PINTO0309 / PINTO_model_zoo

A repository for storing models that have been inter-converted between various frameworks. Supported frameworks are TensorFlow, PyTorch, ONNX, OpenVINO, TFJS, TFTRT, TensorFlowLite (Float32/16/INT8), EdgeTPU, CoreML.

Home Page:https://qiita.com/PINTO

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

[Blazepose] X,Y,Z coordinates from blazepose

ovshake opened this issue · comments

Issue Type

Support

OS

Ubuntu

OS architecture

x86_64

Programming Language

Python

Framework

TensorFlowLite

Model name and Weights/Checkpoints URL

Model ID: 053_BlazePose/03_pose_landmark_full_body/saved_model_tflite_tfjs_coreml_onnx/model_float32.tflite

Description

I am trying to make sense of the 33 x 5 outputs that I am getting from Blazepose. In this issue, I can see it is x,y,z,visibility,presence. Upon plotting the x,y,z, the 3D skeleton doesn't make any sense. But when I just plot the x and y coordinate, it matches with the picture. Does the z coordinate require some additional preprocessing, for it to be correct? I have attached my outputs.
man-standing-in-the-middle-of-a-road_2d
man-standing-in-the-middle-of-a-road_3d
man-standing-in-the-middle-of-a-road

Relevant Log Output

No response

URL or source code for simple inference testing code

from tensorflow.lite.python.interpreter import Interpreter
import cv2
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import glob
import os
from sys import exit

def plot_3d_points_and_lines(points, lines, image_path):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    # Unpack the points into three separate lists (one for x, y, and z)
    xs, ys, zs = zip(*points)

    # Scatter plot for the points
    ax.scatter(xs, ys, zs)

    # Iterate through the lines and plot each one
    for a, b in lines:
        # Get the start and end point for each line
        x_values = [points[a][0], points[b][0]]
        y_values = [points[a][1], points[b][1]]
        z_values = [points[a][2], points[b][2]]

        # Plot the line
        ax.plot(x_values, y_values, z_values, color='r')

    # Set the viewpoint to +x, +y, +z direction
    # ax.view_init(elev=180, azim=270)
    ax.view_init(elev=0, azim=0)
    path = image_path.split('/')[-1].split('.')[0]
    os.makedirs('output_3d', exist_ok=True)
    path = os.path.join('output_3d', path + '_3d.png')

    # Save the figure
    plt.savefig(path)
    plt.close(fig)
    plt.clf()

def process_image(image_path):
    image = cv2.imread(image_path)
    image_height = image.shape[0]
    image_width  = image.shape[1]
    frame = cv2.resize(image, (256, 256))
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = np.expand_dims(frame, axis=0)
    frame = frame.astype(np.float32)
    cv2.normalize(frame, frame, -1, 1, cv2.NORM_MINMAX)
    model.set_tensor(input_details[0]['index'], frame)
    model.invoke()
    pose_output = model.get_tensor(output_details[1]['index'])[0]
    pose_output = pose_output.astype(np.uint)
    print(pose_output.shape)
    POSE_PAIRS = [(0,1) ,(0,4) ,(1,2) ,(2,3) ,(3,7),(4,5) ,(5,6) ,(6, 8), (9, 10),
                    (11 ,12) , (12 ,14) , (14,16) ,(16,22) ,(16,18) ,(16,22), (18,20),
                    (12,24) , (24,26) ,(26,28) ,(28,32), (28,30) ,(30 ,32) ,(24,23) ,
                    (11,13) ,(13,15) ,(15,21) ,(15,17) ,(15,19) ,(19,17) ,(11,23),
                    (23,25) ,(25,27) ,(27,29) ,(27 ,31) ,(29 ,31)
                    ]

    pose_output = np.reshape(pose_output, (-1, 5))
    pose_output_3d = pose_output[:, :3]
    pose_output_2d = pose_output[:, :2]
    pose_output = pose_output.astype(np.int32)
    plot_3d_points_and_lines(pose_output_3d, POSE_PAIRS, image_path)
    plot_2d_points_and_lines(image_path, pose_output_2d, POSE_PAIRS)

def plot_2d_points_and_lines(image_path, points, lines):
    # Load the image
    image = cv2.imread(image_path)
    image = cv2.resize(image, (256, 256))

    # Check if the image is loaded properly
    if image is None:
        print(f"Error loading image at {image_path}")
        return

    # Draw points on the image
    for point in points:
        cv2.circle(image, point, radius=5, color=(0, 255, 0), thickness=-1)

    # Draw lines on the image
    for a, b in lines:
        cv2.line(image, points[a], points[b], color=(0, 0, 255), thickness=2)

    # Save the image
    path = image_path.split('/')[-1].split('.')[0]
    path = os.path.join('output', path + '_2d.png')
    os.makedirs('output', exist_ok=True)
    cv2.imwrite(path, image)



model = Interpreter(model_path="/home/ubuntu/projects/PINTO_model_zoo/053_BlazePose/03_pose_landmark_full_body/saved_model_tflite_tfjs_coreml_onnx/model_float32.tflite")
model.allocate_tensors()
input_details = model.get_input_details()
output_details = model.get_output_details()
image_path = '/home/ubuntu/projects/PINTO_model_zoo/053_BlazePose/03_pose_landmark_full_body/man-standing-in-the-middle-of-a-road.jpg'

process_image(image_path)

I am not a programming teacher. You should read the paper and the MediaPipe implementation.