Proper MeshRasterizerOpenGL usage
FlimFlamm opened this issue · comments
❓ How to properly use MeshRasterizerOpenGL
I'm looking for help/guidance (or a pointer to sample usage code!) regarding the MeshRasterizerOpenGL...
I'm multiview-rendering a large number of meshes (many of them are very large), and I was hoping to speed up the processes with the OpenGL rasterizer, which is said to be faster for large meshes and for multi-render scenarios.
The error I'm currently stuck on is rather confusing: can't find CUDA device with index 0 (the index it does find is -1 apparently)
Traceback (most recent call last):
File "/workspace/training/datamancer/datamancer/junk/junk2.py", line 112, in <module>
images = spherical_multiview_batched(
File "/workspace/training/datamancer/datamancer/utils/rendering_gl.py", line 388, in spherical_multiview_batched
images = renderer(meshes,
File "/workspace/miniconda3/envs/torch5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/envs/torch5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/envs/torch5/lib/python3.10/site-packages/pytorch3d/renderer/mesh/renderer.py", line 63, in forward
fragments = self.rasterizer(meshes_world, **kwargs)
File "/workspace/miniconda3/envs/torch5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/envs/torch5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/envs/torch5/lib/python3.10/site-packages/pytorch3d/renderer/opengl/rasterizer_opengl.py", line 207, in forward
pix_to_face, bary_coords, zbuf = self.opengl_machinery(
File "/workspace/miniconda3/envs/torch5/lib/python3.10/site-packages/pytorch3d/renderer/opengl/rasterizer_opengl.py", line 281, in __call__
self.initialize_device_data(meshes_gl_ndc.device)
File "/workspace/miniconda3/envs/torch5/lib/python3.10/site-packages/pytorch3d/renderer/opengl/rasterizer_opengl.py", line 322, in initialize_device_data
self.egl_context = global_device_context_store.get_egl_context(device)
File "/workspace/miniconda3/envs/torch5/lib/python3.10/site-packages/pytorch3d/renderer/opengl/opengl_utils.py", line 362, in get_egl_context
self._egl_contexts[cuda_device_id] = EGLContext(
File "/workspace/miniconda3/envs/torch5/lib/python3.10/site-packages/pytorch3d/renderer/opengl/opengl_utils.py", line 189, in __init__
self.device = _get_cuda_device(self.cuda_device_id)
File "/workspace/miniconda3/envs/torch5/lib/python3.10/site-packages/pytorch3d/renderer/opengl/opengl_utils.py", line 123, in _get_cuda_device
raise ValueError(
ValueError: Found 1 CUDA devices, but none with CUDA id 0.
The problem seems to be EGL related, which is where I'm hoping for some guidance. According to the docs it doesn't seem like I need to be creating any EGL contexts manually, and that I should be able to just hot-swap the MeshRasterizer for the MeshRasterizerOpenGL. I suspect that my code is either misusing one or more classes, that I have a version compatibility issue somewhere, or that I have run afoul of some subtle issue (like not importing openGL before pytorch3d?)
Here are the relevant versions
- python = 3.10.11
- cuda 12.1
conda:
- torch = 2.3.1+cu121
- torchaudio = 2.3.1+cu121
- torchvision = 0.18.1+cu121
- pycuda = 2024.1
- pyopengl + accelerate = 3.1.7
- pytorch3d = 0.7.6
- numpy = 12.6.4
pip:
- same versions as conda
Here is some sample code to show how i'm using the MeshRasterizerOpenGL class:
import torch
from tqdm import tqdm
from pytorch3d.renderer.opengl.rasterizer_opengl import MeshRasterizerOpenGL
from pytorch3d.renderer import (
FoVPerspectiveCameras,
look_at_view_transform,
RasterizationSettings,
MeshRenderer,
HardPhongShader,
PointLights,
)
from datamancer.utils.mesh_processor import scale_and_center
from datamancer.utils.meshes_processor import make_meshes
def spherical_multiview_batched(
verts,
faces,
image_size: int = 256,
device = "cuda:0"):
render_distance = 2
multiview_count = 12
device = device
camera_batches = []
verts = [scale_and_center(shape_verts.to(device), "bbox") for shape_verts in verts]
meshes = make_meshes(verts, faces, None, device=device, scale_center=False)
meshes.to(device)
batch_size = len(meshes)
dummy_azimuths = torch.tensor([0.0] * multiview_count)
dummy_elevations = torch.tensor([0.0] * multiview_count)
R, T = look_at_view_transform(dist = render_distance,
elev=dummy_elevations,
azim=dummy_azimuths,
device=device,
degrees=True)
# compose batch sized lists for each multiview angle
for i in range(multiview_count):
camera_batches.append(FoVPerspectiveCameras(
znear = 0.5,
zfar = 3.5,
fov = 90,
device=device,
R= R[i].repeat(batch_size,1,1),
T= T[i].repeat(batch_size, 1)
)
)
# Rasterization settings for rendering
raster_settings = RasterizationSettings(
image_size=image_size,
blur_radius=0.0,
bin_size=0,
faces_per_pixel=1,
max_faces_per_bin = int(max(10000, meshes._F / 5))
)
outputs = torch.zeros((batch_size, multiview_count, image_size, image_size, 3))
for i in tqdm(range(multiview_count), desc="batched render"):
# Using a standard lighting and shading model
shader = HardPhongShader(
device=device,
cameras=camera_batches[i],
lights=PointLights(
device = device,
location = camera_batches[i].get_camera_center()
)
).to(device)
rasterizer = MeshRasterizerOpenGL(
cameras=camera_batches[i],
raster_settings=raster_settings)
rasterizer.to(device)
# Create the renderer with the chosen shader
renderer = MeshRenderer(
rasterizer=rasterizer,
shader=shader,
).to(device)
# Render the scene
images = renderer(meshes, cameras = camera_batches[i])
# Store the output images
for j in range(len(images)):
outputs[j][i] = images[j]
return outputs
Any pointers at all would be much appreciated; if info relevant to the issue is missing, please don't hesitate to ask and I'll provide it ASAP.
I don't think this is an installation / import order problem. Maybe you can step through the function _get_cuda_device and see what values are around. How does what you know about what GPUs you have match with EGL's data?
I don't think this is an installation / import order problem. Maybe you can step through the function _get_cuda_device and see what values are around. How does what you know about what GPUs you have match with EGL's data?
Should have one 4090 available in the system (it's a remote server so possibly lacking a display is related?)
Added comments to describe the results:
def _get_cuda_device(requested_device_id: int):
"""
Find an EGL device with a given CUDA device ID.
Args:
requested_device_id: The desired CUDA device ID, e.g. "1" for "cuda:1".
Returns:
EGL device with the desired CUDA ID.
"""
# `requested_device == 1` <--------------------
num_devices = egl.EGLint()
# num_devices.value == 0 <--------------------
if (
# pyre-ignore Undefined attribute [16]
not egl.eglQueryDevicesEXT(0, None, ctypes.pointer(num_devices))
or num_devices.value < 1
):
raise RuntimeError("EGL requires a system that supports at least one device.")
# num_devices.value == 1 (not sure why it changes before and after this if statement)
devices = (egl.EGLDeviceEXT * num_devices.value)() # array of size num_devices
# len(devices) == 1 <--------------------
if (
# pyre-ignore Undefined attribute [16]
not egl.eglQueryDevicesEXT(
num_devices.value, devices, ctypes.pointer(num_devices)
)
or num_devices.value < 1
):
raise RuntimeError("EGL sees no available devices.")
if len(devices) < requested_device_id + 1:
raise ValueError(
f"Device {requested_device_id} not available. Found only {len(devices)} devices."
)
# num_devices.value == 1 <--------------------
# Iterate over all the EGL devices, and check if their CUDA ID matches the request.
for device in devices:
available_device_id = egl.EGLAttrib(ctypes.c_int(-1))
# available_device_id.contents.value == -1 <--------------------
# pyre-ignore Undefined attribute [16]
egl.eglQueryDeviceAttribEXT(device, EGL_CUDA_DEVICE_NV, available_device_id)
if available_device_id.contents.value == requested_device_id:
return device
raise ValueError(
f"Found {len(devices)} CUDA devices, but none with CUDA id {requested_device_id}."
)
It's finding a device, somehow, but its index is -1...
Going to try this on my local station soon as I get a chance to try and eliminate/narrow display headlessness as related.
EDIT: Finding some sources claiming EGL requires a display. fingers crossed they're wrong XD