KTH-RPL / DeFlow

Hi,

How can I modify the code to have only two point clouds as input and get output result as a scene flow of those two point clouds? I don't need any ground masking or pose because I will assume that camera stays at the same location and there will be no background for the input point clouds.

I'm asking because even though I tried my best to change the code, there are too many arguments and parts I have to modify.

Thank you.

I believe this one is related to: #3 (comment)

Option A: Save your data as h5df file: Please check the code attached there, and save the data like the same way, then run the same in the instructions.

Option B: read directly, here is a branch I did for DynamicMap benchmark where dataloader directly read each PCD file etc.

DeFlow/scripts/network/dataloader.py

Lines 70 to 123 in f8652b3

    
           class DynamicMapData(Dataset): 
        
               def __init__(self, directory): 
        
                   super(DynamicMapData, self).__init__() 
        
                   self.scene_id = directory.split("/")[-1] 
        
                   self.directory = Path(directory) / "pcd" 
        
                   self.pcd_files = [os.path.join(self.directory, f) for f in sorted(os.listdir(self.directory)) if f.endswith('.pcd')] 
        
                   # FIXME: ground segmentation config: av2, kitti, semindoor; hard code here 
        
                   if self.scene_id == "av2": 
        
                       ground_config = f"{BASE_DIR}/conf/groundseg/av2.toml" 
        
                   elif self.scene_id == "semindoor": 
        
                       ground_config = f"{BASE_DIR}/conf/groundseg/semindoor.toml" 
        
                   else: 
        
                       ground_config = f"{BASE_DIR}/conf/groundseg/kitti.toml" 
        
                   self.groundseg = ground_seg(ground_config) 
        
               def __len__(self): 
        
                   return len(self.pcd_files) 
        
               def __getitem__(self, index_): 
        
                   res_dict = { 
        
                       'scene_id': self.scene_id, 
        
                       'timestamp': self.pcd_files[index_].split("/")[-1].split(".")[0], 
        
                   } 
        
                   pcd_ = pcdpy3.PointCloud.from_path(self.pcd_files[index_]) 
        
                   pc0 = pcd_.np_data[:,:3] 
        
                   pose0 = xyzqwxyz_to_matrix(list(pcd_.viewpoint)) 
        
                   if index_ + 1 == len(self.pcd_files): 
        
                       index_ = index_ - 2 
        
                   pcd_ = pcdpy3.PointCloud.from_path(self.pcd_files[index_+1]) 
        
                   pc1 = pcd_.np_data[:,:3] 
        
                   pose1 = xyzqwxyz_to_matrix(list(pcd_.viewpoint)) 
        
                   inv_pose0 = inv_pose_matrix(pose0) 
        
                   ego_pc0 = pc0 @ inv_pose0[:3, :3].T + inv_pose0[:3, 3] 
        
                   # pcdpy3.save_pcd(f"{BASE_DIR}/ego_pc0.pcd", ego_pc0) 
        
                   # sys.exit(0) 
        
                   gm0 = np.array(self.groundseg.run(ego_pc0[:,:3].tolist())) 
        
                   inv_pose1 = inv_pose_matrix(pose1) 
        
                   ego_pc1 = pc1 @ inv_pose1[:3, :3].T + inv_pose1[:3, 3] 
        
                   gm1 = np.array(self.groundseg.run(ego_pc1[:,:3].tolist())) 
        
                   res_dict['pc0'] = torch.tensor(ego_pc0.astype(np.float32)) 
        
                   res_dict['gm0'] = torch.tensor(gm0.astype(np.bool_)) 
        
                   res_dict['pose0'] = torch.tensor(pose0) 
        
                   res_dict['pc1'] = torch.tensor(ego_pc1.astype(np.float32)) 
        
                   res_dict['gm1'] = torch.tensor(gm1.astype(np.bool_)) 
        
                   res_dict['pose1'] = torch.tensor(pose1) 
        
                   res_dict['world_pc0'] = torch.tensor(pc0.astype(np.float32)) 
        
                   return res_dict

Hi,

Thanks for the very quick reply. Sorry for the basic question, I'm new to this field. According to your reply, I'm still confused how can I deal with ground masking. It seems like you used some configuration files (xxx.toml) to make "groundseg". What if I don't have any configuration files, but fortunately having only point clouds without backgrounds? I tried to erase relevant codes for "gm" and "pose", but I failed. Is there no way to deal with res_dict having only two keys (i.e., pc0, pc1)?

Thank you.

I see, if you don’t need gm or pose set gm to np.zeros and pose to np.eye(4). Follow this way, you don’t need delete codes and initial by yourself

…

On Tue, Jun 4, 2024 at 05:26 Hojeong Lee ***@***.***> wrote: Hi, Thanks for the very quick reply. Sorry for the basic question, I'm new to this field. According to your reply, I'm still confused how can I deal with ground masking. It seems like you used some configuration files (xxx.toml) to make "groundseg". What if I don't have any configuration files, but fortunately having only point clouds without backgrounds? I tried to erase relevant codes for "gm" and "pose", but I failed. Is there no way to deal with res_dict having only two keys (i.e., pc0, pc1)? Thank you. — Reply to this email directly, view it on GitHub <#4 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AIN2HBF77RDWDDJWCJF6WLLZFUXWBAVCNFSM6AAAAABIXTNPF2VHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDCNBWGUYDGMZSHA> . You are receiving this because you commented.Message ID: ***@***.***>

I set res_dict like below as you mentioned:
res_dict = {
'scene_id': 'scene_id',
'timestamp': 'key',
'pc0': torch.tensor(self.pc0),
'pc1': torch.tensor(self.pc1),
'pose0': torch.tensor(np.eye(4)),
'pose1': torch.tensor(np.eye(4)),
'gm0': torch.tensor(np.zeros(np.shape(self.pc0))),
'gm1': torch.tensor(np.zeros(np.shape(self.pc1))),
}

But the error says like below:
File "/home/kin/workspace/DeFlow/scripts/pl_model.py", line 230, in test_step
batch['pc0'] = batch['pc0'][~batch['gm0']].unsqueeze(0)
TypeError: ~ (operator.invert) is only implemented on integer and Boolean-type tensors

Did I miss something?

'gm0': torch.tensor(np.zeros(np.shape(self.pc0)).astype(np.bool_)),
'gm1': torch.tensor(np.zeros(np.shape(self.pc1)).astype(np.bool_)),

It's weird.. It leads to another error:
File "/opt/mambaforge/envs/deflow/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/kin/workspace/DeFlow/scripts/network/models/basic/make_voxels.py", line 65, in forward
not_nan_mask = ~torch.isnan(batch_points).any(dim=1)
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

better to debug by yourself with your data, like set a breakpoint and check the shape of data.

Okay, I'll try it. But, do you assume that there could be nan values in point cloud data?

no, you should have valid points in data (like at least 1 point in data is valid)... otherwise it doesn't make sense.

Let me know if you find out why that happens.

I found why that happend, but still can't resolve it. The reason is that when the model voxelize the input point clouds, it returns all zero values.

in deflow.py file:

    self.timer[1].start("Voxelization")
    pc0_before_pseudoimages, pc0_voxel_infos_lst = self.embedder(pc0s)
    pc1_before_pseudoimages, pc1_voxel_infos_lst = self.embedder(pc1s)
    self.timer[1].stop()

Can you help me to figure it out? My inputs are like below:
{'pc0': tensor([[[211., 63., 61.],
[210., 63., 63.],
[211., 62., 63.],
...,
[325., 904., 136.],
[321., 904., 144.],
[320., 906., 144.]]], device='cuda:0'), 'pc1': tensor([[[191., 63., 336.],
[191., 63., 337.],
[190., 62., 339.],
...,
[111., 673., 512.],
[110., 674., 512.],
[112., 672., 512.]]], device='cuda:0'), 'pose0': tensor([[[1., 0., 0., 0.],
[0., 1., 0., 0.],
[0., 0., 1., 0.],
[0., 0., 0., 1.]]], device='cuda:0', dtype=torch.float64), 'pose1': tensor([[[1., 0., 0., 0.],
[0., 1., 0., 0.],
[0., 0., 1., 0.],
[0., 0., 0., 1.]]], device='cuda:0', dtype=torch.float64), 'gm0': tensor([[[False, False, False],
[False, False, False],
[False, False, False],
...,
[False, False, False],
[False, False, False],
[False, False, False]]], device='cuda:0'), 'gm1': tensor([[[False, False, False],
[False, False, False],
[False, False, False],
...,
[False, False, False],
[False, False, False],
[False, False, False]]], device='cuda:0')}
In other words, how can I deal with if I have different range of "point_cloud_range"? (I think this is the main difference)

There are several potential issues I notice here,

gm shape should be (N,), so maybe replace the initial to np.zeros(pc.shape[0]) etc.
the pose is the sensor center pose, which means (0,0,0) coord is sensor pose.
for range limit, here is the config:

DeFlow/conf/config.yaml

Lines 37 to 38 in 6f0321a

voxel_size: [0.2, 0.2, 6]

point_cloud_range: [-51.2, -51.2, -3, 51.2, 51.2, 3]

if you change point cloud range and voxel size here, not 512x512x1 you need retrain for your new setting.

	class DynamicMapData(Dataset):
	def __init__(self, directory):
	super(DynamicMapData, self).__init__()
	self.scene_id = directory.split("/")[-1]
	self.directory = Path(directory) / "pcd"
	self.pcd_files = [os.path.join(self.directory, f) for f in sorted(os.listdir(self.directory)) if f.endswith('.pcd')]

	# FIXME: ground segmentation config: av2, kitti, semindoor; hard code here
	if self.scene_id == "av2":
	ground_config = f"{BASE_DIR}/conf/groundseg/av2.toml"
	elif self.scene_id == "semindoor":
	ground_config = f"{BASE_DIR}/conf/groundseg/semindoor.toml"
	else:
	ground_config = f"{BASE_DIR}/conf/groundseg/kitti.toml"

	self.groundseg = ground_seg(ground_config)

	def __len__(self):
	return len(self.pcd_files)

	def __getitem__(self, index_):
	res_dict = {
	'scene_id': self.scene_id,
	'timestamp': self.pcd_files[index_].split("/")[-1].split(".")[0],
	}
	pcd_ = pcdpy3.PointCloud.from_path(self.pcd_files[index_])
	pc0 = pcd_.np_data[:,:3]
	pose0 = xyzqwxyz_to_matrix(list(pcd_.viewpoint))


	if index_ + 1 == len(self.pcd_files):
	index_ = index_ - 2
	pcd_ = pcdpy3.PointCloud.from_path(self.pcd_files[index_+1])
	pc1 = pcd_.np_data[:,:3]
	pose1 = xyzqwxyz_to_matrix(list(pcd_.viewpoint))

	inv_pose0 = inv_pose_matrix(pose0)
	ego_pc0 = pc0 @ inv_pose0[:3, :3].T + inv_pose0[:3, 3]
	# pcdpy3.save_pcd(f"{BASE_DIR}/ego_pc0.pcd", ego_pc0)
	# sys.exit(0)
	gm0 = np.array(self.groundseg.run(ego_pc0[:,:3].tolist()))

	inv_pose1 = inv_pose_matrix(pose1)
	ego_pc1 = pc1 @ inv_pose1[:3, :3].T + inv_pose1[:3, 3]
	gm1 = np.array(self.groundseg.run(ego_pc1[:,:3].tolist()))

	res_dict['pc0'] = torch.tensor(ego_pc0.astype(np.float32))
	res_dict['gm0'] = torch.tensor(gm0.astype(np.bool_))
	res_dict['pose0'] = torch.tensor(pose0)
	res_dict['pc1'] = torch.tensor(ego_pc1.astype(np.float32))
	res_dict['gm1'] = torch.tensor(gm1.astype(np.bool_))
	res_dict['pose1'] = torch.tensor(pose1)
	res_dict['world_pc0'] = torch.tensor(pc0.astype(np.float32))
	return res_dict

	voxel_size: [0.2, 0.2, 6]
	point_cloud_range: [-51.2, -51.2, -3, 51.2, 51.2, 3]

Get a inference result for two arbitrary input point clouds