OA-CNNs training S3DIS and KITTI, as well as nuScenes dataset
jing-zhao9 opened this issue · comments
Hi, getting configs for each other dataset is pretty easy. Just copy our config for SpUNet for each dataset and them replace the model parameters with our provided.
Hi, getting configs for each other dataset is pretty easy. Just copy our config for SpUNet for each dataset and them replace the model parameters with our provided.
Hello, may I ask how to choose the point_grid_size in OACNN for the nuScenes dataset? What parameter did you use to achieve your results?
Hi, 0.05 for outdoor. Use config for SpUNet as a base, I remember I also add the Lovasz loss.
Hi, 0.05 for outdoor. Use config for SpUNet as a base, I remember I also add the Lovasz loss.
Hi, I am referring to this parameter. point_grid_size=[[8, 12, 16, 16], [6, 9, 12, 12], [4, 6, 8, 8], [3, 4, 6, 6]].
I remember that it should be the same. (I didn’t have the memory that changed the parameter for the outdoor case)
Hi, getting configs for each other dataset is pretty easy. Just copy our config for SpUNet for each dataset and them replace the model parameters with our provided.
This is the config file I set according to what you know above, but it seems that I cannot replicate the miou of S3DIS in the OACNNS paper. I trained OACNNS to replicate the miou in the OACNNS paper based on the config file you provided for scannet. May I ask if my S3DIS configuration file is incorrect. The following is the configuration file for S3DIS
base = ["../base/default_runtime.py"]
misc custom setting
batch_size = 10 # bs: total bs in all gpus
mix_prob = 0.8
empty_cache = False
enable_amp = True
sync_bn = True
sync_bn = False
model settings
model = dict(
type="DefaultSegmentor",
backbone=dict(
type="OACNNs",
in_channels=6,
num_classes=13,
embed_channels=64,
enc_channels=[64, 64, 128, 256],
groups=[4, 4, 8, 16],
enc_depth=[3, 3, 9, 8],
dec_channels=[256, 256, 256, 256],
point_grid_size=[[8, 12, 16, 16], [6, 9, 12, 12], [4, 6, 8, 8], [3, 4, 6, 6]],
dec_depth=[2, 2, 2, 2],
enc_num_ref=[16, 16, 16, 16],
),
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
)
scheduler settings
epoch = 900
optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.02)
scheduler = dict(
type="OneCycleLR",
max_lr=optimizer["lr"],
pct_start=0.05,
anneal_strategy="cos",
div_factor=10.0,
final_div_factor=1000.0,
)
dataset settings
dataset_type = "S3DISDataset"
data_root = "/home/cvmaster/zj/202404/Pointcept/data/s3dis"
data = dict(
num_classes=13,
ignore_index=-1,
names=[
"ceiling",
"floor",
"wall",
"beam",
"column",
"window",
"door",
"table",
"chair",
"sofa",
"bookcase",
"board",
"clutter",
],
train=dict(
type=dataset_type,
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
data_root=data_root,
transform=[
dict(type="CenterShift", apply_z=True),
dict(
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
),
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
dict(type="RandomScale", scale=[0.9, 1.1]),
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
dict(type="RandomFlip", p=0.5),
dict(type="RandomJitter", sigma=0.005, clip=0.02),
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
dict(type="ChromaticJitter", p=0.95, std=0.05),
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
dict(
type="GridSample",
grid_size=0.05,
hash_type="fnv",
mode="train",
keys=("coord", "color", "segment"),
return_grid_coord=True,
),
dict(type="SphereCrop", point_max=100000, mode="random"),
dict(type="CenterShift", apply_z=False),
dict(type="NormalizeColor"),
dict(type="ShufflePoint"),
dict(type="ToTensor"),
dict(
type="Collect",
keys=("coord", "grid_coord", "segment"),
feat_keys=["coord", "color"],
),
],
test_mode=False,
),
val=dict(
type=dataset_type,
split="Area_5",
data_root=data_root,
transform=[
dict(type="CenterShift", apply_z=True),
dict(
type="GridSample",
grid_size=0.05,
hash_type="fnv",
mode="train",
keys=("coord", "color", "segment"),
return_grid_coord=True,
),
dict(type="CenterShift", apply_z=False),
dict(type="NormalizeColor"),
dict(type="ToTensor"),
dict(
type="Collect",
keys=("coord", "grid_coord", "segment"),
feat_keys=["coord", "color"],
),
],
test_mode=False,
),
test=dict(
type=dataset_type,
split="Area_5",
data_root=data_root,
transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
test_mode=True,
test_cfg=dict(
voxelize=dict(
type="GridSample",
grid_size=0.05,
hash_type="fnv",
mode="test",
keys=("coord", "color"),
return_grid_coord=True,
),
crop=None,
post_transform=[
dict(type="CenterShift", apply_z=False),
dict(type="ToTensor"),
dict(
type="Collect",
keys=("coord", "grid_coord", "index"),
feat_keys=("coord", "color"),
),
],
aug_transform=[
[dict(type="RandomScale", scale=[0.9, 0.9])],
[dict(type="RandomScale", scale=[0.95, 0.95])],
[dict(type="RandomScale", scale=[1, 1])],
[dict(type="RandomScale", scale=[1.05, 1.05])],
[dict(type="RandomScale", scale=[1.1, 1.1])],
[
dict(type="RandomScale", scale=[0.9, 0.9]),
dict(type="RandomFlip", p=1),
],
[
dict(type="RandomScale", scale=[0.95, 0.95]),
dict(type="RandomFlip", p=1),
],
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
[
dict(type="RandomScale", scale=[1.05, 1.05]),
dict(type="RandomFlip", p=1),
],
[
dict(type="RandomScale", scale=[1.1, 1.1]),
dict(type="RandomFlip", p=1),
],
],
),
),
)