Pointcept / Pointcept

Pointcept: a codebase for point cloud perception research. Latest works: PTv3 (CVPR'24 Oral), PPT (CVPR'24), OA-CNNs (CVPR'24), MSC (CVPR'23)

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

CUDA out of memory on A100 GPUs

paladin1410 opened this issue · comments

Hi, thank you very much for sharing the code. I am trying the train a PTv2 model on my custom dataset. I used 8 A100 GPUs to train and I got this out of memory problem. I try to reduce the num_worker = 8 and batch_size = 8 but the problem still persists. Please advice me on this problem. Here is the log file: [2024-06-09 01:43:25,317 INFO train.py line 131 2003007] Config:
weight = None
resume = False
evaluate = True
test_only = False
seed = 7999152
save_path = 'exp/desay_sv/desayseg-pt-v2m2-0-base'
num_worker = 8
batch_size = 8
batch_size_val = None
batch_size_test = None
epoch = 1
eval_epoch = 1
sync_bn = False
enable_amp = True
empty_cache = False
empty_cache_per_epoch = False
find_unused_parameters = False
mix_prob = 0.8
param_dicts = None
hooks = [
dict(type='CheckpointLoader'),
dict(type='IterationTimer', warmup_iter=2),
dict(type='InformationWriter'),
dict(type='SemSegEvaluator'),
dict(type='CheckpointSaver', save_freq=None),
dict(type='PreciseEvaluator', test_last=False)
]
train = dict(type='DefaultTrainer')
test = dict(type='SemSegTester', verbose=True)
num_classes = 14
point_cloud_range = (-51.2, -51.2, -3.0, 51.2, 51.2, 5.0)
ignore_index = -1
model = dict(
type='DefaultSegmentor',
backbone=dict(
type='PT-v2m2',
in_channels=4,
num_classes=14,
patch_embed_depth=1,
patch_embed_channels=48,
patch_embed_groups=6,
patch_embed_neighbours=8,
enc_depths=(2, 2, 6, 2),
enc_channels=(96, 192, 384, 512),
enc_groups=(12, 24, 48, 64),
enc_neighbours=(16, 16, 16, 16),
dec_depths=(1, 1, 1, 1),
dec_channels=(48, 96, 192, 384),
dec_groups=(6, 12, 24, 48),
dec_neighbours=(16, 16, 16, 16),
grid_sizes=(0.15, 0.375, 0.9375, 2.34375),
attn_qkv_bias=True,
pe_multiplier=False,
pe_bias=True,
attn_drop_rate=0.0,
drop_path_rate=0.3,
enable_checkpoint=False,
unpool_backend='map'),
criteria=[
dict(
type='CrossEntropyLoss',
weight=[
1, 3.1557, 3.1557, 6.1354, 6.3161, 7.9937, 8.7029, 2.6261,
2.6261, 2.5492, 1.9385, 2.5492, 2.0198, 5.8585
],
loss_weight=1.0,
ignore_index=-1),
dict(
type='LovaszLoss',
mode='multiclass',
loss_weight=1.0,
ignore_index=-1)
])
optimizer = dict(type='AdamW', lr=0.002, weight_decay=0.005)
scheduler = dict(
type='OneCycleLR',
max_lr=0.002,
pct_start=0.04,
anneal_strategy='cos',
div_factor=10.0,
final_div_factor=100.0)
dataset_type = 'DesaySVDataset'
data_root = '/home/AppData/data_path.1/trunk/'
names = [
'Unlabeled', 'Car', 'Bus', 'Truck', 'Other Vehicles', 'Pedestrian',
'Bicycle', 'Cone', 'Barrier', 'Freespace', 'Sidewalk', 'Other Flat',
'Building', 'Other Static'
]
data = dict(
num_classes=14,
ignore_index=-1,
names=[
'Unlabeled', 'Car', 'Bus', 'Truck', 'Other Vehicles', 'Pedestrian',
'Bicycle', 'Cone', 'Barrier', 'Freespace', 'Sidewalk', 'Other Flat',
'Building', 'Other Static'
],
train=dict(
type='DesaySVDataset',
split='train',
data_root='/home/AppData/data_path.1/trunk/',
transform=[
dict(
type='RandomRotate',
angle=[-1, 1],
axis='z',
center=[0, 0, 0],
p=0.5),
dict(type='RandomScale', scale=[0.9, 1.1]),
dict(type='RandomFlip', p=0.5),
dict(type='RandomJitter', sigma=0.005, clip=0.02),
dict(
type='GridSample',
grid_size=0.05,
hash_type='fnv',
mode='train',
keys=('coord', 'strength', 'segment'),
return_grid_coord=True),
dict(
type='PointClip',
point_cloud_range=(-51.2, -51.2, -3.0, 51.2, 51.2, 5.0)),
dict(type='SphereCrop', sample_rate=0.8, mode='random'),
dict(type='SphereCrop', point_max=120000, mode='random'),
dict(type='ToTensor'),
dict(
type='Collect',
keys=('coord', 'grid_coord', 'segment'),
feat_keys=('coord', 'strength'))
],
test_mode=False,
ignore_index=-1,
loop=1),
val=dict(
type='DesaySVDataset',
split='val',
data_root='/home/AppData/data_path.1/trunk/',
transform=[
dict(
type='GridSample',
grid_size=0.05,
hash_type='fnv',
mode='train',
keys=('coord', 'strength', 'segment'),
return_grid_coord=True),
dict(
type='PointClip',
point_cloud_range=(-51.2, -51.2, -3.0, 51.2, 51.2, 5.0)),
dict(type='ToTensor'),
dict(
type='Collect',
keys=('coord', 'grid_coord', 'segment'),
feat_keys=('coord', 'strength'))
],
test_mode=False,
ignore_index=-1),
test=dict(
type='DesaySVDataset',
split='val',
data_root='/home/AppData/data_path.1/trunk/',
transform=[],
test_mode=True,
test_cfg=dict(
voxelize=dict(
type='GridSample',
grid_size=0.05,
hash_type='fnv',
mode='test',
return_grid_coord=True,
keys=('coord', 'strength')),
crop=None,
post_transform=[
dict(
type='PointClip',
point_cloud_range=(-51.2, -51.2, -3.0, 51.2, 51.2, 5.0)),
dict(type='ToTensor'),
dict(
type='Collect',
keys=('coord', 'grid_coord', 'index'),
feat_keys=('coord', 'strength'))
],
aug_transform=[[{
'type': 'RandomRotateTargetAngle',
'angle': [0],
'axis': 'z',
'center': [0, 0, 0],
'p': 1
}],
[{
'type': 'RandomRotateTargetAngle',
'angle': [0.5],
'axis': 'z',
'center': [0, 0, 0],
'p': 1
}],
[{
'type': 'RandomRotateTargetAngle',
'angle': [1],
'axis': 'z',
'center': [0, 0, 0],
'p': 1
}],
[{
'type': 'RandomRotateTargetAngle',
'angle': [1.5],
'axis': 'z',
'center': [0, 0, 0],
'p': 1
}]]),
ignore_index=-1))
num_worker_per_gpu = 1
batch_size_per_gpu = 1
batch_size_val_per_gpu = 1
batch_size_test_per_gpu = 1

[2024-06-09 01:43:25,317 INFO train.py line 132 2003007] => Building model ...
[2024-06-09 01:43:25,414 INFO train.py line 216 2003007] Num params: 11323414
[2024-06-09 01:43:27,780 INFO train.py line 134 2003007] => Building writer ...
[2024-06-09 01:43:27,782 INFO train.py line 226 2003007] Tensorboard writer logging dir: exp/desay_sv/desayseg-pt-v2m2-0-base
[2024-06-09 01:43:27,782 INFO train.py line 136 2003007] => Building train dataset & dataloader ...
[2024-06-09 01:43:27,812 INFO defaults.py line 68 2003007] Totally 11421 x 1 samples in train set.
[2024-06-09 01:43:27,812 INFO train.py line 138 2003007] => Building val dataset & dataloader ...
[2024-06-09 01:43:27,816 INFO defaults.py line 68 2003007] Totally 2008 x 1 samples in val set.
[2024-06-09 01:43:27,817 INFO train.py line 140 2003007] => Building optimize, scheduler, scaler(amp) ...
[2024-06-09 01:43:27,819 INFO train.py line 144 2003007] => Building hooks ...
[2024-06-09 01:43:27,819 INFO misc.py line 214 2003007] => Loading checkpoint & weight ...
[2024-06-09 01:43:27,819 INFO misc.py line 250 2003007] No weight found at: None
[2024-06-09 01:43:27,819 INFO train.py line 151 2003007] >>>>>>>>>>>>>>>> Start Training >>>>>>>>>>>>>>>>
[2024-06-09 01:43:41,585 INFO misc.py line 118 2003007] Train: [1/1][1/1428] Data 0.372 (0.372) Batch 5.711 (5.711) Remain 02:15:49 loss: 3.5972 Lr: 0.00020
[2024-06-09 01:43:43,831 INFO misc.py line 118 2003007] Train: [1/1][2/1428] Data 0.003 (0.003) Batch 2.246 (2.246) Remain 00:53:22 loss: 3.4677 Lr: 0.00020
[2024-06-09 01:43:46,122 INFO misc.py line 118 2003007] Train: [1/1][3/1428] Data 0.002 (0.002) Batch 2.291 (2.291) Remain 00:54:24 loss: 3.5670 Lr: 0.00020
[2024-06-09 01:43:48,290 INFO misc.py line 118 2003007] Train: [1/1][4/1428] Data 0.002 (0.002) Batch 2.168 (2.168) Remain 00:51:26 loss: 3.3916 Lr: 0.00021
[2024-06-09 01:43:50,582 INFO misc.py line 118 2003007] Train: [1/1][5/1428] Data 0.002 (0.002) Batch 2.291 (2.229) Remain 00:52:52 loss: 3.4670 Lr: 0.00021
[2024-06-09 01:43:52,851 INFO misc.py line 118 2003007] Train: [1/1][6/1428] Data 0.004 (0.003) Batch 2.270 (2.243) Remain 00:53:09 loss: 3.4939 Lr: 0.00022
[2024-06-09 01:43:54,964 INFO misc.py line 118 2003007] Train: [1/1][7/1428] Data 0.003 (0.003) Batch 2.112 (2.210) Remain 00:52:20 loss: 3.4109 Lr: 0.00024
[2024-06-09 01:43:57,164 INFO misc.py line 118 2003007] Train: [1/1][8/1428] Data 0.005 (0.003) Batch 2.201 (2.208) Remain 00:52:15 loss: 3.4161 Lr: 0.00025
[2024-06-09 01:43:59,323 INFO misc.py line 118 2003007] Train: [1/1][9/1428] Data 0.003 (0.003) Batch 2.159 (2.200) Remain 00:52:02 loss: 3.4496 Lr: 0.00027
[2024-06-09 01:44:01,505 INFO misc.py line 118 2003007] Train: [1/1][10/1428] Data 0.003 (0.003) Batch 2.182 (2.198) Remain 00:51:56 loss: 3.3275 Lr: 0.00029
[2024-06-09 01:44:03,638 INFO misc.py line 118 2003007] Train: [1/1][11/1428] Data 0.002 (0.003) Batch 2.133 (2.189) Remain 00:51:42 loss: 3.3257 Lr: 0.00031
[2024-06-09 01:44:05,822 INFO misc.py line 118 2003007] Train: [1/1][12/1428] Data 0.002 (0.003) Batch 2.183 (2.189) Remain 00:51:39 loss: 3.2563 Lr: 0.00034
[2024-06-09 01:44:08,050 INFO misc.py line 118 2003007] Train: [1/1][13/1428] Data 0.004 (0.003) Batch 2.229 (2.193) Remain 00:51:42 loss: 3.2539 Lr: 0.00034
[2024-06-09 01:44:10,037 INFO misc.py line 118 2003007] Train: [1/1][14/1428] Data 0.003 (0.003) Batch 1.988 (2.174) Remain 00:51:14 loss: 3.3341 Lr: 0.00034
[2024-06-09 01:44:12,297 INFO misc.py line 118 2003007] Train: [1/1][15/1428] Data 0.003 (0.003) Batch 2.259 (2.181) Remain 00:51:22 loss: 3.2377 Lr: 0.00037
[2024-06-09 01:44:14,440 INFO misc.py line 118 2003007] Train: [1/1][16/1428] Data 0.002 (0.003) Batch 2.143 (2.178) Remain 00:51:15 loss: 3.2450 Lr: 0.00040
[2024-06-09 01:44:16,484 INFO misc.py line 118 2003007] Train: [1/1][17/1428] Data 0.005 (0.003) Batch 2.044 (2.169) Remain 00:51:00 loss: 3.2686 Lr: 0.00040
[2024-06-09 01:44:19,035 INFO misc.py line 118 2003007] Train: [1/1][18/1428] Data 0.004 (0.003) Batch 2.552 (2.194) Remain 00:51:33 loss: 3.1930 Lr: 0.00043
[2024-06-09 01:44:21,275 INFO misc.py line 118 2003007] Train: [1/1][19/1428] Data 0.003 (0.003) Batch 2.240 (2.197) Remain 00:51:35 loss: 3.1656 Lr: 0.00046
[2024-06-09 01:44:23,400 INFO misc.py line 118 2003007] Train: [1/1][20/1428] Data 0.003 (0.003) Batch 2.124 (2.193) Remain 00:51:27 loss: 3.1162 Lr: 0.00050
[2024-06-09 01:44:25,676 INFO misc.py line 118 2003007] Train: [1/1][21/1428] Data 0.003 (0.003) Batch 2.277 (2.197) Remain 00:51:31 loss: 3.1458 Lr: 0.00054
[2024-06-09 01:44:28,039 INFO misc.py line 118 2003007] Train: [1/1][22/1428] Data 0.002 (0.003) Batch 2.363 (2.206) Remain 00:51:41 loss: 3.1353 Lr: 0.00058
[2024-06-09 01:44:30,255 INFO misc.py line 118 2003007] Train: [1/1][23/1428] Data 0.002 (0.003) Batch 2.215 (2.207) Remain 00:51:40 loss: 3.0548 Lr: 0.00062
[2024-06-09 01:44:32,673 INFO misc.py line 118 2003007] Train: [1/1][24/1428] Data 0.004 (0.003) Batch 2.418 (2.217) Remain 00:51:52 loss: 3.0037 Lr: 0.00066
[2024-06-09 01:44:34,958 INFO misc.py line 118 2003007] Train: [1/1][25/1428] Data 0.004 (0.003) Batch 2.286 (2.220) Remain 00:51:54 loss: 2.9732 Lr: 0.00071
[2024-06-09 01:44:37,261 INFO misc.py line 118 2003007] Train: [1/1][26/1428] Data 0.003 (0.003) Batch 2.303 (2.223) Remain 00:51:57 loss: 2.9459 Lr: 0.00075
[2024-06-09 01:44:39,392 INFO misc.py line 118 2003007] Train: [1/1][27/1428] Data 0.003 (0.003) Batch 2.130 (2.220) Remain 00:51:49 loss: 2.9588 Lr: 0.00080
[2024-06-09 01:44:41,931 INFO misc.py line 118 2003007] Train: [1/1][28/1428] Data 0.004 (0.003) Batch 2.540 (2.232) Remain 00:52:05 loss: 2.8248 Lr: 0.00085
[2024-06-09 01:44:44,373 INFO misc.py line 118 2003007] Train: [1/1][29/1428] Data 0.003 (0.003) Batch 2.443 (2.240) Remain 00:52:14 loss: 2.8620 Lr: 0.00090
[2024-06-09 01:44:46,485 INFO misc.py line 118 2003007] Train: [1/1][30/1428] Data 0.003 (0.003) Batch 2.111 (2.236) Remain 00:52:05 loss: 2.8513 Lr: 0.00095
[2024-06-09 01:44:48,700 INFO misc.py line 118 2003007] Train: [1/1][31/1428] Data 0.004 (0.003) Batch 2.214 (2.235) Remain 00:52:02 loss: 3.0392 Lr: 0.00100
[2024-06-09 01:44:50,773 INFO misc.py line 118 2003007] Train: [1/1][32/1428] Data 0.004 (0.003) Batch 2.073 (2.229) Remain 00:51:52 loss: 2.9966 Lr: 0.00105
[2024-06-09 01:44:52,970 INFO misc.py line 118 2003007] Train: [1/1][33/1428] Data 0.003 (0.003) Batch 2.198 (2.228) Remain 00:51:48 loss: 2.6812 Lr: 0.00110
[2024-06-09 01:44:55,162 INFO misc.py line 118 2003007] Train: [1/1][34/1428] Data 0.003 (0.003) Batch 2.192 (2.227) Remain 00:51:44 loss: 2.6751 Lr: 0.00115
[2024-06-09 01:44:57,388 INFO misc.py line 118 2003007] Train: [1/1][35/1428] Data 0.002 (0.003) Batch 2.226 (2.227) Remain 00:51:42 loss: 2.5620 Lr: 0.00120
[2024-06-09 01:44:59,639 INFO misc.py line 118 2003007] Train: [1/1][36/1428] Data 0.003 (0.003) Batch 2.251 (2.228) Remain 00:51:41 loss: 2.7427 Lr: 0.00125
[2024-06-09 01:45:02,353 INFO misc.py line 118 2003007] Train: [1/1][37/1428] Data 0.003 (0.003) Batch 2.715 (2.242) Remain 00:51:58 loss: 2.4847 Lr: 0.00130
[2024-06-09 01:45:04,639 INFO misc.py line 118 2003007] Train: [1/1][38/1428] Data 0.002 (0.003) Batch 2.286 (2.243) Remain 00:51:58 loss: 2.1936 Lr: 0.00135
[2024-06-09 01:45:06,802 INFO misc.py line 118 2003007] Train: [1/1][39/1428] Data 0.002 (0.003) Batch 2.162 (2.241) Remain 00:51:52 loss: 2.7939 Lr: 0.00139
[2024-06-09 01:45:09,118 INFO misc.py line 118 2003007] Train: [1/1][40/1428] Data 0.003 (0.003) Batch 2.316 (2.243) Remain 00:51:53 loss: 2.4577 Lr: 0.00144
[2024-06-09 01:45:11,214 INFO misc.py line 118 2003007] Train: [1/1][41/1428] Data 0.002 (0.003) Batch 2.096 (2.239) Remain 00:51:45 loss: 2.3230 Lr: 0.00149
[2024-06-09 01:45:13,457 INFO misc.py line 118 2003007] Train: [1/1][42/1428] Data 0.003 (0.003) Batch 2.244 (2.239) Remain 00:51:43 loss: 1.8924 Lr: 0.00153
[2024-06-09 01:45:15,785 INFO misc.py line 118 2003007] Train: [1/1][43/1428] Data 0.003 (0.003) Batch 2.328 (2.242) Remain 00:51:44 loss: 2.8540 Lr: 0.00158
[2024-06-09 01:45:17,916 INFO misc.py line 118 2003007] Train: [1/1][44/1428] Data 0.003 (0.003) Batch 2.132 (2.239) Remain 00:51:38 loss: 2.3032 Lr: 0.00162
[2024-06-09 01:45:20,371 INFO misc.py line 118 2003007] Train: [1/1][45/1428] Data 0.002 (0.003) Batch 2.455 (2.244) Remain 00:51:43 loss: 2.0343 Lr: 0.00166
[2024-06-09 01:45:22,651 INFO misc.py line 118 2003007] Train: [1/1][46/1428] Data 0.002 (0.003) Batch 2.280 (2.245) Remain 00:51:42 loss: 2.1147 Lr: 0.00170
[2024-06-09 01:45:24,772 INFO misc.py line 118 2003007] Train: [1/1][47/1428] Data 0.002 (0.003) Batch 2.121 (2.242) Remain 00:51:36 loss: 2.2694 Lr: 0.00173
[2024-06-09 01:45:26,847 INFO misc.py line 118 2003007] Train: [1/1][48/1428] Data 0.002 (0.003) Batch 2.075 (2.238) Remain 00:51:28 loss: 1.8284 Lr: 0.00177
[2024-06-09 01:45:29,064 INFO misc.py line 118 2003007] Train: [1/1][49/1428] Data 0.002 (0.003) Batch 2.217 (2.238) Remain 00:51:26 loss: 2.1975 Lr: 0.00180
[2024-06-09 01:45:31,183 INFO misc.py line 118 2003007] Train: [1/1][50/1428] Data 0.003 (0.003) Batch 2.119 (2.235) Remain 00:51:20 loss: 2.4137 Lr: 0.00183
[2024-06-09 01:45:33,301 INFO misc.py line 118 2003007] Train: [1/1][51/1428] Data 0.002 (0.003) Batch 2.118 (2.233) Remain 00:51:14 loss: 2.1602 Lr: 0.00186
[2024-06-09 01:45:35,576 INFO misc.py line 118 2003007] Train: [1/1][52/1428] Data 0.002 (0.003) Batch 2.275 (2.234) Remain 00:51:13 loss: 2.3547 Lr: 0.00189
[2024-06-09 01:45:37,753 INFO misc.py line 118 2003007] Train: [1/1][53/1428] Data 0.002 (0.003) Batch 2.177 (2.233) Remain 00:51:09 loss: 1.9459 Lr: 0.00191
[2024-06-09 01:45:40,034 INFO misc.py line 118 2003007] Train: [1/1][54/1428] Data 0.002 (0.003) Batch 2.281 (2.234) Remain 00:51:08 loss: 2.0179 Lr: 0.00193
[2024-06-09 01:45:42,353 INFO misc.py line 118 2003007] Train: [1/1][55/1428] Data 0.002 (0.003) Batch 2.319 (2.235) Remain 00:51:08 loss: 1.9461 Lr: 0.00195
[2024-06-09 01:45:44,694 INFO misc.py line 118 2003007] Train: [1/1][56/1428] Data 0.002 (0.003) Batch 2.341 (2.237) Remain 00:51:09 loss: 2.0644 Lr: 0.00196
[2024-06-09 01:45:47,009 INFO misc.py line 118 2003007] Train: [1/1][57/1428] Data 0.002 (0.003) Batch 2.314 (2.239) Remain 00:51:09 loss: 1.9840 Lr: 0.00198
[2024-06-09 01:45:49,307 INFO misc.py line 118 2003007] Train: [1/1][58/1428] Data 0.002 (0.003) Batch 2.298 (2.240) Remain 00:51:08 loss: 2.4954 Lr: 0.00199
[2024-06-09 01:45:51,610 INFO misc.py line 118 2003007] Train: [1/1][59/1428] Data 0.003 (0.003) Batch 2.303 (2.241) Remain 00:51:07 loss: 1.7567 Lr: 0.00199
[2024-06-09 01:45:53,823 INFO misc.py line 118 2003007] Train: [1/1][60/1428] Data 0.002 (0.003) Batch 2.213 (2.240) Remain 00:51:04 loss: 2.4860 Lr: 0.00200
[2024-06-09 01:45:56,270 INFO misc.py line 118 2003007] Train: [1/1][61/1428] Data 0.002 (0.003) Batch 2.447 (2.244) Remain 00:51:07 loss: 1.9810 Lr: 0.00200
[2024-06-09 01:45:58,634 INFO misc.py line 118 2003007] Train: [1/1][62/1428] Data 0.002 (0.003) Batch 2.364 (2.246) Remain 00:51:07 loss: 1.5950 Lr: 0.00200
[2024-06-09 01:46:00,944 INFO misc.py line 118 2003007] Train: [1/1][63/1428] Data 0.002 (0.003) Batch 2.310 (2.247) Remain 00:51:07 loss: 2.0556 Lr: 0.00200
[2024-06-09 01:46:03,056 INFO misc.py line 118 2003007] Train: [1/1][64/1428] Data 0.002 (0.003) Batch 2.112 (2.245) Remain 00:51:01 loss: 1.8000 Lr: 0.00200
[2024-06-09 01:46:05,175 INFO misc.py line 118 2003007] Train: [1/1][65/1428] Data 0.002 (0.003) Batch 2.119 (2.243) Remain 00:50:56 loss: 2.5448 Lr: 0.00200
[2024-06-09 01:46:07,425 INFO misc.py line 118 2003007] Train: [1/1][66/1428] Data 0.003 (0.003) Batch 2.250 (2.243) Remain 00:50:54 loss: 1.6612 Lr: 0.00200
[2024-06-09 01:46:09,596 INFO misc.py line 118 2003007] Train: [1/1][67/1428] Data 0.002 (0.003) Batch 2.171 (2.242) Remain 00:50:51 loss: 1.9301 Lr: 0.00200
[2024-06-09 01:46:11,941 INFO misc.py line 118 2003007] Train: [1/1][68/1428] Data 0.002 (0.003) Batch 2.345 (2.243) Remain 00:50:50 loss: 1.6951 Lr: 0.00200
[2024-06-09 01:46:14,076 INFO misc.py line 118 2003007] Train: [1/1][69/1428] Data 0.003 (0.003) Batch 2.135 (2.242) Remain 00:50:46 loss: 2.1705 Lr: 0.00200
[2024-06-09 01:46:16,579 INFO misc.py line 118 2003007] Train: [1/1][70/1428] Data 0.002 (0.003) Batch 2.502 (2.246) Remain 00:50:49 loss: 1.3983 Lr: 0.00200
[2024-06-09 01:46:18,819 INFO misc.py line 118 2003007] Train: [1/1][71/1428] Data 0.002 (0.003) Batch 2.240 (2.246) Remain 00:50:47 loss: 1.6327 Lr: 0.00200
[2024-06-09 01:46:21,450 INFO misc.py line 118 2003007] Train: [1/1][72/1428] Data 0.002 (0.003) Batch 2.631 (2.251) Remain 00:50:52 loss: 1.4145 Lr: 0.00200
[2024-06-09 01:46:23,593 INFO misc.py line 118 2003007] Train: [1/1][73/1428] Data 0.003 (0.003) Batch 2.144 (2.250) Remain 00:50:48 loss: 2.4047 Lr: 0.00200
[2024-06-09 01:46:25,812 INFO misc.py line 118 2003007] Train: [1/1][74/1428] Data 0.002 (0.003) Batch 2.218 (2.249) Remain 00:50:45 loss: 1.5175 Lr: 0.00200
[2024-06-09 01:46:28,231 INFO misc.py line 118 2003007] Train: [1/1][75/1428] Data 0.003 (0.003) Batch 2.420 (2.252) Remain 00:50:46 loss: 1.6189 Lr: 0.00200
[2024-06-09 01:46:30,611 INFO misc.py line 118 2003007] Train: [1/1][76/1428] Data 0.003 (0.003) Batch 2.381 (2.253) Remain 00:50:46 loss: 1.8844 Lr: 0.00200
[2024-06-09 01:46:32,839 INFO misc.py line 118 2003007] Train: [1/1][77/1428] Data 0.002 (0.003) Batch 2.228 (2.253) Remain 00:50:43 loss: 1.7640 Lr: 0.00200
[2024-06-09 01:46:35,205 INFO misc.py line 118 2003007] Train: [1/1][78/1428] Data 0.002 (0.003) Batch 2.365 (2.254) Remain 00:50:43 loss: 1.6278 Lr: 0.00200
[2024-06-09 01:46:37,350 INFO misc.py line 118 2003007] Train: [1/1][79/1428] Data 0.003 (0.003) Batch 2.146 (2.253) Remain 00:50:39 loss: 1.8738 Lr: 0.00200
[2024-06-09 01:46:39,591 INFO misc.py line 118 2003007] Train: [1/1][80/1428] Data 0.002 (0.003) Batch 2.240 (2.253) Remain 00:50:36 loss: 1.3578 Lr: 0.00200
[2024-06-09 01:46:41,979 INFO misc.py line 118 2003007] Train: [1/1][81/1428] Data 0.003 (0.003) Batch 2.389 (2.255) Remain 00:50:36 loss: 1.7936 Lr: 0.00200
[2024-06-09 01:46:44,219 INFO misc.py line 118 2003007] Train: [1/1][82/1428] Data 0.002 (0.003) Batch 2.240 (2.254) Remain 00:50:34 loss: 1.7127 Lr: 0.00200
[2024-06-09 01:46:46,334 INFO misc.py line 118 2003007] Train: [1/1][83/1428] Data 0.003 (0.003) Batch 2.116 (2.253) Remain 00:50:29 loss: 1.2532 Lr: 0.00200
[2024-06-09 01:46:48,624 INFO misc.py line 118 2003007] Train: [1/1][84/1428] Data 0.002 (0.003) Batch 2.289 (2.253) Remain 00:50:28 loss: 1.9607 Lr: 0.00200
[2024-06-09 01:46:51,066 INFO misc.py line 118 2003007] Train: [1/1][85/1428] Data 0.002 (0.003) Batch 2.441 (2.255) Remain 00:50:28 loss: 1.1661 Lr: 0.00200
[2024-06-09 01:46:53,252 INFO misc.py line 118 2003007] Train: [1/1][86/1428] Data 0.003 (0.003) Batch 2.187 (2.255) Remain 00:50:25 loss: 2.1639 Lr: 0.00200
[2024-06-09 01:46:55,427 INFO misc.py line 118 2003007] Train: [1/1][87/1428] Data 0.003 (0.003) Batch 2.176 (2.254) Remain 00:50:22 loss: 1.5367 Lr: 0.00200
[2024-06-09 01:46:57,463 INFO misc.py line 118 2003007] Train: [1/1][88/1428] Data 0.003 (0.003) Batch 2.036 (2.251) Remain 00:50:16 loss: 1.3281 Lr: 0.00200
[2024-06-09 01:47:00,359 INFO misc.py line 118 2003007] Train: [1/1][89/1428] Data 0.002 (0.003) Batch 2.895 (2.259) Remain 00:50:24 loss: 1.7570 Lr: 0.00200
[2024-06-09 01:47:02,497 INFO misc.py line 118 2003007] Train: [1/1][90/1428] Data 0.002 (0.003) Batch 2.138 (2.257) Remain 00:50:20 loss: 1.9051 Lr: 0.00200
[2024-06-09 01:47:04,572 INFO misc.py line 118 2003007] Train: [1/1][91/1428] Data 0.003 (0.003) Batch 2.076 (2.255) Remain 00:50:15 loss: 1.4836 Lr: 0.00200
[2024-06-09 01:47:06,742 INFO misc.py line 118 2003007] Train: [1/1][92/1428] Data 0.003 (0.003) Batch 2.170 (2.254) Remain 00:50:11 loss: 1.5681 Lr: 0.00200
[2024-06-09 01:47:09,133 INFO misc.py line 118 2003007] Train: [1/1][93/1428] Data 0.002 (0.003) Batch 2.391 (2.256) Remain 00:50:11 loss: 2.1198 Lr: 0.00200
[2024-06-09 01:47:11,536 INFO misc.py line 118 2003007] Train: [1/1][94/1428] Data 0.003 (0.003) Batch 2.403 (2.257) Remain 00:50:11 loss: 2.1893 Lr: 0.00200
[2024-06-09 01:47:13,696 INFO misc.py line 118 2003007] Train: [1/1][95/1428] Data 0.003 (0.003) Batch 2.160 (2.256) Remain 00:50:07 loss: 1.3640 Lr: 0.00200
[2024-06-09 01:47:15,789 INFO misc.py line 118 2003007] Train: [1/1][96/1428] Data 0.002 (0.003) Batch 2.093 (2.254) Remain 00:50:02 loss: 1.5196 Lr: 0.00200
[2024-06-09 01:47:17,843 INFO misc.py line 118 2003007] Train: [1/1][97/1428] Data 0.002 (0.003) Batch 2.054 (2.252) Remain 00:49:57 loss: 1.3613 Lr: 0.00200
[2024-06-09 01:47:20,084 INFO misc.py line 118 2003007] Train: [1/1][98/1428] Data 0.003 (0.003) Batch 2.241 (2.252) Remain 00:49:55 loss: 1.2987 Lr: 0.00200
[2024-06-09 01:47:22,232 INFO misc.py line 118 2003007] Train: [1/1][99/1428] Data 0.004 (0.003) Batch 2.147 (2.251) Remain 00:49:51 loss: 1.3506 Lr: 0.00200
[2024-06-09 01:47:24,511 INFO misc.py line 118 2003007] Train: [1/1][100/1428] Data 0.003 (0.003) Batch 2.279 (2.251) Remain 00:49:49 loss: 1.2856 Lr: 0.00200
[2024-06-09 01:47:26,779 INFO misc.py line 118 2003007] Train: [1/1][101/1428] Data 0.003 (0.003) Batch 2.268 (2.252) Remain 00:49:47 loss: 1.2183 Lr: 0.00200
[2024-06-09 01:47:29,058 INFO misc.py line 118 2003007] Train: [1/1][102/1428] Data 0.002 (0.003) Batch 2.280 (2.252) Remain 00:49:45 loss: 1.9176 Lr: 0.00200
[2024-06-09 01:47:31,117 INFO misc.py line 118 2003007] Train: [1/1][103/1428] Data 0.002 (0.003) Batch 2.059 (2.250) Remain 00:49:41 loss: 1.1635 Lr: 0.00200
[2024-06-09 01:47:33,272 INFO misc.py line 118 2003007] Train: [1/1][104/1428] Data 0.002 (0.003) Batch 2.155 (2.249) Remain 00:49:37 loss: 1.5503 Lr: 0.00200
[2024-06-09 01:47:35,407 INFO misc.py line 118 2003007] Train: [1/1][105/1428] Data 0.003 (0.003) Batch 2.135 (2.248) Remain 00:49:33 loss: 1.3533 Lr: 0.00199
[2024-06-09 01:47:37,789 INFO misc.py line 118 2003007] Train: [1/1][106/1428] Data 0.002 (0.003) Batch 2.382 (2.249) Remain 00:49:33 loss: 1.5324 Lr: 0.00199
[2024-06-09 01:47:40,037 INFO misc.py line 118 2003007] Train: [1/1][107/1428] Data 0.002 (0.003) Batch 2.248 (2.249) Remain 00:49:31 loss: 1.4407 Lr: 0.00199
[2024-06-09 01:47:42,342 INFO misc.py line 118 2003007] Train: [1/1][108/1428] Data 0.002 (0.003) Batch 2.304 (2.250) Remain 00:49:29 loss: 1.6777 Lr: 0.00199
[2024-06-09 01:47:44,592 INFO misc.py line 118 2003007] Train: [1/1][109/1428] Data 0.002 (0.003) Batch 2.250 (2.250) Remain 00:49:27 loss: 0.8821 Lr: 0.00199
[2024-06-09 01:47:46,809 INFO misc.py line 118 2003007] Train: [1/1][110/1428] Data 0.003 (0.003) Batch 2.217 (2.249) Remain 00:49:24 loss: 1.5449 Lr: 0.00199
[2024-06-09 01:47:49,339 INFO misc.py line 118 2003007] Train: [1/1][111/1428] Data 0.003 (0.003) Batch 2.530 (2.252) Remain 00:49:25 loss: 1.3575 Lr: 0.00199
[2024-06-09 01:47:51,605 INFO misc.py line 118 2003007] Train: [1/1][112/1428] Data 0.003 (0.003) Batch 2.265 (2.252) Remain 00:49:23 loss: 1.4283 Lr: 0.00199
[2024-06-09 01:47:53,918 INFO misc.py line 118 2003007] Train: [1/1][113/1428] Data 0.003 (0.003) Batch 2.314 (2.253) Remain 00:49:22 loss: 1.4216 Lr: 0.00199
[2024-06-09 01:47:56,079 INFO misc.py line 118 2003007] Train: [1/1][114/1428] Data 0.002 (0.003) Batch 2.161 (2.252) Remain 00:49:18 loss: 1.3168 Lr: 0.00199
[2024-06-09 01:47:58,374 INFO misc.py line 118 2003007] Train: [1/1][115/1428] Data 0.003 (0.003) Batch 2.295 (2.252) Remain 00:49:17 loss: 1.0849 Lr: 0.00199
[2024-06-09 01:48:00,595 INFO misc.py line 118 2003007] Train: [1/1][116/1428] Data 0.002 (0.003) Batch 2.220 (2.252) Remain 00:49:14 loss: 1.1139 Lr: 0.00199
[2024-06-09 01:48:02,855 INFO misc.py line 118 2003007] Train: [1/1][117/1428] Data 0.003 (0.003) Batch 2.261 (2.252) Remain 00:49:12 loss: 1.6560 Lr: 0.00199
[2024-06-09 01:48:04,931 INFO misc.py line 118 2003007] Train: [1/1][118/1428] Data 0.002 (0.003) Batch 2.076 (2.251) Remain 00:49:08 loss: 1.9262 Lr: 0.00199
[2024-06-09 01:48:07,005 INFO misc.py line 118 2003007] Train: [1/1][119/1428] Data 0.002 (0.003) Batch 2.074 (2.249) Remain 00:49:03 loss: 1.4593 Lr: 0.00199
[2024-06-09 01:48:09,186 INFO misc.py line 118 2003007] Train: [1/1][120/1428] Data 0.002 (0.003) Batch 2.181 (2.248) Remain 00:49:00 loss: 1.3255 Lr: 0.00199
[2024-06-09 01:48:11,291 INFO misc.py line 118 2003007] Train: [1/1][121/1428] Data 0.003 (0.003) Batch 2.105 (2.247) Remain 00:48:57 loss: 0.9942 Lr: 0.00199
[2024-06-09 01:48:13,477 INFO misc.py line 118 2003007] Train: [1/1][122/1428] Data 0.004 (0.003) Batch 2.186 (2.247) Remain 00:48:54 loss: 1.4465 Lr: 0.00199
[2024-06-09 01:48:15,945 INFO misc.py line 118 2003007] Train: [1/1][123/1428] Data 0.003 (0.003) Batch 2.467 (2.249) Remain 00:48:54 loss: 1.5724 Lr: 0.00199
[2024-06-09 01:48:18,130 INFO misc.py line 118 2003007] Train: [1/1][124/1428] Data 0.003 (0.003) Batch 2.185 (2.248) Remain 00:48:51 loss: 1.7164 Lr: 0.00199
[2024-06-09 01:48:20,321 INFO misc.py line 118 2003007] Train: [1/1][125/1428] Data 0.002 (0.003) Batch 2.192 (2.248) Remain 00:48:48 loss: 1.2847 Lr: 0.00199
[2024-06-09 01:48:22,652 INFO misc.py line 118 2003007] Train: [1/1][126/1428] Data 0.003 (0.003) Batch 2.331 (2.248) Remain 00:48:47 loss: 1.2831 Lr: 0.00199
[2024-06-09 01:48:24,813 INFO misc.py line 118 2003007] Train: [1/1][127/1428] Data 0.002 (0.003) Batch 2.161 (2.248) Remain 00:48:44 loss: 1.3414 Lr: 0.00199
[2024-06-09 01:48:26,869 INFO misc.py line 118 2003007] Train: [1/1][128/1428] Data 0.002 (0.003) Batch 2.056 (2.246) Remain 00:48:39 loss: 1.3796 Lr: 0.00199
[2024-06-09 01:48:29,043 INFO misc.py line 118 2003007] Train: [1/1][129/1428] Data 0.002 (0.003) Batch 2.175 (2.245) Remain 00:48:36 loss: 1.9906 Lr: 0.00199
[2024-06-09 01:48:31,194 INFO misc.py line 118 2003007] Train: [1/1][130/1428] Data 0.003 (0.003) Batch 2.150 (2.245) Remain 00:48:33 loss: 1.8909 Lr: 0.00199
[2024-06-09 01:48:33,514 INFO misc.py line 118 2003007] Train: [1/1][131/1428] Data 0.003 (0.003) Batch 2.321 (2.245) Remain 00:48:32 loss: 1.2918 Lr: 0.00199
[2024-06-09 01:48:35,790 INFO misc.py line 118 2003007] Train: [1/1][132/1428] Data 0.002 (0.003) Batch 2.276 (2.245) Remain 00:48:30 loss: 1.8371 Lr: 0.00199
[2024-06-09 01:48:38,089 INFO misc.py line 118 2003007] Train: [1/1][133/1428] Data 0.002 (0.003) Batch 2.299 (2.246) Remain 00:48:28 loss: 1.0474 Lr: 0.00199
[2024-06-09 01:48:40,516 INFO misc.py line 118 2003007] Train: [1/1][134/1428] Data 0.002 (0.003) Batch 2.427 (2.247) Remain 00:48:27 loss: 1.4290 Lr: 0.00199
[2024-06-09 01:48:42,652 INFO misc.py line 118 2003007] Train: [1/1][135/1428] Data 0.002 (0.003) Batch 2.136 (2.246) Remain 00:48:24 loss: 1.5545 Lr: 0.00199
[2024-06-09 01:48:44,860 INFO misc.py line 118 2003007] Train: [1/1][136/1428] Data 0.003 (0.003) Batch 2.208 (2.246) Remain 00:48:22 loss: 1.6713 Lr: 0.00199
[2024-06-09 01:48:47,001 INFO misc.py line 118 2003007] Train: [1/1][137/1428] Data 0.003 (0.003) Batch 2.141 (2.245) Remain 00:48:18 loss: 1.6231 Lr: 0.00198
[2024-06-09 01:48:49,172 INFO misc.py line 118 2003007] Train: [1/1][138/1428] Data 0.002 (0.003) Batch 2.171 (2.245) Remain 00:48:15 loss: 1.2269 Lr: 0.00198
[2024-06-09 01:48:51,443 INFO misc.py line 118 2003007] Train: [1/1][139/1428] Data 0.002 (0.003) Batch 2.271 (2.245) Remain 00:48:13 loss: 2.0986 Lr: 0.00198
[2024-06-09 01:48:53,567 INFO misc.py line 118 2003007] Train: [1/1][140/1428] Data 0.003 (0.003) Batch 2.124 (2.244) Remain 00:48:10 loss: 1.3247 Lr: 0.00198
[2024-06-09 01:48:55,814 INFO misc.py line 118 2003007] Train: [1/1][141/1428] Data 0.002 (0.003) Batch 2.248 (2.244) Remain 00:48:08 loss: 1.7710 Lr: 0.00198
[2024-06-09 01:48:58,283 INFO misc.py line 118 2003007] Train: [1/1][142/1428] Data 0.002 (0.003) Batch 2.469 (2.246) Remain 00:48:08 loss: 1.3828 Lr: 0.00198
[2024-06-09 01:49:00,632 INFO misc.py line 118 2003007] Train: [1/1][143/1428] Data 0.003 (0.003) Batch 2.349 (2.247) Remain 00:48:06 loss: 1.1432 Lr: 0.00198
[2024-06-09 01:49:02,988 INFO misc.py line 118 2003007] Train: [1/1][144/1428] Data 0.002 (0.003) Batch 2.355 (2.247) Remain 00:48:05 loss: 1.7076 Lr: 0.00198
[2024-06-09 01:49:05,103 INFO misc.py line 118 2003007] Train: [1/1][145/1428] Data 0.003 (0.003) Batch 2.116 (2.246) Remain 00:48:02 loss: 1.3506 Lr: 0.00198
[2024-06-09 01:49:07,304 INFO misc.py line 118 2003007] Train: [1/1][146/1428] Data 0.002 (0.003) Batch 2.200 (2.246) Remain 00:47:59 loss: 0.8747 Lr: 0.00198
[2024-06-09 01:49:09,514 INFO misc.py line 118 2003007] Train: [1/1][147/1428] Data 0.003 (0.003) Batch 2.210 (2.246) Remain 00:47:56 loss: 1.4523 Lr: 0.00198
[2024-06-09 01:49:11,797 INFO misc.py line 118 2003007] Train: [1/1][148/1428] Data 0.003 (0.003) Batch 2.284 (2.246) Remain 00:47:54 loss: 1.4133 Lr: 0.00198
[2024-06-09 01:49:14,149 INFO misc.py line 118 2003007] Train: [1/1][149/1428] Data 0.003 (0.003) Batch 2.351 (2.247) Remain 00:47:53 loss: 1.2852 Lr: 0.00198
[2024-06-09 01:49:16,514 INFO misc.py line 118 2003007] Train: [1/1][150/1428] Data 0.002 (0.003) Batch 2.366 (2.248) Remain 00:47:52 loss: 0.9856 Lr: 0.00198
[2024-06-09 01:49:18,681 INFO misc.py line 118 2003007] Train: [1/1][151/1428] Data 0.003 (0.003) Batch 2.166 (2.247) Remain 00:47:49 loss: 1.0041 Lr: 0.00198
[2024-06-09 01:49:20,995 INFO misc.py line 118 2003007] Train: [1/1][152/1428] Data 0.003 (0.003) Batch 2.314 (2.247) Remain 00:47:47 loss: 1.1908 Lr: 0.00198
[2024-06-09 01:49:23,208 INFO misc.py line 118 2003007] Train: [1/1][153/1428] Data 0.003 (0.003) Batch 2.214 (2.247) Remain 00:47:45 loss: 1.4345 Lr: 0.00198
[2024-06-09 01:49:25,586 INFO misc.py line 118 2003007] Train: [1/1][154/1428] Data 0.002 (0.003) Batch 2.379 (2.248) Remain 00:47:44 loss: 1.0705 Lr: 0.00198
[2024-06-09 01:49:28,113 INFO misc.py line 118 2003007] Train: [1/1][155/1428] Data 0.003 (0.003) Batch 2.525 (2.250) Remain 00:47:44 loss: 0.8692 Lr: 0.00198
[2024-06-09 01:49:30,233 INFO misc.py line 118 2003007] Train: [1/1][156/1428] Data 0.004 (0.003) Batch 2.122 (2.249) Remain 00:47:40 loss: 2.2457 Lr: 0.00198
[2024-06-09 01:49:32,543 INFO misc.py line 118 2003007] Train: [1/1][157/1428] Data 0.002 (0.003) Batch 2.309 (2.249) Remain 00:47:39 loss: 1.3207 Lr: 0.00198
[2024-06-09 01:49:34,899 INFO misc.py line 118 2003007] Train: [1/1][158/1428] Data 0.003 (0.003) Batch 2.357 (2.250) Remain 00:47:37 loss: 1.3527 Lr: 0.00198
[2024-06-09 01:49:37,308 INFO misc.py line 118 2003007] Train: [1/1][159/1428] Data 0.002 (0.003) Batch 2.408 (2.251) Remain 00:47:36 loss: 1.7179 Lr: 0.00197
[2024-06-09 01:49:39,618 INFO misc.py line 118 2003007] Train: [1/1][160/1428] Data 0.003 (0.003) Batch 2.310 (2.252) Remain 00:47:34 loss: 1.0477 Lr: 0.00197
[2024-06-09 01:49:42,051 INFO misc.py line 118 2003007] Train: [1/1][161/1428] Data 0.002 (0.003) Batch 2.432 (2.253) Remain 00:47:34 loss: 1.8264 Lr: 0.00197
[2024-06-09 01:49:44,429 INFO misc.py line 118 2003007] Train: [1/1][162/1428] Data 0.003 (0.003) Batch 2.379 (2.254) Remain 00:47:32 loss: 1.1528 Lr: 0.00197
[2024-06-09 01:49:46,744 INFO misc.py line 118 2003007] Train: [1/1][163/1428] Data 0.002 (0.003) Batch 2.314 (2.254) Remain 00:47:31 loss: 1.4378 Lr: 0.00197
[2024-06-09 01:49:49,021 INFO misc.py line 118 2003007] Train: [1/1][164/1428] Data 0.003 (0.003) Batch 2.277 (2.254) Remain 00:47:29 loss: 1.6652 Lr: 0.00197
[2024-06-09 01:49:51,157 INFO misc.py line 118 2003007] Train: [1/1][165/1428] Data 0.003 (0.003) Batch 2.136 (2.253) Remain 00:47:25 loss: 1.4749 Lr: 0.00197
[2024-06-09 01:49:53,406 INFO misc.py line 118 2003007] Train: [1/1][166/1428] Data 0.002 (0.003) Batch 2.249 (2.253) Remain 00:47:23 loss: 1.4492 Lr: 0.00197
[2024-06-09 01:49:55,473 INFO misc.py line 118 2003007] Train: [1/1][167/1428] Data 0.003 (0.003) Batch 2.067 (2.252) Remain 00:47:19 loss: 1.1427 Lr: 0.00197
[2024-06-09 01:49:57,447 INFO misc.py line 118 2003007] Train: [1/1][168/1428] Data 0.002 (0.003) Batch 1.974 (2.250) Remain 00:47:15 loss: 1.6523 Lr: 0.00197
[2024-06-09 01:49:59,644 INFO misc.py line 118 2003007] Train: [1/1][169/1428] Data 0.002 (0.003) Batch 2.197 (2.250) Remain 00:47:12 loss: 1.5496 Lr: 0.00197
[2024-06-09 01:50:01,934 INFO misc.py line 118 2003007] Train: [1/1][170/1428] Data 0.002 (0.003) Batch 2.289 (2.250) Remain 00:47:10 loss: 2.3612 Lr: 0.00197
[2024-06-09 01:50:04,251 INFO misc.py line 118 2003007] Train: [1/1][171/1428] Data 0.003 (0.003) Batch 2.317 (2.251) Remain 00:47:09 loss: 1.0891 Lr: 0.00197
[2024-06-09 01:50:06,530 INFO misc.py line 118 2003007] Train: [1/1][172/1428] Data 0.002 (0.003) Batch 2.279 (2.251) Remain 00:47:07 loss: 1.6342 Lr: 0.00197
[2024-06-09 01:50:08,651 INFO misc.py line 118 2003007] Train: [1/1][173/1428] Data 0.002 (0.003) Batch 2.121 (2.250) Remain 00:47:03 loss: 1.3799 Lr: 0.00197
[2024-06-09 01:50:10,757 INFO misc.py line 118 2003007] Train: [1/1][174/1428] Data 0.003 (0.003) Batch 2.106 (2.249) Remain 00:47:00 loss: 1.0589 Lr: 0.00197
[2024-06-09 01:50:12,889 INFO misc.py line 118 2003007] Train: [1/1][175/1428] Data 0.002 (0.003) Batch 2.132 (2.249) Remain 00:46:57 loss: 1.1392 Lr: 0.00197
[2024-06-09 01:50:15,077 INFO misc.py line 118 2003007] Train: [1/1][176/1428] Data 0.002 (0.003) Batch 2.188 (2.248) Remain 00:46:54 loss: 1.2988 Lr: 0.00197
[2024-06-09 01:50:17,429 INFO misc.py line 118 2003007] Train: [1/1][177/1428] Data 0.002 (0.003) Batch 2.352 (2.249) Remain 00:46:53 loss: 1.5317 Lr: 0.00196
[2024-06-09 01:50:19,583 INFO misc.py line 118 2003007] Train: [1/1][178/1428] Data 0.003 (0.003) Batch 2.154 (2.248) Remain 00:46:50 loss: 1.5431 Lr: 0.00196
[2024-06-09 01:50:21,799 INFO misc.py line 118 2003007] Train: [1/1][179/1428] Data 0.002 (0.003) Batch 2.215 (2.248) Remain 00:46:47 loss: 1.0996 Lr: 0.00196
[2024-06-09 01:50:23,903 INFO misc.py line 118 2003007] Train: [1/1][180/1428] Data 0.003 (0.003) Batch 2.104 (2.247) Remain 00:46:44 loss: 0.9035 Lr: 0.00196
[2024-06-09 01:50:26,205 INFO misc.py line 118 2003007] Train: [1/1][181/1428] Data 0.002 (0.003) Batch 2.301 (2.248) Remain 00:46:42 loss: 1.4588 Lr: 0.00196
[2024-06-09 01:50:29,017 INFO misc.py line 118 2003007] Train: [1/1][182/1428] Data 0.003 (0.003) Batch 2.807 (2.251) Remain 00:46:44 loss: 1.7131 Lr: 0.00196
[2024-06-09 01:50:31,418 INFO misc.py line 118 2003007] Train: [1/1][183/1428] Data 0.017 (0.003) Batch 2.407 (2.252) Remain 00:46:43 loss: 1.8062 Lr: 0.00196
[2024-06-09 01:50:33,708 INFO misc.py line 118 2003007] Train: [1/1][184/1428] Data 0.003 (0.003) Batch 2.290 (2.252) Remain 00:46:41 loss: 1.2068 Lr: 0.00196
[2024-06-09 01:50:36,143 INFO misc.py line 118 2003007] Train: [1/1][185/1428] Data 0.003 (0.003) Batch 2.436 (2.253) Remain 00:46:40 loss: 0.8248 Lr: 0.00196
[2024-06-09 01:50:38,500 INFO misc.py line 118 2003007] Train: [1/1][186/1428] Data 0.002 (0.003) Batch 2.356 (2.253) Remain 00:46:38 loss: 0.9136 Lr: 0.00196
[2024-06-09 01:50:40,684 INFO misc.py line 118 2003007] Train: [1/1][187/1428] Data 0.003 (0.003) Batch 2.185 (2.253) Remain 00:46:36 loss: 1.4367 Lr: 0.00196
[2024-06-09 01:50:42,968 INFO misc.py line 118 2003007] Train: [1/1][188/1428] Data 0.002 (0.003) Batch 2.284 (2.253) Remain 00:46:33 loss: 1.8233 Lr: 0.00196
[2024-06-09 01:50:45,117 INFO misc.py line 118 2003007] Train: [1/1][189/1428] Data 0.002 (0.003) Batch 2.149 (2.253) Remain 00:46:31 loss: 1.4353 Lr: 0.00196
[2024-06-09 01:50:47,343 INFO misc.py line 118 2003007] Train: [1/1][190/1428] Data 0.003 (0.003) Batch 2.225 (2.253) Remain 00:46:28 loss: 2.8174 Lr: 0.00196
[2024-06-09 01:50:49,629 INFO misc.py line 118 2003007] Train: [1/1][191/1428] Data 0.004 (0.003) Batch 2.287 (2.253) Remain 00:46:26 loss: 1.1389 Lr: 0.00196
[2024-06-09 01:50:51,725 INFO misc.py line 118 2003007] Train: [1/1][192/1428] Data 0.003 (0.003) Batch 2.096 (2.252) Remain 00:46:23 loss: 1.3693 Lr: 0.00196
[2024-06-09 01:50:53,920 INFO misc.py line 118 2003007] Train: [1/1][193/1428] Data 0.002 (0.003) Batch 2.195 (2.252) Remain 00:46:20 loss: 1.0762 Lr: 0.00195
[2024-06-09 01:50:56,149 INFO misc.py line 118 2003007] Train: [1/1][194/1428] Data 0.002 (0.003) Batch 2.229 (2.251) Remain 00:46:18 loss: 1.1826 Lr: 0.00195
[2024-06-09 01:50:58,273 INFO misc.py line 118 2003007] Train: [1/1][195/1428] Data 0.003 (0.003) Batch 2.123 (2.251) Remain 00:46:15 loss: 1.8929 Lr: 0.00195
[2024-06-09 01:51:00,327 INFO misc.py line 118 2003007] Train: [1/1][196/1428] Data 0.003 (0.003) Batch 2.055 (2.250) Remain 00:46:11 loss: 1.4714 Lr: 0.00195
[2024-06-09 01:51:02,469 INFO misc.py line 118 2003007] Train: [1/1][197/1428] Data 0.003 (0.003) Batch 2.142 (2.249) Remain 00:46:08 loss: 1.1650 Lr: 0.00195
[2024-06-09 01:51:04,941 INFO misc.py line 118 2003007] Train: [1/1][198/1428] Data 0.002 (0.003) Batch 2.472 (2.250) Remain 00:46:07 loss: 2.7268 Lr: 0.00195
[2024-06-09 01:51:07,395 INFO misc.py line 118 2003007] Train: [1/1][199/1428] Data 0.003 (0.003) Batch 2.454 (2.251) Remain 00:46:06 loss: 1.7347 Lr: 0.00195
[2024-06-09 01:51:09,526 INFO misc.py line 118 2003007] Train: [1/1][200/1428] Data 0.002 (0.003) Batch 2.130 (2.251) Remain 00:46:03 loss: 1.7722 Lr: 0.00195
[2024-06-09 01:51:11,823 INFO misc.py line 118 2003007] Train: [1/1][201/1428] Data 0.002 (0.003) Batch 2.297 (2.251) Remain 00:46:01 loss: 2.4923 Lr: 0.00195
[2024-06-09 01:51:14,108 INFO misc.py line 118 2003007] Train: [1/1][202/1428] Data 0.002 (0.003) Batch 2.285 (2.251) Remain 00:45:59 loss: 0.6947 Lr: 0.00195
[2024-06-09 01:51:16,380 INFO misc.py line 118 2003007] Train: [1/1][203/1428] Data 0.003 (0.003) Batch 2.272 (2.251) Remain 00:45:57 loss: 1.0291 Lr: 0.00195
[2024-06-09 01:51:18,849 INFO misc.py line 118 2003007] Train: [1/1][204/1428] Data 0.002 (0.003) Batch 2.469 (2.252) Remain 00:45:56 loss: 0.7228 Lr: 0.00195
[2024-06-09 01:51:21,258 INFO misc.py line 118 2003007] Train: [1/1][205/1428] Data 0.003 (0.003) Batch 2.409 (2.253) Remain 00:45:55 loss: 1.2835 Lr: 0.00195
[2024-06-09 01:51:23,573 INFO misc.py line 118 2003007] Train: [1/1][206/1428] Data 0.003 (0.003) Batch 2.315 (2.253) Remain 00:45:53 loss: 1.8850 Lr: 0.00195
[2024-06-09 01:51:25,636 INFO misc.py line 118 2003007] Train: [1/1][207/1428] Data 0.003 (0.003) Batch 2.063 (2.253) Remain 00:45:50 loss: 0.5852 Lr: 0.00194
[2024-06-09 01:51:27,770 INFO misc.py line 118 2003007] Train: [1/1][208/1428] Data 0.003 (0.003) Batch 2.135 (2.252) Remain 00:45:47 loss: 0.9117 Lr: 0.00194
[2024-06-09 01:51:29,874 INFO misc.py line 118 2003007] Train: [1/1][209/1428] Data 0.002 (0.003) Batch 2.103 (2.251) Remain 00:45:44 loss: 0.4490 Lr: 0.00194
[2024-06-09 01:51:32,250 INFO misc.py line 118 2003007] Train: [1/1][210/1428] Data 0.003 (0.003) Batch 2.376 (2.252) Remain 00:45:42 loss: 1.0488 Lr: 0.00194
[2024-06-09 01:51:34,439 INFO misc.py line 118 2003007] Train: [1/1][211/1428] Data 0.002 (0.003) Batch 2.189 (2.252) Remain 00:45:40 loss: 0.7718 Lr: 0.00194
[2024-06-09 01:51:36,684 INFO misc.py line 118 2003007] Train: [1/1][212/1428] Data 0.003 (0.003) Batch 2.245 (2.251) Remain 00:45:37 loss: 1.1627 Lr: 0.00194
[2024-06-09 01:51:38,947 INFO misc.py line 118 2003007] Train: [1/1][213/1428] Data 0.003 (0.003) Batch 2.262 (2.252) Remain 00:45:35 loss: 1.1440 Lr: 0.00194
[2024-06-09 01:51:41,297 INFO misc.py line 118 2003007] Train: [1/1][214/1428] Data 0.003 (0.003) Batch 2.351 (2.252) Remain 00:45:33 loss: 1.3220 Lr: 0.00194
[2024-06-09 01:51:43,822 INFO misc.py line 118 2003007] Train: [1/1][215/1428] Data 0.002 (0.003) Batch 2.524 (2.253) Remain 00:45:33 loss: 0.7580 Lr: 0.00194
[2024-06-09 01:51:46,259 INFO misc.py line 118 2003007] Train: [1/1][216/1428] Data 0.002 (0.003) Batch 2.437 (2.254) Remain 00:45:32 loss: 0.8347 Lr: 0.00194
[2024-06-09 01:51:48,719 INFO misc.py line 118 2003007] Train: [1/1][217/1428] Data 0.002 (0.003) Batch 2.460 (2.255) Remain 00:45:30 loss: 1.0892 Lr: 0.00194
[2024-06-09 01:51:50,894 INFO misc.py line 118 2003007] Train: [1/1][218/1428] Data 0.003 (0.003) Batch 2.174 (2.255) Remain 00:45:28 loss: 1.5881 Lr: 0.00194
[2024-06-09 01:51:52,960 INFO misc.py line 118 2003007] Train: [1/1][219/1428] Data 0.002 (0.003) Batch 2.066 (2.254) Remain 00:45:24 loss: 1.3391 Lr: 0.00194
[2024-06-09 01:51:55,049 INFO misc.py line 118 2003007] Train: [1/1][220/1428] Data 0.003 (0.003) Batch 2.089 (2.253) Remain 00:45:21 loss: 1.6809 Lr: 0.00193
[2024-06-09 01:51:57,149 INFO misc.py line 118 2003007] Train: [1/1][221/1428] Data 0.003 (0.003) Batch 2.099 (2.252) Remain 00:45:18 loss: 1.2632 Lr: 0.00193
[2024-06-09 01:51:59,575 INFO misc.py line 118 2003007] Train: [1/1][222/1428] Data 0.004 (0.003) Batch 2.426 (2.253) Remain 00:45:17 loss: 1.3230 Lr: 0.00193
[2024-06-09 01:52:01,888 INFO misc.py line 118 2003007] Train: [1/1][223/1428] Data 0.003 (0.003) Batch 2.314 (2.253) Remain 00:45:15 loss: 1.3991 Lr: 0.00193
[2024-06-09 01:52:04,402 INFO misc.py line 118 2003007] Train: [1/1][224/1428] Data 0.003 (0.003) Batch 2.513 (2.255) Remain 00:45:14 loss: 1.2995 Lr: 0.00193
[2024-06-09 01:52:06,647 INFO misc.py line 118 2003007] Train: [1/1][225/1428] Data 0.003 (0.003) Batch 2.245 (2.255) Remain 00:45:12 loss: 1.9511 Lr: 0.00193
[2024-06-09 01:52:08,943 INFO misc.py line 118 2003007] Train: [1/1][226/1428] Data 0.003 (0.003) Batch 2.296 (2.255) Remain 00:45:10 loss: 0.9911 Lr: 0.00193
[2024-06-09 01:52:11,119 INFO misc.py line 118 2003007] Train: [1/1][227/1428] Data 0.003 (0.003) Batch 2.175 (2.254) Remain 00:45:07 loss: 1.3611 Lr: 0.00193
[2024-06-09 01:52:13,202 INFO misc.py line 118 2003007] Train: [1/1][228/1428] Data 0.003 (0.003) Batch 2.084 (2.254) Remain 00:45:04 loss: 1.2297 Lr: 0.00193
[2024-06-09 01:52:15,426 INFO misc.py line 118 2003007] Train: [1/1][229/1428] Data 0.003 (0.003) Batch 2.224 (2.254) Remain 00:45:02 loss: 0.8170 Lr: 0.00193
[2024-06-09 01:52:17,802 INFO misc.py line 118 2003007] Train: [1/1][230/1428] Data 0.002 (0.003) Batch 2.376 (2.254) Remain 00:45:00 loss: 0.9762 Lr: 0.00193
[2024-06-09 01:52:20,099 INFO misc.py line 118 2003007] Train: [1/1][231/1428] Data 0.003 (0.003) Batch 2.297 (2.254) Remain 00:44:58 loss: 1.7207 Lr: 0.00193
[2024-06-09 01:52:22,361 INFO misc.py line 118 2003007] Train: [1/1][232/1428] Data 0.003 (0.003) Batch 2.262 (2.254) Remain 00:44:56 loss: 1.5768 Lr: 0.00192
[2024-06-09 01:52:24,589 INFO misc.py line 118 2003007] Train: [1/1][233/1428] Data 0.002 (0.003) Batch 2.229 (2.254) Remain 00:44:53 loss: 1.2569 Lr: 0.00192
[2024-06-09 01:52:26,745 INFO misc.py line 118 2003007] Train: [1/1][234/1428] Data 0.002 (0.003) Batch 2.155 (2.254) Remain 00:44:51 loss: 1.1246 Lr: 0.00192
[2024-06-09 01:52:28,974 INFO misc.py line 118 2003007] Train: [1/1][235/1428] Data 0.002 (0.003) Batch 2.230 (2.254) Remain 00:44:48 loss: 2.0032 Lr: 0.00192
[2024-06-09 01:52:31,330 INFO misc.py line 118 2003007] Train: [1/1][236/1428] Data 0.003 (0.003) Batch 2.356 (2.254) Remain 00:44:46 loss: 0.9609 Lr: 0.00192
[2024-06-09 01:52:33,727 INFO misc.py line 118 2003007] Train: [1/1][237/1428] Data 0.002 (0.003) Batch 2.397 (2.255) Remain 00:44:45 loss: 1.7413 Lr: 0.00192
[2024-06-09 01:52:35,921 INFO misc.py line 118 2003007] Train: [1/1][238/1428] Data 0.002 (0.003) Batch 2.194 (2.254) Remain 00:44:42 loss: 1.3202 Lr: 0.00192
[2024-06-09 01:52:38,319 INFO misc.py line 118 2003007] Train: [1/1][239/1428] Data 0.002 (0.003) Batch 2.397 (2.255) Remain 00:44:41 loss: 0.8013 Lr: 0.00192
[2024-06-09 01:52:40,483 INFO misc.py line 118 2003007] Train: [1/1][240/1428] Data 0.003 (0.003) Batch 2.165 (2.255) Remain 00:44:38 loss: 1.1111 Lr: 0.00192
[2024-06-09 01:52:42,738 INFO misc.py line 118 2003007] Train: [1/1][241/1428] Data 0.002 (0.003) Batch 2.255 (2.255) Remain 00:44:36 loss: 1.2805 Lr: 0.00192
[2024-06-09 01:52:44,816 INFO misc.py line 118 2003007] Train: [1/1][242/1428] Data 0.002 (0.003) Batch 2.078 (2.254) Remain 00:44:33 loss: 0.8405 Lr: 0.00192
[2024-06-09 01:52:46,962 INFO misc.py line 118 2003007] Train: [1/1][243/1428] Data 0.002 (0.003) Batch 2.146 (2.253) Remain 00:44:30 loss: 1.9672 Lr: 0.00191
[2024-06-09 01:52:49,575 INFO misc.py line 118 2003007] Train: [1/1][244/1428] Data 0.003 (0.003) Batch 2.613 (2.255) Remain 00:44:29 loss: 1.1934 Lr: 0.00191
[2024-06-09 01:52:51,781 INFO misc.py line 118 2003007] Train: [1/1][245/1428] Data 0.003 (0.003) Batch 2.206 (2.255) Remain 00:44:27 loss: 2.1035 Lr: 0.00191
[2024-06-09 01:52:54,372 INFO misc.py line 118 2003007] Train: [1/1][246/1428] Data 0.003 (0.003) Batch 2.591 (2.256) Remain 00:44:26 loss: 1.0980 Lr: 0.00191
[2024-06-09 01:52:56,764 INFO misc.py line 118 2003007] Train: [1/1][247/1428] Data 0.003 (0.003) Batch 2.392 (2.257) Remain 00:44:25 loss: 0.8738 Lr: 0.00191
[2024-06-09 01:52:59,010 INFO misc.py line 118 2003007] Train: [1/1][248/1428] Data 0.002 (0.003) Batch 2.246 (2.257) Remain 00:44:22 loss: 1.1832 Lr: 0.00191
[2024-06-09 01:53:01,307 INFO misc.py line 118 2003007] Train: [1/1][249/1428] Data 0.002 (0.003) Batch 2.297 (2.257) Remain 00:44:20 loss: 1.2498 Lr: 0.00191
[2024-06-09 01:53:03,614 INFO misc.py line 118 2003007] Train: [1/1][250/1428] Data 0.003 (0.003) Batch 2.307 (2.257) Remain 00:44:18 loss: 1.1395 Lr: 0.00191
[2024-06-09 01:53:05,707 INFO misc.py line 118 2003007] Train: [1/1][251/1428] Data 0.002 (0.003) Batch 2.093 (2.256) Remain 00:44:15 loss: 1.7751 Lr: 0.00191
[2024-06-09 01:53:07,911 INFO misc.py line 118 2003007] Train: [1/1][252/1428] Data 0.002 (0.003) Batch 2.204 (2.256) Remain 00:44:13 loss: 0.8717 Lr: 0.00191
[2024-06-09 01:53:10,174 INFO misc.py line 118 2003007] Train: [1/1][253/1428] Data 0.002 (0.003) Batch 2.262 (2.256) Remain 00:44:11 loss: 0.9070 Lr: 0.00190
[2024-06-09 01:53:12,246 INFO misc.py line 118 2003007] Train: [1/1][254/1428] Data 0.003 (0.003) Batch 2.073 (2.255) Remain 00:44:07 loss: 0.7665 Lr: 0.00190
[2024-06-09 01:53:14,499 INFO misc.py line 118 2003007] Train: [1/1][255/1428] Data 0.002 (0.003) Batch 2.253 (2.255) Remain 00:44:05 loss: 1.2552 Lr: 0.00190
[2024-06-09 01:53:16,763 INFO misc.py line 118 2003007] Train: [1/1][256/1428] Data 0.002 (0.003) Batch 2.264 (2.255) Remain 00:44:03 loss: 1.3493 Lr: 0.00190
[2024-06-09 01:53:19,200 INFO misc.py line 118 2003007] Train: [1/1][257/1428] Data 0.002 (0.003) Batch 2.437 (2.256) Remain 00:44:02 loss: 1.1942 Lr: 0.00190
[2024-06-09 01:53:21,405 INFO misc.py line 118 2003007] Train: [1/1][258/1428] Data 0.003 (0.003) Batch 2.205 (2.256) Remain 00:43:59 loss: 1.3395 Lr: 0.00190
[2024-06-09 01:53:23,667 INFO misc.py line 118 2003007] Train: [1/1][259/1428] Data 0.003 (0.003) Batch 2.262 (2.256) Remain 00:43:57 loss: 0.8718 Lr: 0.00190
[2024-06-09 01:53:25,874 INFO misc.py line 118 2003007] Train: [1/1][260/1428] Data 0.002 (0.003) Batch 2.207 (2.256) Remain 00:43:54 loss: 1.1959 Lr: 0.00190
[2024-06-09 01:53:28,212 INFO misc.py line 118 2003007] Train: [1/1][261/1428] Data 0.002 (0.003) Batch 2.338 (2.256) Remain 00:43:52 loss: 0.9121 Lr: 0.00190
[2024-06-09 01:53:30,202 INFO misc.py line 118 2003007] Train: [1/1][262/1428] Data 0.003 (0.003) Batch 1.989 (2.255) Remain 00:43:49 loss: 0.6696 Lr: 0.00190
[2024-06-09 01:53:32,524 INFO misc.py line 118 2003007] Train: [1/1][263/1428] Data 0.003 (0.003) Batch 2.323 (2.255) Remain 00:43:47 loss: 0.9894 Lr: 0.00189
Traceback (most recent call last):
File "exp/desay_sv/desayseg-pt-v2m2-0-base/code/tools/train_desay.py", line 49, in
main()
File "exp/desay_sv/desayseg-pt-v2m2-0-base/code/tools/train_desay.py", line 38, in main
launch(
File "/home/AppData/model/code/exp/desay_sv/desayseg-pt-v2m2-0-base/code/pointcept/engines/launch.py", line 74, in launch
mp.spawn(
File "/miniconda/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/miniconda/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/miniconda/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:

-- Process 5 terminated with the following error:
Traceback (most recent call last):
File "/miniconda/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/AppData/model/code/exp/desay_sv/desayseg-pt-v2m2-0-base/code/pointcept/engines/launch.py", line 137, in _distributed_worker
main_func(*cfg)
File "/home/AppData/model/code/exp/desay_sv/desayseg-pt-v2m2-0-base/code/tools/train_desay.py", line 20, in main_worker
trainer.train()
File "/home/AppData/model/code/exp/desay_sv/desayseg-pt-v2m2-0-base/code/pointcept/engines/train.py", line 168, in train
self.run_step()
File "/home/AppData/model/code/exp/desay_sv/desayseg-pt-v2m2-0-base/code/pointcept/engines/train.py", line 182, in run_step
output_dict = self.model(input_dict)
File "/miniconda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/miniconda/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 1008, in forward
output = self._run_ddp_forward(*inputs, **kwargs)
File "/miniconda/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 969, in _run_ddp_forward
return module_to_run(*inputs[0], **kwargs[0])
File "/miniconda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/AppData/model/code/exp/desay_sv/desayseg-pt-v2m2-0-base/code/pointcept/models/default.py", line 21, in forward
seg_logits = self.backbone(input_dict)
File "/miniconda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/AppData/model/code/exp/desay_sv/desayseg-pt-v2m2-0-base/code/pointcept/models/point_transformer_v2/point_transformer_v2m2_base.py", line 573, in forward
points = self.dec_stages[i](points, skip_points, cluster)
File "/miniconda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/AppData/model/code/exp/desay_sv/desayseg-pt-v2m2-0-base/code/pointcept/models/point_transformer_v2/point_transformer_v2m2_base.py", line 402, in forward
return self.blocks(points)
File "/miniconda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/AppData/model/code/exp/desay_sv/desayseg-pt-v2m2-0-base/code/pointcept/models/point_transformer_v2/point_transformer_v2m2_base.py", line 225, in forward
points = block(points, reference_index)
File "/miniconda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/AppData/model/code/exp/desay_sv/desayseg-pt-v2m2-0-base/code/pointcept/models/point_transformer_v2/point_transformer_v2m2_base.py", line 169, in forward
self.attn(feat, coord, reference_index)
File "/miniconda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/AppData/model/code/exp/desay_sv/desayseg-pt-v2m2-0-base/code/pointcept/models/point_transformer_v2/point_transformer_v2m2_base.py", line 118, in forward
relation_qk = relation_qk + peb
RuntimeError: CUDA out of memory. Tried to allocate 352.00 MiB (GPU 5; 79.35 GiB total capacity; 12.66 GiB already allocated; 113.19 MiB free; 14.17 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

srun: error: hzh-sdc4-e03-u14-sv-dgx03: task 0: Exited with exit code 1
srun: launch/slurm: _step_signal: Terminating StepId=3048295.0

" Tried to allocate 352.00 MiB (GPU 5; 79.35 GiB total capacity; 12.66 GiB already allocated; 113.19 MiB free; 14.17 GiB reserved in total by PyTorch)"
Hi, please check whether multiple tasks run on a single GPU