fcjian / TOOD

TOOD: Task-aligned One-stage Object Detection, ICCV2021 Oral

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

i changed the number of ratios, then model can not train ,where should i have to modify futher?

joeyslv opened this issue · comments

i modify the ratios=[1] to ratios=[2.444, 3.182, 1.574, 1.721, 0.994, 1.163, 0.751, 0.534] then have a error like this:

2022-03-29 15:12:23,844 - mmdet - INFO - workflow: [('train', 1)], max: 100 epochs
2022-03-29 15:12:23,844 - mmdet - INFO - Checkpoints will be saved to E:\Object-Detection\Github\radar-detection\work_dirs\radar_tood by HardDiskBackend.
D:\App\anaconda\envs\swin-t\lib\site-packages\torch\nn\functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  ..\c10/core/TensorImpl.h:1156.)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
Traceback (most recent call last):
  File "D:\App\anaconda\envs\swin-t\lib\site-packages\mmcv\runner\epoch_based_runner.py", line 50, in train
    self.run_iter(data_batch, train_mode=True, **kwargs)
  File "D:\App\anaconda\envs\swin-t\lib\site-packages\mmcv\runner\epoch_based_runner.py", line 30, in run_iter
    **kwargs)
  File "D:\App\anaconda\envs\swin-t\lib\site-packages\mmcv\parallel\data_parallel.py", line 75, in train_step
    return self.module.train_step(*inputs[0], **kwargs[0])
  File "D:\App\anaconda\envs\swin-t\lib\site-packages\mmdet\models\detectors\base.py", line 248, in train_step
    losses = self(**data)
  File "D:\App\anaconda\envs\swin-t\lib\site-packages\torch\nn\modules\module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "D:\App\anaconda\envs\swin-t\lib\site-packages\mmcv\runner\fp16_utils.py", line 98, in new_func
    return old_func(*args, **kwargs)
  File "D:\App\anaconda\envs\swin-t\lib\site-packages\mmdet\models\detectors\base.py", line 172, in forward
    return self.forward_train(img, img_metas, **kwargs)
  File "D:\App\anaconda\envs\swin-t\lib\site-packages\mmdet\models\detectors\single_stage.py", line 84, in forward_train
    gt_labels, gt_bboxes_ignore)
  File "D:\App\anaconda\envs\swin-t\lib\site-packages\mmdet\models\dense_heads\base_dense_head.py", line 330, in forward_train
    outs = self(x)
  File "D:\App\anaconda\envs\swin-t\lib\site-packages\torch\nn\modules\module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "D:\App\anaconda\envs\swin-t\lib\site-packages\mmdet\models\dense_heads\tood_head.py", line 263, in forward
    b, h, w, 4).permute(0, 3, 1, 2) / stride[0]
RuntimeError: shape '[8, 32, 168, 4]' is invalid for input of size 1376256

and this is my config file

dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]
data = dict(
    samples_per_gpu=8,
    workers_per_gpu=1,
    train=dict(
        type='CocoDataset',
        ann_file='E:/Object-Detection/data_radar/devkit/voc07_train.json',
        img_prefix='E:/Object-Detection/data_radar/devkit/',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(type='LoadAnnotations', with_bbox=True),
            dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
            dict(type='RandomFlip', flip_ratio=0.5),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='DefaultFormatBundle'),
            dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
        ],
        classes=('loose_l', 'loose_s', 'poor_l', 'porous')),
    val=dict(
        type='CocoDataset',
        ann_file='E:/Object-Detection/data_radar/devkit/voc07_val.json',
        img_prefix='E:/Object-Detection/data_radar/devkit/',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(1333, 800),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='Pad', size_divisor=32),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ],
        classes=('loose_l', 'loose_s', 'poor_l', 'porous')),
    test=dict(
        type='CocoDataset',
        ann_file='E:/Object-Detection/data_radar/devkit/voc07_test.json',
        img_prefix='E:/Object-Detection/data_radar/devkit/',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(1333, 800),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='Pad', size_divisor=32),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ],
        classes=('loose_l', 'loose_s', 'poor_l', 'porous')))
evaluation = dict(interval=1, metric='bbox')
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=0.001,
    step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=100)
checkpoint_config = dict(interval=10)
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
custom_hooks = [dict(type='SetEpochInfoHook')]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
model = dict(
    type='TOOD',
    backbone=dict(
        type='ResNet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=True,
        style='pytorch',
        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        start_level=1,
        add_extra_convs='on_output',
        num_outs=5),
    bbox_head=dict(
        type='TOODHead',
        num_classes=4,
        in_channels=256,
        stacked_convs=6,
        feat_channels=256,
        anchor_type='anchor_based',
        anchor_generator=dict(
            type='AnchorGenerator',
            ratios=[2.444, 3.182, 1.574, 1.721, 0.994, 1.163, 0.751, 0.534],
            octave_base_scale=1,
            scales_per_octave=1,
            strides=[8, 16, 32, 64, 128]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[0.0, 0.0, 0.0, 0.0],
            target_stds=[0.1, 0.1, 0.2, 0.2]),
        initial_loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
            activated=True,
            gamma=2.0,
            alpha=0.25,
            loss_weight=1.0),
        loss_cls=dict(
            type='QualityFocalLoss',
            use_sigmoid=True,
            activated=True,
            beta=2.0,
            loss_weight=1.0),
        loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
    train_cfg=dict(
        initial_epoch=4,
        initial_assigner=dict(type='ATSSAssigner', topk=9),
        assigner=dict(type='TaskAlignedAssigner', topk=13),
        alpha=1,
        beta=6,
        allowed_border=-1,
        pos_weight=-1,
        debug=False),
    test_cfg=dict(
        nms_pre=1000,
        min_bbox_size=0,
        score_thr=0.05,
        nms=dict(type='nms', iou_threshold=0.6),
        max_per_img=100))
classes = ('loose_l', 'loose_s', 'poor_l', 'porous')
work_dir = './work_dirs\radar_tood'
auto_resume = False
gpu_ids = [0]