lyuwenyu / RT-DETR

[CVPR 2024] Official RT-DETR (RTDETR paddle pytorch), Real-Time DEtection TRansformer, DETRs Beat YOLOs on Real-time Object Detection. 🔥 🔥 🔥

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

have a question about IoU metric: bbox

MinGiSa opened this issue · comments

When I run training with the PyTorch version, I obtain the following IoU metrics. I modified the code to increase the resolution from 640 to 1024 and used images like the one provided below,

Multi_00030

However, the -1.000 scores do not change even after further training. What should I fix, and could you provide some advice?

IoU metric: bbox
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.784
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.898
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.891
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.785
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.586
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.914
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.930
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.930
best_stat: {'epoch': 70, 'coco_eval_bbox': 0.7925207858496603}
Training time 0:32:39

I think this involves how coco_metric defines objects of different sizes. In your case, It may thinks they are all large objects. If the detection results is good, you can ignore -1.000 scores

I think this involves how coco_metric defines objects of different sizes. In your case, It may thinks they are all large objects. If the detection results is good, you can ignore -1.000 scores

thank you for giving answer.

there is a new question about results.
I changed the model with rtdetr_r101vd_6x_coco.yml and obtained these training results.

===========================================================
IoU metric: bbox
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.921
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 1.000
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.921
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.930
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.930
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.935
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.935
best_stat: {'epoch': 88, 'coco_eval_bbox': 0.9470352035203521}
Training time 1:32:23

===========================================================

However, when I perform inference, the results are poor. Is there any solution?

threshold 0 result
Multi_thresh

threshold 0.1 result
multi_thresh_01

=============================================================

import torch
from torch import nn
from torchvision.transforms import transforms
from PIL import Image, ImageDraw
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(file)), '..'))
from src.core import YAMLConfig
import argparse
from pathlib import Path
import time

class ImageReader:
def init(self, resize=224, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
self.transform = transforms.Compose([
# transforms.Resize((resize, resize)) if isinstance(resize, int) else transforms.Resize(
# (resize[0], resize[1])),
transforms.ToTensor(),
# transforms.Normalize(mean=mean, std=std),
])
self.resize = resize
self.pil_img = None

def __call__(self, image_path, *args, **kwargs):
    """
    读取图片
    """
    self.pil_img = Image.open(image_path).convert('RGB').resize((self.resize, self.resize))
    return self.transform(self.pil_img).unsqueeze(0)

class Model(nn.Module):
def init(self, confg=None, ckpt="") -> None:
super().init()
self.cfg = YAMLConfig(confg, resume=ckpt)
if ckpt:
checkpoint = torch.load(ckpt, map_location='cpu')
if 'ema' in checkpoint:
state = checkpoint['ema']['module']
else:
state = checkpoint['model']
else:
raise AttributeError('only support resume to load model.state_dict by now.')

    # NOTE load train mode state -> convert to deploy mode
    self.cfg.model.load_state_dict(state)


    self.model = self.cfg.model.deploy()
    self.postprocessor = self.cfg.postprocessor.deploy()
    # print(self.postprocessor.deploy_mode)
    
def forward(self, images, orig_target_sizes):
    outputs = self.model(images)
    return self.postprocessor(outputs, orig_target_sizes)

def get_argparser():
parser = argparse.ArgumentParser()
parser.add_argument("--config", default=r".\RT-DETR\rtdetr_pytorch\configs\rtdetr\rtdetr_r101vd_6x_coco.yml", help="配置文件路径")
parser.add_argument("--ckpt", default=r".\RT-DETR\rtdetr_pytorch\logs\checkpoint.pth", help="权重文件路径")
parser.add_argument("--image", default=r".\RT-DETR\rtdetr_pytorch\dataset\customDataSet\test", help="待推理图片路径")
parser.add_argument("--output_dir", default=r".\RT-DETR\rtdetr_pytorch\inference", help="输出文件保存路径")
parser.add_argument("--device", default="cuda")
return parser

def main(args):
device = torch.device(args.device)
model = Model(confg=args.config, ckpt=args.ckpt)
model.to(device=device)
for filename in os.listdir(args.image):
img_path = Path(os.path.join(args.image, filename))
device = torch.device(args.device)
reader = ImageReader(resize=1024)

    img =reader(img_path).to(device)
    size = torch.tensor([[img.shape[2], img.shape[3]]]).to(device)
    start = time.time()
    output = model(img, size)
    print(f"elapsedTime:{time.time() - start:.4f}s")
    labels, boxes, scores = output
    im = reader.pil_img
    draw = ImageDraw.Draw(im)
    thrh = 0.1

    for i in range(img.shape[0]):

        scr = scores[i]
        lab = labels[i][scr > thrh]
        box = boxes[i][scr > thrh]


        for b in box:
            draw.rectangle(list(b), outline='red', )
            draw.text((b[0], b[1]), text=str(lab[i]), fill='blue', )

    save_path = Path(args.output_dir) / img_path.name
    im.save(save_path)
    print(f"savePath:{save_path}")

if name == "main":
main(get_argparser().parse_args())

@MinGiSa Im having the same issue, how did you fix it?

@MinGiSa Im having the same issue, how did you fix it?

Nope, not yet solved