have a question about IoU metric: bbox
MinGiSa opened this issue · comments
When I run training with the PyTorch version, I obtain the following IoU metrics. I modified the code to increase the resolution from 640 to 1024 and used images like the one provided below,
However, the -1.000 scores do not change even after further training. What should I fix, and could you provide some advice?
IoU metric: bbox
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.784
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.898
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.891
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.785
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.586
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.914
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.930
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.930
best_stat: {'epoch': 70, 'coco_eval_bbox': 0.7925207858496603}
Training time 0:32:39
I think this involves how coco_metric
defines objects of different sizes. In your case, It may thinks they are all large objects. If the detection results is good, you can ignore -1.000
scores
I think this involves how
coco_metric
defines objects of different sizes. In your case, It may thinks they are all large objects. If the detection results is good, you can ignore-1.000
scores
thank you for giving answer.
there is a new question about results.
I changed the model with rtdetr_r101vd_6x_coco.yml and obtained these training results.
===========================================================
IoU metric: bbox
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.921
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 1.000
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.921
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.930
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.930
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.935
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.935
best_stat: {'epoch': 88, 'coco_eval_bbox': 0.9470352035203521}
Training time 1:32:23
===========================================================
However, when I perform inference, the results are poor. Is there any solution?
=============================================================
import torch
from torch import nn
from torchvision.transforms import transforms
from PIL import Image, ImageDraw
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(file)), '..'))
from src.core import YAMLConfig
import argparse
from pathlib import Path
import time
class ImageReader:
def init(self, resize=224, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
self.transform = transforms.Compose([
# transforms.Resize((resize, resize)) if isinstance(resize, int) else transforms.Resize(
# (resize[0], resize[1])),
transforms.ToTensor(),
# transforms.Normalize(mean=mean, std=std),
])
self.resize = resize
self.pil_img = None
def __call__(self, image_path, *args, **kwargs):
"""
读取图片
"""
self.pil_img = Image.open(image_path).convert('RGB').resize((self.resize, self.resize))
return self.transform(self.pil_img).unsqueeze(0)
class Model(nn.Module):
def init(self, confg=None, ckpt="") -> None:
super().init()
self.cfg = YAMLConfig(confg, resume=ckpt)
if ckpt:
checkpoint = torch.load(ckpt, map_location='cpu')
if 'ema' in checkpoint:
state = checkpoint['ema']['module']
else:
state = checkpoint['model']
else:
raise AttributeError('only support resume to load model.state_dict by now.')
# NOTE load train mode state -> convert to deploy mode
self.cfg.model.load_state_dict(state)
self.model = self.cfg.model.deploy()
self.postprocessor = self.cfg.postprocessor.deploy()
# print(self.postprocessor.deploy_mode)
def forward(self, images, orig_target_sizes):
outputs = self.model(images)
return self.postprocessor(outputs, orig_target_sizes)
def get_argparser():
parser = argparse.ArgumentParser()
parser.add_argument("--config", default=r".\RT-DETR\rtdetr_pytorch\configs\rtdetr\rtdetr_r101vd_6x_coco.yml", help="配置文件路径")
parser.add_argument("--ckpt", default=r".\RT-DETR\rtdetr_pytorch\logs\checkpoint.pth", help="权重文件路径")
parser.add_argument("--image", default=r".\RT-DETR\rtdetr_pytorch\dataset\customDataSet\test", help="待推理图片路径")
parser.add_argument("--output_dir", default=r".\RT-DETR\rtdetr_pytorch\inference", help="输出文件保存路径")
parser.add_argument("--device", default="cuda")
return parser
def main(args):
device = torch.device(args.device)
model = Model(confg=args.config, ckpt=args.ckpt)
model.to(device=device)
for filename in os.listdir(args.image):
img_path = Path(os.path.join(args.image, filename))
device = torch.device(args.device)
reader = ImageReader(resize=1024)
img =reader(img_path).to(device)
size = torch.tensor([[img.shape[2], img.shape[3]]]).to(device)
start = time.time()
output = model(img, size)
print(f"elapsedTime:{time.time() - start:.4f}s")
labels, boxes, scores = output
im = reader.pil_img
draw = ImageDraw.Draw(im)
thrh = 0.1
for i in range(img.shape[0]):
scr = scores[i]
lab = labels[i][scr > thrh]
box = boxes[i][scr > thrh]
for b in box:
draw.rectangle(list(b), outline='red', )
draw.text((b[0], b[1]), text=str(lab[i]), fill='blue', )
save_path = Path(args.output_dir) / img_path.name
im.save(save_path)
print(f"savePath:{save_path}")
if name == "main":
main(get_argparser().parse_args())