使用预训练模型在got10k上测试与leadboard上的分数不一致
Z-Xiong opened this issue · comments
Zhuang Xiong commented
您好,我使用您提供的两个预训练模型在got10k测试,得到的结果如下:
Method | AO | SR0.50 | SR0.75 | Hz | Hardware | Language |
---|---|---|---|---|---|---|
TransT_N2 | 0.590 | 0.683 | 0.492 | 11.58 fps | 3090 | Python |
TransT_N4 | 0.610 | 0.693 | 0.533 | 11.92 fps | 3090 | Python |
而在leadboard上的评分比这个高很多,不知是不是我的推理代码有问题,代码如下:
from got10k.trackers import Tracker
from got10k.experiments import ExperimentGOT10k
import numpy as np
import math
import torchvision.transforms.functional as tvisf
import cv2
import torch
import torch.nn.functional as F
from pytracking.utils.loading import load_network
from easydict import EasyDict as edict
class TransT(Tracker):
def __init__(self, name, net, window_penalty=0.49, exemplar_size=128, instance_size=256):
super(TransT, self).__init__(
name=name,
is_deterministic=True)
self.net = net
self.window_penalty = window_penalty
self.exemplar_size = exemplar_size
self.instance_size = instance_size
def _convert_score(self, score):
score = score.permute(2, 1, 0).contiguous().view(2, -1).permute(1, 0)
score = F.softmax(score, dim=1).data[:, 0].cpu().numpy()
return score
def _convert_bbox(self, delta):
delta = delta.permute(2, 1, 0).contiguous().view(4, -1)
delta = delta.data.cpu().numpy()
return delta
def _bbox_clip(self, cx, cy, width, height, boundary):
cx = max(0, min(cx, boundary[1]))
cy = max(0, min(cy, boundary[0]))
width = max(10, min(width, boundary[1]))
height = max(10, min(height, boundary[0]))
return cx, cy, width, height
def get_subwindow(self, im, pos, model_sz, original_sz, avg_chans):
"""
args:
im: rgb based image
pos: center position
model_sz: exemplar size
original_sz: original size
avg_chans: channel average
"""
if isinstance(pos, float):
pos = [pos, pos]
sz = original_sz
im_sz = im.shape
c = (original_sz + 1) / 2
# context_xmin = round(pos[0] - c) # py2 and py3 round
context_xmin = np.floor(pos[0] - c + 0.5)
context_xmax = context_xmin + sz - 1
# context_ymin = round(pos[1] - c)
context_ymin = np.floor(pos[1] - c + 0.5)
context_ymax = context_ymin + sz - 1
left_pad = int(max(0., -context_xmin))
top_pad = int(max(0., -context_ymin))
right_pad = int(max(0., context_xmax - im_sz[1] + 1))
bottom_pad = int(max(0., context_ymax - im_sz[0] + 1))
context_xmin = context_xmin + left_pad
context_xmax = context_xmax + left_pad
context_ymin = context_ymin + top_pad
context_ymax = context_ymax + top_pad
r, c, k = im.shape
if any([top_pad, bottom_pad, left_pad, right_pad]):
size = (r + top_pad + bottom_pad, c + left_pad + right_pad, k)
te_im = np.zeros(size, np.uint8)
te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im
if top_pad:
te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans
if bottom_pad:
te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans
if left_pad:
te_im[:, 0:left_pad, :] = avg_chans
if right_pad:
te_im[:, c + left_pad:, :] = avg_chans
im_patch = te_im[int(context_ymin):int(context_ymax + 1),
int(context_xmin):int(context_xmax + 1), :]
else:
im_patch = im[int(context_ymin):int(context_ymax + 1),
int(context_xmin):int(context_xmax + 1), :]
if not np.array_equal(model_sz, original_sz):
im_patch = cv2.resize(im_patch, (model_sz, model_sz))
im_patch = im_patch.transpose(2, 0, 1)
im_patch = im_patch[np.newaxis, :, :, :]
im_patch = im_patch.astype(np.float32)
im_patch = torch.from_numpy(im_patch)
im_patch = im_patch.cuda()
return im_patch
def initialize_features(self):
# if not getattr(self, 'features_initialized', False):
# self.net.initialize()
self.features_initialized = True
def init(self, image, box):
# PIL to np.array, (H, W, C)
image = np.array(image).astype(np.uint8)
hanning = np.hanning(32)
window = np.outer(hanning, hanning)
self.window = window.flatten()
# Initialize
self.initialize_features()
self.center_pos = np.array([box[0] + box[2] / 2,
box[1] + box[3] / 2])
self.size = np.array([box[2], box[3]])
# calculate z crop size
w_z = self.size[0] + (2 - 1) * ((self.size[0] + self.size[1]) * 0.5)
h_z = self.size[1] + (2 - 1) * ((self.size[0] + self.size[1]) * 0.5)
s_z = math.ceil(math.sqrt(w_z * h_z))
# calculate channle average
self.channel_average = np.mean(image, axis=(0, 1))
# get crop
z_crop = self.get_subwindow(image, self.center_pos,
self.exemplar_size,
s_z, self.channel_average)
# normalize
z_crop = z_crop.float().mul(1.0 / 255.0).clamp(0.0, 1.0)
self.mean = [0.485, 0.456, 0.406]
self.std = [0.229, 0.224, 0.225]
self.inplace = False
z_crop[0] = tvisf.normalize(z_crop[0], self.mean, self.std, self.inplace)
# initialize template feature
self.net.template(z_crop)
self.box = box
def update(self, image):
image = np.array(image).astype(np.uint8)
# calculate x crop size
w_x = self.size[0] + (4 - 1) * ((self.size[0] + self.size[1]) * 0.5)
h_x = self.size[1] + (4 - 1) * ((self.size[0] + self.size[1]) * 0.5)
s_x = math.ceil(math.sqrt(w_x * h_x))
# get crop
x_crop = self.get_subwindow(image, self.center_pos,
self.instance_size,
round(s_x), self.channel_average)
# normalize
x_crop = x_crop.float().mul(1.0 / 255.0).clamp(0.0, 1.0)
x_crop[0] = tvisf.normalize(x_crop[0], self.mean, self.std, self.inplace)
# track
outputs = self.net.track(x_crop)
score = self._convert_score(outputs['pred_logits'])
pred_bbox = self._convert_bbox(outputs['pred_boxes'])
# window penalty
pscore = score * (1 - self.window_penalty) + \
self.window * self.window_penalty
best_idx = np.argmax(pscore)
bbox = pred_bbox[:, best_idx]
bbox = bbox * s_x
cx = bbox[0] + self.center_pos[0] - s_x / 2
cy = bbox[1] + self.center_pos[1] - s_x / 2
width = bbox[2]
height = bbox[3]
# clip boundary
cx, cy, width, height = self._bbox_clip(cx, cy, width,
height, image.shape[:2])
# update state
self.center_pos = np.array([cx, cy])
self.size = np.array([width, height])
bbox = [cx - width / 2,
cy - height / 2,
width,
height]
self.box = bbox
return self.box
if __name__ == '__main__':
# setup tracker
settings = edict()
settings.device = 'cuda'
settings.description = 'TransT with default settings.'
settings.root_dir = 'GOT-10k'
settings.model_path = TransT/pytracking/networks/transt.pth'
model = load_network(settings.model_path)
tracker = TransT(name="TransT", net=model)
# run experiments on GOT-10k (validation subset)
experiment = ExperimentGOT10k(settings.root_dir, subset='test', result_dir="results", report_dir="reports")
experiment.run(tracker)
# report performance
experiment.report([tracker.name])
chenxin commented
使用github上提供的测试代码,与论文中的性能应是一致的。
如果代码是用的github的代码,有可能是数据集不完整等原因导致的,可以尝试测试其他数据集的指标观察是否有异常,来排查
Zhuang Xiong commented
使用github上提供的测试代码,与论文中的性能应是一致的。 如果代码是用的github的代码,有可能是数据集不完整等原因导致的,可以尝试测试其他数据集的指标观察是否有异常,来排查
谢谢回复,我找到原因了,是因为我加载模型后没有启用eval模式,添加了eval()后就和排行榜的测试结果一致了。