some questions about IR measures In code
584251395 opened this issue · comments
这个计算是否有问题,未包含batch_ideal_sorted_labels参数
def torch_precision_at_k(batch_sys_sorted_labels, k=None, gpu=False):
''' Precision at k
:param sys_sorted_labels: [batch_size, ranking_size] system's predicted ltr_adhoc of labels in a descending order
batch_size维指query,ranking_size维指查询下的文档
:param ks: cutoff values
:return: [batch_size, len(ks)]
'''
max_cutoff = batch_sys_sorted_labels.size(1)
used_cutoff = min(max_cutoff, k)
batch_sys_sorted_labels = batch_sys_sorted_labels[:, 0:used_cutoff]
'''
取前used_cutoff个排序文档的label
'''
batch_bi_sys_sorted_labels = torch.clamp(batch_sys_sorted_labels, min=0, max=1) # binary
'''
将排序文档的label 限幅到0-1之间
'''
batch_sys_cumsum_reles = torch.cumsum(batch_bi_sys_sorted_labels, dim=1)
'''
将排序文档的label 依次累加
'''
batch_ranks = (torch.arange(used_cutoff).type(torch.cuda.FloatTensor).expand_as(batch_sys_cumsum_reles) + 1.0) \
if gpu else (torch.arange(used_cutoff).expand_as(batch_sys_cumsum_reles) + 1.0)
batch_sys_rankwise_precision = batch_sys_cumsum_reles / batch_ranks
batch_sys_p_at_k = batch_sys_rankwise_precision[:, used_cutoff-1:used_cutoff]
return batch_sys_p_at_k
def torch_precision_at_ks(batch_sys_sorted_labels, ks=None, gpu=False):
''' Precision at ks
:param sys_sorted_labels: [batch_size, ranking_size] system's predicted ltr_adhoc of labels in a descending order
:param ks: cutoff values
:return: [batch_size, len(ks)]
'''
valid_max_cutoff = batch_sys_sorted_labels.size(1)
need_padding = True if valid_max_cutoff < max(ks) else False
used_ks = [k for k in ks if k <= valid_max_cutoff] if need_padding else ks
max_cutoff = max(used_ks)
inds = torch.from_numpy(np.asarray(used_ks) - 1)
batch_sys_sorted_labels = batch_sys_sorted_labels[:, 0:max_cutoff]
batch_bi_sys_sorted_labels = torch.clamp(batch_sys_sorted_labels, min=0, max=1) # binary
batch_sys_cumsum_reles = torch.cumsum(batch_bi_sys_sorted_labels, dim=1)
batch_ranks = (torch.arange(max_cutoff).type(torch.cuda.FloatTensor).expand_as(batch_sys_cumsum_reles) + 1.0) if gpu \
else (torch.arange(max_cutoff).expand_as(batch_sys_cumsum_reles) + 1.0)
batch_sys_rankwise_precision = batch_sys_cumsum_reles / batch_ranks
batch_sys_p_at_ks = batch_sys_rankwise_precision[:, inds]
if need_padding:
padded_p_at_ks = torch.zeros(batch_sys_sorted_labels.size(0), len(ks))
padded_p_at_ks[:, 0:len(used_ks)] = batch_sys_p_at_ks
return padded_p_at_ks
else:
return batch_sys_p_at_ks
@584251395 你好,请仔细考虑一下,计算precision时只需要考虑排序结果的前k个位置中为1的标签,这个标签以包含在batch_sys_sorted_labels中,所以不需要batch_ideal_sorted_labels。