RuntimeError: The size of tensor a (135) must match the size of tensor b (136) at non-singleton dimension 3
Labaien96 opened this issue · comments
Jokin Labaien commented
I am using the ResNeSt101 model for object detection and it works perfectly if when doing inference using the whole model but I get an error when I do inference using only the backbone (because I want to inspect the features extracted from the backbone).
import detectron2.data.transforms as T
from detectron2.data import (
MetadataCatalog,
build_detection_test_loader,
build_detection_train_loader,
)
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
import torch
import random
from detectron2.utils.visualizer import Visualizer
import cv2
import matplotlib.pyplot as plt
from detectron2.utils.visualizer import ColorMode
import matplotlib.patches as patches
for d in random.sample(dataset_dicts, 1):
im = cv2.imread(d["file_name"])
input_format = cfg.INPUT.FORMAT
model = build_model(cfg)
model.eval()
metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
checkpointer = DetectionCheckpointer(model).load( "output/ResNest101_Faster_RCNN_model_Small_Anchors.pth")
# checkpointer.load(cfg.MODEL.WEIGHTS)
transform_gen = T.ResizeShortestEdge(
[cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
)
with torch.no_grad(): # https://github.com/sphinx-doc/sphinx/issues/4258
# Apply pre-processing to image.
if input_format == "RGB":
# whether the model expects BGR inputs or RGB
original_image = im[:, :, ::-1]
height, width = original_image.shape[:2]
image = transform_gen.get_transform(original_image).apply_image(original_image)
image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
inputs = {"image": image, "height": height, "width": width}
# predictions = model.backbone(image.unsqueeze(0).cuda())[0]
predictions = model([inputs])[0]
This code works well. This works also well:
with torch.no_grad():
model.backbone(torch.randn(2, 3, 574, 768))
The problem begins when I use the backbone for predicting the loaded image, which has size (3,800, 1074), which I have converted to (1,3,800,1074) adding the batch dimension.
image = image.unsqueeze(0)
with torch.no_grad():
model.backbone(image)
Here I get this error:
RuntimeErrorTraceback (most recent call last)
<ipython-input-35-290a6709c3a6> in <module>
1 with torch.no_grad():
----> 2 model.backbone(image_proba)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/detectron2/modeling/backbone/fpn.py in forward(self, x)
131 top_down_features = F.interpolate(prev_features, scale_factor=2, mode="nearest")
132 lateral_features = lateral_conv(features)
--> 133 prev_features = lateral_features + top_down_features
134 if self._fuse_type == "avg":
135 prev_features /= 2
RuntimeError: The size of tensor a (135) must match the size of tensor b (136) at non-singleton dimension 3