AttributeError: 'DataParallel' object has no attribute 'train_model'

Question

AttributeError: 'DataParallel' object has no attribute 'train_model'

sxhxliang opened this issue 6 years ago · comments

Thank for your implementation, but I got an error when using 4 GPUs to train this model

# model = torch.nn.DataParallel(model, device_ids=[0,1,2,3])
Traceback (most recent call last):
File "bdd_coco.py", line 567, in
model.train_model(dataset_train, dataset_val,
File "/home/user/.conda/envs/pytorch/lib/python3.5/site-packages/torch/nn/modules/module.py", line 532, in getattr
type(self).name, name))
AttributeError: 'DataParallel' object has no attribute 'train_model'

Jianyuan Wang · Answer 1 · Sat Sep 22 2018 13:29:06 GMT+0800 (China Standard Time)

@AaronLeong Notably, if you use 'DataParallel', the model will be wrapped in DataParallel(). It means you need to change the model.function() to model.module.function() in the following codes.
For example,
model.train_model --> model.module.train_model

Shi hua · Answer 2 · Sat Sep 22 2018 15:38:33 GMT+0800 (China Standard Time)

@jytime I have tried this setting, but only one GPU can work well

`
import os
import time
import numpy as np
import scipy.misc
import scipy.ndimage
import skimage.color
import skimage.io

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from pycocotools import mask as maskUtils

import zipfile
import urllib.request
import shutil

from config import Config
import utils
import model as modellib

import torch

ROOT_DIR = os.getcwd()

COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.pth")

DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
DEFAULT_DATASET_YEAR = "2018"

class CocoConfig(Config):

NAME = "bdd"

IMAGES_PER_GPU = 16

GPU_COUNT = 4

class CocoDataset(utils.Dataset):
def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
class_map=None, return_coco=False, auto_download=False):

    if auto_download is True:
        self.auto_download(dataset_dir, subset, year)

    coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
    if subset == "minival" or subset == "valminusminival":
        subset = "val"
    image_dir = "{}/{}/{}".format(dataset_dir,'images/100k', subset+year)

    if not class_ids:
        class_ids = sorted(coco.getCatIds())

    if class_ids:
        image_ids = []
        for id in class_ids:
            image_ids.extend(list(coco.getImgIds(catIds=[id])))
        image_ids = list(set(image_ids))
    else:
        image_ids = list(coco.imgs.keys())

    for i in class_ids:
        self.add_class("bdd", i, coco.loadCats(i)[0]["name"])

    for i in image_ids:
        self.add_image(
            "bdd", image_id=i,
            path=os.path.join(image_dir, coco.imgs[i]['file_name']),
            width=coco.imgs[i]["width"],
            height=coco.imgs[i]["height"],
            annotations=coco.loadAnns(coco.getAnnIds(
                imgIds=[i], catIds=class_ids, iscrowd=None)))
    if return_coco:
        return coco
def load_mask(self, image_id):

    image_info = self.image_info[image_id]

    if image_info["source"] != "bdd":
        return super(CocoDataset, self).load_mask(image_id)

    instance_masks = []
    class_ids = []
    annotations = self.image_info[image_id]["annotations"]
    for annotation in annotations:
        
        class_id = self.map_source_class_id(
            "bdd.{}".format(annotation['category_id']))

        if class_id:
            m = self.annToMask(annotation, image_info["height"],
                               image_info["width"])
            if m.max() < 1:
                continue
            if annotation['iscrowd']:
                class_id *= -1
                if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
                    m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
            instance_masks.append(m)
            class_ids.append(class_id)

    if class_ids:
        mask = np.stack(instance_masks, axis=2)
        class_ids = np.array(class_ids, dtype=np.int32)
        return mask, class_ids
    else:
        return super(CocoDataset, self).load_mask(image_id)
   
def load_drivable(self,image_id, use_color_maps=False, use_one_hot_label=True):
   
    image_info = self.image_info[image_id]
    path = image_info['path']

    if use_color_maps:
        label_name = 'color_labels'
        path = path.replace('.jpg', '_drivable_color.png')
    else:
        label_name = 'labels'
        path = path.replace('.jpg', '_drivable_id.png')

    path = path.replace('images/100k', 'drivable_maps/'+label_name)

    image = skimage.io.imread(path)
   

    if image.ndim != 3:
        image = skimage.color.gray2rgb(image)
        drivable_maps = np.zeros_like(image)
        drivable_maps[:,:,0] = 0
        drivable_maps[:,:,1] = 1
        drivable_maps[:,:,2] = 2
        image = image == drivable_maps
    return image*1.0
    
def image_reference(self, image_id):
  
    info = self.image_info[image_id]
    if info["source"] == "coco":
        return "http://cocodataset.org/#explore?id={}".format(info["id"])
    else:
        super(CocoDataset, self).image_reference(image_id)


def annToRLE(self, ann, height, width):

    segm = ann['segmentation']
    if isinstance(segm, list):
        rles = maskUtils.frPyObjects(segm, height, width)
        rle = maskUtils.merge(rles)
    elif isinstance(segm['counts'], list):
        rle = maskUtils.frPyObjects(segm, height, width)
    else:
        rle = ann['segmentation']
    return rle

def annToMask(self, ann, height, width):

    rle = self.annToRLE(ann, height, width)
    m = maskUtils.decode(rle)
    return m

def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):

if rois is None:
    return []

results = []
for image_id in image_ids:
    for i in range(rois.shape[0]):
        class_id = class_ids[i]
        score = scores[i]
        bbox = np.around(rois[i], 1)
        mask = masks[:, :, i]

        result = {
            "image_id": image_id,
            "category_id": dataset.get_source_class_id(class_id, "coco"),
            "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
            "score": score,
            "segmentation": maskUtils.encode(np.asfortranarray(mask))
        }
        results.append(result)
return results

def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):

image_ids = image_ids or dataset.image_ids

if limit:
    image_ids = image_ids[:limit]

coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]

t_prediction = 0
t_start = time.time()

results = []
for i, image_id in enumerate(image_ids):
    image = dataset.load_image(image_id)

    t = time.time()
    r = model.detect([image])[0]
    t_prediction += (time.time() - t)

    image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
                                       r["rois"], r["class_ids"],
                                       r["scores"], r["masks"])
    results.extend(image_results)

coco_results = coco.loadRes(results)

cocoEval = COCOeval(coco, coco_results, eval_type)
cocoEval.params.imgIds = coco_image_ids
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

print("Prediction time: {}. Average {}/image".format(
    t_prediction, t_prediction / len(image_ids)))
print("Total time: ", time.time() - t_start)

if name == 'main':
import argparse

parser = argparse.ArgumentParser(
    description='Train Mask R-CNN on MS COCO.')
parser.add_argument("command",
                    metavar="<command>",
                    help="'train' or 'evaluate' on MS COCO")
parser.add_argument('--dataset', required=True,
                    metavar="/data1/datasets/bdd100k",
                    help='Directory of the MS-COCO dataset')
parser.add_argument('--year', required=False,
                    default=DEFAULT_DATASET_YEAR,
                    metavar="<year>",
                    help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
parser.add_argument('--model', required=False,
                    metavar="/path/to/weights.pth",
                    help="Path to weights .pth file or 'coco'")
parser.add_argument('--logs', required=False,
                    default=DEFAULT_LOGS_DIR,
                    metavar="/path/to/logs/",
                    help='Logs and checkpoints directory (default=logs/)')
parser.add_argument('--limit', required=False,
                    default=500,
                    metavar="<image count>",
                    help='Images to use for evaluation (default=500)')
parser.add_argument('--download', required=False,
                    default=False,
                    metavar="<True|False>",
                    help='Automatically download and unzip MS-COCO files (default=False)',
                    type=bool)
parser.add_argument('--lr', required=False,
                    default=0.001,
                    help='Learning rate')
parser.add_argument('--batchsize', required=False,
                    default=4,
                    help='Batch size')
parser.add_argument('--steps', required=False,
                    default=200,
                    help='steps per epoch')    
parser.add_argument('--device', required=False,
                    default="gpu",
                    help='gpu or cpu')                         
args = parser.parse_args()                        

print("Command: ", args.command)
print("Model: ", args.model)
print("Dataset: ", args.dataset)
print("Year: ", args.year)
print("Logs: ", args.logs)
print("Auto Download: ", args.download)

if args.command == "train":
    config = CocoConfig()
else:
    class InferenceConfig(CocoConfig):
        GPU_COUNT = 1
        IMAGES_PER_GPU = 1
        DETECTION_MIN_CONFIDENCE = 0
    config = InferenceConfig()
config.display()

if args.command == "train":
    model = modellib.MaskRCNN(config=config,
                              model_dir=args.logs)
else:
    model = modellib.MaskRCNN(config=config,
                              model_dir=args.logs)

if args.device == "gpu":
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

model = model.to(device)

if args.model:
    if args.model.lower() == "coco":
        model_path = COCO_MODEL_PATH
    elif args.model.lower() == "last":
        model_path = model.find_last()[1]
    elif args.model.lower() == "imagenet":
        model_path = config.IMAGENET_MODEL_PATH
    else:
        model_path = args.model
else:
    model_path = ""



if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = torch.nn.DataParallel(model, device_ids=[0,1,2,3])

model_path = 'mask_rcnn_coco.pth'
print("Loading weights ", model_path)
model.module.load_pre_weights(model_path)

lr=float(args.lr)
batchsize=int(args.batchsize)
steps=int(args.steps)

print('batchsize', batchsize)
print('lr', lr)
print('steps', steps)

if args.command == "train":
    dataset_train = CocoDataset()
    dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
    dataset_train.prepare()

    dataset_val = CocoDataset()
    dataset_val.load_coco(args.dataset, "val", year=args.year, auto_download=args.download)
    dataset_val.prepare()


    print("Training network heads")
    model.module.train_model(dataset_train, dataset_val,
                learning_rate=config.LEARNING_RATE,
                epochs=1,
                BatchSize=batchsize,
                steps=steps,
                layers='heads')

    print("Fine tune Resnet stage 4 and up")
    model.module.train_model(dataset_train, dataset_val,
                learning_rate=config.LEARNING_RATE,
                epochs=1,
                BatchSize=batchsize,
                steps=steps,
                layers='4+')

    print("Fine tune all layers")
    model.module.train_model(dataset_train, dataset_val,
                learning_rate=config.LEARNING_RATE / 10,
                epochs=2,
                BatchSize=batchsize,
                steps=steps,
                layers='all')

elif args.command == "evaluate":
    dataset_val = CocoDataset()
    coco = dataset_val.load_coco(args.dataset, "val", year=args.year, return_coco=True, auto_download=args.download)
    dataset_val.prepare()
    print("Running COCO evaluation on {} images.".format(args.limit))
    evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
else:
    print("'{}' is not recognized. "
          "Use 'train' or 'evaluate'".format(args.command))

`

Jianyuan Wang · Answer 3 · Sat Sep 22 2018 17:39:51 GMT+0800 (China Standard Time)

It's weird since I have checked it on my computer and it works well... I guess you could find some help from this
Possibly I would only have time to solve this after Dec.

Liyun · Answer 4 · Mon Aug 24 2020 16:41:36 GMT+0800 (China Standard Time)

@AaronLeong Notably, if you use 'DataParallel', the model will be wrapped in DataParallel(). It means you need to change the model.function() to model.module.function() in the following codes.
For example,
model.train_model --> model.module.train_model

I tried, but it still cannot work,it just opened the multi python thread in GPU but only one GPU worked.
So I think it looks like model.module.xxx can solve the bugs cased by DataParallel, but it makes problem come back original status, I mean the multi GPU of DataParallel to single GPU of module.

Yicong Li · Answer 5 · Sun Jan 17 2021 03:39:17 GMT+0800 (China Standard Time)

@zhangliyun9120 Hi, did you solve the problem? I am in the same situation.

Shiva · Answer 6 · Thu Apr 20 2023 22:08:11 GMT+0800 (China Standard Time)

Hey Guy's,
I got the same situation while I am working on kaggle GPU *2's, but I didn't seen both GPU's are not sharing the memory