'MyDataset' object has no attribute 'get_labels'

Question

'MyDataset' object has no attribute 'get_labels'

kuri54 opened this issue 3 years ago · comments

When I try to use my own Dataset class, I get the error 'MyDataset' object has no attribute 'get_labels' and cannot proceed.

The content of the Dataloader is as follows, and there is nothing strange about it.
It processes the image data and label data in .npz format.

class MyDataset(data.Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image = self.images[index]
        label = self.labels[index]

        if self.transform is not None:
            image = self.transform(image=image)["image"]

        return image, label

train_dataset = MyDataset(train_imgs, train_labels, transform=transform)
train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                               sampler=ImbalancedDatasetSampler(train_dataset),
                                               batch_size= batch_size,
                                               shuffle=True,
                                               num_workers=2)

Is there something wrong with the code?
I don't think it's a typo.

How can I fix it so that it works correctly?

Jaehyeop Choi · Answer 1 · Fri Jul 09 2021 15:53:43 GMT+0800 (China Standard Time)

add this in your dataset code, and turn off the shuffle

def get_labels(self): return self.labels

kuri54 · Answer 2 · Mon Jul 12 2021 07:53:38 GMT+0800 (China Standard Time)

@jaebbb
Thank you!
Thanks to you, it is now working properly.

Until then, I had been running the ImbalancedDatasetSampler class by creating my own to match my dataset.

class ImbalancedDatasetSampler(torch.utils.data.sampler.Sampler):
    def __init__(self, dataset, indices=None, num_samples=None):
       
        self.indices = list(range(len(dataset))) \
            if indices is None else indices

        self.num_samples = len(self.indices) \
            if num_samples is None else num_samples

        label_to_count = {}
        for idx in self.indices:
            label = self._get_label(dataset, idx)
            if label in label_to_count:
                label_to_count[label] += 1
            else:
                label_to_count[label] = 1

        weights  = [1.0 for idx in self.indices]
        self.weights = torch.DoubleTensor(weights)

    def _get_label(self, dataset, idx):
        dataset_type = type(dataset)
        if dataset_type is torchvision.datasets.MNIST:
            return dataset.train_labels[idx].item()
        elif dataset_type is torchvision.datasets.ImageFolder:
            return dataset.imgs[idx][1]
        else:
            return dataset.__getitem__(idx)[1]

    def __iter__(self):
        return (self.indices[i] for i in torch.multinomial(
            self.weights, self.num_samples, replacement=True))

    def __len__(self):
        return self.num_samples

Joevaen · Answer 3 · Thu Aug 12 2021 17:48:25 GMT+0800 (China Standard Time)

add this in your dataset code, and turn off the shuffle

def get_labels(self): return self.labels

hi, why I modify the dataset class (source code), it still doesn;t work?
this is my dataset:

class CincDataset(Dataset):
    def __init__(self, root_dir, transform=None, phase='train2017'):
        super().__init__()
        self.root_dir = root_dir
        self.transform = transform
        self.phase = phase
        
        data_dir = os.path.join(root_dir, phase)
        list_data = os.listdir(data_dir)
        self.list_data = list_data
        
    def __len__(self):
        return len(self.list_data)
    
    def __getitem__(self, idx):
        recordname = self.list_data[idx]
        filename = os.path.join(self.root_dir, self.phase, recordname)
        data = np.load(filename)
        input_0 = data['input_0']
        input_1 = data['input_1']
        input_2 = data['input_2']
        input_3 = data['input_3']
        beat_0 = data['beat_0']
        beat_1 = data['beat_1']
        beat_2 = data['beat_2']
        beat_3 = data['beat_3']
        rhythm_0 = data['rhythm_0']
        rhythm_1 = data['rhythm_1'] 
        rhythm_2 = data['rhythm_2']
        rhythm_3 = data['rhythm_3']
        freq = data['freq']
        #label = np.array([data['label']])
        label = data['label']
        
        if self.transform is not None:
            input_0, input_1, input_2, input_3,
            beat_0, beat_1, beat_2, beat_3,
            rhythm_0, rhythm_1, rhythm_2, rhythm_3,
            freq, label = self.transform(
            input_0=input_0, input_1=input_1, input_2=input_2, input_3=input_3,
            beat_0=beat_0, beat_1=beat_1, beat_2=beat_2, beat_3=beat_3,
            rhythm_0=rhythm_0, rhythm_1=rhythm_1, rhythm_2=rhythm_2, rhythm_3=rhythm_3,
            freq=freq, label=label)
        input_0 = torch.from_numpy(input_0.astype(np.float32))
        input_1 = torch.from_numpy(input_1.astype(np.float32))
        input_2 = torch.from_numpy(input_2.astype(np.float32))
        input_3 = torch.from_numpy(input_3.astype(np.float32))
        beat_0 = torch.from_numpy(beat_0.astype(np.float32))
        beat_1 = torch.from_numpy(beat_1.astype(np.float32))
        beat_2 = torch.from_numpy(beat_2.astype(np.float32))
        beat_3 = torch.from_numpy(beat_3.astype(np.float32))
        rhythm_0 = torch.from_numpy(rhythm_0.astype(np.float32))
        rhythm_1 = torch.from_numpy(rhythm_1.astype(np.float32))
        rhythm_2 = torch.from_numpy(rhythm_2.astype(np.float32))
        rhythm_3 = torch.from_numpy(rhythm_3.astype(np.float32))
        freq = torch.from_numpy(freq.astype(np.float32))
        label = torch.from_numpy(label.astype(np.int32)).long()
        return input_0, input_1, input_2, input_3, \
            beat_0, beat_1, beat_2, beat_3, \
            rhythm_0, rhythm_1, rhythm_2, rhythm_3, \
            freq, label

kuri54 · Answer 4 · Fri Aug 13 2021 10:13:12 GMT+0800 (China Standard Time)

hi, I faced the same problem.

You need to include a function to get the label in that class.
As you can see in imbalanced-dataset-sampler/torchsampler/imbalanced.py, the code to get the labels looks like this

def _get_labels(self, dataset):
        if self.callback_get_label:
            return self.callback_get_label(dataset)
        elif isinstance(dataset, torchvision.datasets.MNIST):
            return dataset.train_labels.tolist()
        elif isinstance(dataset, torchvision.datasets.ImageFolder):
            return [x[1] for x in dataset.imgs]
        elif isinstance(dataset, torchvision.datasets.DatasetFolder):
            return dataset.samples[:][1]
        elif isinstance(dataset, torch.utils.data.Subset):
            return dataset.dataset.imgs[:][1]
        elif isinstance(dataset, torch.utils.data.Dataset):
            return dataset.get_labels()
        else:
            raise NotImplementedError

If you want to get labels from a custom Dataset class, you will need to define them yourself as this code does not do it.

In the end, I added def get_labels(self): return self.labels to the Dataset class at the top of this page, and it works fine now.

Linghao Wang · Answer 5 · Sun Jun 12 2022 23:19:32 GMT+0800 (China Standard Time)

You need to include a function to get the label in that class. As you can see in imbalanced-dataset-sampler/torchsampler/imbalanced.py, the code to get the labels looks like this
Hi,
Should we do shuffle=False for training, validating and testing ?