'MyDataset' object has no attribute 'get_labels'
kuri54 opened this issue · comments
When I try to use my own Dataset class, I get the error 'MyDataset' object has no attribute 'get_labels'
and cannot proceed.
The content of the Dataloader is as follows, and there is nothing strange about it.
It processes the image data and label data in .npz format.
class MyDataset(data.Dataset):
def __init__(self, images, labels, transform=None):
self.images = images
self.labels = labels
self.transform = transform
def __len__(self):
return len(self.images)
def __getitem__(self, index):
image = self.images[index]
label = self.labels[index]
if self.transform is not None:
image = self.transform(image=image)["image"]
return image, label
train_dataset = MyDataset(train_imgs, train_labels, transform=transform)
train_dataloader = torch.utils.data.DataLoader(train_dataset,
sampler=ImbalancedDatasetSampler(train_dataset),
batch_size= batch_size,
shuffle=True,
num_workers=2)
Is there something wrong with the code?
I don't think it's a typo.
How can I fix it so that it works correctly?
add this in your dataset code, and turn off the shuffle
def get_labels(self): return self.labels
@jaebbb
Thank you!
Thanks to you, it is now working properly.
Until then, I had been running the ImbalancedDatasetSampler class by creating my own to match my dataset.
class ImbalancedDatasetSampler(torch.utils.data.sampler.Sampler):
def __init__(self, dataset, indices=None, num_samples=None):
self.indices = list(range(len(dataset))) \
if indices is None else indices
self.num_samples = len(self.indices) \
if num_samples is None else num_samples
label_to_count = {}
for idx in self.indices:
label = self._get_label(dataset, idx)
if label in label_to_count:
label_to_count[label] += 1
else:
label_to_count[label] = 1
weights = [1.0 for idx in self.indices]
self.weights = torch.DoubleTensor(weights)
def _get_label(self, dataset, idx):
dataset_type = type(dataset)
if dataset_type is torchvision.datasets.MNIST:
return dataset.train_labels[idx].item()
elif dataset_type is torchvision.datasets.ImageFolder:
return dataset.imgs[idx][1]
else:
return dataset.__getitem__(idx)[1]
def __iter__(self):
return (self.indices[i] for i in torch.multinomial(
self.weights, self.num_samples, replacement=True))
def __len__(self):
return self.num_samples
add this in your dataset code, and turn off the shuffle
def get_labels(self): return self.labels
hi, why I modify the dataset class (source code), it still doesn;t work?
this is my dataset:
class CincDataset(Dataset):
def __init__(self, root_dir, transform=None, phase='train2017'):
super().__init__()
self.root_dir = root_dir
self.transform = transform
self.phase = phase
data_dir = os.path.join(root_dir, phase)
list_data = os.listdir(data_dir)
self.list_data = list_data
def __len__(self):
return len(self.list_data)
def __getitem__(self, idx):
recordname = self.list_data[idx]
filename = os.path.join(self.root_dir, self.phase, recordname)
data = np.load(filename)
input_0 = data['input_0']
input_1 = data['input_1']
input_2 = data['input_2']
input_3 = data['input_3']
beat_0 = data['beat_0']
beat_1 = data['beat_1']
beat_2 = data['beat_2']
beat_3 = data['beat_3']
rhythm_0 = data['rhythm_0']
rhythm_1 = data['rhythm_1']
rhythm_2 = data['rhythm_2']
rhythm_3 = data['rhythm_3']
freq = data['freq']
#label = np.array([data['label']])
label = data['label']
if self.transform is not None:
input_0, input_1, input_2, input_3,
beat_0, beat_1, beat_2, beat_3,
rhythm_0, rhythm_1, rhythm_2, rhythm_3,
freq, label = self.transform(
input_0=input_0, input_1=input_1, input_2=input_2, input_3=input_3,
beat_0=beat_0, beat_1=beat_1, beat_2=beat_2, beat_3=beat_3,
rhythm_0=rhythm_0, rhythm_1=rhythm_1, rhythm_2=rhythm_2, rhythm_3=rhythm_3,
freq=freq, label=label)
input_0 = torch.from_numpy(input_0.astype(np.float32))
input_1 = torch.from_numpy(input_1.astype(np.float32))
input_2 = torch.from_numpy(input_2.astype(np.float32))
input_3 = torch.from_numpy(input_3.astype(np.float32))
beat_0 = torch.from_numpy(beat_0.astype(np.float32))
beat_1 = torch.from_numpy(beat_1.astype(np.float32))
beat_2 = torch.from_numpy(beat_2.astype(np.float32))
beat_3 = torch.from_numpy(beat_3.astype(np.float32))
rhythm_0 = torch.from_numpy(rhythm_0.astype(np.float32))
rhythm_1 = torch.from_numpy(rhythm_1.astype(np.float32))
rhythm_2 = torch.from_numpy(rhythm_2.astype(np.float32))
rhythm_3 = torch.from_numpy(rhythm_3.astype(np.float32))
freq = torch.from_numpy(freq.astype(np.float32))
label = torch.from_numpy(label.astype(np.int32)).long()
return input_0, input_1, input_2, input_3, \
beat_0, beat_1, beat_2, beat_3, \
rhythm_0, rhythm_1, rhythm_2, rhythm_3, \
freq, label
hi, I faced the same problem.
You need to include a function to get the label in that class.
As you can see in imbalanced-dataset-sampler/torchsampler/imbalanced.py, the code to get the labels looks like this
def _get_labels(self, dataset):
if self.callback_get_label:
return self.callback_get_label(dataset)
elif isinstance(dataset, torchvision.datasets.MNIST):
return dataset.train_labels.tolist()
elif isinstance(dataset, torchvision.datasets.ImageFolder):
return [x[1] for x in dataset.imgs]
elif isinstance(dataset, torchvision.datasets.DatasetFolder):
return dataset.samples[:][1]
elif isinstance(dataset, torch.utils.data.Subset):
return dataset.dataset.imgs[:][1]
elif isinstance(dataset, torch.utils.data.Dataset):
return dataset.get_labels()
else:
raise NotImplementedError
If you want to get labels from a custom Dataset class, you will need to define them yourself as this code does not do it.
In the end, I added def get_labels(self): return self.labels
to the Dataset class at the top of this page, and it works fine now.
You need to include a function to get the label in that class. As you can see in imbalanced-dataset-sampler/torchsampler/imbalanced.py, the code to get the labels looks like this
Hi,
Should we doshuffle=False
for training, validating and testing ?