Trouble with `dataset_benchmark` and `tensors_benchmark`. Continual Learning plugins do not work.

Question

Trouble with `dataset_benchmark` and `tensors_benchmark`. Continual Learning plugins do not work.

Vincent710129 opened this issue 10 months ago · comments

🐛 Describe the bug
I prepare 2 sets of training and test Pytorch datasets from 2 different domains with the EWC plugin to perform domain-incremental CL. However, the evaluation results show no CL is performed. The evaluation results are the same as performing naive fine-tuning. This bug happens for both dataset_benchmark and tensors_benchmark.

🐜 To Reproduce
Due to the confidentiality of my dataset, I cannot share the dataset here. Below is my working manuscript for your information for debugging.

import torch
import argparse
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn import Module, ReLU, Linear, Softmax
from avalanche.benchmarks.generators import dataset_benchmark, tensors_benchmark
from avalanche.benchmarks.utils import AvalancheDataset, make_classification_dataset
from avalanche.training.supervised import EWC
from avalanche.evaluation.metrics import (
     forgetting_metrics,
     accuracy_metrics,
     loss_metrics,
     bwt_metrics
)
from avalanche.logging import InteractiveLogger, TensorboardLogger
from avalanche.training.plugins import EvaluationPlugin


# Create torch dataset class
class dataSet(Dataset):
    def __init__(self, x, y):
        self.x = torch.as_tensor(x)
        self.targets = torch.as_tensor(y)

    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, index):
        return self.x[index, :], self.targets[index]


# Create MLP model
class MLP(Module):
    def __init__(self, in_num, out_num, hidden_num1, hidden_num2, hidden_num3):
        super(MLP, self).__init__()
        self.fc1 = Linear(in_features=in_num, out_features=hidden_num1)
        self.relu1 = ReLU()

        self.fc2 = Linear(in_features=hidden_num1, out_features=hidden_num2)
        self.relu2 = ReLU()

        self.fc3 = Linear(in_features=hidden_num2, out_features=hidden_num3)
        self.relu3 = ReLU()

        self.fc4 = Linear(in_features=hidden_num3, out_features=out_num)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)

        x = self.fc2(x)
        x = self.relu2(x)

        x = self.fc3(x)
        x = self.relu3(x)

        x = self.fc4(x)
        return x


def main(args):
    # --- CONFIG
    if args.dataset == "covid":
        path = "covid_dataset"
    elif args.dataset == "diab":
        path = "diabetes_dataset"

    in_num = 155        # the number of input features
    hidden_num1 = 256   # number of neurons in hidden layers
    hidden_num2 = 128   # number of neurons in hidden layers
    hidden_num3 = 128   # number of neurons in hidden layers
    out_num = 3         # number of classes

    # Prepare training and test datasets for both distributions
    x_train_1 = np.load(path + 'x_train_1.npy').astype(np.float32)
    x_test_1 = np.load(path + 'x_test_1.npy').astype(np.float32)
    x_train_2 = np.load(path + 'x_train_2.npy').astype(np.float32)
    x_test_2 = np.load(path + 'x_test_2.npy').astype(np.float32)
    
    # Transform the labels into one-hot encoding
    y_train_1 = np.load(path + 'y_train_1.npy').astype(np.int_)
    y_test_1 = np.load(path + 'y_test_1.npy').astype(np.int_)
    y_train_2 = np.load(path + 'y_train_2.npy').astype(np.int_)
    y_test_2 = np.load(path + 'y_test_2.npy').astype(np.int_)

    # Instantiate datasets 
    train_1 = dataSet(x_train_1, y_train_1)
    test_1 = dataSet(x_test_1, y_test_1)
    train_2 = dataSet(x_train_2, y_train_2)
    test_2 = dataSet(x_test_2, y_test_2)

    train_1 = make_classification_dataset(train_1, task_labels=0)
    test_1 = make_classification_dataset(test_1, task_labels=0)
    train_2 = make_classification_dataset(train_2, task_labels=0)
    test_2 = make_classification_dataset(test_2, task_labels=0)

    # check if selected GPU is available or use CPU
    assert args.cuda == -1 or args.cuda >= 0, "cuda must be -1 or >= 0."
    device = torch.device(
        f"cuda:{args.cuda}"
        if torch.cuda.is_available() and args.cuda >= 0
        else "cpu"
    )
    print(f"Using device: {device}")
    # ---------

    # --- SCENARIO CREATION
    # generic_scenario = tensors_benchmark(
    #     train_tensors=[(x_train_1, y_train_1), (x_train_2, y_train_2)], 
    #     test_tensors=[(x_test_1, y_test_1), (x_test_2, y_test_2)], 
    #     task_labels=[0, 1]
    # )
    generic_scenario = dataset_benchmark([train_1, train_2], [test_1, test_2])
    # ---------

    # MODEL CREATION
    model = MLP(in_num, out_num, hidden_num1, hidden_num2, hidden_num3).to(device)
    if args.optim == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
    elif args.optim == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    criterion = torch.nn.CrossEntropyLoss()

    # DEFINE THE EVALUATION PLUGIN AND LOGGER
    interactive_logger = InteractiveLogger()

    eval_plugin = EvaluationPlugin(
        accuracy_metrics(
            minibatch=False, epoch=True, experience=True, stream=True
        ),
        loss_metrics(minibatch=False, epoch=True, experience=True, stream=True),
        forgetting_metrics(experience=True, stream=True),
        bwt_metrics(experience=True, stream=True),
        loggers=[interactive_logger]
    )

    if args.ewc_mode == 'separate':
        args.decay_factor = None

    # create strategy
    strategy = EWC(
        model,
        optimizer,
        criterion,
        args.ewc_lambda,
        args.ewc_mode,
        decay_factor=args.decay_factor,
        train_epochs=args.epochs,
        device=device,
        train_mb_size=args.minibatch_size,
        evaluator=eval_plugin,
    )

    # train on the selected scenario with the chosen strategy
    print("Starting experiment...")
    results = []
    acc_history = []
    for experience in generic_scenario.train_stream:
        print("Start training on experience ", experience.current_experience)

        strategy.train(experience)
        print("End training on experience", experience.current_experience)
        print("Computing accuracy on the test set")
        results.append(strategy.eval(generic_scenario.test_stream))


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--dataset",
        type=str,
        choices=["covid", "diab"],
        default="covid",
        help="Choose between covid and diab.",
    )
    parser.add_argument(
        "--ewc_mode",
        type=str,
        choices=["separate", "online"],
        default="separate",
        help="Choose between EWC and online.",
    )
    parser.add_argument(
        "--ewc_lambda",
        type=float,
        default=0.4,
        help="Penalty hyperparameter for EWC",
    )
    parser.add_argument(
        "--decay_factor",
        type=float,
        default=0.1,
        help="Decay factor for importance " "when ewc_mode is online.",
    )
    parser.add_argument("--optim", type=str, choices=["sgd", "adam"], default="sgd", help="Optimizer.")
    parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate.")
    parser.add_argument("--momentum", type=float, default=9e-1, help="Momentum.")
    parser.add_argument(
        "--epochs", type=int, default=300, help="Number of training epochs."
    )
    parser.add_argument(
        "--minibatch_size", type=int, default=128, help="Minibatch size."
    )
    parser.add_argument(
        "--cuda",
        type=int,
        default=0,
        help="Specify GPU id to use. Use CPU if -1.",
    )
    args = parser.parse_args()

    main(args)

🐝 Expected behavior
I expect the EWC plugin to perform domain-incremental CL with EWC on my self-defined datasets from 2 different domains with dataset_benchmark.

🐞 Screenshots
Results from EWC:

Results from Naive Fine-tuning:

Results from my own CL strategy: