How to load the small model

Clone this repo and install all dependencies:

git clone https://github.com/facebookresearch/metaseq.git
cd metaseq
pip3 install -e .

Install Megatron LM as described in the official setup.md.

git clone --branch fairseq_v2 https://github.com/ngoyal2707/Megatron-LM.git
cd Megatron-LM
pip3 install six regex
pip3 install -e .

Create a directory where you save the model and tokenizer

mkdir -p add_opt
cd add_opt

Download the 350m model as shown here.

wget https://dl.fbaipublicfiles.com/opt/v1_20220502/350m/reshard.pt

Comment this line since the rank is only needed to initialize different random seeds accross pp ranks.
Create the following Python script:

import os

from transformers import BartTokenizerFast
from megatron import get_args
from megatron.initialize import initialize_megatron
from metaseq import checkpoint_utils

path = "/home/patrick/add_opt"

tokenizer = BartTokenizerFast.from_pretrained("facebook/bart-large")
tokenizer.save_pretrained(path)

# arguments taken from: https://arxiv.org/pdf/2205.01068.pdf | table 1
initialize_megatron(args_defaults={
    "micro_batch_size": 1, 
    "num_layers": 24, 
    "hidden_size": 1024, 
    "num_attention_heads": 16,
    "max_position_embeddings": 2048, # TODO check if it is the correct args
    "encoder_seq_length": 2048 # TODO check if it is the correct args
})

checkpoint = checkpoint_utils.load_model_ensemble_and_task(
#    [os.path.join(path, "reshard-model_part-0.pt"), os.path.join(path, "reshard-model_part-1.pt")],
    [os.path.join(path, "reshard.pt")],
    arg_overrides={
        "vocab_filename": os.path.join(path, "vocab.json"),
        "merges_filename": os.path.join(path, "merges.txt"),
    }
)

model = checkpoint[0][0].eval()


# forward passes
def single_batch_forward_logits(prompts):
    input_ids = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).input_ids
    logits = model(input_ids)[0]
    return logits

prompts = [
    "Today is a beautiful day and I want to",
    "In the city of",
    "Paris is the capital of France and",
    "Computers and mobile phones have taken",
]

meta_logits = single_batch_forward_logits(prompts)
import ipdb; ipdb.set_trace()

Now run:

torchrun run_model.py --pipeline-model-parallel-size 1 --tensor-model-parallel-size 1

Make sure the logits of the HF models correspond to the meta_logits values.

7. How to run the 350 model

#!/usr/bin/env python3
import os

from transformers import AutoTokenizer, GPT2Tokenizer
from megatron.initialize import initialize_megatron
from metaseq import checkpoint_utils
import torch

path = "/home/patrick/add_opt"
metaseq_path = "/home/patrick/metaseq"

# arguments taken from: https://arxiv.org/pdf/2205.01068.pdf | table 1
initialize_megatron(args_defaults={
    "micro_batch_size": 1, 
    "num_layers": 24, 
    "hidden_size": 1024, 
    "num_attention_heads": 16,
    "max_position_embeddings": 2048, # TODO check if it is the correct args
    "encoder_seq_length": 2048 # TODO check if it is the correct args
})

tokenizer = GPT2Tokenizer.from_pretrained("patrickvonplaten/opt_gpt2_tokenizer")
tokenizer.save_pretrained(path)

checkpoint = checkpoint_utils.load_model_ensemble_and_task(
    [os.path.join(path, "reshard.pt")],
#    [os.path.join(path, "reshard-model_part-0.pt"), os.path.join(path, "reshard-model_part-1.pt")],
    arg_overrides={
        "vocab_filename": os.path.join(path, "vocab.json"),
        "merges_filename": os.path.join(path, "merges.txt"),
    }
)

model = checkpoint[0][0].eval()


# forward passes
def single_batch_forward_logits(prompts):
#    input_ids = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).input_ids
    input_ids = tokenizer(prompts, return_tensors="pt").input_ids
    logits = model(input_ids)[0]
    return logits

prompts = [
    "Today is a beautiful day and I want to",
    "In the city of",
    "Paris is the capital of France and",
    "Computers and mobile phones have taken",
]

print("Next word generation")
for prompt in prompts:
    print("-------------")
    print(f"Prompt: {prompt}...\n")
    logits = single_batch_forward_logits(prompt)
    pred_next_token = torch.argmax(logits[0, -1], -1)
    next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
    next_token = next_token[0].replace("Ġ", "")
    print(f"Next word: {next_token}")
    print("-------------")

eschmidbauer / metaseq

How to load the small model

7. How to run the 350 model

About

Languages