eschmidbauer / metaseq

Repo for external large-scale work

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

How to load the small model

  1. Clone this repo and install all dependencies:
git clone
cd metaseq
pip3 install -e .
  1. Install Megatron LM as described in the official
git clone --branch fairseq_v2
cd Megatron-LM
pip3 install six regex
pip3 install -e .
  1. Create a directory where you save the model and tokenizer
mkdir -p add_opt
cd add_opt
  1. Download the 350m model as shown here.
  1. Comment this line since the rank is only needed to initialize different random seeds accross pp ranks.

  2. Create the following Python script:

import os

from transformers import BartTokenizerFast
from megatron import get_args
from megatron.initialize import initialize_megatron
from metaseq import checkpoint_utils

path = "/home/patrick/add_opt"

tokenizer = BartTokenizerFast.from_pretrained("facebook/bart-large")

# arguments taken from: | table 1
    "micro_batch_size": 1, 
    "num_layers": 24, 
    "hidden_size": 1024, 
    "num_attention_heads": 16,
    "max_position_embeddings": 2048, # TODO check if it is the correct args
    "encoder_seq_length": 2048 # TODO check if it is the correct args

checkpoint = checkpoint_utils.load_model_ensemble_and_task(
#    [os.path.join(path, ""), os.path.join(path, "")],
    [os.path.join(path, "")],
        "vocab_filename": os.path.join(path, "vocab.json"),
        "merges_filename": os.path.join(path, "merges.txt"),

model = checkpoint[0][0].eval()

# forward passes
def single_batch_forward_logits(prompts):
    input_ids = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).input_ids
    logits = model(input_ids)[0]
    return logits

prompts = [
    "Today is a beautiful day and I want to",
    "In the city of",
    "Paris is the capital of France and",
    "Computers and mobile phones have taken",

meta_logits = single_batch_forward_logits(prompts)
import ipdb; ipdb.set_trace()
  1. Now run:
torchrun --pipeline-model-parallel-size 1 --tensor-model-parallel-size 1

Make sure the logits of the HF models correspond to the meta_logits values.

7. How to run the 350 model

#!/usr/bin/env python3
import os

from transformers import AutoTokenizer, GPT2Tokenizer
from megatron.initialize import initialize_megatron
from metaseq import checkpoint_utils
import torch

path = "/home/patrick/add_opt"
metaseq_path = "/home/patrick/metaseq"

# arguments taken from: | table 1
    "micro_batch_size": 1, 
    "num_layers": 24, 
    "hidden_size": 1024, 
    "num_attention_heads": 16,
    "max_position_embeddings": 2048, # TODO check if it is the correct args
    "encoder_seq_length": 2048 # TODO check if it is the correct args

tokenizer = GPT2Tokenizer.from_pretrained("patrickvonplaten/opt_gpt2_tokenizer")

checkpoint = checkpoint_utils.load_model_ensemble_and_task(
    [os.path.join(path, "")],
#    [os.path.join(path, ""), os.path.join(path, "")],
        "vocab_filename": os.path.join(path, "vocab.json"),
        "merges_filename": os.path.join(path, "merges.txt"),

model = checkpoint[0][0].eval()

# forward passes
def single_batch_forward_logits(prompts):
#    input_ids = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).input_ids
    input_ids = tokenizer(prompts, return_tensors="pt").input_ids
    logits = model(input_ids)[0]
    return logits

prompts = [
    "Today is a beautiful day and I want to",
    "In the city of",
    "Paris is the capital of France and",
    "Computers and mobile phones have taken",

print("Next word generation")
for prompt in prompts:
    print(f"Prompt: {prompt}...\n")
    logits = single_batch_forward_logits(prompt)
    pred_next_token = torch.argmax(logits[0, -1], -1)
    next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
    next_token = next_token[0].replace("Ġ", "")
    print(f"Next word: {next_token}")


Repo for external large-scale work

License:MIT License


Language:Python 97.5%Language:Cython 1.2%Language:HTML 0.8%Language:Cuda 0.5%