This repository includes tests for using DeepSpeed with transformers
- Prepare machine following Stas Guide. Install Cuda11.3
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda-repo-ubuntu2004-11-3-local_11.3.1-465.19.01-1_amd64.deb
sudo dpkg -i cuda-repo-ubuntu2004-11-3-local_11.3.1-465.19.01-1_amd64.deb
sudo apt-key add /var/cuda-repo-ubuntu2004-11-3-local/7fa2af80.pub
sudo apt-get update
sudo apt-get -y install cuda-11.3
- Install
deepspeed
and other dependencies
pip install torch==1.10.2+cu113 -f https://download.pytorch.org/whl/torch_stable.html
pip install transformers==4.15.0
# DS_BUILD_OPS=1 pip install git+https://github.com/microsoft/DeepSpeed.git
# deepspeed
sudo apt install libaio-dev
pip install triton==1.0.0
git clone https://github.com/microsoft/DeepSpeed
cd DeepSpeed
mkdir deepspeed/ops/transformer_inference
DS_BUILD_TRANSFORMER_INFERENCE=1 DS_BUILD_UTILS=1 pip install -e . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1 | tee build.log
cd ..
/home/ubuntu/transformers-deepspeed/DeepSpeed/build/lib.linux-x86_64-3.8/deepspeed/ops/transformer_inference/transformer_inference_op.cpython-38-x86_64-linux-gnu.so 2. test script
python3 -m deepspeed.launcher.runner --num_gpus 2 run_infernence_gpt-neo.py
Number Parameter * 4 / Number GPUs + 2-3GB (Kernel + GPU Memory) = GPU Memory needed per GPU needed
- check gpus
nvidia-smi
- watch nvidia-smi
watch -n0.1 nvidia-smi
- check deepspeed
#ds_report
python3 -m deepspeed.env_report
gpt-neo
python3 -m deepspeed.launcher.runner --num_gpus 2 run_infernence_gpt-neo.py
work with ranks
python3 -m deepspeed.launcher.runner --num_gpus 2 run_infernence_gpt-neo.py
memory allocation test
python3 -m deepspeed.launcher.runner --num_gpus 2 memory_allocation_test.py
gptj
python3 -m deepspeed.launcher.runner --num_gpus 4 gptj.py
python3 -m deepspeed.launcher.runner --num_gpus 2 api.py
curl --request POST \
--url http://localhost:8500/ \
--header 'Content-Type: application/json' \
--data '{
"inputs":"Deepspeed is"
}'
cd io
python3 server.py
curl --request POST \
--url http://localhost:8500/ \
--header 'Content-Type: application/json' \
--data '{
"inputs": "Hugging Face can do",
"paramters": {
"min_length": 75,
"max_length": 250
}
}'
python3 -m deepspeed.launcher.runner --num_gpus 4 run_mp_example.py
python3 -m deepspeed.launcher.runner --num_gpus 4 t5_mp.py