mamba original paper https://arxiv.org/ftp/arxiv/papers/2312/2312.00752.pdf
python finetune_mamba.py --output_dir path/to/your/dir --model_name_or_path path/to/your/model
deepspeed finetune_mamba.py --output_dir path/to/your/dir --model_name_or_path path/to/your/model --deepspeed path/to/your/deepspeed_config.json
torchrun --nproc_per_node=2 --nnode=4 finetune_mamba.py --output_dir path/to/your/dir --model_name_or_path path/to/your/model --deepspeed path/to/your/deepspeed_config.json