Notebook

In [1]:

%cd /workspace/axolotl

/workspace/axolotl

In [2]:

!accelerate config --config_file configs/accelerate/default_config.yaml default

Setting ds_accelerator to cuda (auto detect)
accelerate configuration saved at /root/.cache/huggingface/accelerate/default_config.yaml

In [10]:

!cat examples/openllama/qlora.yml

# Based on https://gist.github.com/fearnworks/723709806cebc67bafe1eb8138e7efbd
base_model: openlm-research/open_llama_3b_600bt_preview
base_model_config: openlm-research/open_llama_3b_600bt_preview
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
load_in_8bit: false
load_in_4bit: true
strict: false
push_dataset_to_hub:
datasets:
  # - path: AtlasUnified/Code-Instruct-Sets
  #   data_files:
  #     - unmasked-set-1.jsonl
  #     - unmasked-set-2.jsonl
  #     - unmasked-set-3.jsonl
  #     - unmasked-set-4.jsonl
  #   type: alpaca_code_instruct
  # - path: winglian/pygmalion-cleaned
  #   data_files:
  #     - v13_no_ai.cleaned.jsonl
  #   type: pygmalion
  #   shards: 4
  # - path: winglian/evals
  #   data_files:
  #     - hf/ARC-Challenge.jsonl
  #     - hf/ARC-Easy.jsonl
  #     - hf/riddle_sense.jsonl
  #   type: explainchoice:chat
  # - path: winglian/evals
  #   data_files:
  #     - hf/gsm8k.jsonl
  #     - custom/logic_inference_oa.jsonl
  #   type: alpaca_chat.load_qa
  # - path: winglian/evals
  #   data_files:
  #     - custom/in_context_qa.jsonl
  #   type: context_qa
  # - path: winglian/evals
  #   data_files:
  #     - custom/in_context_qa.jsonl
  #   type: context_qa.load_404
  # - path: winglian/evals
  #   data_files:
  #     - custom/jokes_explained_500up.jsonl
  #   type: sharegpt_jokes
  # - path: winglian/evals
  #   data_files:
  #     - custom/classify-self-chat.sharegpt.jsonl
  #     - custom/coding-self-chat.sharegpt.jsonl
  #     - custom/prose-gpt4.sharegpt.jsonl
  #     - custom/prose-rewrite-gpt4.sharegpt.jsonl
  #   type: sharegpt_simple
  # - path: winglian/evals
  #   data_files:
  #     - custom/guanaco-cleaned.en.jsonl
  #   type: sharegpt_simple.load_guanaco
  # - path: winglian/evals
  #   data_files:
  #     - openai/tldr.jsonl
  #   type: summarizetldr:chat
  # - path: winglian/evals
  #   data_files:
  #     - hellaswag/hellaswag.jsonl
  #   type: explainchoice:chat
  #   shards: 60
  # - path: metaeval/ScienceQA_text_only
  #   type: concisechoice:chat
  #   shards: 13
  # - path: teknium/GPTeacher-General-Instruct
  #   data_files: 
  #     - gpt4-instruct-similarity-0.6-dataset.json
  #   type: gpteacher:chat
  - path: QingyiSi/Alpaca-CoT
    data_files:
      # - chain-of-thought/formatted_cot_data/aqua_train.jsonl
      # - Chain-of-Thought/formatted_cot_data/creak_train.json
      # - Chain-of-Thought/formatted_cot_data/ecqa_train.json
      # - Chain-of-Thought/formatted_cot_data/esnli_train.json
      - Chain-of-Thought/formatted_cot_data/gsm8k_train.json
      # - Chain-of-Thought/formatted_cot_data/qasc_train.json
      # - Chain-of-Thought/formatted_cot_data/qed_train.json
      # - Chain-of-Thought/formatted_cot_data/sensemaking_train.json
      # - Chain-of-Thought/formatted_cot_data/strategyqa_train.json
      # - GPTeacher/Roleplay/formatted_roleplay-similarity_0.6-instruct-dataset.json
    type: "alpaca:chat"
dataset_prepared_path: last_run_prepared
val_set_size: 0.01
adapter: qlora
lora_model_dir:
sequence_len: 2048
max_packed_sequence_len: 2048
lora_r: 64
lora_alpha: 16
lora_dropout: 0.05
lora_target_modules:
lora_target_linear: true
lora_fan_in_fan_out:
wandb_project: openllama-7b-qlora-gsm8k
wandb_watch:
wandb_run_id:
wandb_log_model: checkpoint
output_dir: ./qlora-out
batch_size: 36
micro_batch_size: 9
num_epochs: 3
optimizer: paged_adamw_32bit
torchdistx_path:
lr_scheduler: cosine
learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
tf32: true
gradient_checkpointing: true
# stop training after this many evaluation losses have increased in a row
# https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback
early_stopping_patience: 3
resume_from_checkpoint:
auto_resume_from_checkpoints: true
local_rank:
logging_steps: 1
xformers_attention: false
flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 10
eval_steps: 5
save_steps: 10
debug:
deepspeed:
weight_decay: 0.000001
fsdp:
fsdp_config:
special_tokens:
  bos_token: "<s>"
  eos_token: "</s>"
  unk_token: "<unk>"

In [ ]:

!accelerate launch scripts/finetune.py examples/openllama/qlora.yml

Setting ds_accelerator to cuda (auto detect)

===================================BUG REPORT===================================
Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
================================================================================
bin /root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/libbitsandbytes_cuda118.so
/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/usr/local/nvidia/lib64'), PosixPath('/usr/local/nvidia/lib')}
  warn(msg)
/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/cuda_setup/main.py:149: UserWarning: /usr/local/nvidia/lib:/usr/local/nvidia/lib64 did not contain ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] as expected! Searching further paths...
  warn(msg)
/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('module'), PosixPath('//matplotlib_inline.backend_inline')}
  warn(msg)
/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIK1tFOFrWbmoa2ckCJYhzgBHKTSMeR/AeuScCCzugqlI utensilcandel@gmail.com')}
  warn(msg)
CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching in backup paths...
/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/cuda_setup/main.py:149: UserWarning: Found duplicate ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] files: {PosixPath('/usr/local/cuda/lib64/libcudart.so'), PosixPath('/usr/local/cuda/lib64/libcudart.so.11.0')}.. We'll flip a coin and try one of these, in order to fail forward.
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/libbitsandbytes_cuda118.so...
Setting ds_accelerator to cuda (auto detect)
INFO:root:loading tokenizer...
Using pad_token, but it is not set yet.
INFO:root:Loading prepared packed dataset from disk at last_run_prepared/21a0611c6c2b67b31f00097fa2a91c26...
INFO:root:Prepared packed dataset loaded from disk...
INFO:root:loading model and peft_config...
INFO:root:converting PEFT model w/ prepare_model_for_int8_training
/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/peft/utils/other.py:76: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.
  warnings.warn(
INFO:root:found linear modules: ['k_proj', 'gate_proj', 'q_proj', 'v_proj', 'o_proj', 'up_proj', 'down_proj']
trainable params: 101703680 || all params: 1917425280 || trainable%: 5.304179571472011
INFO:root:Compiling torch model
INFO:root:Pre-saving adapter config to ./qlora-out
INFO:root:Starting trainer...
INFO:root:Using Auto-resume functionality to start with checkpoint at qlora-out/checkpoint-40
wandb: Currently logged in as: utensil. Use `wandb login --relogin` to force relogin
wandb: Tracking run with wandb version 0.15.3
wandb: Run data is saved locally in /workspace/axolotl/wandb/run-20230531_043745-ggfx5q40
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run peach-feather-14
wandb: ⭐️ View project at https://wandb.ai/utensil/openllama-7b-qlora-gsm8k
wandb: 🚀 View run at https://wandb.ai/utensil/openllama-7b-qlora-gsm8k/runs/ggfx5q40
{'loss': 0.7336, 'learning_rate': 0.0001, 'epoch': 1.71}                        
{'loss': 0.7318, 'learning_rate': 9.493508311612874e-05, 'epoch': 1.75}         
{'loss': 0.7294, 'learning_rate': 8.98831678012568e-05, 'epoch': 1.79}          
{'loss': 0.7361, 'learning_rate': 8.485722224954237e-05, 'epoch': 1.83}         
{'loss': 0.692, 'learning_rate': 7.987014799113397e-05, 'epoch': 1.88}          
 62%|██████████████████████████▉                | 45/72 [04:57<06:00, 13.33s/it]
                                                                                
{'eval_loss': 0.7622343897819519, 'eval_runtime': 4.0149, 'eval_samples_per_second': 1.993, 'eval_steps_per_second': 0.249, 'epoch': 1.88}
 62%|██████████████████████████▉                | 45/72 [05:01<06:00, 13.33s/it]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  7.40it/s]
{'loss': 0.7289, 'learning_rate': 7.493474677412794e-05, 'epoch': 1.92}         
{'loss': 0.7027, 'learning_rate': 7.006368770266421e-05, 'epoch': 1.96}         
{'loss': 0.7396, 'learning_rate': 6.526947471551798e-05, 'epoch': 2.0}          
 67%|████████████████████████████▋              | 48/72 [07:49<11:07, 27.80s/it]

Below are ad hoc cells handling issues during training

In [18]:

!apt install lsof

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  lsof
0 upgraded, 1 newly installed, 0 to remove and 49 not upgraded.
Need to get 253 kB of archives.
After this operation, 458 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 lsof amd64 4.93.2+dfsg-1.1build2 [253 kB]
Fetched 253 kB in 1s (364 kB/s)0m
debconf: delaying package configuration, since apt-utils is not installed

78Selecting previously unselected package lsof.
(Reading database ... 21634 files and directories currently installed.)
Preparing to unpack .../lsof_4.93.2+dfsg-1.1build2_amd64.deb ...
7Progress: [  0%] [..........................................................] 87Progress: [ 20%] [###########...............................................] 8Unpacking lsof (4.93.2+dfsg-1.1build2) ...
7Progress: [ 40%] [#######################...................................] 8Setting up lsof (4.93.2+dfsg-1.1build2) ...
7Progress: [ 60%] [##################################........................] 87Progress: [ 80%] [##############################################............] 8
78

In [19]:

!lsof /dev/nvidia*

COMMAND    PID USER   FD   TYPE  DEVICE SIZE/OFF NODE NAME
docker-in    1 root    0u   CHR     1,3      0t0    6 /dev/null
bash         7 root    0u   CHR     1,3      0t0    6 /dev/null
sshd        19 root    0u   CHR     1,3      0t0    6 /dev/null
sshd        19 root    1u   CHR     1,3      0t0    6 /dev/null
sshd        19 root    2u   CHR     1,3      0t0    6 /dev/null
jupyter-l 2308 root    0r   CHR     1,3      0t0    6 /dev/null
jupyter-l 2308 root   12r   CHR     1,9      0t0   11 /dev/urandom
python3   2541 root    4r   CHR     1,9      0t0   11 /dev/urandom
python3   2947 root  mem    CHR 195,255           472 /dev/nvidiactl
python3   2947 root  mem    CHR   195,0           473 /dev/nvidia0
python3   2947 root  mem    CHR   234,0           481 /dev/nvidia-uvm
python3   2947 root    3r   CHR     1,9      0t0   11 /dev/urandom
python3   2947 root  132u   CHR 195,255      0t0  472 /dev/nvidiactl
python3   2947 root  133u   CHR   234,0      0t0  481 /dev/nvidia-uvm
python3   2947 root  134u   CHR   195,0      0t0  473 /dev/nvidia0
python3   2947 root  135u   CHR   195,0      0t0  473 /dev/nvidia0
python3   2947 root  136u   CHR   195,0      0t0  473 /dev/nvidia0
python3   2947 root  139u   CHR 195,255      0t0  472 /dev/nvidiactl
python3   2947 root  140u   CHR   195,0      0t0  473 /dev/nvidia0
python3   2947 root  141u   CHR   195,0      0t0  473 /dev/nvidia0
python3   2947 root  142u   CHR   195,0      0t0  473 /dev/nvidia0
python3   2947 root  145u   CHR   195,0      0t0  473 /dev/nvidia0
python3   2947 root  147u   CHR   195,0      0t0  473 /dev/nvidia0
python3   2947 root  148u   CHR   195,0      0t0  473 /dev/nvidia0
python3   2947 root  149u   CHR   195,0      0t0  473 /dev/nvidia0
python3   2947 root  151u   CHR   195,0      0t0  473 /dev/nvidia0
python3   2947 root  152u   CHR   195,0      0t0  473 /dev/nvidia0
python3   2947 root  153u   CHR   195,0      0t0  473 /dev/nvidia0
python3   2947 root  154u   CHR   195,0      0t0  473 /dev/nvidia0
python3   3545 root    4r   CHR     1,9      0t0   11 /dev/urandom
python3   4493 root  mem    CHR 195,255           472 /dev/nvidiactl
python3   4493 root  mem    CHR   195,0           473 /dev/nvidia0
python3   4493 root  mem    CHR   234,0           481 /dev/nvidia-uvm
python3   4493 root    3r   CHR     1,9      0t0   11 /dev/urandom
python3   4493 root  132u   CHR 195,255      0t0  472 /dev/nvidiactl
python3   4493 root  133u   CHR   234,0      0t0  481 /dev/nvidia-uvm
python3   4493 root  134u   CHR   195,0      0t0  473 /dev/nvidia0
python3   4493 root  135u   CHR   195,0      0t0  473 /dev/nvidia0
python3   4493 root  136u   CHR   195,0      0t0  473 /dev/nvidia0
python3   4493 root  139u   CHR 195,255      0t0  472 /dev/nvidiactl
python3   4493 root  140u   CHR   195,0      0t0  473 /dev/nvidia0
python3   4493 root  141u   CHR   195,0      0t0  473 /dev/nvidia0
python3   4493 root  142u   CHR   195,0      0t0  473 /dev/nvidia0
python3   4493 root  145u   CHR   195,0      0t0  473 /dev/nvidia0
python3   4493 root  146u   CHR   195,0      0t0  473 /dev/nvidia0
python3   4493 root  147u   CHR   195,0      0t0  473 /dev/nvidia0
python3   4493 root  148u   CHR   195,0      0t0  473 /dev/nvidia0
python3   4493 root  150u   CHR   195,0      0t0  473 /dev/nvidia0
python3   4493 root  151u   CHR   195,0      0t0  473 /dev/nvidia0
python3   4493 root  152u   CHR   195,0      0t0  473 /dev/nvidia0
python3   4493 root  153u   CHR   195,0      0t0  473 /dev/nvidia0
sh        4950 root   10u   CHR     5,0      0t0   13 /dev/tty
python3   5051 root  mem    CHR 195,255           472 /dev/nvidiactl
python3   5051 root  mem    CHR   195,0           473 /dev/nvidia0
python3   5051 root  mem    CHR   234,0           481 /dev/nvidia-uvm
python3   5051 root    3r   CHR     1,9      0t0   11 /dev/urandom
python3   5051 root  132u   CHR 195,255      0t0  472 /dev/nvidiactl
python3   5051 root  133u   CHR   234,0      0t0  481 /dev/nvidia-uvm
python3   5051 root  134u   CHR   195,0      0t0  473 /dev/nvidia0
python3   5051 root  135u   CHR   195,0      0t0  473 /dev/nvidia0
python3   5051 root  136u   CHR   195,0      0t0  473 /dev/nvidia0
python3   5051 root  139u   CHR 195,255      0t0  472 /dev/nvidiactl
python3   5051 root  140u   CHR   195,0      0t0  473 /dev/nvidia0
python3   5051 root  141u   CHR   195,0      0t0  473 /dev/nvidia0
python3   5051 root  142u   CHR   195,0      0t0  473 /dev/nvidia0
python3   5051 root  145u   CHR   195,0      0t0  473 /dev/nvidia0
python3   5051 root  146u   CHR   195,0      0t0  473 /dev/nvidia0
python3   5051 root  147u   CHR   195,0      0t0  473 /dev/nvidia0
python3   5051 root  148u   CHR   195,0      0t0  473 /dev/nvidia0
python3   5051 root  150u   CHR   195,0      0t0  473 /dev/nvidia0
python3   5051 root  151u   CHR   195,0      0t0  473 /dev/nvidia0
python3   5051 root  152u   CHR   195,0      0t0  473 /dev/nvidia0
python3   5051 root  153u   CHR   195,0      0t0  473 /dev/nvidia0
tmux:\x20 5801 root    0u   CHR     1,3      0t0    6 /dev/null
tmux:\x20 5801 root    1u   CHR     1,3      0t0    6 /dev/null
tmux:\x20 5801 root    2u   CHR     1,3      0t0    6 /dev/null
nvitop    5817 root    3u   CHR 195,255      0t0  472 /dev/nvidiactl
nvitop    5817 root    4u   CHR   195,0      0t0  473 /dev/nvidia0
nvitop    5817 root    5u   CHR   195,0      0t0  473 /dev/nvidia0
nvitop    5817 root    6u   CHR   195,0      0t0  473 /dev/nvidia0

In [13]:

!ps aux|grep python|grep finetune|awk '{print $2}'|xargs kill -9

In [8]:

!kill -9 2960 

In [9]:

!ps aux|grep python

root        2353  0.7  0.0 576260 110108 ?       Sl   12:51   0:05 /root/miniconda3/envs/py3.9/bin/python3 /root/miniconda3/envs/py3.9/bin/jupyter-lab --allow-root --no-browser --port=8888 --ip=* --ServerApp.token=sc --ServerApp.allow_origin=* --ServerApp.preferred_dir=/workspace/
root        2636  1.6  0.0 770824 63020 ?        Ssl  12:52   0:12 /root/miniconda3/envs/py3.9/bin/python3 -m ipykernel_launcher -f /root/.local/share/jupyter/runtime/kernel-b2638c7c-467b-4866-a969-c97f1b037796.json
root        3776  3.5  0.0 316080 90152 pts/2    Sl+  12:55   0:19 /root/miniconda3/envs/py3.9/bin/python3 /root/miniconda3/envs/py3.9/bin/nvitop -m full
root        5019  0.0  0.0   2880   952 pts/3    Ss+  13:04   0:00 /usr/bin/sh -c ps aux|grep python
root        5022  0.0  0.0   3836  1968 pts/3    S+   13:04   0:00 grep python

In [ ]:

!pwd

In [ ]:

!apt install zip
!zip -r last_run_prepared.zip -xi last_run_prepared

!pip install nvitop

In [ ]:

!nvitop -m full