From 245fe47ece22a4b7822449b126715aaa8ec25aba Mon Sep 17 00:00:00 2001 From: hiyouga Date: Fri, 3 May 2024 02:58:23 +0800 Subject: [PATCH] update webui and add CLIs --- Dockerfile | 2 +- README.md | 6 +- README_zh.md | 6 +- examples/extras/badam/sft.sh | 2 +- examples/extras/fsdp_qlora/sft.sh | 2 +- examples/extras/galore/sft.sh | 2 +- examples/extras/llama_pro/sft.sh | 2 +- examples/extras/loraplus/sft.sh | 2 +- examples/extras/mod/sft.sh | 2 +- examples/full_multi_gpu/multi_node.sh | 2 +- examples/full_multi_gpu/predict.sh | 2 +- examples/full_multi_gpu/single_node.sh | 2 +- examples/inference/api_demo.sh | 2 +- examples/inference/cli_demo.sh | 2 +- examples/inference/evaluate.sh | 2 +- examples/inference/web_demo.sh | 2 +- examples/lora_multi_gpu/ds_zero3.sh | 3 +- examples/lora_multi_gpu/multi_node.sh | 2 +- examples/lora_multi_gpu/single_node.sh | 2 +- examples/lora_single_gpu/dpo.sh | 2 +- examples/lora_single_gpu/orpo.sh | 2 +- examples/lora_single_gpu/ppo.sh | 2 +- examples/lora_single_gpu/predict.sh | 2 +- examples/lora_single_gpu/prepare.sh | 2 +- examples/lora_single_gpu/pretrain.sh | 2 +- examples/lora_single_gpu/reward.sh | 2 +- examples/lora_single_gpu/sft.sh | 2 +- examples/lora_single_gpu/sft_mllm.sh | 2 +- examples/merge_lora/merge.sh | 2 +- examples/merge_lora/quantize.sh | 2 +- examples/qlora_single_gpu/aqlm.sh | 2 +- examples/qlora_single_gpu/awq.sh | 2 +- examples/qlora_single_gpu/bitsandbytes.sh | 2 +- examples/qlora_single_gpu/gptq.sh | 2 +- requirements.txt | 1 + setup.py | 1 + src/api_demo.py | 16 --- src/cli_demo.py | 49 -------- src/evaluate.py | 9 -- src/export_model.py | 9 -- src/llmtuner/__init__.py | 10 +- src/llmtuner/api/__init__.py | 4 - src/llmtuner/api/app.py | 3 +- src/llmtuner/chat/chat_model.py | 43 +++++++ src/llmtuner/cli.py | 39 ++++++ src/llmtuner/eval/__init__.py | 4 - src/llmtuner/eval/evaluator.py | 2 +- src/llmtuner/extras/callbacks.py | 145 ++++++++++------------ src/llmtuner/extras/constants.py | 8 +- src/llmtuner/extras/logging.py | 34 +++-- src/llmtuner/extras/ploting.py | 25 +++- src/llmtuner/hparams/parser.py | 4 +- src/llmtuner/train/__init__.py | 4 - src/llmtuner/train/tuner.py | 8 +- src/llmtuner/webui/__init__.py | 4 - src/llmtuner/webui/common.py | 11 +- src/llmtuner/webui/components/export.py | 2 +- src/llmtuner/webui/components/train.py | 6 +- src/llmtuner/webui/engine.py | 4 +- src/llmtuner/webui/interface.py | 6 +- src/llmtuner/webui/runner.py | 77 +++++------- src/llmtuner/webui/utils.py | 108 ++++++++-------- src/{train_bash.py => train.py} | 4 +- src/train_web.py | 9 -- src/web_demo.py | 9 -- 65 files changed, 363 insertions(+), 372 deletions(-) delete mode 100644 src/api_demo.py delete mode 100644 src/cli_demo.py delete mode 100644 src/evaluate.py delete mode 100644 src/export_model.py create mode 100644 src/llmtuner/cli.py rename src/{train_bash.py => train.py} (67%) delete mode 100644 src/train_web.py delete mode 100644 src/web_demo.py diff --git a/Dockerfile b/Dockerfile index c3d231b5..4b8bb084 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,4 +11,4 @@ RUN pip install -e .[deepspeed,metrics,bitsandbytes,qwen] VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] EXPOSE 7860 -CMD [ "python", "src/train_web.py" ] +CMD [ "llamafactory-cli webui" ] diff --git a/README.md b/README.md index 04e5aa5b..8caac93f 100644 --- a/README.md +++ b/README.md @@ -346,7 +346,7 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec ```bash export CUDA_VISIBLE_DEVICES=0 # `set CUDA_VISIBLE_DEVICES=0` for Windows export GRADIO_SERVER_PORT=7860 # `set GRADIO_SERVER_PORT=7860` for Windows -python src/train_web.py # or python -m llmtuner.webui.interface +llamafactory-cli webui ```
For Alibaba Cloud users @@ -392,12 +392,12 @@ docker compose -f ./docker-compose.yml up -d See [examples/README.md](examples/README.md) for usage. -Use `python src/train_bash.py -h` to display arguments description. +Use `llamafactory-cli train -h` to display arguments description. ### Deploy with OpenAI-style API and vLLM ```bash -CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 python src/api_demo.py \ +CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api \ --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \ --template llama3 \ --infer_backend vllm \ diff --git a/README_zh.md b/README_zh.md index 2240c688..27522232 100644 --- a/README_zh.md +++ b/README_zh.md @@ -346,7 +346,7 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl ```bash export CUDA_VISIBLE_DEVICES=0 # Windows 使用 `set CUDA_VISIBLE_DEVICES=0` export GRADIO_SERVER_PORT=7860 # Windows 使用 `set GRADIO_SERVER_PORT=7860` -python src/train_web.py # 或 python -m llmtuner.webui.interface +llamafactory-cli webui ```
阿里云用户指南 @@ -392,12 +392,12 @@ docker compose -f ./docker-compose.yml up -d 使用方法请参考 [examples/README_zh.md](examples/README_zh.md)。 -您可以执行 `python src/train_bash.py -h` 来查看参数文档。 +您可以执行 `llamafactory-cli train -h` 来查看参数文档。 ### 利用 vLLM 部署 OpenAI API ```bash -CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 python src/api_demo.py \ +CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api \ --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \ --template llama3 \ --infer_backend vllm \ diff --git a/examples/extras/badam/sft.sh b/examples/extras/badam/sft.sh index c2319caa..61167dad 100644 --- a/examples/extras/badam/sft.sh +++ b/examples/extras/badam/sft.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage sft \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/extras/fsdp_qlora/sft.sh b/examples/extras/fsdp_qlora/sft.sh index e8b9ece7..9eb70a53 100644 --- a/examples/extras/fsdp_qlora/sft.sh +++ b/examples/extras/fsdp_qlora/sft.sh @@ -7,7 +7,7 @@ pip install "bitsandbytes>=0.43.0" CUDA_VISIBLE_DEVICES=0,1 accelerate launch \ --config_file ../../accelerate/fsdp_config.yaml \ - ../../../src/train_bash.py \ + ../../../src/train.py \ --stage sft \ --do_train \ --model_name_or_path meta-llama/Llama-2-70b-hf \ diff --git a/examples/extras/galore/sft.sh b/examples/extras/galore/sft.sh index da1779ed..283673e7 100644 --- a/examples/extras/galore/sft.sh +++ b/examples/extras/galore/sft.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage sft \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/extras/llama_pro/sft.sh b/examples/extras/llama_pro/sft.sh index 573078ff..3e26e0a6 100644 --- a/examples/extras/llama_pro/sft.sh +++ b/examples/extras/llama_pro/sft.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage sft \ --do_train \ --model_name_or_path ../../../models/llama2-7b-pro \ diff --git a/examples/extras/loraplus/sft.sh b/examples/extras/loraplus/sft.sh index cb334e7d..8d152d9e 100644 --- a/examples/extras/loraplus/sft.sh +++ b/examples/extras/loraplus/sft.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage sft \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/extras/mod/sft.sh b/examples/extras/mod/sft.sh index 2c8f04a3..5219751f 100644 --- a/examples/extras/mod/sft.sh +++ b/examples/extras/mod/sft.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage sft \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/full_multi_gpu/multi_node.sh b/examples/full_multi_gpu/multi_node.sh index d1382bc2..a1ffc0ee 100644 --- a/examples/full_multi_gpu/multi_node.sh +++ b/examples/full_multi_gpu/multi_node.sh @@ -6,7 +6,7 @@ python -m torch.distributed.run \ --node_rank $RANK \ --master_addr $MASTER_ADDR \ --master_port $MASTER_PORT \ - ../../src/train_bash.py \ + ../../src/train.py \ --deepspeed ../deepspeed/ds_z3_config.json \ --stage sft \ --do_train \ diff --git a/examples/full_multi_gpu/predict.sh b/examples/full_multi_gpu/predict.sh index 801df85a..7c2e458f 100644 --- a/examples/full_multi_gpu/predict.sh +++ b/examples/full_multi_gpu/predict.sh @@ -2,7 +2,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \ --config_file ../accelerate/single_config.yaml \ - ../../src/train_bash.py \ + ../../src/train.py \ --stage sft \ --do_predict \ --model_name_or_path ../../saves/LLaMA2-7B/full/sft \ diff --git a/examples/full_multi_gpu/single_node.sh b/examples/full_multi_gpu/single_node.sh index ea4acf90..73c7662d 100644 --- a/examples/full_multi_gpu/single_node.sh +++ b/examples/full_multi_gpu/single_node.sh @@ -1,6 +1,6 @@ #!/bin/bash -deepspeed --num_gpus 4 ../../src/train_bash.py \ +deepspeed --num_gpus 4 ../../src/train.py \ --deepspeed ../deepspeed/ds_z3_config.json \ --stage sft \ --do_train \ diff --git a/examples/inference/api_demo.sh b/examples/inference/api_demo.sh index aee86595..6f0f1b2e 100644 --- a/examples/inference/api_demo.sh +++ b/examples/inference/api_demo.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 API_PORT=8000 python ../../src/api_demo.py \ +CUDA_VISIBLE_DEVICES=0 API_PORT=8000 llamafactory-cli api \ --model_name_or_path meta-llama/Llama-2-7b-hf \ --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \ --template default \ diff --git a/examples/inference/cli_demo.sh b/examples/inference/cli_demo.sh index 3e4a1e4e..bc762411 100644 --- a/examples/inference/cli_demo.sh +++ b/examples/inference/cli_demo.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/cli_demo.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat \ --model_name_or_path meta-llama/Llama-2-7b-hf \ --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \ --template default \ diff --git a/examples/inference/evaluate.sh b/examples/inference/evaluate.sh index 1fc6ccf8..5030329d 100644 --- a/examples/inference/evaluate.sh +++ b/examples/inference/evaluate.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/evaluate.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli eval \ --model_name_or_path meta-llama/Llama-2-7b-hf \ --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \ --template fewshot \ diff --git a/examples/inference/web_demo.sh b/examples/inference/web_demo.sh index 8d6ed09d..a58cd2a0 100644 --- a/examples/inference/web_demo.sh +++ b/examples/inference/web_demo.sh @@ -1,7 +1,7 @@ #!/bin/bash # add `--visual_inputs True` to load MLLM -CUDA_VISIBLE_DEVICES=0 python ../../src/web_demo.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli webchat \ --model_name_or_path meta-llama/Llama-2-7b-hf \ --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \ --template default \ diff --git a/examples/lora_multi_gpu/ds_zero3.sh b/examples/lora_multi_gpu/ds_zero3.sh index f429d15b..bc74a6de 100644 --- a/examples/lora_multi_gpu/ds_zero3.sh +++ b/examples/lora_multi_gpu/ds_zero3.sh @@ -1,6 +1,7 @@ #!/bin/bash +# ZeRO-3 enables weight sharding on multiple GPUs -deepspeed --num_gpus 4 ../../src/train_bash.py \ +deepspeed --num_gpus 4 ../../src/train.py \ --deepspeed ../deepspeed/ds_z3_config.json \ --stage sft \ --do_train \ diff --git a/examples/lora_multi_gpu/multi_node.sh b/examples/lora_multi_gpu/multi_node.sh index 85a3e026..a58cac20 100644 --- a/examples/lora_multi_gpu/multi_node.sh +++ b/examples/lora_multi_gpu/multi_node.sh @@ -3,7 +3,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \ --config_file ../accelerate/master_config.yaml \ - ../../src/train_bash.py \ + ../../src/train.py \ --stage sft \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/lora_multi_gpu/single_node.sh b/examples/lora_multi_gpu/single_node.sh index 04529cf0..c0719c04 100644 --- a/examples/lora_multi_gpu/single_node.sh +++ b/examples/lora_multi_gpu/single_node.sh @@ -2,7 +2,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \ --config_file ../accelerate/single_config.yaml \ - ../../src/train_bash.py \ + ../../src/train.py \ --stage sft \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/lora_single_gpu/dpo.sh b/examples/lora_single_gpu/dpo.sh index 56a2dfc3..2cb6cb01 100644 --- a/examples/lora_single_gpu/dpo.sh +++ b/examples/lora_single_gpu/dpo.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage dpo \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/lora_single_gpu/orpo.sh b/examples/lora_single_gpu/orpo.sh index 407907b1..335707bf 100644 --- a/examples/lora_single_gpu/orpo.sh +++ b/examples/lora_single_gpu/orpo.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage orpo \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/lora_single_gpu/ppo.sh b/examples/lora_single_gpu/ppo.sh index 6a5b770e..9eccb05e 100644 --- a/examples/lora_single_gpu/ppo.sh +++ b/examples/lora_single_gpu/ppo.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage ppo \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/lora_single_gpu/predict.sh b/examples/lora_single_gpu/predict.sh index eb9a18c0..250efed1 100644 --- a/examples/lora_single_gpu/predict.sh +++ b/examples/lora_single_gpu/predict.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage sft \ --do_predict \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/lora_single_gpu/prepare.sh b/examples/lora_single_gpu/prepare.sh index e86de636..277f9b7a 100644 --- a/examples/lora_single_gpu/prepare.sh +++ b/examples/lora_single_gpu/prepare.sh @@ -1,7 +1,7 @@ #!/bin/bash # use `--tokenized_path` in training script to load data -CUDA_VISIBLE_DEVICES= python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES= llamafactory-cli train \ --stage sft \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/lora_single_gpu/pretrain.sh b/examples/lora_single_gpu/pretrain.sh index 59bdfe62..0782f00c 100644 --- a/examples/lora_single_gpu/pretrain.sh +++ b/examples/lora_single_gpu/pretrain.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage pt \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/lora_single_gpu/reward.sh b/examples/lora_single_gpu/reward.sh index 1212d082..678809fd 100644 --- a/examples/lora_single_gpu/reward.sh +++ b/examples/lora_single_gpu/reward.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage rm \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/lora_single_gpu/sft.sh b/examples/lora_single_gpu/sft.sh index 3bfbc9b8..2047e21f 100644 --- a/examples/lora_single_gpu/sft.sh +++ b/examples/lora_single_gpu/sft.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage sft \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/lora_single_gpu/sft_mllm.sh b/examples/lora_single_gpu/sft_mllm.sh index 7e900918..53e37262 100644 --- a/examples/lora_single_gpu/sft_mllm.sh +++ b/examples/lora_single_gpu/sft_mllm.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage sft \ --do_train \ --model_name_or_path llava-hf/llava-1.5-7b-hf \ diff --git a/examples/merge_lora/merge.sh b/examples/merge_lora/merge.sh index c50bd6ad..186e64a4 100644 --- a/examples/merge_lora/merge.sh +++ b/examples/merge_lora/merge.sh @@ -1,7 +1,7 @@ #!/bin/bash # DO NOT use quantized model or quantization_bit when merging lora weights -CUDA_VISIBLE_DEVICES=0 python ../../src/export_model.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli export \ --model_name_or_path meta-llama/Llama-2-7b-hf \ --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \ --template default \ diff --git a/examples/merge_lora/quantize.sh b/examples/merge_lora/quantize.sh index aeedbe66..4a104645 100644 --- a/examples/merge_lora/quantize.sh +++ b/examples/merge_lora/quantize.sh @@ -1,7 +1,7 @@ #!/bin/bash # NEED TO run `merge.sh` before using this script -CUDA_VISIBLE_DEVICES=0 python ../../src/export_model.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli export \ --model_name_or_path ../../models/llama2-7b-sft \ --template default \ --export_dir ../../models/llama2-7b-sft-int4 \ diff --git a/examples/qlora_single_gpu/aqlm.sh b/examples/qlora_single_gpu/aqlm.sh index 68eb4482..1e0a71ca 100644 --- a/examples/qlora_single_gpu/aqlm.sh +++ b/examples/qlora_single_gpu/aqlm.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage sft \ --do_train \ --model_name_or_path BlackSamorez/Llama-2-7b-AQLM-2Bit-1x16-hf \ diff --git a/examples/qlora_single_gpu/awq.sh b/examples/qlora_single_gpu/awq.sh index b0f1f46b..c13c8134 100644 --- a/examples/qlora_single_gpu/awq.sh +++ b/examples/qlora_single_gpu/awq.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage sft \ --do_train \ --model_name_or_path TheBloke/Llama-2-7B-AWQ \ diff --git a/examples/qlora_single_gpu/bitsandbytes.sh b/examples/qlora_single_gpu/bitsandbytes.sh index 84bbb426..27f48d41 100644 --- a/examples/qlora_single_gpu/bitsandbytes.sh +++ b/examples/qlora_single_gpu/bitsandbytes.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage sft \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ diff --git a/examples/qlora_single_gpu/gptq.sh b/examples/qlora_single_gpu/gptq.sh index a971b09f..5b1b80e1 100644 --- a/examples/qlora_single_gpu/gptq.sh +++ b/examples/qlora_single_gpu/gptq.sh @@ -1,6 +1,6 @@ #!/bin/bash -CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train \ --stage sft \ --do_train \ --model_name_or_path TheBloke/Llama-2-7B-GPTQ \ diff --git a/requirements.txt b/requirements.txt index ecba3ce1..f4818ed2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ sse-starlette matplotlib fire packaging +pyyaml diff --git a/setup.py b/setup.py index 6a03138d..f7589eb8 100644 --- a/setup.py +++ b/setup.py @@ -52,6 +52,7 @@ def main(): python_requires=">=3.8.0", install_requires=get_requires(), extras_require=extra_require, + entry_points={"console_scripts": ["llamafactory-cli = llmtuner.cli:main"]}, classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", diff --git a/src/api_demo.py b/src/api_demo.py deleted file mode 100644 index a7140675..00000000 --- a/src/api_demo.py +++ /dev/null @@ -1,16 +0,0 @@ -import os - -import uvicorn - -from llmtuner import ChatModel, create_app - - -def main(): - chat_model = ChatModel() - app = create_app(chat_model) - print("Visit http://localhost:{}/docs for API document.".format(os.environ.get("API_PORT", 8000))) - uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("API_PORT", 8000)), workers=1) - - -if __name__ == "__main__": - main() diff --git a/src/cli_demo.py b/src/cli_demo.py deleted file mode 100644 index ba828f51..00000000 --- a/src/cli_demo.py +++ /dev/null @@ -1,49 +0,0 @@ -from llmtuner import ChatModel -from llmtuner.extras.misc import torch_gc - - -try: - import platform - - if platform.system() != "Windows": - import readline # noqa: F401 -except ImportError: - print("Install `readline` for a better experience.") - - -def main(): - chat_model = ChatModel() - messages = [] - print("Welcome to the CLI application, use `clear` to remove the history, use `exit` to exit the application.") - - while True: - try: - query = input("\nUser: ") - except UnicodeDecodeError: - print("Detected decoding error at the inputs, please set the terminal encoding to utf-8.") - continue - except Exception: - raise - - if query.strip() == "exit": - break - - if query.strip() == "clear": - messages = [] - torch_gc() - print("History has been removed.") - continue - - messages.append({"role": "user", "content": query}) - print("Assistant: ", end="", flush=True) - - response = "" - for new_text in chat_model.stream_chat(messages): - print(new_text, end="", flush=True) - response += new_text - print() - messages.append({"role": "assistant", "content": response}) - - -if __name__ == "__main__": - main() diff --git a/src/evaluate.py b/src/evaluate.py deleted file mode 100644 index 705a6e42..00000000 --- a/src/evaluate.py +++ /dev/null @@ -1,9 +0,0 @@ -from llmtuner import Evaluator - - -def main(): - Evaluator().eval() - - -if __name__ == "__main__": - main() diff --git a/src/export_model.py b/src/export_model.py deleted file mode 100644 index 4baeb2c3..00000000 --- a/src/export_model.py +++ /dev/null @@ -1,9 +0,0 @@ -from llmtuner import export_model - - -def main(): - export_model() - - -if __name__ == "__main__": - main() diff --git a/src/llmtuner/__init__.py b/src/llmtuner/__init__.py index b3a980a5..a3a97450 100644 --- a/src/llmtuner/__init__.py +++ b/src/llmtuner/__init__.py @@ -1,11 +1,3 @@ # Level: api, webui > chat, eval, train > data, model > extras, hparams -from .api import create_app -from .chat import ChatModel -from .eval import Evaluator -from .train import export_model, run_exp -from .webui import create_ui, create_web_demo - - -__version__ = "0.7.0" -__all__ = ["create_app", "ChatModel", "Evaluator", "export_model", "run_exp", "create_ui", "create_web_demo"] +__version__ = "0.7.1.dev0" diff --git a/src/llmtuner/api/__init__.py b/src/llmtuner/api/__init__.py index d7059fbd..e69de29b 100644 --- a/src/llmtuner/api/__init__.py +++ b/src/llmtuner/api/__init__.py @@ -1,4 +0,0 @@ -from .app import create_app - - -__all__ = ["create_app"] diff --git a/src/llmtuner/api/app.py b/src/llmtuner/api/app.py index 3f06fef1..36918d1b 100644 --- a/src/llmtuner/api/app.py +++ b/src/llmtuner/api/app.py @@ -224,7 +224,8 @@ def create_app(chat_model: "ChatModel") -> "FastAPI": return app -if __name__ == "__main__": +def run_api(): chat_model = ChatModel() app = create_app(chat_model) + print("Visit http://localhost:{}/docs for API document.".format(os.environ.get("API_PORT", 8000))) uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("API_PORT", 8000)), workers=1) diff --git a/src/llmtuner/chat/chat_model.py b/src/llmtuner/chat/chat_model.py index ba58dd2e..97ae87d7 100644 --- a/src/llmtuner/chat/chat_model.py +++ b/src/llmtuner/chat/chat_model.py @@ -2,6 +2,7 @@ import asyncio from threading import Thread from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, Generator, List, Optional, Sequence +from ..extras.misc import torch_gc from ..hparams import get_infer_args from .hf_engine import HuggingfaceEngine from .vllm_engine import VllmEngine @@ -95,3 +96,45 @@ class ChatModel: **input_kwargs, ) -> List[float]: return await self.engine.get_scores(batch_input, **input_kwargs) + + +def run_chat(): + try: + import platform + + if platform.system() != "Windows": + import readline # noqa: F401 + except ImportError: + print("Install `readline` for a better experience.") + + chat_model = ChatModel() + messages = [] + print("Welcome to the CLI application, use `clear` to remove the history, use `exit` to exit the application.") + + while True: + try: + query = input("\nUser: ") + except UnicodeDecodeError: + print("Detected decoding error at the inputs, please set the terminal encoding to utf-8.") + continue + except Exception: + raise + + if query.strip() == "exit": + break + + if query.strip() == "clear": + messages = [] + torch_gc() + print("History has been removed.") + continue + + messages.append({"role": "user", "content": query}) + print("Assistant: ", end="", flush=True) + + response = "" + for new_text in chat_model.stream_chat(messages): + print(new_text, end="", flush=True) + response += new_text + print() + messages.append({"role": "assistant", "content": response}) diff --git a/src/llmtuner/cli.py b/src/llmtuner/cli.py new file mode 100644 index 00000000..1b5bd658 --- /dev/null +++ b/src/llmtuner/cli.py @@ -0,0 +1,39 @@ +import sys +from enum import Enum, unique + +from .api.app import run_api +from .chat.chat_model import run_chat +from .eval.evaluator import run_eval +from .train.tuner import export_model, run_exp +from .webui.interface import run_web_demo, run_web_ui + + +@unique +class Command(str, Enum): + API = "api" + CHAT = "chat" + EVAL = "eval" + EXPORT = "export" + TRAIN = "train" + WEBDEMO = "webchat" + WEBUI = "webui" + + +def main(): + command = sys.argv.pop(1) + if command == Command.API: + run_api() + elif command == Command.CHAT: + run_chat() + elif command == Command.EVAL: + run_eval() + elif command == Command.EXPORT: + export_model() + elif command == Command.TRAIN: + run_exp() + elif command == Command.WEBDEMO: + run_web_demo() + elif command == Command.WEBUI: + run_web_ui() + else: + raise NotImplementedError("Unknown command: {}".format(command)) diff --git a/src/llmtuner/eval/__init__.py b/src/llmtuner/eval/__init__.py index 95ce0377..e69de29b 100644 --- a/src/llmtuner/eval/__init__.py +++ b/src/llmtuner/eval/__init__.py @@ -1,4 +0,0 @@ -from .evaluator import Evaluator - - -__all__ = ["Evaluator"] diff --git a/src/llmtuner/eval/evaluator.py b/src/llmtuner/eval/evaluator.py index 7446c6f5..4ea134c6 100644 --- a/src/llmtuner/eval/evaluator.py +++ b/src/llmtuner/eval/evaluator.py @@ -118,6 +118,6 @@ class Evaluator: f.write(score_info) -if __name__ == "__main__": +def run_eval(): evaluator = Evaluator() evaluator.eval() diff --git a/src/llmtuner/extras/callbacks.py b/src/llmtuner/extras/callbacks.py index 6e347c3c..fbe6f373 100644 --- a/src/llmtuner/extras/callbacks.py +++ b/src/llmtuner/extras/callbacks.py @@ -1,14 +1,18 @@ import json +import logging import os +import signal import time +from concurrent.futures import ThreadPoolExecutor from datetime import timedelta -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Dict +import transformers from transformers import TrainerCallback from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR, has_length -from .constants import LOG_FILE_NAME -from .logging import get_logger +from .constants import TRAINER_LOG +from .logging import LoggerHandler, get_logger from .misc import fix_valuehead_checkpoint @@ -33,20 +37,32 @@ class FixValueHeadModelCallback(TrainerCallback): class LogCallback(TrainerCallback): - def __init__(self, runner=None): - self.runner = runner - self.in_training = False + def __init__(self, output_dir: str) -> None: + self.aborted = False + self.do_train = False + self.webui_mode = bool(int(os.environ.get("LLAMABOARD_ENABLED", "0"))) + if self.webui_mode: + signal.signal(signal.SIGABRT, self._set_abort) + self.logger_handler = LoggerHandler(output_dir) + logging.root.addHandler(self.logger_handler) + transformers.logging.add_handler(self.logger_handler) + + def _set_abort(self, signum, frame) -> None: + self.aborted = True + + def _reset(self, max_steps: int = 0) -> None: self.start_time = time.time() self.cur_steps = 0 - self.max_steps = 0 + self.max_steps = max_steps self.elapsed_time = "" self.remaining_time = "" - def timing(self): + def _timing(self, cur_steps: int) -> None: cur_time = time.time() elapsed_time = cur_time - self.start_time - avg_time_per_step = elapsed_time / self.cur_steps if self.cur_steps != 0 else 0 - remaining_time = (self.max_steps - self.cur_steps) * avg_time_per_step + avg_time_per_step = elapsed_time / cur_steps if cur_steps != 0 else 0 + remaining_time = (self.max_steps - cur_steps) * avg_time_per_step + self.cur_steps = cur_steps self.elapsed_time = str(timedelta(seconds=int(elapsed_time))) self.remaining_time = str(timedelta(seconds=int(remaining_time))) @@ -54,36 +70,27 @@ class LogCallback(TrainerCallback): r""" Event called at the beginning of training. """ - if state.is_local_process_zero: - self.in_training = True - self.start_time = time.time() - self.max_steps = state.max_steps + if args.should_log: + self.do_train = True + self._reset(max_steps=state.max_steps) - if args.save_on_each_node: - if not state.is_local_process_zero: - return - else: - if not state.is_world_process_zero: - return + if args.should_save: + os.makedirs(args.output_dir, exist_ok=True) + self.thread_pool = ThreadPoolExecutor(max_workers=1) - if os.path.exists(os.path.join(args.output_dir, LOG_FILE_NAME)) and args.overwrite_output_dir: - logger.warning("Previous log file in this folder will be deleted.") - os.remove(os.path.join(args.output_dir, LOG_FILE_NAME)) - - def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): - r""" - Event called at the end of training. - """ - if state.is_local_process_zero: - self.in_training = False - self.cur_steps = 0 - self.max_steps = 0 + if ( + args.should_save + and os.path.exists(os.path.join(args.output_dir, TRAINER_LOG)) + and args.overwrite_output_dir + ): + logger.warning("Previous trainer log in this folder will be deleted.") + os.remove(os.path.join(args.output_dir, TRAINER_LOG)) def on_substep_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): r""" Event called at the end of an substep during gradient accumulation. """ - if state.is_local_process_zero and self.runner is not None and self.runner.aborted: + if self.aborted: control.should_epoch_stop = True control.should_training_stop = True @@ -91,42 +98,41 @@ class LogCallback(TrainerCallback): r""" Event called at the end of a training step. """ - if state.is_local_process_zero: - self.cur_steps = state.global_step - self.timing() - if self.runner is not None and self.runner.aborted: - control.should_epoch_stop = True - control.should_training_stop = True + if args.should_log: + self._timing(cur_steps=state.global_step) - def on_evaluate(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): + if self.aborted: + control.should_epoch_stop = True + control.should_training_stop = True + + def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): r""" - Event called after an evaluation phase. + Event called at the end of training. """ - if state.is_local_process_zero and not self.in_training: - self.cur_steps = 0 - self.max_steps = 0 + self.thread_pool.shutdown(wait=True) + self.thread_pool = None - def on_predict( - self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", *other, **kwargs + def on_prediction_step( + self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs ): r""" - Event called after a successful prediction. + Event called after a prediction step. """ - if state.is_local_process_zero and not self.in_training: - self.cur_steps = 0 - self.max_steps = 0 + eval_dataloader = kwargs.pop("eval_dataloader", None) + if args.should_log and has_length(eval_dataloader) and not self.do_train: + if self.max_steps == 0: + self.max_steps = len(eval_dataloader) + + self._timing(cur_steps=self.cur_steps + 1) + + def _write_log(self, output_dir: str, logs: Dict[str, Any]): + with open(os.path.join(output_dir, TRAINER_LOG), "a", encoding="utf-8") as f: + f.write(json.dumps(logs) + "\n") def on_log(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs) -> None: r""" - Event called after logging the last logs. + Event called after logging the last logs, `args.should_log` has been applied. """ - if args.save_on_each_node: - if not state.is_local_process_zero: - return - else: - if not state.is_world_process_zero: - return - logs = dict( current_steps=self.cur_steps, total_steps=self.max_steps, @@ -141,26 +147,13 @@ class LogCallback(TrainerCallback): elapsed_time=self.elapsed_time, remaining_time=self.remaining_time, ) - if self.runner is not None: + logs = {k: v for k, v in logs.items() if v is not None} + if self.webui_mode and "loss" in logs and "learning_rate" in logs and "epoch" in logs: logger.info( "{{'loss': {:.4f}, 'learning_rate': {:2.4e}, 'epoch': {:.2f}}}".format( - logs["loss"] or 0, logs["learning_rate"] or 0, logs["epoch"] or 0 + logs["loss"], logs["learning_rate"], logs["epoch"] ) ) - os.makedirs(args.output_dir, exist_ok=True) - with open(os.path.join(args.output_dir, "trainer_log.jsonl"), "a", encoding="utf-8") as f: - f.write(json.dumps(logs) + "\n") - - def on_prediction_step( - self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs - ): - r""" - Event called after a prediction step. - """ - eval_dataloader = kwargs.pop("eval_dataloader", None) - if state.is_local_process_zero and has_length(eval_dataloader) and not self.in_training: - if self.max_steps == 0: - self.max_steps = len(eval_dataloader) - self.cur_steps += 1 - self.timing() + if args.should_save and self.thread_pool is not None: + self.thread_pool.submit(self._write_log, args.output_dir, logs) diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 0329b374..bf542e69 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -24,8 +24,6 @@ IGNORE_INDEX = -100 LAYERNORM_NAMES = {"norm", "ln"} -LOG_FILE_NAME = "trainer_log.jsonl" - METHODS = ["full", "freeze", "lora"] MLLM_LIST = ["LLaVA1.5"] @@ -34,10 +32,16 @@ MOD_SUPPORTED_MODELS = ["bloom", "falcon", "gemma", "llama", "mistral", "mixtral PEFT_METHODS = ["lora"] +RUNNING_LOG = "running_log.txt" + SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"] SUPPORTED_MODELS = OrderedDict() +TRAINER_CONFIG = "trainer_config.yaml" + +TRAINER_LOG = "trainer_log.jsonl" + TRAINING_STAGES = { "Supervised Fine-Tuning": "sft", "Reward Modeling": "rm", diff --git a/src/llmtuner/extras/logging.py b/src/llmtuner/extras/logging.py index bb270776..430b8a48 100644 --- a/src/llmtuner/extras/logging.py +++ b/src/llmtuner/extras/logging.py @@ -1,5 +1,9 @@ import logging +import os import sys +from concurrent.futures import ThreadPoolExecutor + +from .constants import RUNNING_LOG class LoggerHandler(logging.Handler): @@ -7,19 +11,35 @@ class LoggerHandler(logging.Handler): Logger handler used in Web UI. """ - def __init__(self): + def __init__(self, output_dir: str) -> None: super().__init__() - self.log = "" + formatter = logging.Formatter( + fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S" + ) + self.setLevel(logging.INFO) + self.setFormatter(formatter) - def reset(self): - self.log = "" + os.makedirs(output_dir, exist_ok=True) + self.running_log = os.path.join(output_dir, RUNNING_LOG) + if os.path.exists(self.running_log): + os.remove(self.running_log) - def emit(self, record): + self.thread_pool = ThreadPoolExecutor(max_workers=1) + + def _write_log(self, log_entry: str) -> None: + with open(self.running_log, "a", encoding="utf-8") as f: + f.write(log_entry + "\n\n") + + def emit(self, record) -> None: if record.name == "httpx": return + log_entry = self.format(record) - self.log += log_entry - self.log += "\n\n" + self.thread_pool.submit(self._write_log, log_entry) + + def close(self) -> None: + self.thread_pool.shutdown(wait=True) + return super().close() def get_logger(name: str) -> logging.Logger: diff --git a/src/llmtuner/extras/ploting.py b/src/llmtuner/extras/ploting.py index fd3cb8a3..e53f1f89 100644 --- a/src/llmtuner/extras/ploting.py +++ b/src/llmtuner/extras/ploting.py @@ -1,7 +1,7 @@ import json import math import os -from typing import List +from typing import Any, Dict, List from transformers.trainer import TRAINER_STATE_NAME @@ -10,6 +10,7 @@ from .packages import is_matplotlib_available if is_matplotlib_available(): + import matplotlib.figure import matplotlib.pyplot as plt @@ -21,7 +22,7 @@ def smooth(scalars: List[float]) -> List[float]: EMA implementation according to TensorBoard. """ last = scalars[0] - smoothed = list() + smoothed = [] weight = 1.8 * (1 / (1 + math.exp(-0.05 * len(scalars))) - 0.5) # a sigmoid function for next_val in scalars: smoothed_val = last * weight + (1 - weight) * next_val @@ -30,7 +31,27 @@ def smooth(scalars: List[float]) -> List[float]: return smoothed +def gen_loss_plot(trainer_log: List[Dict[str, Any]]) -> "matplotlib.figure.Figure": + plt.close("all") + plt.switch_backend("agg") + fig = plt.figure() + ax = fig.add_subplot(111) + steps, losses = [], [] + for log in trainer_log: + if log.get("loss", None): + steps.append(log["current_steps"]) + losses.append(log["loss"]) + + ax.plot(steps, losses, color="#1f77b4", alpha=0.4, label="original") + ax.plot(steps, smooth(losses), color="#1f77b4", label="smoothed") + ax.legend() + ax.set_xlabel("step") + ax.set_ylabel("loss") + return fig + + def plot_loss(save_dictionary: os.PathLike, keys: List[str] = ["loss"]) -> None: + plt.switch_backend("agg") with open(os.path.join(save_dictionary, TRAINER_STATE_NAME), "r", encoding="utf-8") as f: data = json.load(f) diff --git a/src/llmtuner/hparams/parser.py b/src/llmtuner/hparams/parser.py index 977d7cf4..7fdd3234 100644 --- a/src/llmtuner/hparams/parser.py +++ b/src/llmtuner/hparams/parser.py @@ -10,6 +10,7 @@ from transformers.trainer_utils import get_last_checkpoint from transformers.utils import is_torch_bf16_gpu_available from transformers.utils.versions import require_version +from ..extras.constants import TRAINER_CONFIG from ..extras.logging import get_logger from ..extras.misc import check_dependencies, get_current_device from .data_args import DataArguments @@ -251,7 +252,8 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: and can_resume_from_checkpoint ): last_checkpoint = get_last_checkpoint(training_args.output_dir) - if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: + files = os.listdir(training_args.output_dir) + if last_checkpoint is None and len(files) > 0 and (len(files) != 1 or files[0] != TRAINER_CONFIG): raise ValueError("Output directory already exists and is not empty. Please set `overwrite_output_dir`.") if last_checkpoint is not None: diff --git a/src/llmtuner/train/__init__.py b/src/llmtuner/train/__init__.py index 6c22bc15..e69de29b 100644 --- a/src/llmtuner/train/__init__.py +++ b/src/llmtuner/train/__init__.py @@ -1,4 +0,0 @@ -from .tuner import export_model, run_exp - - -__all__ = ["export_model", "run_exp"] diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index a2eb121f..6822ffb5 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -23,9 +23,9 @@ if TYPE_CHECKING: logger = get_logger(__name__) -def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["TrainerCallback"]] = None): +def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: List["TrainerCallback"] = []): model_args, data_args, training_args, finetuning_args, generating_args = get_train_args(args) - callbacks = [LogCallback()] if callbacks is None else callbacks + callbacks.append(LogCallback(training_args.output_dir)) if finetuning_args.stage == "pt": run_pt(model_args, data_args, training_args, finetuning_args, callbacks) @@ -88,7 +88,3 @@ def export_model(args: Optional[Dict[str, Any]] = None): tokenizer.push_to_hub(model_args.export_hub_model_id, token=model_args.hf_hub_token) except Exception: logger.warning("Cannot save tokenizer, please copy the files manually.") - - -if __name__ == "__main__": - run_exp() diff --git a/src/llmtuner/webui/__init__.py b/src/llmtuner/webui/__init__.py index 3e82dd69..e69de29b 100644 --- a/src/llmtuner/webui/__init__.py +++ b/src/llmtuner/webui/__init__.py @@ -1,4 +0,0 @@ -from .interface import create_ui, create_web_demo - - -__all__ = ["create_ui", "create_web_demo"] diff --git a/src/llmtuner/webui/common.py b/src/llmtuner/webui/common.py index 9af4c439..a33e3db7 100644 --- a/src/llmtuner/webui/common.py +++ b/src/llmtuner/webui/common.py @@ -4,6 +4,7 @@ from collections import defaultdict from typing import Any, Dict, Optional from peft.utils import SAFETENSORS_WEIGHTS_NAME, WEIGHTS_NAME +from yaml import safe_dump, safe_load from ..extras.constants import ( DATA_CONFIG, @@ -29,7 +30,7 @@ DEFAULT_CACHE_DIR = "cache" DEFAULT_CONFIG_DIR = "config" DEFAULT_DATA_DIR = "data" DEFAULT_SAVE_DIR = "saves" -USER_CONFIG = "user.config" +USER_CONFIG = "user_config.yaml" def get_save_dir(*args) -> os.PathLike: @@ -47,7 +48,7 @@ def get_save_path(config_path: str) -> os.PathLike: def load_config() -> Dict[str, Any]: try: with open(get_config_path(), "r", encoding="utf-8") as f: - return json.load(f) + return safe_load(f) except Exception: return {"lang": None, "last_model": None, "path_dict": {}, "cache_dir": None} @@ -60,13 +61,13 @@ def save_config(lang: str, model_name: Optional[str] = None, model_path: Optiona user_config["last_model"] = model_name user_config["path_dict"][model_name] = model_path with open(get_config_path(), "w", encoding="utf-8") as f: - json.dump(user_config, f, indent=2, ensure_ascii=False) + safe_dump(user_config, f) def load_args(config_path: str) -> Optional[Dict[str, Any]]: try: with open(get_save_path(config_path), "r", encoding="utf-8") as f: - return json.load(f) + return safe_load(f) except Exception: return None @@ -74,7 +75,7 @@ def load_args(config_path: str) -> Optional[Dict[str, Any]]: def save_args(config_path: str, config_dict: Dict[str, Any]) -> str: os.makedirs(DEFAULT_CONFIG_DIR, exist_ok=True) with open(get_save_path(config_path), "w", encoding="utf-8") as f: - json.dump(config_dict, f, indent=2, ensure_ascii=False) + safe_dump(config_dict, f) return str(get_save_path(config_path)) diff --git a/src/llmtuner/webui/components/export.py b/src/llmtuner/webui/components/export.py index 4c224736..64273882 100644 --- a/src/llmtuner/webui/components/export.py +++ b/src/llmtuner/webui/components/export.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Dict, Generator, List from ...extras.misc import torch_gc from ...extras.packages import is_gradio_available -from ...train import export_model +from ...train.tuner import export_model from ..common import get_save_dir from ..locales import ALERTS diff --git a/src/llmtuner/webui/components/train.py b/src/llmtuner/webui/components/train.py index c9671289..c709b916 100644 --- a/src/llmtuner/webui/components/train.py +++ b/src/llmtuner/webui/components/train.py @@ -245,7 +245,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: with gr.Row(): resume_btn = gr.Checkbox(visible=False, interactive=False) - process_bar = gr.Slider(visible=False, interactive=False) + progress_bar = gr.Slider(visible=False, interactive=False) with gr.Row(): output_box = gr.Markdown() @@ -263,14 +263,14 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: output_dir=output_dir, config_path=config_path, resume_btn=resume_btn, - process_bar=process_bar, + progress_bar=progress_bar, output_box=output_box, loss_viewer=loss_viewer, ) ) input_elems.update({output_dir, config_path}) - output_elems = [output_box, process_bar, loss_viewer] + output_elems = [output_box, progress_bar, loss_viewer] cmd_preview_btn.click(engine.runner.preview_train, input_elems, output_elems, concurrency_limit=None) arg_save_btn.click(engine.runner.save_args, input_elems, output_elems, concurrency_limit=None) diff --git a/src/llmtuner/webui/engine.py b/src/llmtuner/webui/engine.py index cebac3b9..964d65a2 100644 --- a/src/llmtuner/webui/engine.py +++ b/src/llmtuner/webui/engine.py @@ -41,7 +41,7 @@ class Engine: init_dict["train.dataset"] = {"choices": list_dataset().choices} init_dict["eval.dataset"] = {"choices": list_dataset().choices} init_dict["train.output_dir"] = {"value": "train_{}".format(get_time())} - init_dict["train.config_path"] = {"value": "{}.json".format(get_time())} + init_dict["train.config_path"] = {"value": "{}.yaml".format(get_time())} init_dict["eval.output_dir"] = {"value": "eval_{}".format(get_time())} init_dict["infer.image_box"] = {"visible": False} @@ -51,7 +51,7 @@ class Engine: yield self._update_component(init_dict) - if self.runner.alive and not self.demo_mode and not self.pure_chat: + if self.runner.running and not self.demo_mode and not self.pure_chat: yield {elem: elem.__class__(value=value) for elem, value in self.runner.running_data.items()} if self.runner.do_train: yield self._update_component({"train.resume_btn": {"value": True}}) diff --git a/src/llmtuner/webui/interface.py b/src/llmtuner/webui/interface.py index abca16c5..feb2a20a 100644 --- a/src/llmtuner/webui/interface.py +++ b/src/llmtuner/webui/interface.py @@ -68,5 +68,9 @@ def create_web_demo() -> gr.Blocks: return demo -if __name__ == "__main__": +def run_web_ui(): create_ui().queue().launch(server_name="0.0.0.0", server_port=None, share=False, inbrowser=True) + + +def run_web_demo(): + create_web_demo().queue().launch(server_name="0.0.0.0", server_port=None, share=False, inbrowser=True) diff --git a/src/llmtuner/webui/runner.py b/src/llmtuner/webui/runner.py index d53a4dfe..b14271b7 100644 --- a/src/llmtuner/webui/runner.py +++ b/src/llmtuner/webui/runner.py @@ -1,22 +1,19 @@ -import logging import os -import time -from threading import Thread -from typing import TYPE_CHECKING, Any, Dict, Generator +import signal +from copy import deepcopy +from subprocess import Popen, TimeoutExpired +from typing import TYPE_CHECKING, Any, Dict, Generator, Optional -import transformers +import psutil from transformers.trainer import TRAINING_ARGS_NAME from transformers.utils import is_torch_cuda_available -from ..extras.callbacks import LogCallback from ..extras.constants import TRAINING_STAGES -from ..extras.logging import LoggerHandler from ..extras.misc import get_device_count, torch_gc from ..extras.packages import is_gradio_available -from ..train import run_exp from .common import get_module, get_save_dir, load_args, load_config, save_args from .locales import ALERTS -from .utils import gen_cmd, gen_plot, get_eval_results, update_process_bar +from .utils import gen_cmd, get_eval_results, get_trainer_info, save_cmd if is_gradio_available(): @@ -34,24 +31,18 @@ class Runner: self.manager = manager self.demo_mode = demo_mode """ Resume """ - self.thread: "Thread" = None + self.trainer: Optional["Popen"] = None self.do_train = True self.running_data: Dict["Component", Any] = None """ State """ self.aborted = False self.running = False - """ Handler """ - self.logger_handler = LoggerHandler() - self.logger_handler.setLevel(logging.INFO) - logging.root.addHandler(self.logger_handler) - transformers.logging.add_handler(self.logger_handler) - - @property - def alive(self) -> bool: - return self.thread is not None def set_abort(self) -> None: self.aborted = True + if self.trainer is not None: + for children in psutil.Process(self.trainer.pid).children(): # abort the child process + os.kill(children.pid, signal.SIGABRT) def _initialize(self, data: Dict["Component", Any], do_train: bool, from_preview: bool) -> str: get = lambda elem_id: data[self.manager.get_elem_by_id(elem_id)] @@ -85,13 +76,11 @@ class Runner: if not from_preview and not is_torch_cuda_available(): gr.Warning(ALERTS["warn_no_cuda"][lang]) - self.logger_handler.reset() - self.trainer_callback = LogCallback(self) return "" def _finalize(self, lang: str, finish_info: str) -> str: finish_info = ALERTS["info_aborted"][lang] if self.aborted else finish_info - self.thread = None + self.trainer = None self.aborted = False self.running = False self.running_data = None @@ -270,11 +259,12 @@ class Runner: gr.Warning(error) yield {output_box: error} else: - args = self._parse_train_args(data) if do_train else self._parse_eval_args(data) - run_kwargs = dict(args=args, callbacks=[self.trainer_callback]) self.do_train, self.running_data = do_train, data - self.thread = Thread(target=run_exp, kwargs=run_kwargs) - self.thread.start() + args = self._parse_train_args(data) if do_train else self._parse_eval_args(data) + env = deepcopy(os.environ) + env["CUDA_VISIBLE_DEVICES"] = os.environ.get("CUDA_VISIBLE_DEVICES", "0") + env["LLAMABOARD_ENABLED"] = "1" + self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True) yield from self.monitor() def preview_train(self, data): @@ -291,9 +281,6 @@ class Runner: def monitor(self): get = lambda elem_id: self.running_data[self.manager.get_elem_by_id(elem_id)] - self.aborted = False - self.running = True - lang = get("top.lang") model_name = get("top.model_name") finetuning_type = get("top.finetuning_type") @@ -301,28 +288,31 @@ class Runner: output_path = get_save_dir(model_name, finetuning_type, output_dir) output_box = self.manager.get_elem_by_id("{}.output_box".format("train" if self.do_train else "eval")) - process_bar = self.manager.get_elem_by_id("{}.process_bar".format("train" if self.do_train else "eval")) + progress_bar = self.manager.get_elem_by_id("{}.progress_bar".format("train" if self.do_train else "eval")) loss_viewer = self.manager.get_elem_by_id("train.loss_viewer") if self.do_train else None - while self.thread is not None and self.thread.is_alive(): + while self.trainer is not None: if self.aborted: yield { output_box: ALERTS["info_aborting"][lang], - process_bar: gr.Slider(visible=False), + progress_bar: gr.Slider(visible=False), } else: + running_log, running_progress, running_loss = get_trainer_info(output_path) return_dict = { - output_box: self.logger_handler.log, - process_bar: update_process_bar(self.trainer_callback), + output_box: running_log, + progress_bar: running_progress, } - if self.do_train: - plot = gen_plot(output_path) - if plot is not None: - return_dict[loss_viewer] = plot + if self.do_train and running_loss is not None: + return_dict[loss_viewer] = running_loss yield return_dict - time.sleep(2) + try: + self.trainer.wait(2) + self.trainer = None + except TimeoutExpired: + continue if self.do_train: if os.path.exists(os.path.join(output_path, TRAINING_ARGS_NAME)): @@ -337,16 +327,11 @@ class Runner: return_dict = { output_box: self._finalize(lang, finish_info), - process_bar: gr.Slider(visible=False), + progress_bar: gr.Slider(visible=False), } - if self.do_train: - plot = gen_plot(output_path) - if plot is not None: - return_dict[loss_viewer] = plot - yield return_dict - def save_args(self, data): + def save_args(self, data: dict): output_box = self.manager.get_elem_by_id("train.output_box") error = self._initialize(data, do_train=True, from_preview=True) if error: diff --git a/src/llmtuner/webui/utils.py b/src/llmtuner/webui/utils.py index 74f74e6a..2ad1e62c 100644 --- a/src/llmtuner/webui/utils.py +++ b/src/llmtuner/webui/utils.py @@ -1,10 +1,13 @@ import json import os from datetime import datetime -from typing import TYPE_CHECKING, Any, Dict, Optional +from typing import Any, Dict, List, Optional, Tuple +from yaml import safe_dump + +from ..extras.constants import RUNNING_LOG, TRAINER_CONFIG, TRAINER_LOG from ..extras.packages import is_gradio_available, is_matplotlib_available -from ..extras.ploting import smooth +from ..extras.ploting import gen_loss_plot from .locales import ALERTS @@ -12,30 +15,6 @@ if is_gradio_available(): import gradio as gr -if is_matplotlib_available(): - import matplotlib.figure - import matplotlib.pyplot as plt - - -if TYPE_CHECKING: - from ..extras.callbacks import LogCallback - - -def update_process_bar(callback: "LogCallback") -> "gr.Slider": - if not callback.max_steps: - return gr.Slider(visible=False) - - percentage = round(100 * callback.cur_steps / callback.max_steps, 0) if callback.max_steps != 0 else 100.0 - label = "Running {:d}/{:d}: {} < {}".format( - callback.cur_steps, callback.max_steps, callback.elapsed_time, callback.remaining_time - ) - return gr.Slider(label=label, value=percentage, visible=True) - - -def get_time() -> str: - return datetime.now().strftime(r"%Y-%m-%d-%H-%M-%S") - - def can_quantize(finetuning_type: str) -> "gr.Dropdown": if finetuning_type != "lora": return gr.Dropdown(value="none", interactive=False) @@ -57,14 +36,19 @@ def check_json_schema(text: str, lang: str) -> None: gr.Warning(ALERTS["err_json_schema"][lang]) +def clean_cmd(args: Dict[str, Any]) -> Dict[str, Any]: + no_skip_keys = ["packing"] + return {k: v for k, v in args.items() if (k in no_skip_keys) or (v is not None and v is not False and v != "")} + + def gen_cmd(args: Dict[str, Any]) -> str: args.pop("disable_tqdm", None) args["plot_loss"] = args.get("do_train", None) current_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "0") cmd_lines = ["CUDA_VISIBLE_DEVICES={} python src/train_bash.py ".format(current_devices)] - for k, v in args.items(): - if v is not None and v is not False and v != "": - cmd_lines.append(" --{} {} ".format(k, str(v))) + for k, v in clean_cmd(args).items(): + cmd_lines.append(" --{} {} ".format(k, str(v))) + cmd_text = "\\\n".join(cmd_lines) cmd_text = "```bash\n{}\n```".format(cmd_text) return cmd_text @@ -76,29 +60,49 @@ def get_eval_results(path: os.PathLike) -> str: return "```json\n{}\n```\n".format(result) -def gen_plot(output_path: str) -> Optional["matplotlib.figure.Figure"]: - log_file = os.path.join(output_path, "trainer_log.jsonl") - if not os.path.isfile(log_file) or not is_matplotlib_available(): - return +def get_time() -> str: + return datetime.now().strftime(r"%Y-%m-%d-%H-%M-%S") - plt.close("all") - plt.switch_backend("agg") - fig = plt.figure() - ax = fig.add_subplot(111) - steps, losses = [], [] - with open(log_file, "r", encoding="utf-8") as f: - for line in f: - log_info: Dict[str, Any] = json.loads(line) - if log_info.get("loss", None): - steps.append(log_info["current_steps"]) - losses.append(log_info["loss"]) - if len(losses) == 0: - return +def get_trainer_info(output_path: os.PathLike) -> Tuple[str, "gr.Slider", Optional["gr.Plot"]]: + running_log = "" + running_progress = gr.Slider(visible=False) + running_loss = None - ax.plot(steps, losses, color="#1f77b4", alpha=0.4, label="original") - ax.plot(steps, smooth(losses), color="#1f77b4", label="smoothed") - ax.legend() - ax.set_xlabel("step") - ax.set_ylabel("loss") - return fig + running_log_path = os.path.join(output_path, RUNNING_LOG) + if os.path.isfile(running_log_path): + with open(running_log_path, "r", encoding="utf-8") as f: + running_log = f.read() + + trainer_log_path = os.path.join(output_path, TRAINER_LOG) + if os.path.isfile(trainer_log_path): + trainer_log: List[Dict[str, Any]] = [] + with open(trainer_log_path, "r", encoding="utf-8") as f: + for line in f: + trainer_log.append(json.loads(line)) + + if len(trainer_log) != 0: + latest_log = trainer_log[-1] + percentage = latest_log["percentage"] + label = "Running {:d}/{:d}: {} < {}".format( + latest_log["current_steps"], + latest_log["total_steps"], + latest_log["elapsed_time"], + latest_log["remaining_time"], + ) + running_progress = gr.Slider(label=label, value=percentage, visible=True) + + if is_matplotlib_available(): + running_loss = gr.Plot(gen_loss_plot(trainer_log)) + + return running_log, running_progress, running_loss + + +def save_cmd(args: Dict[str, Any]) -> str: + output_dir = args["output_dir"] + os.makedirs(output_dir, exist_ok=True) + + with open(os.path.join(output_dir, TRAINER_CONFIG), "w", encoding="utf-8") as f: + safe_dump(clean_cmd(args), f) + + return os.path.join(output_dir, TRAINER_CONFIG) diff --git a/src/train_bash.py b/src/train.py similarity index 67% rename from src/train_bash.py rename to src/train.py index 9ddd0586..6a3212cb 100644 --- a/src/train_bash.py +++ b/src/train.py @@ -1,4 +1,4 @@ -from llmtuner import run_exp +from llmtuner.train.tuner import run_exp def main(): @@ -7,7 +7,7 @@ def main(): def _mp_fn(index): # For xla_spawn (TPUs) - main() + run_exp() if __name__ == "__main__": diff --git a/src/train_web.py b/src/train_web.py deleted file mode 100644 index 8327f4dd..00000000 --- a/src/train_web.py +++ /dev/null @@ -1,9 +0,0 @@ -from llmtuner import create_ui - - -def main(): - create_ui().queue().launch(server_name="0.0.0.0", server_port=None, share=False, inbrowser=True) - - -if __name__ == "__main__": - main() diff --git a/src/web_demo.py b/src/web_demo.py deleted file mode 100644 index 3b57ee73..00000000 --- a/src/web_demo.py +++ /dev/null @@ -1,9 +0,0 @@ -from llmtuner import create_web_demo - - -def main(): - create_web_demo().queue().launch(server_name="0.0.0.0", server_port=None, share=False, inbrowser=True) - - -if __name__ == "__main__": - main()