From e44a4f07f09bbee55c10ccee91dd858256c36054 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 27 Jun 2024 20:14:48 +0800 Subject: [PATCH] tiny fix --- README.md | 2 +- README_zh.md | 2 +- docker/docker-cuda/Dockerfile | 36 ++++++++++++++------------- docker/docker-cuda/docker-compose.yml | 2 +- docker/docker-npu/Dockerfile | 4 +-- src/llamafactory/cli.py | 4 +-- src/llamafactory/hparams/parser.py | 4 +-- 7 files changed, 28 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 45ac23d8..44aed7e8 100644 --- a/README.md +++ b/README.md @@ -444,7 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \ --build-arg INSTALL_BNB=false \ --build-arg INSTALL_VLLM=false \ --build-arg INSTALL_DEEPSPEED=false \ - --build-arg INSTALL_FLASH_ATTN=false \ + --build-arg INSTALL_FLASHATTN=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . diff --git a/README_zh.md b/README_zh.md index c5fd4f69..7e3d51ad 100644 --- a/README_zh.md +++ b/README_zh.md @@ -444,7 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \ --build-arg INSTALL_BNB=false \ --build-arg INSTALL_VLLM=false \ --build-arg INSTALL_DEEPSPEED=false \ - --build-arg INSTALL_FLASH_ATTN=false \ + --build-arg INSTALL_FLASHATTN=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile index 44aaf538..718390a8 100644 --- a/docker/docker-cuda/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -2,11 +2,14 @@ # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html FROM nvcr.io/nvidia/pytorch:24.02-py3 +# Define environments +ENV MAX_JOBS=4 + # Define installation arguments ARG INSTALL_BNB=false ARG INSTALL_VLLM=false ARG INSTALL_DEEPSPEED=false -ARG INSTALL_FLASH_ATTN=false +ARG INSTALL_FLASHATTN=false ARG PIP_INDEX=https://pypi.org/simple # Set the working directory @@ -14,34 +17,33 @@ WORKDIR /app # Install the requirements COPY requirements.txt /app -RUN pip config set global.index-url $PIP_INDEX -RUN pip config set global.extra-index-url $PIP_INDEX -RUN python -m pip install --upgrade pip -RUN python -m pip install -r requirements.txt +RUN pip config set global.index-url "$PIP_INDEX" && \ + pip config set global.extra-index-url "$PIP_INDEX" && \ + python -m pip install --upgrade pip && \ + python -m pip install -r requirements.txt + +# Rebuild flash attention +RUN pip uninstall -y transformer-engine flash-attn && \ + if [ "$INSTALL_FLASHATTN" == "true" ]; then \ + pip uninstall -y ninja && pip install ninja && \ + pip install --no-cache-dir flash-attn --no-build-isolation \ + fi; # Copy the rest of the application into the image COPY . /app # Install the LLaMA Factory RUN EXTRA_PACKAGES="metrics"; \ - if [ "$INSTALL_BNB" = "true" ]; then \ + if [ "$INSTALL_BNB" == "true" ]; then \ EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \ fi; \ - if [ "$INSTALL_VLLM" = "true" ]; then \ + if [ "$INSTALL_VLLM" == "true" ]; then \ EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \ fi; \ - if [ "$INSTALL_DEEPSPEED" = "true" ]; then \ + if [ "$INSTALL_DEEPSPEED" == "true" ]; then \ EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ fi; \ - pip install -e .[$EXTRA_PACKAGES] && \ - pip uninstall -y transformer-engine flash-attn - -# Rebuild flash-attn -RUN if [ "$INSTALL_FLASH_ATTN" = "true" ]; then \ - ninja --version || \ - (pip uninstall -y ninja && pip install ninja) && \ - MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation \ - fi; + pip install -e ".[$EXTRA_PACKAGES]" # Set up volumes VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ] diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml index 4ccb0c04..16267dc3 100644 --- a/docker/docker-cuda/docker-compose.yml +++ b/docker/docker-cuda/docker-compose.yml @@ -7,7 +7,7 @@ services: INSTALL_BNB: false INSTALL_VLLM: false INSTALL_DEEPSPEED: false - INSTALL_FLASH_ATTN: false + INSTALL_FLASHATTN: false PIP_INDEX: https://pypi.org/simple container_name: llamafactory volumes: diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile index 71ab3daf..e413d4e3 100644 --- a/docker/docker-npu/Dockerfile +++ b/docker/docker-npu/Dockerfile @@ -2,6 +2,7 @@ # More versions can be found at https://hub.docker.com/r/cosdt/cann/tags FROM cosdt/cann:8.0.rc1-910b-ubuntu22.04 +# Define environments ENV DEBIAN_FRONTEND=noninteractive # Define installation arguments @@ -27,8 +28,7 @@ RUN EXTRA_PACKAGES="torch-npu,metrics"; \ if [ "$INSTALL_DEEPSPEED" == "true" ]; then \ EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ fi; \ - pip install -e ".[$EXTRA_PACKAGES]" && \ - pip uninstall -y transformer-engine flash-attn + pip install -e ".[$EXTRA_PACKAGES]" # Set up volumes VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ] diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index c92f556b..48eb2898 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -91,7 +91,7 @@ def main(): master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999))) logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port)) - subproc = subprocess.run( + process = subprocess.run( ( "torchrun --nnodes {nnodes} --node_rank {node_rank} --nproc_per_node {nproc_per_node} " "--master_addr {master_addr} --master_port {master_port} {file_name} {args}" @@ -106,7 +106,7 @@ def main(): ), shell=True, ) - sys.exit(subproc.returncode) + sys.exit(process.returncode) else: run_exp() elif command == Command.WEBDEMO: diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index d4bcfbc6..6017907c 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -199,8 +199,8 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if not is_torch_bf16_gpu_available(): raise ValueError("This device does not support `pure_bf16`.") - if training_args.deepspeed: - raise ValueError("`pure_bf16` is incompatible with DeepSpeed.") + if is_deepspeed_zero3_enabled(): + raise ValueError("`pure_bf16` is incompatible with DeepSpeed ZeRO-3.") if training_args.fp16 or training_args.bf16: raise ValueError("Turn off mixed precision training when using `pure_bf16`.")