From e44a4f07f09bbee55c10ccee91dd858256c36054 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 27 Jun 2024 20:14:48 +0800
Subject: [PATCH] tiny fix

---
 README.md                             |  2 +-
 README_zh.md                          |  2 +-
 docker/docker-cuda/Dockerfile         | 36 ++++++++++++++-------------
 docker/docker-cuda/docker-compose.yml |  2 +-
 docker/docker-npu/Dockerfile          |  4 +--
 src/llamafactory/cli.py               |  4 +--
 src/llamafactory/hparams/parser.py    |  4 +--
 7 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index 45ac23d8..44aed7e8 100644
--- a/README.md
+++ b/README.md
@@ -444,7 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \
     --build-arg INSTALL_BNB=false \
     --build-arg INSTALL_VLLM=false \
     --build-arg INSTALL_DEEPSPEED=false \
-    --build-arg INSTALL_FLASH_ATTN=false \
+    --build-arg INSTALL_FLASHATTN=false \
     --build-arg PIP_INDEX=https://pypi.org/simple \
     -t llamafactory:latest .
 
diff --git a/README_zh.md b/README_zh.md
index c5fd4f69..7e3d51ad 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -444,7 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \
     --build-arg INSTALL_BNB=false \
     --build-arg INSTALL_VLLM=false \
     --build-arg INSTALL_DEEPSPEED=false \
-    --build-arg INSTALL_FLASH_ATTN=false \
+    --build-arg INSTALL_FLASHATTN=false \
     --build-arg PIP_INDEX=https://pypi.org/simple \
     -t llamafactory:latest .
 
diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile
index 44aaf538..718390a8 100644
--- a/docker/docker-cuda/Dockerfile
+++ b/docker/docker-cuda/Dockerfile
@@ -2,11 +2,14 @@
 # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html
 FROM nvcr.io/nvidia/pytorch:24.02-py3
 
+# Define environments
+ENV MAX_JOBS=4
+
 # Define installation arguments
 ARG INSTALL_BNB=false
 ARG INSTALL_VLLM=false
 ARG INSTALL_DEEPSPEED=false
-ARG INSTALL_FLASH_ATTN=false
+ARG INSTALL_FLASHATTN=false
 ARG PIP_INDEX=https://pypi.org/simple
 
 # Set the working directory
@@ -14,34 +17,33 @@ WORKDIR /app
 
 # Install the requirements
 COPY requirements.txt /app
-RUN pip config set global.index-url $PIP_INDEX
-RUN pip config set global.extra-index-url $PIP_INDEX
-RUN python -m pip install --upgrade pip
-RUN python -m pip install -r requirements.txt
+RUN pip config set global.index-url "$PIP_INDEX" && \
+    pip config set global.extra-index-url "$PIP_INDEX" && \
+    python -m pip install --upgrade pip && \
+    python -m pip install -r requirements.txt
+
+# Rebuild flash attention
+RUN pip uninstall -y transformer-engine flash-attn && \
+    if [ "$INSTALL_FLASHATTN" == "true" ]; then \
+        pip uninstall -y ninja && pip install ninja && \
+        pip install --no-cache-dir flash-attn --no-build-isolation \
+    fi;
 
 # Copy the rest of the application into the image
 COPY . /app
 
 # Install the LLaMA Factory
 RUN EXTRA_PACKAGES="metrics"; \
-    if [ "$INSTALL_BNB" = "true" ]; then \
+    if [ "$INSTALL_BNB" == "true" ]; then \
         EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
     fi; \
-    if [ "$INSTALL_VLLM" = "true" ]; then \
+    if [ "$INSTALL_VLLM" == "true" ]; then \
         EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
     fi; \
-    if [ "$INSTALL_DEEPSPEED" = "true" ]; then \
+    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
         EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
     fi; \
-    pip install -e .[$EXTRA_PACKAGES] && \
-    pip uninstall -y transformer-engine flash-attn
-
-# Rebuild flash-attn
-RUN if [ "$INSTALL_FLASH_ATTN" = "true" ]; then \
-        ninja --version || \
-        (pip uninstall -y ninja && pip install ninja) && \
-        MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation \
-    fi;
+    pip install -e ".[$EXTRA_PACKAGES]"
 
 # Set up volumes
 VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml
index 4ccb0c04..16267dc3 100644
--- a/docker/docker-cuda/docker-compose.yml
+++ b/docker/docker-cuda/docker-compose.yml
@@ -7,7 +7,7 @@ services:
         INSTALL_BNB: false
         INSTALL_VLLM: false
         INSTALL_DEEPSPEED: false
-        INSTALL_FLASH_ATTN: false
+        INSTALL_FLASHATTN: false
         PIP_INDEX: https://pypi.org/simple
     container_name: llamafactory
     volumes:
diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile
index 71ab3daf..e413d4e3 100644
--- a/docker/docker-npu/Dockerfile
+++ b/docker/docker-npu/Dockerfile
@@ -2,6 +2,7 @@
 # More versions can be found at https://hub.docker.com/r/cosdt/cann/tags
 FROM cosdt/cann:8.0.rc1-910b-ubuntu22.04
 
+# Define environments
 ENV DEBIAN_FRONTEND=noninteractive
 
 # Define installation arguments
@@ -27,8 +28,7 @@ RUN EXTRA_PACKAGES="torch-npu,metrics"; \
     if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
         EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
     fi; \
-    pip install -e ".[$EXTRA_PACKAGES]" && \
-    pip uninstall -y transformer-engine flash-attn
+    pip install -e ".[$EXTRA_PACKAGES]"
 
 # Set up volumes
 VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py
index c92f556b..48eb2898 100644
--- a/src/llamafactory/cli.py
+++ b/src/llamafactory/cli.py
@@ -91,7 +91,7 @@ def main():
             master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
             master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
             logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port))
-            subproc = subprocess.run(
+            process = subprocess.run(
                 (
                     "torchrun --nnodes {nnodes} --node_rank {node_rank} --nproc_per_node {nproc_per_node} "
                     "--master_addr {master_addr} --master_port {master_port} {file_name} {args}"
@@ -106,7 +106,7 @@ def main():
                 ),
                 shell=True,
             )
-            sys.exit(subproc.returncode)
+            sys.exit(process.returncode)
         else:
             run_exp()
     elif command == Command.WEBDEMO:
diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py
index d4bcfbc6..6017907c 100644
--- a/src/llamafactory/hparams/parser.py
+++ b/src/llamafactory/hparams/parser.py
@@ -199,8 +199,8 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
         if not is_torch_bf16_gpu_available():
             raise ValueError("This device does not support `pure_bf16`.")
 
-        if training_args.deepspeed:
-            raise ValueError("`pure_bf16` is incompatible with DeepSpeed.")
+        if is_deepspeed_zero3_enabled():
+            raise ValueError("`pure_bf16` is incompatible with DeepSpeed ZeRO-3.")
 
         if training_args.fp16 or training_args.bf16:
             raise ValueError("Turn off mixed precision training when using `pure_bf16`.")