Merge pull request #4461 from hzhaoy/feature/support-flash-attn

support flash-attn in Dockerfile
2024-06-27 20:05:26 +08:00 · 2024-06-27 20:05:26 +08:00 · 64b131dcfa
parent a6bf74c0c9 e19491b0f0
commit 64b131dcfa
4 changed files with 11 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -444,6 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \
    --build-arg INSTALL_BNB=false \
    --build-arg INSTALL_VLLM=false \
    --build-arg INSTALL_DEEPSPEED=false \
+    --build-arg INSTALL_FLASH_ATTN=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
    -t llamafactory:latest .

--- a/README_zh.md
+++ b/README_zh.md
@ -444,6 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \
    --build-arg INSTALL_BNB=false \
    --build-arg INSTALL_VLLM=false \
    --build-arg INSTALL_DEEPSPEED=false \
+    --build-arg INSTALL_FLASH_ATTN=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
    -t llamafactory:latest .

--- a/docker/docker-cuda/Dockerfile
+++ b/docker/docker-cuda/Dockerfile
@ -6,6 +6,7 @@ FROM nvcr.io/nvidia/pytorch:24.02-py3
 ARG INSTALL_BNB=false
 ARG INSTALL_VLLM=false
 ARG INSTALL_DEEPSPEED=false
+ARG INSTALL_FLASH_ATTN=false
 ARG PIP_INDEX=https://pypi.org/simple

 # Set the working directory
@ -35,6 +36,13 @@ RUN EXTRA_PACKAGES="metrics"; \
    pip install -e .[$EXTRA_PACKAGES] && \
    pip uninstall -y transformer-engine flash-attn

+# Rebuild flash-attn
+RUN if [ "$INSTALL_FLASH_ATTN" = "true" ]; then \
+        ninja --version || \
+        (pip uninstall -y ninja && pip install ninja) && \
+        MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation \
+    fi;
+
 # Set up volumes
 VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]

--- a/docker/docker-cuda/docker-compose.yml
+++ b/docker/docker-cuda/docker-compose.yml
@ -7,6 +7,7 @@ services:
        INSTALL_BNB: false
        INSTALL_VLLM: false
        INSTALL_DEEPSPEED: false
+        INSTALL_FLASH_ATTN: false
        PIP_INDEX: https://pypi.org/simple
    container_name: llamafactory
    volumes: