From c88b1be9f3dfaf5fe65448dea20fc697b4f257bd Mon Sep 17 00:00:00 2001
From: hzhaoy <hzywong@gmail.com>
Date: Tue, 25 Jun 2024 15:13:07 +0800
Subject: [PATCH 1/2] support flash-attn in Dockerfile

---
 docker/docker-cuda/Dockerfile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile
index 827b7b3c..06a172f0 100644
--- a/docker/docker-cuda/Dockerfile
+++ b/docker/docker-cuda/Dockerfile
@@ -35,6 +35,11 @@ RUN EXTRA_PACKAGES="metrics"; \
     pip install -e .[$EXTRA_PACKAGES] && \
     pip uninstall -y transformer-engine flash-attn
 
+# Rebuild flash-attn
+RUN ninja --version || \
+    (pip uninstall -y ninja && pip install ninja) && \
+    MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation
+
 # Set up volumes
 VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
 

From e19491b0f0446f2fb2154cf14e0b2fbba5b54808 Mon Sep 17 00:00:00 2001
From: hzhaoy <hzywong@gmail.com>
Date: Thu, 27 Jun 2024 00:11:04 +0800
Subject: [PATCH 2/2] add flash-attn installation flag in Dockerfile

---
 README.md                             | 1 +
 README_zh.md                          | 1 +
 docker/docker-cuda/Dockerfile         | 9 ++++++---
 docker/docker-cuda/docker-compose.yml | 1 +
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 4b42edd7..cdca8333 100644
--- a/README.md
+++ b/README.md
@@ -444,6 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \
     --build-arg INSTALL_BNB=false \
     --build-arg INSTALL_VLLM=false \
     --build-arg INSTALL_DEEPSPEED=false \
+    --build-arg INSTALL_FLASH_ATTN=false \
     --build-arg PIP_INDEX=https://pypi.org/simple \
     -t llamafactory:latest .
 
diff --git a/README_zh.md b/README_zh.md
index 3926c09d..d26c8268 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -444,6 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \
     --build-arg INSTALL_BNB=false \
     --build-arg INSTALL_VLLM=false \
     --build-arg INSTALL_DEEPSPEED=false \
+    --build-arg INSTALL_FLASH_ATTN=false \
     --build-arg PIP_INDEX=https://pypi.org/simple \
     -t llamafactory:latest .
 
diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile
index 06a172f0..44aaf538 100644
--- a/docker/docker-cuda/Dockerfile
+++ b/docker/docker-cuda/Dockerfile
@@ -6,6 +6,7 @@ FROM nvcr.io/nvidia/pytorch:24.02-py3
 ARG INSTALL_BNB=false
 ARG INSTALL_VLLM=false
 ARG INSTALL_DEEPSPEED=false
+ARG INSTALL_FLASH_ATTN=false
 ARG PIP_INDEX=https://pypi.org/simple
 
 # Set the working directory
@@ -36,9 +37,11 @@ RUN EXTRA_PACKAGES="metrics"; \
     pip uninstall -y transformer-engine flash-attn
 
 # Rebuild flash-attn
-RUN ninja --version || \
-    (pip uninstall -y ninja && pip install ninja) && \
-    MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation
+RUN if [ "$INSTALL_FLASH_ATTN" = "true" ]; then \
+        ninja --version || \
+        (pip uninstall -y ninja && pip install ninja) && \
+        MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation \
+    fi;
 
 # Set up volumes
 VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml
index 1c0a3c75..ad269cb0 100644
--- a/docker/docker-cuda/docker-compose.yml
+++ b/docker/docker-cuda/docker-compose.yml
@@ -7,6 +7,7 @@ services:
         INSTALL_BNB: false
         INSTALL_VLLM: false
         INSTALL_DEEPSPEED: false
+        INSTALL_FLASH_ATTN: false
         PIP_INDEX: https://pypi.org/simple
     container_name: llamafactory
     volumes: