From c88b1be9f3dfaf5fe65448dea20fc697b4f257bd Mon Sep 17 00:00:00 2001 From: hzhaoy Date: Tue, 25 Jun 2024 15:13:07 +0800 Subject: [PATCH 1/2] support flash-attn in Dockerfile --- docker/docker-cuda/Dockerfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile index 827b7b3c..06a172f0 100644 --- a/docker/docker-cuda/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -35,6 +35,11 @@ RUN EXTRA_PACKAGES="metrics"; \ pip install -e .[$EXTRA_PACKAGES] && \ pip uninstall -y transformer-engine flash-attn +# Rebuild flash-attn +RUN ninja --version || \ + (pip uninstall -y ninja && pip install ninja) && \ + MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation + # Set up volumes VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ] From e19491b0f0446f2fb2154cf14e0b2fbba5b54808 Mon Sep 17 00:00:00 2001 From: hzhaoy Date: Thu, 27 Jun 2024 00:11:04 +0800 Subject: [PATCH 2/2] add flash-attn installation flag in Dockerfile --- README.md | 1 + README_zh.md | 1 + docker/docker-cuda/Dockerfile | 9 ++++++--- docker/docker-cuda/docker-compose.yml | 1 + 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4b42edd7..cdca8333 100644 --- a/README.md +++ b/README.md @@ -444,6 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \ --build-arg INSTALL_BNB=false \ --build-arg INSTALL_VLLM=false \ --build-arg INSTALL_DEEPSPEED=false \ + --build-arg INSTALL_FLASH_ATTN=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . diff --git a/README_zh.md b/README_zh.md index 3926c09d..d26c8268 100644 --- a/README_zh.md +++ b/README_zh.md @@ -444,6 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \ --build-arg INSTALL_BNB=false \ --build-arg INSTALL_VLLM=false \ --build-arg INSTALL_DEEPSPEED=false \ + --build-arg INSTALL_FLASH_ATTN=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile index 06a172f0..44aaf538 100644 --- a/docker/docker-cuda/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -6,6 +6,7 @@ FROM nvcr.io/nvidia/pytorch:24.02-py3 ARG INSTALL_BNB=false ARG INSTALL_VLLM=false ARG INSTALL_DEEPSPEED=false +ARG INSTALL_FLASH_ATTN=false ARG PIP_INDEX=https://pypi.org/simple # Set the working directory @@ -36,9 +37,11 @@ RUN EXTRA_PACKAGES="metrics"; \ pip uninstall -y transformer-engine flash-attn # Rebuild flash-attn -RUN ninja --version || \ - (pip uninstall -y ninja && pip install ninja) && \ - MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation +RUN if [ "$INSTALL_FLASH_ATTN" = "true" ]; then \ + ninja --version || \ + (pip uninstall -y ninja && pip install ninja) && \ + MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation \ + fi; # Set up volumes VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ] diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml index 1c0a3c75..ad269cb0 100644 --- a/docker/docker-cuda/docker-compose.yml +++ b/docker/docker-cuda/docker-compose.yml @@ -7,6 +7,7 @@ services: INSTALL_BNB: false INSTALL_VLLM: false INSTALL_DEEPSPEED: false + INSTALL_FLASH_ATTN: false PIP_INDEX: https://pypi.org/simple container_name: llamafactory volumes: