diff --git a/Dockerfile b/Dockerfile index 0a35e355..45849601 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,44 @@ -FROM nvcr.io/nvidia/pytorch:24.01-py3 +# Use the NVIDIA official image with PyTorch 2.3.0 +# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html +FROM nvcr.io/nvidia/pytorch:24.02-py3 +# Define installation arguments +ARG INSTALL_BNB=false +ARG INSTALL_VLLM=false +ARG INSTALL_DEEPSPEED=false +ARG PIP_INDEX=https://pypi.org/simple + +# Set the working directory WORKDIR /app +# Install the requirements COPY requirements.txt /app/ -RUN pip install -r requirements.txt +RUN pip config set global.index-url $PIP_INDEX +RUN python -m pip install --upgrade pip +RUN python -m pip install -r requirements.txt +# Copy the rest of the application into the image COPY . /app/ -RUN pip install -e .[metrics,bitsandbytes,qwen] +# Install the LLaMA Factory +RUN EXTRA_PACKAGES="metrics"; \ + if [ "$INSTALL_BNB" = "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \ + fi; \ + if [ "$INSTALL_VLLM" = "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \ + fi; \ + if [ "$INSTALL_DEEPSPEED" = "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ + fi; \ + pip install -e .[$EXTRA_PACKAGES] && \ + pip uninstall -y transformer-engine + +# Set up volumes VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] + +# Expose port 7860 for the LLaMA Board EXPOSE 7860 -CMD [ "llamafactory-cli", "webui" ] +# Expose port 8000 for the API service +EXPOSE 8000 diff --git a/README.md b/README.md index 4dea65b9..35dacd2e 100644 --- a/README.md +++ b/README.md @@ -405,9 +405,9 @@ Please refer to [data/README.md](data/README.md) for checking the details about Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively. ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli chat examples/inference/llama3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` See [examples/README.md](examples/README.md) for advanced usage (including distributed training). @@ -417,33 +417,33 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr ### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio)) -#### Use local environment - ```bash -CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui +llamafactory-cli webui ``` - - -#### Use Docker +### Build Docker ```bash -docker build -f ./Dockerfile -t llama-factory:latest . -docker run --gpus=all \ +docker build -f ./Dockerfile \ + --build-arg INSTALL_BNB=false \ + --build-arg INSTALL_VLLM=false \ + --build-arg INSTALL_DEEPSPEED=false \ + --build-arg PIP_INDEX=https://pypi.org/simple \ + -t llamafactory:latest . + +docker run -it --gpus=all \ -v ./hf_cache:/root/.cache/huggingface/ \ -v ./data:/app/data \ -v ./output:/app/output \ -p 7860:7860 \ + -p 8000:8000 \ --shm-size 16G \ - --name llama_factory \ - -d llama-factory:latest + --name llamafactory \ + llamafactory:latest ``` -#### Use Docker Compose - -```bash -docker compose -f ./docker-compose.yml up -d -``` +> [!TIP] +> Use Docker Compose to build image via `docker compose up -d`.
Details about volume diff --git a/README_zh.md b/README_zh.md index ab0e8cb7..0ddb8b19 100644 --- a/README_zh.md +++ b/README_zh.md @@ -405,9 +405,9 @@ Docker 镜像: 下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli chat examples/inference/llama3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` 高级用法请参考 [examples/README_zh.md](examples/README_zh.md)(包括多 GPU 微调)。 @@ -417,31 +417,33 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s ### LLaMA Board 可视化微调(由 [Gradio](https://github.com/gradio-app/gradio) 驱动) -#### 使用本地环境 - ```bash -CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui +llamafactory-cli webui ``` -#### 使用 Docker +### 构建 Docker ```bash -docker build -f ./Dockerfile -t llama-factory:latest . -docker run --gpus=all \ +docker build -f ./Dockerfile \ + --build-arg INSTALL_BNB=false \ + --build-arg INSTALL_VLLM=false \ + --build-arg INSTALL_DEEPSPEED=false \ + --build-arg PIP_INDEX=https://pypi.org/simple \ + -t llamafactory:latest . + +docker run -it --gpus=all \ -v ./hf_cache:/root/.cache/huggingface/ \ -v ./data:/app/data \ -v ./output:/app/output \ -p 7860:7860 \ + -p 8000:8000 \ --shm-size 16G \ - --name llama_factory \ - -d llama-factory:latest + --name llamafactory \ + llamafactory:latest ``` -#### 使用 Docker Compose - -```bash -docker compose -f ./docker-compose.yml up -d -``` +> [!TIP] +> 通过 `docker compose up -d` 使用 Docker Compose 构建镜像。
数据卷详情 diff --git a/docker-compose.yml b/docker-compose.yml index 9602a3e3..b3e4a34d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,17 +1,23 @@ version: '3.8' services: - llama-factory: + llamafactory: build: dockerfile: Dockerfile context: . - container_name: llama_factory + args: + INSTALL_BNB: false + INSTALL_VLLM: false + INSTALL_DEEPSPEED: false + PIP_INDEX: https://pypi.org/simple + container_name: llamafactory volumes: - ./hf_cache:/root/.cache/huggingface/ - ./data:/app/data - ./output:/app/output ports: - "7860:7860" + - "8000:8000" ipc: host deploy: resources: