diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..ce67d58a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,11 @@ +.vscode +.git +.github +.venv +cache +data +examples +.dockerignore +.gitattributes +.gitignore +Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..7f930148 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04 + +WORKDIR /app + +COPY requirements.txt /app/ +RUN pip install -r requirements.txt && \ + pip install tiktoken && \ + pip install transformers_stream_generator + +COPY . /app/ + +VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] +EXPOSE 7860 + +CMD [ "python", "src/train_web.py" ] diff --git a/README.md b/README.md index a0f7d9cc..a0c16008 100644 --- a/README.md +++ b/README.md @@ -651,6 +651,32 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ > [!TIP] > We recommend using `--per_device_eval_batch_size=1` and `--max_target_length 128` at 4/8-bit predict. +### Dockerize Training + +#### Get ready + +Necessary dockerized environment is needed, such as Docker or Docker Compose. + +#### Docker support + +```bash +docker build -f ./Dockerfile -t llama-factory:latest . + +docker run --gpus=all -v ./hf_cache:/root/.cache/huggingface/ -v ./data:/app/data -v ./output:/app/output -p 7860:7860 --shm-size 16G --name llama_factory -d llama-factory:latest +``` + +#### Docker Compose support + +```bash +docker compose -f ./docker-compose.yml up -d +``` + +> [!TIP] +> Details about volume: +> * hf_cache: Utilize Huggingface cache on the host machine. Reassignable if a cache already exists in a different directory. +> * data: Place datasets on this dir of the host machine so that they can be selected on LLaMA Board GUI. +> * output: Set export dir to this location so that the merged result can be accessed directly on the host machine. + ## Projects using LLaMA Factory 1. Wang et al. ESRL: Efficient Sampling-based Reinforcement Learning for Sequence Generation. 2023. [[arxiv]](https://arxiv.org/abs/2308.02223) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..267ea694 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,23 @@ +version: '3.8' + +services: + llama-factory: + build: + dockerfile: Dockerfile + context: . + container_name: llama_factory + volumes: + - ./hf_cache:/root/.cache/huggingface/ + - ./data:/app/data + - ./output:/app/output + ports: + - "7860:7860" + shm_size: 16G + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: "all" + capabilities: [gpu] + restart: unless-stopped