diff --git a/Dockerfile b/Dockerfile
index 0a35e355..45849601 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,14 +1,44 @@
-FROM nvcr.io/nvidia/pytorch:24.01-py3
+# Use the NVIDIA official image with PyTorch 2.3.0
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html
+FROM nvcr.io/nvidia/pytorch:24.02-py3
 
+# Define installation arguments
+ARG INSTALL_BNB=false
+ARG INSTALL_VLLM=false
+ARG INSTALL_DEEPSPEED=false
+ARG PIP_INDEX=https://pypi.org/simple
+
+# Set the working directory
 WORKDIR /app
 
+# Install the requirements
 COPY requirements.txt /app/
-RUN pip install -r requirements.txt
+RUN pip config set global.index-url $PIP_INDEX
+RUN python -m pip install --upgrade pip
+RUN python -m pip install -r requirements.txt
 
+# Copy the rest of the application into the image
 COPY . /app/
-RUN pip install -e .[metrics,bitsandbytes,qwen]
 
+# Install the LLaMA Factory
+RUN EXTRA_PACKAGES="metrics"; \
+    if [ "$INSTALL_BNB" = "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
+    fi; \
+    if [ "$INSTALL_VLLM" = "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
+    fi; \
+    if [ "$INSTALL_DEEPSPEED" = "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
+    fi; \
+    pip install -e .[$EXTRA_PACKAGES] && \
+    pip uninstall -y transformer-engine
+
+# Set up volumes
 VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
+
+# Expose port 7860 for the LLaMA Board
 EXPOSE 7860
 
-CMD [ "llamafactory-cli", "webui" ]
+# Expose port 8000 for the API service
+EXPOSE 8000
diff --git a/README.md b/README.md
index 4dea65b9..35dacd2e 100644
--- a/README.md
+++ b/README.md
@@ -405,9 +405,9 @@ Please refer to [data/README.md](data/README.md) for checking the details about
 Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively.
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
+llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
+llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
+llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
 ```
 
 See [examples/README.md](examples/README.md) for advanced usage (including distributed training).
@@ -417,33 +417,33 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr
 
 ### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio))
 
-#### Use local environment
-
 ```bash
-CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
+llamafactory-cli webui
 ```
 
-</details>
-
-#### Use Docker
+### Build Docker
 
 ```bash
-docker build -f ./Dockerfile -t llama-factory:latest .
-docker run --gpus=all \
+docker build -f ./Dockerfile \
+    --build-arg INSTALL_BNB=false \
+    --build-arg INSTALL_VLLM=false \
+    --build-arg INSTALL_DEEPSPEED=false \
+    --build-arg PIP_INDEX=https://pypi.org/simple \
+    -t llamafactory:latest .
+
+docker run -it --gpus=all \
     -v ./hf_cache:/root/.cache/huggingface/ \
     -v ./data:/app/data \
     -v ./output:/app/output \
     -p 7860:7860 \
+    -p 8000:8000 \
     --shm-size 16G \
-    --name llama_factory \
-    -d llama-factory:latest
+    --name llamafactory \
+    llamafactory:latest
 ```
 
-#### Use Docker Compose
-
-```bash
-docker compose -f ./docker-compose.yml up -d
-```
+> [!TIP]
+> Use Docker Compose to build image via `docker compose up -d`.
 
 <details><summary>Details about volume</summary>
 
diff --git a/README_zh.md b/README_zh.md
index ab0e8cb7..0ddb8b19 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -405,9 +405,9 @@ Docker 镜像：
 下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
+llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
+llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
+llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
 ```
 
 高级用法请参考 [examples/README_zh.md](examples/README_zh.md)（包括多 GPU 微调）。
@@ -417,31 +417,33 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s
 
 ### LLaMA Board 可视化微调（由 [Gradio](https://github.com/gradio-app/gradio) 驱动）
 
-#### 使用本地环境
-
 ```bash
-CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
+llamafactory-cli webui
 ```
 
-#### 使用 Docker
+### 构建 Docker
 
 ```bash
-docker build -f ./Dockerfile -t llama-factory:latest .
-docker run --gpus=all \
+docker build -f ./Dockerfile \
+    --build-arg INSTALL_BNB=false \
+    --build-arg INSTALL_VLLM=false \
+    --build-arg INSTALL_DEEPSPEED=false \
+    --build-arg PIP_INDEX=https://pypi.org/simple \
+    -t llamafactory:latest .
+
+docker run -it --gpus=all \
     -v ./hf_cache:/root/.cache/huggingface/ \
     -v ./data:/app/data \
     -v ./output:/app/output \
     -p 7860:7860 \
+    -p 8000:8000 \
     --shm-size 16G \
-    --name llama_factory \
-    -d llama-factory:latest
+    --name llamafactory \
+    llamafactory:latest
 ```
 
-#### 使用 Docker Compose
-
-```bash
-docker compose -f ./docker-compose.yml up -d
-```
+> [!TIP]
+> 通过 `docker compose up -d` 使用 Docker Compose 构建镜像。
 
 <details><summary>数据卷详情</summary>
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 9602a3e3..b3e4a34d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,17 +1,23 @@
 version: '3.8'
 
 services:
-  llama-factory:
+  llamafactory:
     build:
       dockerfile: Dockerfile
       context: .
-    container_name: llama_factory
+      args:
+        INSTALL_BNB: false
+        INSTALL_VLLM: false
+        INSTALL_DEEPSPEED: false
+        PIP_INDEX: https://pypi.org/simple
+    container_name: llamafactory
     volumes:
       - ./hf_cache:/root/.cache/huggingface/
       - ./data:/app/data
       - ./output:/app/output
     ports:
       - "7860:7860"
+      - "8000:8000"
     ipc: host
     deploy:
       resources: