update examples
This commit is contained in:
parent
93a0245474
commit
dae83f4199
|
@ -28,6 +28,12 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo
|
|||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
||||
```
|
||||
|
||||
#### Multimodal Supervised Fine-Tuning
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
|
||||
```
|
||||
|
||||
#### Reward Modeling
|
||||
|
||||
```bash
|
||||
|
@ -52,12 +58,6 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo
|
|||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_orpo.yaml
|
||||
```
|
||||
|
||||
#### Multimodal Supervised Fine-Tuning
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
|
||||
```
|
||||
|
||||
#### Preprocess Dataset
|
||||
|
||||
It is useful for large dataset, use `tokenized_path` in config to load the preprocessed dataset.
|
||||
|
|
|
@ -28,6 +28,12 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo
|
|||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
||||
```
|
||||
|
||||
#### 多模态指令监督微调
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
|
||||
```
|
||||
|
||||
#### 奖励模型训练
|
||||
|
||||
```bash
|
||||
|
@ -52,12 +58,6 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo
|
|||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_orpo.yaml
|
||||
```
|
||||
|
||||
#### 多模态指令监督微调
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
|
||||
```
|
||||
|
||||
#### 预处理数据集
|
||||
|
||||
对于大数据集有帮助,在配置中使用 `tokenized_path` 以加载预处理后的数据集。
|
||||
|
|
|
@ -15,7 +15,6 @@ dataset: identity,alpaca_gpt4_en
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -36,6 +35,7 @@ warmup_steps: 0.1
|
|||
pure_bf16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -8,12 +8,14 @@ do_train: true
|
|||
finetuning_type: lora
|
||||
lora_target: q_proj,v_proj
|
||||
|
||||
# ddp
|
||||
ddp_timeout: 180000000
|
||||
|
||||
# dataset
|
||||
dataset: identity,alpaca_gpt4_en
|
||||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -34,6 +36,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -16,7 +16,6 @@ dataset: identity,alpaca_gpt4_en
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -37,6 +36,7 @@ warmup_steps: 0.1
|
|||
pure_bf16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -14,7 +14,6 @@ dataset: identity,alpaca_gpt4_en
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -32,9 +31,10 @@ learning_rate: 0.0001
|
|||
num_train_epochs: 3.0
|
||||
lr_scheduler_type: cosine
|
||||
warmup_steps: 0.1
|
||||
pure_bf16: true
|
||||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -13,7 +13,6 @@ dataset: identity,alpaca_gpt4_en
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -31,9 +30,10 @@ learning_rate: 0.0001
|
|||
num_train_epochs: 3.0
|
||||
lr_scheduler_type: cosine
|
||||
warmup_steps: 0.1
|
||||
pure_bf16: true
|
||||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -34,6 +33,7 @@ warmup_steps: 0.1
|
|||
pure_bf16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -15,7 +15,6 @@ dataset: identity,alpaca_gpt4_en
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -36,6 +35,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -15,7 +15,6 @@ dataset: identity,alpaca_gpt4_en
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -36,6 +35,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -16,7 +16,6 @@ dataset: identity,alpaca_gpt4_en
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -37,6 +36,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -13,7 +13,6 @@ dataset: orca_rlhf
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -34,6 +33,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -12,7 +12,6 @@ dataset: orca_rlhf
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -33,6 +32,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -11,7 +11,6 @@ lora_target: q_proj,v_proj
|
|||
dataset: c4_demo
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -32,6 +31,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -12,7 +12,6 @@ dataset: orca_rlhf
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -33,6 +32,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -33,6 +32,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
tokenized_path: saves/llama3-8b/dataset/sft
|
||||
|
|
|
@ -13,7 +13,6 @@ dataset: mllm_demo
|
|||
template: vicuna
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -34,6 +33,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -33,6 +32,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -33,6 +32,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -8,15 +8,11 @@ do_train: true
|
|||
finetuning_type: lora
|
||||
lora_target: q_proj,v_proj
|
||||
|
||||
# ddp
|
||||
ddp_timeout: 180000000
|
||||
|
||||
# dataset
|
||||
dataset: identity,alpaca_gpt4_en
|
||||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -37,6 +33,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
|
@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en
|
|||
template: llama3
|
||||
cutoff_len: 1024
|
||||
max_samples: 1000
|
||||
val_size: 0.1
|
||||
overwrite_cache: true
|
||||
preprocessing_num_workers: 16
|
||||
|
||||
|
@ -33,6 +32,7 @@ warmup_steps: 0.1
|
|||
fp16: true
|
||||
|
||||
# eval
|
||||
val_size: 0.1
|
||||
per_device_eval_batch_size: 1
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 500
|
||||
|
|
Loading…
Reference in New Issue