add only tune lm and mm_proj

2024-05-27 19:00:15 +08:00 · 2024-05-27 19:00:15 +08:00 · 7ae9a4726c
parent 57eb13b75d
commit 7ae9a4726c
3 changed files with 0 additions and 138 deletions
--- a/sites/paligemma-pt.yaml
+++ b/sites/paligemma-pt.yaml
@ -1,49 +0,0 @@
 # model
 model_name_or_path: google/paligemma-3b-mix-448
 visual_inputs: true
 tune_mm_proj: true
 #print_param_status: true
 # method
 stage: sft
 do_train: true
 finetuning_type: full
 # ddp
 ddp_timeout: 180000000
 deepspeed: examples/deepspeed/ds_z2_offload_config.json
 # dataset
 dataset: mllm_pt_demo
 dataset_dir: data
 template: gemma
 cutoff_len: 2048
 max_samples: 3
 #val_size: 0.0001
 overwrite_cache: true
 preprocessing_num_workers: 16
 # output
 output_dir: saves/paligemma/full/sft_llava_pt_test
 logging_steps: 1
 save_steps: 50
 plot_loss: true
 overwrite_output_dir: true
 #save_strategy: epoch
 #save_total_limit: 2
 # train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 16
 learning_rate: 0.00001
 num_train_epochs: 100
 lr_scheduler_type: cosine
 warmup_steps: 0.1
 #bf16: true
 pure_bf16: true
 # eval
 do_eval: false
 #per_device_eval_batch_size: 1
 #evaluation_strategy: steps
 #eval_steps: 500
--- a/sites/paligemma.yaml
+++ b/sites/paligemma.yaml
@ -1,49 +0,0 @@
 # model
 model_name_or_path: google/paligemma-3b-mix-448
 visual_inputs: true
 #print_param_status: true
 use_fast_tokenizer: false
 # method
 stage: sft
 do_train: true
 finetuning_type: full
 # ddp
 ddp_timeout: 180000000
 deepspeed: examples/deepspeed/ds_z2_offload_config.json
 # dataset
 dataset: mllm_demo
 dataset_dir: data
 template: gemma
 cutoff_len: 2048
 max_samples: 3
 #val_size: 0.0001
 overwrite_cache: true
 preprocessing_num_workers: 16
 # output
 output_dir: saves/paligemma/full/sft_llava_1k
 logging_steps: 1
 save_steps: 50
 plot_loss: true
 overwrite_output_dir: true
 #save_strategy: epoch
 #save_total_limit: 2
 # train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 16
 learning_rate: 0.00001
 num_train_epochs: 100
 lr_scheduler_type: cosine
 warmup_steps: 0.1
 #bf16: true
 pure_bf16: true
 # eval
 do_eval: false
 #per_device_eval_batch_size: 1
 #evaluation_strategy: steps
 #eval_steps: 500
--- a/sites/paligemma_lora.yaml
+++ b/sites/paligemma_lora.yaml
@ -1,40 +0,0 @@
 ### model
 model_name_or_path: google/paligemma-3b-mix-448
 visual_inputs: true
 use_fast_tokenizer: false
 ### method
 stage: sft
 do_train: true
 finetuning_type: lora
 lora_target: q_proj,v_proj
 ### dataset
 dataset: mllm_demo
 template: gemma
 cutoff_len: 1024
 max_samples: 1000
 overwrite_cache: true
 preprocessing_num_workers: 16
 ### output
 output_dir: saves/paligemma/lora/sft_mllm
 logging_steps: 10
 save_steps: 500
 plot_loss: true
 overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
 learning_rate: 0.0001
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_steps: 0.1
 fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500