diff --git a/sites/paligemma-pt.yaml b/sites/paligemma-pt.yaml deleted file mode 100644 index 4305cf5f..00000000 --- a/sites/paligemma-pt.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# model -model_name_or_path: google/paligemma-3b-mix-448 -visual_inputs: true -tune_mm_proj: true -#print_param_status: true - -# method -stage: sft -do_train: true -finetuning_type: full - -# ddp -ddp_timeout: 180000000 -deepspeed: examples/deepspeed/ds_z2_offload_config.json - -# dataset -dataset: mllm_pt_demo -dataset_dir: data -template: gemma -cutoff_len: 2048 -max_samples: 3 -#val_size: 0.0001 -overwrite_cache: true -preprocessing_num_workers: 16 - -# output -output_dir: saves/paligemma/full/sft_llava_pt_test -logging_steps: 1 -save_steps: 50 -plot_loss: true -overwrite_output_dir: true -#save_strategy: epoch -#save_total_limit: 2 - -# train -per_device_train_batch_size: 1 -gradient_accumulation_steps: 16 -learning_rate: 0.00001 -num_train_epochs: 100 -lr_scheduler_type: cosine -warmup_steps: 0.1 -#bf16: true -pure_bf16: true - -# eval -do_eval: false -#per_device_eval_batch_size: 1 -#evaluation_strategy: steps -#eval_steps: 500 diff --git a/sites/paligemma.yaml b/sites/paligemma.yaml deleted file mode 100644 index f3257cfc..00000000 --- a/sites/paligemma.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# model -model_name_or_path: google/paligemma-3b-mix-448 -visual_inputs: true -#print_param_status: true -use_fast_tokenizer: false - -# method -stage: sft -do_train: true -finetuning_type: full - -# ddp -ddp_timeout: 180000000 -deepspeed: examples/deepspeed/ds_z2_offload_config.json - -# dataset -dataset: mllm_demo -dataset_dir: data -template: gemma -cutoff_len: 2048 -max_samples: 3 -#val_size: 0.0001 -overwrite_cache: true -preprocessing_num_workers: 16 - -# output -output_dir: saves/paligemma/full/sft_llava_1k -logging_steps: 1 -save_steps: 50 -plot_loss: true -overwrite_output_dir: true -#save_strategy: epoch -#save_total_limit: 2 - -# train -per_device_train_batch_size: 1 -gradient_accumulation_steps: 16 -learning_rate: 0.00001 -num_train_epochs: 100 -lr_scheduler_type: cosine -warmup_steps: 0.1 -#bf16: true -pure_bf16: true - -# eval -do_eval: false -#per_device_eval_batch_size: 1 -#evaluation_strategy: steps -#eval_steps: 500 diff --git a/sites/paligemma_lora.yaml b/sites/paligemma_lora.yaml deleted file mode 100644 index 0693a6ae..00000000 --- a/sites/paligemma_lora.yaml +++ /dev/null @@ -1,40 +0,0 @@ -### model -model_name_or_path: google/paligemma-3b-mix-448 -visual_inputs: true -use_fast_tokenizer: false - -### method -stage: sft -do_train: true -finetuning_type: lora -lora_target: q_proj,v_proj - -### dataset -dataset: mllm_demo -template: gemma -cutoff_len: 1024 -max_samples: 1000 -overwrite_cache: true -preprocessing_num_workers: 16 - -### output -output_dir: saves/paligemma/lora/sft_mllm -logging_steps: 10 -save_steps: 500 -plot_loss: true -overwrite_output_dir: true - -### train -per_device_train_batch_size: 1 -gradient_accumulation_steps: 8 -learning_rate: 0.0001 -num_train_epochs: 3.0 -lr_scheduler_type: cosine -warmup_steps: 0.1 -fp16: true - -### eval -val_size: 0.1 -per_device_eval_batch_size: 1 -evaluation_strategy: steps -eval_steps: 500