diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml index 9d965f88..f5d93d50 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1.yaml @@ -29,7 +29,7 @@ learning_rate: 1.0e-4 num_train_epochs: 10.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 max_steps: 1000 include_num_input_tokens_seen: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml index 5633cd3f..6e5a9a55 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_1_single.yaml @@ -29,7 +29,7 @@ learning_rate: 1.0e-4 num_train_epochs: 10.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 max_steps: 1000 include_num_input_tokens_seen: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml index e4a05d0c..90929864 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2.yaml @@ -29,7 +29,7 @@ learning_rate: 1.0e-4 num_train_epochs: 10.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 max_steps: 1000 include_num_input_tokens_seen: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml index 1e839cc3..f85cfba3 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_2_single.yaml @@ -29,7 +29,7 @@ learning_rate: 1.0e-4 num_train_epochs: 10.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 max_steps: 1000 include_num_input_tokens_seen: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml index 042e210d..943ccbdc 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3.yaml @@ -29,7 +29,7 @@ learning_rate: 1.0e-4 num_train_epochs: 10.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 max_steps: 1000 include_num_input_tokens_seen: true diff --git a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml index bf5b0f16..f174adc4 100644 --- a/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml +++ b/results/lora_sft_2/Llama2-7B/llama2_lora_sft_3_single.yaml @@ -29,7 +29,7 @@ learning_rate: 1.0e-4 num_train_epochs: 10.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 max_steps: 1000 include_num_input_tokens_seen: true diff --git a/results/lora_sft_2/test/test1.yaml b/results/lora_sft_2/test/test1.yaml index ba87111a..b12061d8 100644 --- a/results/lora_sft_2/test/test1.yaml +++ b/results/lora_sft_2/test/test1.yaml @@ -29,7 +29,7 @@ learning_rate: 1.0e-4 num_train_epochs: 10.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 max_steps: 1000 include_num_input_tokens_seen: true