update scripts

This commit is contained in:
hiyouga 2024-08-09 19:16:23 +08:00
parent c87023d539
commit 86f7099fa3
8 changed files with 29 additions and 16 deletions

View File

@ -537,9 +537,10 @@ docker exec -it llamafactory bash
<details><summary>Details about volume</summary> <details><summary>Details about volume</summary>
- hf_cache: Utilize Hugging Face cache on the host machine. Reassignable if a cache already exists in a different directory. - `hf_cache`: Utilize Hugging Face cache on the host machine. Reassignable if a cache already exists in a different directory.
- data: Place datasets on this dir of the host machine so that they can be selected on LLaMA Board GUI. - `ms_cache`: Similar to Hugging Face cache but for ModelScope users.
- output: Set export dir to this location so that the merged result can be accessed directly on the host machine. - `data`: Place datasets on this dir of the host machine so that they can be selected on LLaMA Board GUI.
- `output`: Set export dir to this location so that the merged result can be accessed directly on the host machine.
</details> </details>

View File

@ -537,9 +537,10 @@ docker exec -it llamafactory bash
<details><summary>数据卷详情</summary> <details><summary>数据卷详情</summary>
- hf_cache使用宿主机的 Hugging Face 缓存文件夹,允许更改为新的目录。 - `hf_cache`:使用宿主机的 Hugging Face 缓存文件夹,允许更改为新的目录。
- data宿主机中存放数据集的文件夹路径。 - `ms_cache`:类似 Hugging Face 缓存文件夹,为 ModelScope 用户提供。
- output将导出目录设置为该路径后即可在宿主机中访问导出后的模型。 - `data`:宿主机中存放数据集的文件夹路径。
- `output`:将导出目录设置为该路径后,即可在宿主机中访问导出后的模型。
</details> </details>

View File

@ -0,0 +1,5 @@
#!/bin/bash
python scripts/pissa_init.py \
--model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
--output_dir models/llama3-8b-pissa

View File

@ -43,7 +43,7 @@ def calculate_lr(
dataset_dir: str = "data", dataset_dir: str = "data",
template: str = "default", template: str = "default",
cutoff_len: int = 1024, # i.e. maximum input length during training cutoff_len: int = 1024, # i.e. maximum input length during training
is_mistral: bool = False, # mistral model uses a smaller learning rate, is_mistral_or_gemma: bool = False, # mistral and gemma models opt for a smaller learning rate,
packing: bool = False, packing: bool = False,
): ):
r""" r"""
@ -84,7 +84,7 @@ def calculate_lr(
valid_ratio = valid_tokens / total_tokens valid_ratio = valid_tokens / total_tokens
batch_valid_len = batch_max_len * valid_ratio batch_valid_len = batch_max_len * valid_ratio
lr = BASE_LR * math.sqrt(batch_valid_len / BASE_BS) # lr ~ sqrt(batch_size) lr = BASE_LR * math.sqrt(batch_valid_len / BASE_BS) # lr ~ sqrt(batch_size)
lr = lr / 6.0 if is_mistral else lr lr = lr / 6.0 if is_mistral_or_gemma else lr
print( print(
"Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective batch size {:.2f}".format( "Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective batch size {:.2f}".format(
lr, valid_ratio * 100, batch_valid_len lr, valid_ratio * 100, batch_valid_len

View File

@ -19,7 +19,7 @@
import json import json
import os import os
from collections import OrderedDict from collections import OrderedDict
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING
import fire import fire
import torch import torch
@ -47,8 +47,8 @@ def block_expansion(
model_name_or_path: str, model_name_or_path: str,
output_dir: str, output_dir: str,
num_expand: int, num_expand: int,
shard_size: Optional[str] = "2GB", shard_size: str = "2GB",
save_safetensors: Optional[bool] = False, save_safetensors: bool = True,
): ):
r""" r"""
Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models. Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models.

View File

@ -16,7 +16,7 @@
import json import json
import os import os
from collections import OrderedDict from collections import OrderedDict
from typing import Any, Dict, Optional from typing import Any, Dict
import fire import fire
import torch import torch
@ -86,7 +86,10 @@ def save_config(input_dir: str, output_dir: str):
def llamafy_baichuan2( def llamafy_baichuan2(
input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False input_dir: str,
output_dir: str,
shard_size: str = "2GB",
save_safetensors: bool = True,
): ):
r""" r"""
Converts the Baichuan2-7B model in the same format as LLaMA2-7B. Converts the Baichuan2-7B model in the same format as LLaMA2-7B.

View File

@ -16,7 +16,7 @@
import json import json
import os import os
from collections import OrderedDict from collections import OrderedDict
from typing import Any, Dict, Optional from typing import Any, Dict
import fire import fire
import torch import torch
@ -139,7 +139,10 @@ def save_config(input_dir: str, output_dir: str, torch_dtype: str):
def llamafy_qwen( def llamafy_qwen(
input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False input_dir: str,
output_dir: str,
shard_size: str = "2GB",
save_safetensors: bool = False,
): ):
r""" r"""
Converts the Qwen models in the same format as LLaMA2. Converts the Qwen models in the same format as LLaMA2.

View File

@ -31,7 +31,7 @@ if TYPE_CHECKING:
def quantize_pissa( def quantize_pissa(
model_name_or_path: str, model_name_or_path: str,
output_dir: str, output_dir: str,
pissa_iter: int = 4, pissa_iter: int = 16,
lora_alpha: int = None, lora_alpha: int = None,
lora_rank: int = 16, lora_rank: int = 16,
lora_dropout: float = 0, lora_dropout: float = 0,