Merge branch 'hiyouga:main' into main
This commit is contained in:
commit
8d53ec2b5f
|
@ -289,7 +289,7 @@ huggingface-cli login
|
||||||
| datasets | 2.14.3 | 2.19.1 |
|
| datasets | 2.14.3 | 2.19.1 |
|
||||||
| accelerate | 0.27.2 | 0.30.1 |
|
| accelerate | 0.27.2 | 0.30.1 |
|
||||||
| peft | 0.9.0 | 0.11.1 |
|
| peft | 0.9.0 | 0.11.1 |
|
||||||
| trl | 0.8.1 | 0.8.6 |
|
| trl | 0.8.2 | 0.8.6 |
|
||||||
|
|
||||||
| Optional | Minimum | Recommend |
|
| Optional | Minimum | Recommend |
|
||||||
| ------------ | ------- | --------- |
|
| ------------ | ------- | --------- |
|
||||||
|
@ -345,6 +345,8 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec
|
||||||
|
|
||||||
<details><summary>For Ascend NPU users</summary>
|
<details><summary>For Ascend NPU users</summary>
|
||||||
|
|
||||||
|
Join [NPU user group](assets/wechat_npu.jpg).
|
||||||
|
|
||||||
To utilize Ascend NPU devices for (distributed) training and inference, you need to install the **[torch-npu](https://gitee.com/ascend/pytorch)** library and the **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**.
|
To utilize Ascend NPU devices for (distributed) training and inference, you need to install the **[torch-npu](https://gitee.com/ascend/pytorch)** library and the **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**.
|
||||||
|
|
||||||
| Requirement | Minimum | Recommend |
|
| Requirement | Minimum | Recommend |
|
||||||
|
|
|
@ -289,7 +289,7 @@ huggingface-cli login
|
||||||
| datasets | 2.14.3 | 2.19.1 |
|
| datasets | 2.14.3 | 2.19.1 |
|
||||||
| accelerate | 0.27.2 | 0.30.1 |
|
| accelerate | 0.27.2 | 0.30.1 |
|
||||||
| peft | 0.9.0 | 0.11.1 |
|
| peft | 0.9.0 | 0.11.1 |
|
||||||
| trl | 0.8.1 | 0.8.6 |
|
| trl | 0.8.2 | 0.8.6 |
|
||||||
|
|
||||||
| 可选项 | 至少 | 推荐 |
|
| 可选项 | 至少 | 推荐 |
|
||||||
| ------------ | ------- | --------- |
|
| ------------ | ------- | --------- |
|
||||||
|
@ -345,6 +345,8 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
|
||||||
|
|
||||||
<details><summary>昇腾 NPU 用户指南</summary>
|
<details><summary>昇腾 NPU 用户指南</summary>
|
||||||
|
|
||||||
|
加入 [NPU 用户群](assets/wechat_npu.jpg)。
|
||||||
|
|
||||||
如果使用昇腾 NPU 设备进行(分布式)训练或推理,需要安装 **[torch-npu](https://gitee.com/ascend/pytorch)** 库和 **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**。
|
如果使用昇腾 NPU 设备进行(分布式)训练或推理,需要安装 **[torch-npu](https://gitee.com/ascend/pytorch)** 库和 **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**。
|
||||||
|
|
||||||
| 依赖项 | 至少 | 推荐 |
|
| 依赖项 | 至少 | 推荐 |
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 146 KiB After Width: | Height: | Size: 145 KiB |
Binary file not shown.
After Width: | Height: | Size: 146 KiB |
|
@ -7,7 +7,7 @@
|
||||||
"hf_hub_url": "Hugging Face 的数据集仓库地址(若指定,则忽略 script_url 和 file_name)",
|
"hf_hub_url": "Hugging Face 的数据集仓库地址(若指定,则忽略 script_url 和 file_name)",
|
||||||
"ms_hub_url": "ModelScope 的数据集仓库地址(若指定,则忽略 script_url 和 file_name)",
|
"ms_hub_url": "ModelScope 的数据集仓库地址(若指定,则忽略 script_url 和 file_name)",
|
||||||
"script_url": "包含数据加载脚本的本地文件夹名称(若指定,则忽略 file_name)",
|
"script_url": "包含数据加载脚本的本地文件夹名称(若指定,则忽略 file_name)",
|
||||||
"file_name": "该目录下数据集文件的名称(若上述参数未指定,则此项必需)",
|
"file_name": "该目录下数据集文件夹或文件的名称(若上述参数未指定,则此项必需)",
|
||||||
"formatting": "数据集格式(可选,默认:alpaca,可以为 alpaca 或 sharegpt)",
|
"formatting": "数据集格式(可选,默认:alpaca,可以为 alpaca 或 sharegpt)",
|
||||||
"ranking": "是否为偏好数据集(可选,默认:False)",
|
"ranking": "是否为偏好数据集(可选,默认:False)",
|
||||||
"subset": "数据集子集的名称(可选,默认:None)",
|
"subset": "数据集子集的名称(可选,默认:None)",
|
||||||
|
|
|
@ -89,7 +89,7 @@ def preprocess_supervised_dataset(
|
||||||
if processor is not None and hasattr(processor, "image_seq_length"): # paligemma case
|
if processor is not None and hasattr(processor, "image_seq_length"): # paligemma case
|
||||||
image_token_id = tokenizer.convert_tokens_to_ids(IMAGE_TOKEN)
|
image_token_id = tokenizer.convert_tokens_to_ids(IMAGE_TOKEN)
|
||||||
input_ids += [image_token_id] * getattr(processor, "image_seq_length")
|
input_ids += [image_token_id] * getattr(processor, "image_seq_length")
|
||||||
labels += [image_token_id] * getattr(processor, "image_seq_length")
|
labels += [IGNORE_INDEX] * getattr(processor, "image_seq_length")
|
||||||
|
|
||||||
for turn_idx, (source_ids, target_ids) in enumerate(
|
for turn_idx, (source_ids, target_ids) in enumerate(
|
||||||
template.encode_multiturn(
|
template.encode_multiturn(
|
||||||
|
|
|
@ -65,7 +65,7 @@ def check_dependencies() -> None:
|
||||||
require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3")
|
require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3")
|
||||||
require_version("accelerate>=0.27.2", "To fix: pip install accelerate>=0.27.2")
|
require_version("accelerate>=0.27.2", "To fix: pip install accelerate>=0.27.2")
|
||||||
require_version("peft>=0.10.0", "To fix: pip install peft>=0.10.0")
|
require_version("peft>=0.10.0", "To fix: pip install peft>=0.10.0")
|
||||||
require_version("trl>=0.8.1", "To fix: pip install trl>=0.8.1")
|
require_version("trl>=0.8.2", "To fix: pip install trl>=0.8.2")
|
||||||
|
|
||||||
|
|
||||||
def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
|
def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
|
||||||
|
|
Loading…
Reference in New Issue