Merge branch 'hiyouga:main' into main
This commit is contained in:
commit
8d53ec2b5f
|
@ -289,7 +289,7 @@ huggingface-cli login
|
|||
| datasets | 2.14.3 | 2.19.1 |
|
||||
| accelerate | 0.27.2 | 0.30.1 |
|
||||
| peft | 0.9.0 | 0.11.1 |
|
||||
| trl | 0.8.1 | 0.8.6 |
|
||||
| trl | 0.8.2 | 0.8.6 |
|
||||
|
||||
| Optional | Minimum | Recommend |
|
||||
| ------------ | ------- | --------- |
|
||||
|
@ -345,6 +345,8 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec
|
|||
|
||||
<details><summary>For Ascend NPU users</summary>
|
||||
|
||||
Join [NPU user group](assets/wechat_npu.jpg).
|
||||
|
||||
To utilize Ascend NPU devices for (distributed) training and inference, you need to install the **[torch-npu](https://gitee.com/ascend/pytorch)** library and the **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**.
|
||||
|
||||
| Requirement | Minimum | Recommend |
|
||||
|
|
|
@ -289,7 +289,7 @@ huggingface-cli login
|
|||
| datasets | 2.14.3 | 2.19.1 |
|
||||
| accelerate | 0.27.2 | 0.30.1 |
|
||||
| peft | 0.9.0 | 0.11.1 |
|
||||
| trl | 0.8.1 | 0.8.6 |
|
||||
| trl | 0.8.2 | 0.8.6 |
|
||||
|
||||
| 可选项 | 至少 | 推荐 |
|
||||
| ------------ | ------- | --------- |
|
||||
|
@ -345,6 +345,8 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
|
|||
|
||||
<details><summary>昇腾 NPU 用户指南</summary>
|
||||
|
||||
加入 [NPU 用户群](assets/wechat_npu.jpg)。
|
||||
|
||||
如果使用昇腾 NPU 设备进行(分布式)训练或推理,需要安装 **[torch-npu](https://gitee.com/ascend/pytorch)** 库和 **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**。
|
||||
|
||||
| 依赖项 | 至少 | 推荐 |
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 146 KiB After Width: | Height: | Size: 145 KiB |
Binary file not shown.
After Width: | Height: | Size: 146 KiB |
|
@ -7,7 +7,7 @@
|
|||
"hf_hub_url": "Hugging Face 的数据集仓库地址(若指定,则忽略 script_url 和 file_name)",
|
||||
"ms_hub_url": "ModelScope 的数据集仓库地址(若指定,则忽略 script_url 和 file_name)",
|
||||
"script_url": "包含数据加载脚本的本地文件夹名称(若指定,则忽略 file_name)",
|
||||
"file_name": "该目录下数据集文件的名称(若上述参数未指定,则此项必需)",
|
||||
"file_name": "该目录下数据集文件夹或文件的名称(若上述参数未指定,则此项必需)",
|
||||
"formatting": "数据集格式(可选,默认:alpaca,可以为 alpaca 或 sharegpt)",
|
||||
"ranking": "是否为偏好数据集(可选,默认:False)",
|
||||
"subset": "数据集子集的名称(可选,默认:None)",
|
||||
|
|
|
@ -89,7 +89,7 @@ def preprocess_supervised_dataset(
|
|||
if processor is not None and hasattr(processor, "image_seq_length"): # paligemma case
|
||||
image_token_id = tokenizer.convert_tokens_to_ids(IMAGE_TOKEN)
|
||||
input_ids += [image_token_id] * getattr(processor, "image_seq_length")
|
||||
labels += [image_token_id] * getattr(processor, "image_seq_length")
|
||||
labels += [IGNORE_INDEX] * getattr(processor, "image_seq_length")
|
||||
|
||||
for turn_idx, (source_ids, target_ids) in enumerate(
|
||||
template.encode_multiturn(
|
||||
|
|
|
@ -65,7 +65,7 @@ def check_dependencies() -> None:
|
|||
require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3")
|
||||
require_version("accelerate>=0.27.2", "To fix: pip install accelerate>=0.27.2")
|
||||
require_version("peft>=0.10.0", "To fix: pip install peft>=0.10.0")
|
||||
require_version("trl>=0.8.1", "To fix: pip install trl>=0.8.1")
|
||||
require_version("trl>=0.8.2", "To fix: pip install trl>=0.8.2")
|
||||
|
||||
|
||||
def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
|
||||
|
|
Loading…
Reference in New Issue