Merge pull request #4309 from EliMCosta/patch-1

Add Magpie and Webinstruct dataset samples
This commit is contained in:
hoshi-hiyouga 2024-06-18 23:30:19 +08:00 committed by GitHub
commit 10316dd8ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 27 additions and 3 deletions

View File

@ -270,6 +270,8 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t
- [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de)
- [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de)
- [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de)
- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub)
- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
</details>

View File

@ -270,6 +270,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
- [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de)
- [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de)
- [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de)
- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub)
- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
</details>

View File

@ -524,13 +524,13 @@
"prompt": "text"
}
},
"fileweb": {
"fineweb": {
"hf_hub_url": "HuggingFaceFW/fineweb",
"columns": {
"prompt": "text"
}
},
"fileweb_edu": {
"fineweb_edu": {
"hf_hub_url": "HuggingFaceFW/fineweb-edu",
"columns": {
"prompt": "text"
@ -550,5 +550,25 @@
"prompt": "content"
},
"folder": "python"
},
"Magpie-Pro-300K-Filtered": {
"hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered",
"columns": {
"messages": "conversations"
},
"tags": {
"role_tag": "from",
"content_tag": "value",
"user_tag": "human",
"assistant_tag": "gpt"
},
"formatting": "sharegpt"
},
"WebInstructSub": {
"hf_hub_url": "TIGER-Lab/WebInstructSub",
"columns": {
"prompt": "question",
"response": "answer"
}
}
}
}