Add Magpie and Webinstruct dataset samples

Adds two dataset samples claimed superior performance: Magpie (from Allen AI) and Webinstruct (from TIGER-Lab).
This commit is contained in:
Eli Costa 2024-06-15 19:31:56 -03:00 committed by GitHub
parent 29c1f31baa
commit 74e49cca95
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 23 additions and 3 deletions

View File

@ -524,13 +524,13 @@
"prompt": "text" "prompt": "text"
} }
}, },
"fileweb": { "fineweb": {
"hf_hub_url": "HuggingFaceFW/fineweb", "hf_hub_url": "HuggingFaceFW/fineweb",
"columns": { "columns": {
"prompt": "text" "prompt": "text"
} }
}, },
"fileweb_edu": { "fineweb_edu": {
"hf_hub_url": "HuggingFaceFW/fineweb-edu", "hf_hub_url": "HuggingFaceFW/fineweb-edu",
"columns": { "columns": {
"prompt": "text" "prompt": "text"
@ -550,5 +550,25 @@
"prompt": "content" "prompt": "content"
}, },
"folder": "python" "folder": "python"
},
"Magpie-Pro-300K-Filtered": {
"hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered",
"columns": {
"messages": "conversations"
},
"tags": {
"role_tag": "from",
"content_tag": "value",
"user_tag": "human",
"assistant_tag": "gpt"
},
"formatting": "sharegpt"
},
"WebInstructSub": {
"hf_hub_url": "TIGER-Lab/WebInstructSub",
"columns": {
"prompt": "question",
"response": "answer"
}
} }
} }