Add Magpie and Webinstruct dataset samples
Adds two dataset samples claimed superior performance: Magpie (from Allen AI) and Webinstruct (from TIGER-Lab).
This commit is contained in:
parent
29c1f31baa
commit
74e49cca95
|
@ -524,13 +524,13 @@
|
||||||
"prompt": "text"
|
"prompt": "text"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"fileweb": {
|
"fineweb": {
|
||||||
"hf_hub_url": "HuggingFaceFW/fineweb",
|
"hf_hub_url": "HuggingFaceFW/fineweb",
|
||||||
"columns": {
|
"columns": {
|
||||||
"prompt": "text"
|
"prompt": "text"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"fileweb_edu": {
|
"fineweb_edu": {
|
||||||
"hf_hub_url": "HuggingFaceFW/fineweb-edu",
|
"hf_hub_url": "HuggingFaceFW/fineweb-edu",
|
||||||
"columns": {
|
"columns": {
|
||||||
"prompt": "text"
|
"prompt": "text"
|
||||||
|
@ -550,5 +550,25 @@
|
||||||
"prompt": "content"
|
"prompt": "content"
|
||||||
},
|
},
|
||||||
"folder": "python"
|
"folder": "python"
|
||||||
|
},
|
||||||
|
"Magpie-Pro-300K-Filtered": {
|
||||||
|
"hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered",
|
||||||
|
"columns": {
|
||||||
|
"messages": "conversations"
|
||||||
|
},
|
||||||
|
"tags": {
|
||||||
|
"role_tag": "from",
|
||||||
|
"content_tag": "value",
|
||||||
|
"user_tag": "human",
|
||||||
|
"assistant_tag": "gpt"
|
||||||
|
},
|
||||||
|
"formatting": "sharegpt"
|
||||||
|
},
|
||||||
|
"WebInstructSub": {
|
||||||
|
"hf_hub_url": "TIGER-Lab/WebInstructSub",
|
||||||
|
"columns": {
|
||||||
|
"prompt": "question",
|
||||||
|
"response": "answer"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue