From 9468ee9012bfe7124fc5cc2acebcfe03a6d0cdee Mon Sep 17 00:00:00 2001 From: Marco <121761685+mlinmg@users.noreply.github.com> Date: Thu, 30 Nov 2023 16:21:34 +0100 Subject: [PATCH] Update dataset_info.json Added the Nectar dataset already preprocessed and divided in sft and rl to which I added a preprompt to each instruction since it has been seen that this increase instruction following --- data/dataset_info.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/data/dataset_info.json b/data/dataset_info.json index faa7931d..78d6a922 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -266,6 +266,12 @@ "columns": { "prompt": "content" } + "nectar_rlaif": { + "hf_hub_url": "mlinmg/RLAIF-Nectar", + "ranking": true + }, + "nectar_sft": { + "hf_hub_url": "mlinmg/SFT-Nectar" }, "starcoder": { "hf_hub_url": "bigcode/starcoderdata",