SlimOrca aligner
This commit is contained in:
parent
d0daaa01f9
commit
36d7a75966
|
@ -53,17 +53,20 @@ def convert_sharegpt(examples: Dict[str, List[Any]], dataset_attr: "DatasetAttr"
|
||||||
if len(messages) == 0:
|
if len(messages) == 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
n_sys = 0
|
||||||
prompt = []
|
prompt = []
|
||||||
response = []
|
response = []
|
||||||
for turn_idx, message in enumerate(messages):
|
for turn_idx, message in enumerate(messages):
|
||||||
if turn_idx % 2 == 0:
|
accept_tags = [dataset_attr.user_tag, dataset_attr.observation_tag, dataset_attr.assistant_tag, dataset_attr.function_tag]
|
||||||
accept_tags = [dataset_attr.user_tag, dataset_attr.observation_tag]
|
|
||||||
else:
|
|
||||||
accept_tags = [dataset_attr.assistant_tag, dataset_attr.function_tag]
|
|
||||||
|
|
||||||
if message[dataset_attr.role_tag] not in accept_tags:
|
if message[dataset_attr.role_tag] == "system":
|
||||||
|
outputs["system"].append(message[dataset_attr.content_tag])
|
||||||
|
n_sys += 1
|
||||||
|
elif message[dataset_attr.role_tag] not in accept_tags:
|
||||||
|
print("sytem attr", dataset_attr.system)
|
||||||
|
print("accepted tags", accept_tags)
|
||||||
raise ValueError("Invalid role tag in {}.".format(messages))
|
raise ValueError("Invalid role tag in {}.".format(messages))
|
||||||
|
else:
|
||||||
prompt.append(
|
prompt.append(
|
||||||
{"role": tag_mapping[message[dataset_attr.role_tag]], "content": message[dataset_attr.content_tag]}
|
{"role": tag_mapping[message[dataset_attr.role_tag]], "content": message[dataset_attr.content_tag]}
|
||||||
)
|
)
|
||||||
|
@ -72,9 +75,10 @@ def convert_sharegpt(examples: Dict[str, List[Any]], dataset_attr: "DatasetAttr"
|
||||||
response.append(last_message)
|
response.append(last_message)
|
||||||
outputs["prompt"].append(prompt)
|
outputs["prompt"].append(prompt)
|
||||||
outputs["response"].append(response)
|
outputs["response"].append(response)
|
||||||
|
if n_sys == 0:
|
||||||
outputs["system"].append(examples[dataset_attr.system][i] if dataset_attr.system else "")
|
outputs["system"].append(examples[dataset_attr.system][i] if dataset_attr.system else "")
|
||||||
outputs["tools"].append(examples[dataset_attr.tools][i] if dataset_attr.tools else "")
|
outputs["tools"].append(examples[dataset_attr.tools][i] if dataset_attr.tools else "")
|
||||||
|
assert n_sys <= 1
|
||||||
return outputs
|
return outputs
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue