LLaMA-Factory-Mirror/jsonl_data/jsonl_to_json.py

22 lines
579 B
Python
Raw Permalink Normal View History

2024-08-12 15:23:43 +08:00
import json
def jsonl_to_json(jsonl_file, json_file):
with open(jsonl_file, 'r', encoding='utf-8') as f:
jsonl_data = f.readlines()
# print(jsonl_data)
# print(jsonl_data[0])
# print(jsonl_data[0][1:-2])
# print(jsonl_data[14])
with open(json_file, 'w', encoding='utf-8') as f:
2024-08-12 15:35:18 +08:00
f.write('[' + '\n')
2024-08-12 15:23:43 +08:00
for line in jsonl_data:
2024-08-12 15:42:18 +08:00
line = line[0:-1]
2024-08-12 15:23:43 +08:00
f.write(' ' + line + ',' + '\n')
f.close()
jsonl_file = 'jsonl_data/alpaca_zh.jsonl'
json_file = 'data/alpaca_zh.json'
jsonl_to_json(jsonl_file, json_file)