2024-08-12 15:23:43 +08:00
|
|
|
import json
|
|
|
|
|
|
|
|
def jsonl_to_json(jsonl_file, json_file):
|
|
|
|
with open(jsonl_file, 'r', encoding='utf-8') as f:
|
|
|
|
jsonl_data = f.readlines()
|
|
|
|
# print(jsonl_data)
|
|
|
|
# print(jsonl_data[0])
|
|
|
|
# print(jsonl_data[0][1:-2])
|
|
|
|
# print(jsonl_data[14])
|
|
|
|
|
|
|
|
with open(json_file, 'w', encoding='utf-8') as f:
|
2024-08-12 15:35:18 +08:00
|
|
|
f.write('[' + '\n')
|
2024-08-12 15:23:43 +08:00
|
|
|
for line in jsonl_data:
|
2024-08-12 15:42:18 +08:00
|
|
|
line = line[0:-1]
|
2024-08-12 15:23:43 +08:00
|
|
|
f.write(' ' + line + ',' + '\n')
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
|
|
|
|
jsonl_file = 'jsonl_data/alpaca_zh.jsonl'
|
|
|
|
json_file = 'data/alpaca_zh.json'
|
|
|
|
jsonl_to_json(jsonl_file, json_file)
|