From 9698ef97815927cb2b1a7a9228d792d8e68e6544 Mon Sep 17 00:00:00 2001 From: wql Date: Mon, 12 Aug 2024 15:23:43 +0800 Subject: [PATCH] add: add jsonl to json py script --- jsonl_data/jsonl_to_json.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 jsonl_data/jsonl_to_json.py diff --git a/jsonl_data/jsonl_to_json.py b/jsonl_data/jsonl_to_json.py new file mode 100644 index 00000000..eb6917b4 --- /dev/null +++ b/jsonl_data/jsonl_to_json.py @@ -0,0 +1,20 @@ +import json + +def jsonl_to_json(jsonl_file, json_file): + with open(jsonl_file, 'r', encoding='utf-8') as f: + jsonl_data = f.readlines() + # print(jsonl_data) + # print(jsonl_data[0]) + # print(jsonl_data[0][1:-2]) + # print(jsonl_data[14]) + + with open(json_file, 'w', encoding='utf-8') as f: + for line in jsonl_data: + line = line[1:-2] + f.write(' ' + line + ',' + '\n') + f.close() + + +jsonl_file = 'jsonl_data/alpaca_zh.jsonl' +json_file = 'data/alpaca_zh.json' +jsonl_to_json(jsonl_file, json_file)