code notes for explanation
# before using , please "pip install ijson",
# details by seeing official website: https://pypi.org/project/ijson/
import ijson
flag = 0
with open(train_path) as file:
parser = ijson.parse(file)
data = []
for prefix, event, value in parser:
# "start_map" is the beginning of a chunk of json
# "end_map" is the ending of a chunk of json
# via getting the beginning and end of a chunk of json to format json
if event == "start_map":
sinle = {}
if prefix == "item.id":
sinle.update({"id": value})
if prefix == "item.title":
sinle.update({"title": value})
if prefix == "item.description":
sinle.update({"description": value})
if event == "end_map":
data.append(sinle)
if len(data)==10:
json.dump(data,open("data/train_10.json","w"),ensure_ascii=False)
break