数据载入
def load_data():
train_fname='test_data/data_valid.json'
""" load data from local file """
facts = []
accu_label = []
article_label = []
imprison_label = []
k=0
with open(train_fname,'r', encoding='utf-8') as f:
line = f.readline()
while line and k<10000:
k+=1
line_dict = json.loads(line, encoding="utf-8")
fact = line_dict["fact"]
accu = util.get_label(line_dict, "accu")
article = util.get_label(line_dict, "law")
imprison = util.get_label(line_dict, "time")
facts.append(fact)
accu_label.append(accu)
article_label.append(article)
imprison_label.append(imprison)
print('第'+str(k)+'个文档处理完!')
line = f.readline()
if util.DEBUG:
print("DEBUG: training file loaded.")
facts = [util.cut_line(line) for line in facts]
if util.DEBUG:
print("DEBUG: training data segmented.")
if util.DUMP:
dump_processed_data_to_file(facts, accu_label, article_label, imprison_label)
print('load_data sucess!')
return facts, accu_label, article_label, imprison_label
数据切片
def slice_data(slice_size=None):
if slice_size is None:
alltext, accu_label, law_label, time_label = load_data()
else:
alltext, accu_label, law_label, time_label = load_data()
randnum = random.randint