import pandas as pd
sep="|"
def read_key(dict_key, arr_fileld, idx=[]):
if len(idx) == 0:
return
for index in idx:
keys = arr_fileld[index].split(",")
for key in keys:
if key not in dict_key:
dict_key[key] = len(dict_key) + 1
def replace_key(dict_key, arr_fileld, idx=[]):
if len(idx) == 0:
return ""
res = []
for i in range(len(arr_fileld)):
if i in idx:
keys = arr_fileld[i].split(",")
ids = []
for key in keys:
ids.append(str(dict_key[key]))
res.append(",".join(ids))
else:
res.append(arr_fileld[i])
return "|".join(res)
def read_file(file_name,file_out,idx):
with open(file_name,"r",encoding="utf-8") as f:
dict_key = {}
for line in f:
read_key(dict_key, line.split(sep), idx)
with open(file_name,"r",encoding="utf-8") as f:
with open(file_out,"w",encoding="utf-8") as fout:
for line in f:
res = replace_key(dict_key,line.split(sep),idx)
fout.write(res)
import json
with open("../conf/names.json", 'r') as f:
names = json.load(f)
idx = []
for i,name in enumerate(names):
if "room_id" in name or ("room" in name and "idx" in name):
idx.append(i)
from datetime import datetime
cur_time1 = datetime.now()
read_file("../data/xxx", "../data/out",idx)
cur_time2 = datetime.now()
time_span = cur_time2 -cur_time1
print("time", time_span)