有时文件数据较大时,csv转excel会发生数据丢失,这时候可以利用python对数据进行简单的校验处理
#!/usr/bin/env python
'''
文件类型
province,city,district,year,month,day,carrier, in_num,out_num,dwell_num
河北省,秦皇岛市,海港区,20180816,3289.0-1132.0-1167.0,3334.0-1139.0-1181.0,58281.0-18489.0-23436.0,cmcc-telecom-unicom
'''
def wrirteList2Disk(li=[]):
with open("./td.csv", 'a+', encoding='gbk') as f:
f.writelines(li)
num0 = 0
num_total = 0
result = []
with open("./1201.txt", 'r', encoding='utf-8') as f:
for line in f.readlines():
line = line.replace("\n", "")
fields = line.split(",")
province = fields[0]
if province == '0':
print(line)
continue
city = fields[1]
district = fields[2]
dt = fields[3]
ins = fields[4].split("-")
outs = fields[5].split("-")
dwells = fields[6].split("-")
carriers = fields[7].split("-")
cmcc_index = carriers.index("cmcc")
unicom_index = carriers.index("unicom")
telecom_index = carriers.index("telecom")
# print("cmcc_index=%s,unicom_index=%s,telecom_index=%s" % (cmcc_index, unicom_index, '-1'))
li = [province, city, district, dt, ins[cmcc_index].replace(".0", ""), ins[unicom_index].replace(".0", ""),
ins[telecom_index].replace(".0", ""), outs[cmcc_index].replace(".0", ""),
outs[unicom_index].replace(".0", ""), outs[telecom_index].replace(".0", ""),
dwells[cmcc_index].replace(".0", ""), dwells[unicom_index].replace(".0", ""),
dwells[telecom_index].replace(".0", "")]
if li[0] == '0':
num0 += 1
else:
new_line = ','.join(li)
result.append(new_line + "\n")
num_total += 1
wrirteList2Disk(result)
print("num0=%d" % num0)
print("num_total=%d" % num_total)