import pandas as pd
import numpy as np
#读取整表数据
data = pd.read_excel('C:/Users/cory511/Desktop/放射科病例一览表0101-0430.xls',encoding='gbk')
#创建一个空表
df = data[0:1].isnull()
df = df.drop([0])
data.shape
#将整表按行切割成分表且导出csv文件
for i in range(1,30430):
j = []
P = data[i:i+1].isnull()
P['Col_sum'] = P.apply(lambda x: x.sum(), axis=1)
if P.iloc[0]['Col_sum']!=12:
df = pd.concat([df,data[i:i+1]])
else:
#'df%i'%(i) = df
#j.append('df%i'%(i))
df.to_csv('C:/Users/cory511/Desktop/JM/%i.csv'%(i))
df = data[0:1].isnull()
df = df.drop([0])
#创建一个空表
JF = pd.DataFrame(columns=['检查号','姓名','性别','年龄','门诊号','住院号','申请科室','检查日期','报告','检查项目','临床诊断','影像诊断','unknow'])
#遍历文件夹
import os
fileList = os.listdir('C:/Users/cory511/Desktop/JM')
#将切割好的分表文件拼接成一整张表
for i in fileList[1:]:
data2 = pd.read_csv(i,encoding='utf-8',error_bad_lines = False)
TT = ['1','2','3','4','5','6','7','8']
for m in TT:
data2.loc[m]=np.NAN
add_data = pd.Series({'检查号':data2.iloc[0,1],'姓名':data2.iloc[0,2],'性别':data2.iloc[0,4],'年龄':data2.iloc[0,6],'门诊号':data2.iloc[0,7],'住院号':data2.iloc[0,9],'申请科室':data2.iloc[0,10],'检查日期':data2.iloc[0,11],'报告':data2.iloc[0,12],'检查项目':data2.iloc[1,2],'临床诊断':data2.iloc[2,2],'影像诊断':data2.iloc[3,2],'unknow':data2.iloc[4,2]})
JF = JF.append(add_data, ignore_index=True)
JF.shape
JF.to_csv('C:/Users/cory511/Desktop/报告.csv')
防掉落