python实现excel数据提取

提取excel所需要的列或者区域

# -*- coding: utf-8 -*- 
import  xdrlib ,sys
import  xlrd
import xlwt
import datetime
import os 
from xlutils.copy import copy
    

def file_name(file_dir):  
    L=[]  
    for root, dirs, files in os.walk(file_dir): 
      for file in files: 
        if os.path.splitext(file)[1] == '.xls': 
          L.append(os.path.join(root, file)) 
    return L 
 
def open_excel(file= '1.csv'):
    try:
        data = xlrd.open_workbook(file)
        return data
    except Exception as e:
        print (e)

def get_sheetname(filename):
    data = xlrd.open_workbook(filename)

    # 查看工作表
    list = data.sheet_names()
    #print("sheets:" + str(data.sheet_names()))
    return list
 
def excel_table_byname(file, colnameindex=0, by_name=u'分页1(打印1份)'):
    data = open_excel(file)
    #print(file)
    list =[]

    table = data.sheet_by_name(by_name) 

    nrows = table.nrows
 
    colnames = table.row_values(colnameindex)
    
    for rownum in range(0, nrows): 
         row = table.row_values(rownum)
         if row: 
             app = [] 
             for i in range(len(colnames)):
                app.append(row[i])
             list.append(app) 
    return list

def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        pass
 
    try:
        import unicodedata
        unicodedata.numeric(s)
        return True
    except (TypeError, ValueError):
        pass
 
    return False    
    
def write_excel_xls_append(path, value):
    index = len(value)  # 获取需要写入数据的行数
    workbook = xlrd.open_workbook(path)  # 打开工作簿
    sheets = workbook.sheet_names()  # 获取工作簿中的所有表格
    worksheet = workbook.sheet_by_name(sheets[0])  # 获取工作簿中所有表格中的的第一个表格
    rows_old = worksheet.nrows  # 获取表格中已存在的数据的行数
    new_workbook = copy(workbook)  # 将xlrd对象拷贝转化为xlwt对象
    new_worksheet = new_workbook.get_sheet(0)  # 获取转化后工作簿中的第一个表格
    for i in range(0, index):
        new_worksheet.write(i+rows_old, j, value[i])  # 追加写入数据,注意是从i+rows_old行开始写入
    new_workbook.save(path)  # 保存工作簿
    print("xls格式表格【追加】写入数据成功!")
    
    
 
def main():
    file_list=file_name('F:\\新建文件夹\\')
    #print(file_list)
    sheet_list_lipei=[]
    

    workbook = xlrd.open_workbook('1.xls')
    sheets = workbook.sheet_names()
    worksheet = workbook.sheet_by_name(sheets[0])
    rows_old = worksheet.nrows
    new_workbook = copy(workbook)
    new_worksheet = new_workbook.get_sheet(0)  # 获取转化后工作簿中的第一个表格
     
    writebook = xlwt.Workbook()
    sheet = writebook.add_sheet('test',cell_overwrite_ok=True)
    table_result=["理赔单号","保险公司","序号","姓名","证件号","个单号/个人编号","直赔结算地点","直赔结算时间","商家流水号","保险公司流水号","理赔金额"]

    for i in range(len(table_result)):
       sheet.write(0,i,table_result[i])
    
    for x in range(len(file_list)):   
        sheet_list = get_sheetname(file_list[x])
        for y in range(len(sheet_list)):
            tables = excel_table_byname(file_list[x],0,str(sheet_list[y]))
            for sheet_row in tables:
                #print(sheet_row)
                for z in range(len(sheet_row)):
                    str1 = str(sheet_row[z])
                    if '理赔单号:' in str1:
                        sheet_list_lipei.append(str(sheet_list[y]))
                        #print(sheet_list_lipei)                    

                        check ='理赔单号:'
                        company='保险公司:'
                        num='序号'
                        name='姓名'
                        id='证件号'
                        id_num='个单号/个人编号'
                        place='直赔结算地点'
                        time='直赔结算时间'
                        flowid='商家流水号'
                        insure_flow_id='保险公司流水号'
                        money='理赔金额'
                        
                        check_row=99999
                        check_col=99999
                        id_row=99999
                        id_col=99999
                        id_num_row=99999
                        id_num_col=99999
                        place_row=99999
                        place_col=99999
                        time_row=99999
                        time_col=99999
                        flowid_row=99999
                        flowid_col=99999
                        insure_flow_id_row=99999
                        insure_flow_id_col=99999
                        money_row=99999
                        money_col=99999
                        
                        
                        name_list=[]
                        id_list=[]
                        id_num_list=[]
                        place_list=[]
                        time_list=[]
                        flowid_list=[]
                        insure_flow_id_list=[]
                        money_list=[]
                        
    
                        workbook = xlrd.open_workbook('1.xls')
                        sheets = workbook.sheet_names()
                        worksheet = workbook.sheet_by_name(sheets[0])
                        rows_old = worksheet.nrows
                        new_workbook = copy(workbook)
                        new_worksheet = new_workbook.get_sheet(0)  # 获取转化后工作簿中的第一个表格
                        

                        count=0
                        count_all=0
                        check_value=''
                        
                       
                        for row in tables:
                            count_all=count_all+1
                            if is_number(str(row[0])):
                                #print(str(row[0]))
                                count=count+1
                                #print(count)
                            for j in range(len(row)):
                                s = str(row[j])
                                if check in s:
                                    check_value=s
                                if company in s:
                                    company_value=s
                                if name in s:
                                    check_col=j
                                    #print(check_col)
                                    check_row=count_all
                                    #print(check_row)
                                if id in s:
                                    id_col=j
                                    #print("id_col:",(id_col))
                                    id_row=count_all
                                    #print("id_row:",(id_row))
                                if id_num in s:
                                    id_num_col=j
                                    #print("id_num_col:",(id_num_col))
                                    id_num_row=count_all
                                    #print("id_num_row:",(id_num_row))
                                if place in s:
                                    place_col=j
                                    #print(place_col)
                                    place_row=count_all
                                    #print(place_row)   
                                if time in s:
                                    time_col=j
                                    #print(time_col)
                                    time_row=count_all
                                    #print(time_row)
                                if flowid in s:
                                    flowid_col=j
                                    #print(flowid_col)
                                    flowid_row=count_all
                                    #print(flowid_row)
                                if insure_flow_id in s:
                                    insure_flow_id_col=j
                                    #print(insure_flow_id_col)
                                    insure_flow_id_row=count_all
                                    #print(insure_flow_id_row)
                                if money in s:
                                    money_col=j
                                    #print(money_col)
                                    money_row=count_all
                                    #print(money_row)
                                
                            if count_all> check_row:
                                name_list.append(row[check_col])
                            if count_all> id_row:
                                id_list.append(row[id_col])
                            if count_all> id_num_row:
                                id_num_list.append(row[id_num_col])  
                            if count_all> place_row:
                                place_list.append(row[place_col]) 
                            if count_all> time_row:
                                time_list.append(row[time_col])
                            if count_all> flowid_row:
                                flowid_list.append(row[flowid_col])   
                            if count_all> insure_flow_id_row:
                                insure_flow_id_list.append(row[insure_flow_id_col])  
                            if count_all> money_row:
                                money_list.append(row[money_col])    
                            
                        print(name_list)
                        #rows_old = sheet.nrows+1
                        
                        for i in range(count):
                            if check_value!='':
                                #sheet.write(i+1,rows_old+0,check_value)
                                sheet.write(i+rows_old,0,check_value)
                                
                                #new_worksheet.write(i+rows_old, j, check_value)
                            if company_value!='':    
                                #sheet.write(i+1,1,company_value)
                                sheet.write(i+rows_old,1,company_value)
                                #new_worksheet.write(i+rows_old, j, company_value)
                            sheet.write(i+rows_old,2,i+1)
                            
                            if len(name_list)>0:
                               # sheet.write(i+1,3,name_list[i])
                                sheet.write(i+rows_old,3,name_list[i])
                                #new_worksheet.write(i+rows_old, j, name_list)
                            if len(id_list)>0:
                                #sheet.write(i+1,4,id_list[i-rows_old]) 
                                sheet.write(i+rows_old,4,id_list[i])
                            if len(id_num_list)>0:
                                #sheet.write(i+1,5,id_num_list[i-rows_old])
                                sheet.write(i+1,5,id_num_list[i])
                            if len(place_list)>0:
                                #sheet.write(i+1,6,place_list[i-rows_old])
                                sheet.write(i+rows_old,6,place_list[i])
                            if len(time_list)>0:    
                                #sheet.write(i+1,7,time_list[i-rows_old])
                                sheet.write(i+rows_old,7,time_list[i])
                            if len(flowid_list)>0:   
                                #sheet.write(i+1,8,flowid_list[i-rows_old])
                                sheet.write(i+rows_old,8,flowid_list[i])
                            if len(insure_flow_id_list)>0:
                                #sheet.write(i+1,9,insure_flow_id_list[i-rows_old])
                                sheet.write(i+rows_old,9,insure_flow_id_list[i])
                            if len(money_list)>0:
                                #sheet.write(i+1,10,money_list[i-rows_old])
                                sheet.write(i+rows_old,10,money_list[i])
                            
                            
                            writebook.save('1.xls')

           
 
if __name__=="__main__":
    main()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值