如何用Python对目录下的所有xls文件进行合并输出

在电商行业中,每天都要去导流量来源信息进行商品数据分析,并需要对多天的数据进行粘贴复制,合并成一个数据表。多个表复制意味着重复的工作,基于此,怎样使用脚本对多个表进行合并,提高工作的效率呢?

脚本步骤
  • 路径拼接,形成完整的路径列表
  • 读取数据
  • 保存数据
路径拼接

找出在特定的目录下都有哪些需要合并的子表,返回完整路径列表

def IsSubString(SubStrList, Str):
    '''''
    #判断字符串Str是否包含序列SubStrList中的每一个子字符串
    #>>>SubStrList=['F','EMS','txt']
    #>>>Str='F06925EMS91.txt'
    #>>>IsSubString(SubStrList,Str)#return True (or False)
    '''
    flag = True
    for substr in SubStrList:
        if not (substr in Str):
            flag = False
    return flag


def fn_get_filelist(FindPath, FlagStr=[]):
    '''''
    #获取目录中指定的文件名
    #>>>FlagStr=['F','EMS','txt'] #要求文件名称中包含这些字符
    #>>>FileList=GetFileList(FindPath,FlagStr) #
    '''
    import os
    FileList = []
    FileNames = os.listdir(FindPath)
    if len(FileNames) > 0:
        for fn in FileNames:
            if len(FlagStr) > 0:
                # 返回指定类型的文件名
                if IsSubString(FlagStr, fn):
                    fullfilename = os.path.join(FindPath, fn)
                    FileList.append(fullfilename)
            else:
                # 默认直接返回所有文件名
                fullfilename = os.path.join(FindPath, fn)
                FileList.append(fullfilename)
                # 对文件名排序
    if (len(FileList) > 0):
        FileList.sort()
    for i in range(len(FileList)):
        print(FileList[i])

    return FileList
读取每一行数据,可以控制从哪一行开始读取
def read_excel(file_name):
    work = xlrd.open_workbook(file_name)
    sheet = work.sheet_by_index(0)
    rows = sheet.nrows

    rows_list = []
    for i in range(1, rows):
        rows_list.append(sheet.row_values(i))
    return rows_list

如上实现了xlrd读取数据,从第一行开始读取,排除表明。

合并数据
def gather_excel(fn):
    workbook = xlwt.Workbook(encoding='utf-8')
    ws = workbook.add_sheet("流量来源详情")

    title_data = ["来源名称", "访客数", "浏览量", "支付金额", "浏览量占比", "店内跳转人数", "跳出本店人数", "收藏人数", "加购人数", "下单买家数", "下单转换率", "支付件数",
                  "支付买家数", "支付转换率", "直接支付买家", "收藏商品-支付买家数", "粉丝支付买家数", "加购商品-支付买家数"]
    target_list = [title_data]

    for n in range(len(title_data)):
        ws.write(0, n, target_list[0][n])

    index = 1

    for i in range(len(fn)):

        data_list = read_excel(fn[i])
        length_data_list = len(data_list)

        for j in range(length_data_list):
            list_line = data_list[j]

            target_list.append(list_line)
            for n in range(len(list_line)):
                ws.write(index, n, target_list[index][n])
            list_line.clear()
            index = index + 1
    save_result(workbook)

如果对于合并数据有所不接可以看我以前的文章,有详细的介绍,Python实现数据的读取和存储

存储数据
def save_result(wb):
    wb.save(target_file)
    print("存储成功!!!")
完整的代码
# This is a sample Python script.
import xlrd, xlwt

target_file = "./target_result_v1.xls"


# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
def IsSubString(SubStrList, Str):
    '''''
    #判断字符串Str是否包含序列SubStrList中的每一个子字符串
    #>>>SubStrList=['F','EMS','txt']
    #>>>Str='F06925EMS91.txt'
    #>>>IsSubString(SubStrList,Str)#return True (or False)
    '''
    flag = True
    for substr in SubStrList:
        if not (substr in Str):
            flag = False
    return flag


def fn_get_filelist(FindPath, FlagStr=[]):
    '''''
    #获取目录中指定的文件名
    #>>>FlagStr=['F','EMS','txt'] #要求文件名称中包含这些字符
    #>>>FileList=GetFileList(FindPath,FlagStr) #
    '''
    import os
    FileList = []
    FileNames = os.listdir(FindPath)
    if len(FileNames) > 0:
        for fn in FileNames:
            if len(FlagStr) > 0:
                # 返回指定类型的文件名
                if IsSubString(FlagStr, fn):
                    fullfilename = os.path.join(FindPath, fn)
                    FileList.append(fullfilename)
            else:
                # 默认直接返回所有文件名
                fullfilename = os.path.join(FindPath, fn)
                FileList.append(fullfilename)
                # 对文件名排序
    if (len(FileList) > 0):
        FileList.sort()
    for i in range(len(FileList)):
        print(FileList[i])

    return FileList


def gather_excel(fn):
    workbook = xlwt.Workbook(encoding='utf-8')
    ws = workbook.add_sheet("流量来源详情")

    title_data = ["来源名称", "访客数", "浏览量", "支付金额", "浏览量占比", "店内跳转人数", "跳出本店人数", "收藏人数", "加购人数", "下单买家数", "下单转换率", "支付件数",
                  "支付买家数", "支付转换率", "直接支付买家", "收藏商品-支付买家数", "粉丝支付买家数", "加购商品-支付买家数"]
    target_list = [title_data]

    for n in range(len(title_data)):
        ws.write(0, n, target_list[0][n])

    index = 1

    for i in range(len(fn)):

        data_list = read_excel(fn[i])
        length_data_list = len(data_list)

        for j in range(length_data_list):
            list_line = data_list[j]

            target_list.append(list_line)
            for n in range(len(list_line)):
                ws.write(index, n, target_list[index][n])
            list_line.clear()
            index = index + 1
    save_result(workbook)


def save_result(wb):
    wb.save(target_file)
    print("存储成功!!!")


def read_excel(file_name):
    work = xlrd.open_workbook(file_name)
    sheet = work.sheet_by_index(0)
    rows = sheet.nrows

    rows_list = []
    for i in range(1, rows):
        rows_list.append(sheet.row_values(i))
    return rows_list


def print_hi(name):
    # Use a breakpoint in the code line below to debug your script.
    print(f'Hi, {name}')  # Press Ctrl+F8 to toggle the breakpoint.


def process():
    fs = fn_get_filelist("F:/pythonCode/project1/tmp", ['xls'])

    gather_excel(fs)


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    process()

# See PyCharm help at https://www.jetbrains.com/help/pycharm/

合并的结果

在这里插入图片描述

欢迎关注:CodeJames

  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值