Python办公自动化，有效告别繁琐操作，955不是梦，建议收藏！

快乐星球没有乐

已于 2024-01-09 20:40:20 修改

阅读量57

点赞数

文章标签： excel python 爬虫数据挖掘数据分析

于 2023-05-24 13:58:48 首次发布

本文链接：https://blog.csdn.net/m0_58477260/article/details/130845590

版权

报表合并

将文件夹下所有excel文件合并为一个文件。但是这里仅支持合并excel文件中的sheet1，如果合并的excel文件有多个sheet，请大家自行修改代码的merge_excel()函数。合并步骤如下：

获取路径下所有文件。
新建一个excel文件，用于存储全部数据。
逐个打开需要合并的excel文件，逐行读取数据，再用一个列表来保存每行数据，最后在该列表中存储所有的数据。
向excel文件中逐行写入。

# coding:utf-8

import xlrd
import os
import xlwt
from xlutils.copy import copy

def get_allfile_msg(file_dir):
    for root, dirs, files in os.walk(file_dir):
        '''
        print(root) #当前目录路径  
        print(dirs) #当前路径下所有子目录  
        print(files) #当前路径下所有非目录子文件 
        '''
        return root, dirs, [file for file in files if file.endswith('.xls') or file.endswith('.xlsx')]


def get_allfile_url(root, files):
    """
    将目录的路径加上'/'和文件名，组成文件的路径
    :param root: 路径
    :param files: 文件名称集合
    :return: none
    """
    allFile_url = []
    for file_name in files:
        file_url = root + '/' + file_name
        allFile_url.append(file_url)
    return allFile_url


def all_to_one(root, allFile_url, file_name='allExcel.xls', title=None, have_title=True):
    """
    合并文件
    :param root: 输出文件的路径
    :param allFile_url: 保存了所有excel文件路径的集合
    :param file_name: 输出文件的文件名
    :param title: excel表格的表头
    :param have_title: 是否存在title(bool类型),默认为true，不读取excel文件的第0行
    :return: none
    """
    # 首先在该目录下创建一个excel文件,用于存储所有excel文件的数据
    file_name = root + '/' + file_name
    create_excel(file_name, title)

    list_row_data = []
    for f in range(0, len(allFile_url)):
    #for f in allFile_url:
        # 打开excel文件
        print('打开%s文件' % allFile_url[f])
        excel = xlrd.open_workbook(allFile_url[f])
        # 根据索引获取sheet，这里是获取第一个sheet
        table = excel.sheet_by_index(0)
        print('该文件行数为：%d，列数为：%d' % (table.nrows, table.ncols))

        # 获取excel文件所有的行
        for i in range(table.nrows):
            # yezi表头修改处，如果表头是2行则为2，1行则为1
            if have_title and i < top and f != 0:
                continue
            else:
                row = table.row_values(i)  # 获取整行的值，返回列表
                list_row_data.append(row)

    print('总数据量为%d' % len(list_row_data))
    # 写入all文件
    add_row(list_row_data, file_name)

# 创建文件名为file_name,表头为title的excel文件
def create_excel(file_name, title):
    print('创建文件%s' % file_name)
    a = xlwt.Workbook()
    # 新建一个sheet
    table = a.add_sheet('sheet1', cell_overwrite_ok=True)
    # 写入数据
    #for i in range(len(title)):
    #    table.write(0, i, title[i])
    a.save (file_name)

# 向文件中添加n行数据
def add_row(list_row_data, file_name):
    # 打开excel文件
    allExcel1 = xlrd.open_workbook(file_name)
    sheet = allExcel1.sheet_by_index(0)
    # copy一份文件,准备向它添加内容
    allExcel2 = copy(allExcel1)
    sheet2 = allExcel2.get_sheet(0)

    # 写入数据
    i = 0
    for row_data in list_row_data:
        for j in range(len(row_data)):
            sheet2.write(sheet.nrows + i, j, row_data[j])
        i += 1
    # 保存文件，将原文件覆盖
    allExcel2.save(file_name)
    print('合并完成')

if __name__ == '__main__':
    # 设置文件夹路径
    # "\"为字符串中的特殊字符，加上r后变为原始字符串，则不会对字符串中的"\t"、"\r" 进行字符串转义
    file_dir = '.\\01 报表合并\\word'
    #模板顶部表头行数,当前行数减1
    top = 2
    # 设置文件名，用于保存数据
    file_name = 'save_demo.xls'

    # 获取文件夹的路径,该路径下的所有文件夹，以及所有文件
    root, dirs, files = get_allfile_msg(file_dir)
    # 拼凑目录路径+文件名,组成文件的路径,用一个列表存储
    allFile_url = get_allfile_url(root, files)
    # have_title参数默认为True,为True时不读取excel文件的首行
    all_to_one(root, allFile_url, file_name=file_name, title=None, have_title=True)

批量word转pdf

import win32com.client
import pythoncom
import os

class Word_2_PDF(object):

    def __init__(self, filepath, Debug=False):
        """
        :param filepath:
        :param Debug: 控制过程是否可视化
        """
        self.wordApp = win32com.client.Dispatch('word.Application')
        self.wordApp.Visible = Debug
        self.myDoc = self.wordApp.Documents.Open(filepath)

    def export_pdf(self, output_file_path):
        """
        将Word文档转化为PDF文件
        :param output_file_path:
        :return:
        """
        self.myDoc.ExportAsFixedFormat(output_file_path, 17, Item=7, CreateBookmarks=0)

    def close(self):
        self.wordApp.Quit()

if __name__ == '__main__':

    rootpath = os.getcwd()  # 文件夹路径
    save_path = os.getcwd()   # PDF储存位置
    pythoncom.CoInitialize()

    os_dict = {root:[dirs, files] for root, dirs, files in os.walk(rootpath)}
    for parent, dirnames, filenames in os.walk(rootpath):
        for filename in filenames:
            if u'.doc' in filename and u'~$' not in filename:
                  # 直接保存为PDF文件
                #print(rootpath+filename)
                a = Word_2_PDF(rootpath +'\\'+ filename, True)
                title = filename.split('.')[0]  # 删除.docx
                a.export_pdf(rootpath  +'\\'+ title+'.pdf')
    print('转化完成')

合同生成

from openpyxl import load_workbook
from docx import Document
from os import listdir
'''
定义替换函数
'''
def replace_text(old_text, new_text):
    #读取所有的自然段
    all_paragraphs = document.paragraphs
    for paragraph in all_paragraphs:
        #循环读取所有的run，并进行新旧文本的替换
        for run in paragraph.runs:
            run_text = run.text.replace(old_text, new_text)
            run.text = run_text
    #读取所有的表格
    all_tables = document.tables
    for table in all_tables:
        for row in table.rows:
            #循环读取表格中所有的cells，并进行新旧文本的替换
            for cell in row.cells:
                cell_text = cell.text.replace(old_text, new_text)
                cell.text = cell_text
'''
获取Excel和Word的文件名
'''
for file in listdir():
    print(file, 'listdir')
    if '模板.docx' in file:
        docx_name = file
    if '信息.xlsx' in file:
        xlsx_name = file
'''
读取Excel内数据
'''
wb = load_workbook(xlsx_name)
sheetx0 = wb.sheetnames
sheetx = wb[sheetx0[0]]

#新文件以第几列数据命名
filename_pos = 1
'''
循环读取并替换
'''
#合同要素Excel中逐列循环
for row in range(3,sheetx.max_row+1):
    document = Document(docx_name)
    #openpyxl在使用sheetx.max_column时可能会读取到空的单元格，这里进行剔除
    if sheetx.cell(row=row,column=1).value!=None:
        #合同要素Excel中逐行循环
        for l in range(1,sheetx.max_column+1):
            #合同要素Excel中对第一列逐行读取编号
            old_text = sheetx.cell(row=1,column=l).value
            #合同要素Excel中对循环的当前列逐行读取新要素
            new_text = sheetx.cell(row=row,column=l).value
            replace_text(str(old_text),str(new_text)) #进行替换
            #定义文件名为当前列第一行的内容
            filename = str(sheetx.cell(row=row,column=filename_pos).value)
        #按定义的文件名进行保存
        document.save("%s.docx"%(filename))
print('合同生成完毕！')

Python经验分享

学好 Python 不论是就业数据分析还是做副业赚钱都不错，但要学会 Python 还是要有一个学习规划。最后大家分享一份全套的 Python 学习资料，给那些想学习 Python 的小伙伴们一点帮助！

Python学习路线

这里把Python常用的技术点做了整理，有各个领域的知识点汇总，可以按照上面的知识点找对应的学习资源。
在这里插入图片描述

学习软件

Python常用的开发软件，会给大家节省很多时间。
在这里插入图片描述

学习视频

编程学习一定要多多看视频，书籍和视频结合起来学习才能事半功倍。
在这里插入图片描述

100道练习题

在这里插入图片描述

实战案例

光学理论是没用的，学习编程切忌纸上谈兵，一定要动手实操，将自己学到的知识运用到实际当中。
在这里插入图片描述
最后祝大家天天进步！！

上面这份完整版的Python全套学习资料已经上传至CSDN官方，朋友如果需要可以直接微信扫描下方CSDN官方认证二维码免费领取【保证100%免费】。

快乐星球没有乐

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Python办公自动化，有效告别繁琐操作，955不是梦，建议收藏！

但是这里仅支持合并excel文件中的sheet1，如果合并的excel文件有多个sheet，请大家自行修改代码的merge_excel()函数。还是做副业赚钱都不错，但要学会 Python 还是要有一个学习规划。逐个打开需要合并的excel文件，逐行读取数据，再用一个列表来保存每行数据，最后在该列表中存储所有的数据。这里把Python常用的技术点做了整理，有各个领域的知识点汇总，可以按照上面的知识点找对应的学习资源。光学理论是没用的，学习编程切忌纸上谈兵，一定要动手实操，将自己学到的知识运用到实际当中。
复制链接

扫一扫