向chatgpt3.5 询问如何使用python3.*加载xlwings库处理excel表格

本文链接：https://blog.csdn.net/u014805297/article/details/132291779

首先说感受：可能一顿分析几小时，编写代码十几分钟，然后运行几秒钟，
有这个时间，可能报价早完成了，
但是因为这个平台上的报价表格式，具有一定的通用性，
如果当下碰到一个项次比较多的内容没有匹配到，
可以写一段正则表达式，先测试一下，以便下次使用。

需求分析

需求图
主要数据在上图中的B2-B5，B列这几行，
我需要将B2-B5 复制粘贴到B8-B11，并且拆分中间的数据，
按照图中C8-J11这样分布。

所用工具

chargpt3.5
python3.*
pythonde的第三方库:xlwings 用于操作 Excel
- 问：为什么是xlwings，而不是其他库。
- 答：参考xlwings库：Excel与Python的完美结合（附使用文档）。

询问（调教）chatgpt

使用python 并且加载xlwings库，写一段处理excel的代码：
判断当前文件夹中，是只有一个或多个.xlsx 或者.xls文件，
如果只有一个，打开这个.xlsx 或者.xls文件，判断是否有多个工作表，
如果没有，就选取第一个工作表，
如果有多个工作表，依次查看后方的工作表是否有数据，
如果没数据，就选取第一个工作表，不处理其他工作表。
复制B2到B列有数据的最后一行，粘贴在最后一行的下方。

我向chatgpt输入上述内容后，返回如下内容：

import os
import xlwings as xw

def process_excel_file(file_path):
    wb = xw.Book(file_path)
    ws = wb.sheets.active

    if len(wb.sheets) > 1:
        for i in range(1, len(wb.sheets)):
            temp_ws = wb.sheets[i]
            if temp_ws.used_range.shape == (1, 1) and temp_ws.used_range.value[0][0] is None:
                continue
            else:
                ws = temp_ws
                break  
    else:
        if ws.used_range.shape == (1, 1) and ws.used_range.value[0][0] is None:
            wb.close()
            return  

    last_row = ws.range('B' + str(ws.cells.last_cell.row)).end('up').row
    value_to_copy = ws.range('B2').expand('down').value

    
    ws.range('B' + str(last_row + 1)).value = value_to_copy

    wb.save()
    wb.close()

def main():
    current_folder = os.getcwd()
    excel_files = [f for f in os.listdir(current_folder) if f.endswith(('.xlsx', '.xls'))]

    if len(excel_files) == 1:
        process_excel_file(excel_files[0])
    elif len(excel_files) > 1:
        print("当前文件夹中存在多个 Excel 文件，请仅保留一个文件。")
    else:
        print("当前文件夹中不存在 Excel 文件。")

if __name__ == "__main__":
    main()

上面这块代码保存为*.py后，使用IDLE打开，并且F5运行，
在同一个目录下，发现扩展名为.xls和.files的表格文件，
并且打开.xls后，代码复制粘贴后的结果，不是自己想要的。
遂，得让chatgpt更改代码。
在这里插入图片描述

再次问chatgpt3.5

修改：复制B2到B列有数据的最后一行，粘贴在最后一行的下方，一行一个。最后保存的文件格式为xlsx

import os
import xlwings as xw

def process_excel_file(file_path):
    # 打开 Excel 文件
    wb = xw.Book(file_path)
    ws = wb.sheets.active

    # 判断工作表数量
    if len(wb.sheets) > 1:
        # 遍历工作表
        for i in range(1, len(wb.sheets)):
            temp_ws = wb.sheets[i]
            if temp_ws.used_range.shape == (1, 1) and temp_ws.used_range.value[0][0] is None:
                continue  # 跳过无数据的工作表
            else:
                ws = temp_ws
                break  # 使用第一个有数据的工作表
    else:
        if ws.used_range.shape == (1, 1) and ws.used_range.value[0][0] is None:
            wb.close()
            return  # 关闭工作簿并退出

    last_row = ws.range('B' + str(ws.cells.last_cell.row)).end('up').row
    values_to_copy = ws.range('B2').expand('down').value

    # 逐行复制和粘贴
    for value in values_to_copy:
        if value:
            last_row += 1
            ws.range('B' + str(last_row)).value = value

    wb.save()
    wb.close()

def main():
    current_folder = os.getcwd()
    excel_files = [f for f in os.listdir(current_folder) if f.endswith(('.xlsx', '.xls'))]

    for excel_file in excel_files:
        if excel_file.lower().endswith('.xlsx'):
            new_file_path = os.path.splitext(excel_file)[0] + '_modified.xlsx'
        else:
            new_file_path = os.path.splitext(excel_file)[0] + '_modified.xls'

        process_excel_file(excel_file)
        os.rename(excel_file, new_file_path)

if __name__ == "__main__":
    main()

以下是上面两个代码段，具体的差异之处（也是让chatgpt帮我分析的）：

工作表选择：

第一段代码中，仅选择第一个非空的工作表作为目标工作表。
```
ws = wb.sheets.active
```

第二段代码中，遍历了所有工作表，找到第一个非空的工作表作为目标工作表。

for i in range(1, len(wb.sheets)):
    temp_ws = wb.sheets[i]
    if temp_ws.used_range.shape == (1, 1) and temp_ws.used_range.value[0][0] is None:
        continue
    else:
        ws = temp_ws
        break

数据复制和粘贴：

第一段代码中，将整列数据一次性复制粘贴到目标工作表的末尾。

value_to_copy = ws.range('B2').expand('down').value
ws.range('B' + str(last_row + 1)).value = value_to_copy

第二段代码中，逐行复制非空单元格的值，然后逐行粘贴到目标工作表的末尾。

values_to_copy = ws.range('B2').expand('down').value
for value in values_to_copy:
    if value:
        last_row += 1
        ws.range('B' + str(last_row)).value = value

处理多个文件：

第一段代码中，仅处理当前目录下的一个 Excel 文件。

if len(excel_files) == 1:
    process_excel_file(excel_files[0])

第二段代码中，使用循环处理当前目录下的所有 Excel 文件，并在处理后重命名保存。
```
for excel_file in excel_files:
    process_excel_file(excel_file)
    # ...
```

文件保存：

第一段代码中，每次处理一个 Excel 文件后，保存并关闭工作簿。
```
wb.save()
wb.close()
```
第二段代码中，在每次复制粘贴数据后都保存工作簿，最后在循环结束后再关闭工作簿。
```
for value in values_to_copy:
    # ...
wb.save()
wb.close()
```

文件命名和保存：

第二段代码中，对每个 Excel 文件处理后，生成修改后的文件名并保存。

if excel_file.lower().endswith('.xlsx'):
    new_file_path = os.path.splitext(excel_file)[0] + '_modified.xlsx'
else:
    new_file_path = os.path.splitext(excel_file)[0] + '_modified.xls'

process_excel_file(excel_file)
os.rename(excel_file, new_file_path)

综上所述，这些是两段代码在工作表选择、数据复制粘贴、处理多个文件、文件保存和文件命名等方面的具体差异之处。

以下是上面【再次问chatgpt3.5】这部分的代码运行后的excel表格内容
在这里插入图片描述
excel列的内容复制，已经是完成了。

再再问chatgpt3.5

不是每行之间隔一行，而是从上面复制数据后，放在原始数据下方两个空行下方

import os
import xlwings as xw

def process_excel_file(file_path):
    # 打开 Excel 文件
    app = xw.App(visible=False)  # 以不可见模式启动 Excel
    wb = app.books.open(file_path)
    ws = wb.sheets.active

    # 判断工作表数量
    if len(wb.sheets) > 1:
        # 遍历工作表
        for i in range(1, len(wb.sheets)):
            temp_ws = wb.sheets[i]
            if temp_ws.used_range.shape == (1, 1) and temp_ws.used_range.value[0][0] is None:
                continue  # 跳过无数据的工作表
            else:
                ws = temp_ws
                break  # 使用第一个有数据的工作表
    else:
        if ws.used_range.shape == (1, 1) and ws.used_range.value[0][0] is None:
            wb.close()
            app.quit()
            return  # 关闭工作簿、Excel 并退出

    last_row = ws.range('B' + str(ws.cells.last_cell.row)).end('up').row
    values_to_copy = ws.range('B2').expand('down').value

    # 保存原始数据下方两个空行的位置
    insert_row = last_row + 3

    # 逐行复制和粘贴
    for value in values_to_copy:
        if value:
            ws.range('B' + str(insert_row)).value = value
            insert_row += 1

    # 保存修改并关闭工作簿、Excel
    new_file_path = os.path.splitext(file_path)[0] + '_modified.xlsx'
    wb.save(new_file_path)
    wb.close()
    app.quit()

def main():
    current_folder = os.getcwd()
    excel_files = [f for f in os.listdir(current_folder) if f.endswith(('.xlsx', '.xls'))]

    for excel_file in excel_files:
        process_excel_file(excel_file)

if __name__ == "__main__":
    main()

调教省略

由于篇幅原因，一篇文章字数太多，多数人就没看下去的欲望，
于是我把调教思路分享给大家（具体问问题思路，请开启科学上网美国区域的节点，然后查看下面的连接）……：https://chat.openai.com/share/90a8ee54-30cc-4131-a153-c50df572c487

目前可用代码

可以补充：比如可以增加适配其他格式的正则表达式。

# 导入必要的模块
import os  # 导入操作系统模块
import xlwings as xw  # 导入 xlwings 库，用于操作 Excel
import re  # 导入正则表达式模块
import psutil  # 导入 psutil 库，用于处理进程信息

# 关闭所有 Excel 进程的函数
def close_excel_processes():
    # 遍历所有进程
    for process in psutil.process_iter():
        try:
            # 获取进程信息
            process_info = process.as_dict(attrs=['pid', 'name'])
            # 如果进程名为 EXCEL.EXE
            if process_info['name'] == 'EXCEL.EXE':
                # 终止该进程
                os.kill(process_info['pid'], 9)
        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
            pass
        
# 打开 Excel 文件，返回应用程序和工作簿对象的函数
def open_excel_file(file_path):
    # 打开 Excel 文件，返回应用程序和工作簿对象的函数
    app = xw.App(visible=False)
    # 打开指定路径的工作簿
    wb = app.books.open(file_path)
    return app, wb

# 获取活动工作表的函数
def get_active_sheet(wb):
    # 如果工作簿中有多个工作表
    if len(wb.sheets) > 1:
        # 遍历工作簿中的工作表
        for i in range(1, len(wb.sheets)):
            temp_ws = wb.sheets[i]
            # 遍历工作簿中的工作表
            if temp_ws.used_range.shape == (1, 1) and temp_ws.used_range.value[0][0] is None:
                continue
            else:
                return temp_ws # 返回第一个非空工作表
    else:
        return wb.sheets.active # 返回活动工作表

def insert_field_names(ws, insert_row):
    # 定义字段名称列表
    field_names = ["物料名称", "名称", "口径", "磅级（压力）", "连接方式", "阀门材料", "阀座密封面", "阀瓣密封面", "阀杆材料"]
    # 在指定位置插入字段名称
    ws.range('B' + str(insert_row)).value = field_names


# 处理具有8个匹配项的行的函数
# 例：碳钢闸阀\DN25 CL150 RF\ASTM A105 13Cr/13Cr/13Cr\API602
def process_row_8(matches, row):
    # 逐列写入匹配项的内容
    row.offset(0, 1).value = matches.group(1)
    row.offset(0, 2).value = matches.group(2)
    row.offset(0, 3).value = matches.group(3)
    row.offset(0, 4).value = matches.group(4)
    row.offset(0, 5).value = matches.group(5)
    row.offset(0, 6).value = matches.group(6)
    row.offset(0, 7).value = matches.group(7)
    row.offset(0, 8).value = matches.group(8)
    
# 处理具有4个匹配项的行的函数
# 例：衬氟塑料隔膜阀\G41F-16C-100\20#/PTFE 内衬
def process_row_4(matches, row):
    # 逐列写入匹配项的内容
    row.offset(0, 1).value = matches.group(1)
    row.offset(0, 2).value = "DN"+matches.group(3)
    row.offset(0, 3).value = "PN"+matches.group(2)
    row.offset(0, 5).value = matches.group(4)

# 处理国标
# 例：[闸阀\Z41H-16C-20]的函数
def process_row_5(matches, row):
    # 逐列写入匹配项的内容
    row.offset(0, 1).value = matches.group(1) #名称
    row.offset(0, 2).value = "DN"+matches.group(3)
    row.offset(0, 3).value = "PN"+matches.group(2)


def process_excel_file(file_path):
    app, wb = open_excel_file(file_path)
    ws = get_active_sheet(wb)

    last_row = ws.range('B' + str(ws.cells.last_cell.row)).end('up').row
    values_to_copy = ws.range('B2').expand('down').value

    insert_row = last_row + 3
    for value in values_to_copy:
        if value:
            ws.range('B' + str(insert_row)).value = value
            insert_row += 1

    for row in ws.range('B' + str(last_row + 3) + ':B' + str(insert_row - 1)):
        data = row.value

        # 匹配规则1
        matches = re.match(r'(.+?)\\(DN\d+)\s(CL\d+|PN\d+)\s(.*)\\ASTM\s([A-Z\d\-]+)\s(.*)/(.*)/(.*)\\(.*)', data)
        if matches:
            insert_row = last_row + 2
            insert_field_names(ws, insert_row)
            process_row_8(matches, row)
            continue

        # 匹配规则2 
        matches = re.match(r'(.+?)\\(DN\d+)\s(CL\d+|PN\d+)\s(.*)\\ASTM\s([A-Z\d\-]+)\s(.*)/(.*)\\(.*)', data)
        if matches:
            insert_row = last_row + 2
            insert_field_names(ws, insert_row)
            process_row_8(matches, row)
            continue

        # 匹配规则3  匹配 截止阀\J41H-50- 32\A105   匹配 截止阀\J41H-50-32\A105
        matches = re.match(r'(.+?)\\.+?-(.+?)-(.+?)\\(.+)', data)
        if matches:
            insert_row = last_row + 2
            insert_field_names(ws, insert_row)
            process_row_4(matches, row)
            continue

        # 匹配规则4  匹配 [闸阀\Z41H-16C-20]的函数
        matches = re.match(r'(.+?)\\.+?-(.+)-(\d+)', data)
        if matches:
            insert_row = last_row + 2
            insert_field_names(ws, insert_row)
            process_row_5(matches, row)
            continue



    new_file_path = os.path.splitext(file_path)[0] + '_modified.xlsx'
    wb.save(new_file_path)
    wb.close()
    app.quit()

def main():
    current_folder = os.getcwd()
    excel_files = [f for f in os.listdir(current_folder) if f.lower().endswith(('.xlsx', '.xls'))]

    if len(excel_files) == 1:
        process_excel_file(excel_files[0])
    elif len(excel_files) > 1:
        print("当前文件夹中存在多个 Excel 文件，请仅保留一个文件。")
    else:
        print("当前文件夹中不存在 Excel 文件。")

    close_excel_processes()

if __name__ == "__main__":
    main()