Python-数据分析实战项目

最近咸鱼上接了一个数据清洗/数据处理的单子 要求用的是Python,还要有配置文件来控制选择默认的寄件人信息和存放原excel的文件夹和新excel的文件夹....真的很难顶啊 后续还得打包成exe后发给甲方,也是耗时4个小时成功完成

首先贴甲方要求

处理这个excel中的数据,填充到给定模板,其中二选一的部分不用填充为空

第一步 思路很明确导库和基础准备,再根据原excel中的补发信息写正则表达式
r'^(.*?)\[(\d+)\]\n(\d+)\n(.*?)\[(\d+)\]\n(.*?)$' #补发信息对应的正则表达式
#导入所需库
import pandas as pd
import re
from tkinter import Tk
from tkinter.filedialog import askopenfilename

# 使用Tkinter选择文件
Tk().withdraw()  # 不显示Tkinter主窗口
file_path = askopenfilename(title="选择包含数据的Excel文件", filetypes=[("Excel files", "*.xlsx *.xls")])

# 读取原始Excel文件,指定engine为openpyxl
df_raw = pd.read_excel(file_path, engine='openpyxl')

第二步 创建一个格式与模板相同的data字典
        data = {
            "收件人姓名(必填)": [],
            "收件人手机(二选一)": [],
            "收件人电话(二选一)": [],
            "收件人地址(必填)": [],
            "寄件人姓名": [],
            "寄件人手机(二选一)": [],
            "寄件人电话(二选一)": [],
            "寄件人地址": [],
            "物品类型(最多4个字)": [],
            "包裹备注": [],
            "订单编号": [],
            "包裹重量": [],
            "包裹体积": []
        }
第三步 根据甲方发的默认寄货人信息以及正则提取到的信息放入excel中,然后导出excel
# 默认寄件人信息(可以根据实际情况调整)
sender_name = "田所浩二"
sender_phone = "1145141919810"
sender_address = "我也不知道我在哪里"

# 提取信息
for index, row in df_raw.iterrows():
    info = row['补发信息']
    shop_name = row['店铺']

    # 正则表达式匹配
    match = re.search(r'^(.*?)\[(\d+)\]\n(\d+)\n(.*?)\[(\d+)\]\n(.*?)$', info, re.DOTALL)
    if match:
        recipient_name = match.group(1)
        order_number = match.group(2)
        recipient_phone = match.group(3)
        recipient_address = match.group(4)
        item_type = match.group(6)

        data["收件人姓名(必填)"].append(recipient_name)
        data["收件人手机(二选一)"].append(recipient_phone)
        data["收件人电话(二选一)"].append("")
        data["收件人地址(必填)"].append(recipient_address)
        data["寄件人姓名"].append(sender_name)
        data["寄件人手机(二选一)"].append(sender_phone)
        data["寄件人电话(二选一)"].append("")
        data["寄件人地址"].append(sender_address)
        data["物品类型(最多4个字)"].append(item_type)
        data["包裹备注"].append(shop_name + "+" + item_type)
        data["订单编号"].append(order_number)
        data["包裹重量"].append("")
        data["包裹体积"].append("")

# 创建DataFrame
df_processed = pd.DataFrame(data)

# 保存为新的Excel文件
output_file_path = "processed_物流订单信息.xlsx"
df_processed.to_excel(output_file_path, index=False)

print(f"Excel文件已生成:{output_file_path}")
至此,第一版最基础的版本完成了,整体代码如下
import pandas as pd
import re
from tkinter import Tk
from tkinter.filedialog import askopenfilename

# 使用Tkinter选择文件
Tk().withdraw()  # 不显示Tkinter主窗口
file_path = askopenfilename(title="选择包含数据的Excel文件", filetypes=[("Excel files", "*.xlsx *.xls")])

# 读取原始Excel文件,指定engine为openpyxl
df_raw = pd.read_excel(file_path, engine='openpyxl')



# 准备提取的数据字典
data = {
    "收件人姓名(必填)": [],
    "收件人手机(二选一)": [],
    "收件人电话(二选一)": [],
    "收件人地址(必填)": [],
    "寄件人姓名": [],
    "寄件人手机(二选一)": [],
    "寄件人电话(二选一)": [],
    "寄件人地址": [],
    "物品类型(最多4个字)": [],
    "包裹备注": [],
    "订单编号": [],
    "包裹重量": [],
    "包裹体积": []
}

# 默认寄件人信息(可以根据实际情况调整)
sender_name = "水族"
sender_phone = "15622726880"
sender_address = "广东省潮州市饶平县钱东镇热带鱼渔场"

# 提取信息
for index, row in df_raw.iterrows():
    info = row['补发信息']
    shop_name = row['店铺']

    # 正则表达式匹配
    match = re.search(r'^(.*?)\[(\d+)\]\n(\d+)\n(.*?)\[(\d+)\]\n(.*?)$', info, re.DOTALL)
    if match:
        recipient_name = match.group(1)
        order_number = match.group(2)
        recipient_phone = match.group(3)
        recipient_address = match.group(4)
        item_type = match.group(6)

        data["收件人姓名(必填)"].append(recipient_name)
        data["收件人手机(二选一)"].append(recipient_phone)
        data["收件人电话(二选一)"].append("")
        data["收件人地址(必填)"].append(recipient_address)
        data["寄件人姓名"].append(sender_name)
        data["寄件人手机(二选一)"].append(sender_phone)
        data["寄件人电话(二选一)"].append("")
        data["寄件人地址"].append(sender_address)
        data["物品类型(最多4个字)"].append(item_type)
        data["包裹备注"].append(shop_name + "+" + item_type)
        data["订单编号"].append(order_number)
        data["包裹重量"].append("")
        data["包裹体积"].append("")

# 创建DataFrame
df_processed = pd.DataFrame(data)

# 保存为新的Excel文件
output_file_path = "processed_物流订单信息.xlsx"
df_processed.to_excel(output_file_path, index=False)

print(f"Excel文件已生成:{output_file_path}")
现在让我们看看我们还有哪些要求没达到的?嗷,可视化的选择原文件夹和新文件夹以及寄件人信息的更改。废话不多说直接贴源码,也是很简单的使用了tk来可视化(给的钱太少了,不想多做活儿)
import os
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import re

class DataProcessingApp:
    def __init__(self, root):
        self.root = root
        self.root.title("数据处理应用")

        # 创建按钮
        self.btn_select_orig = tk.Button(self.root, text="选择原文件夹", command=self.select_orig_folder)
        self.btn_select_orig.pack(pady=10)

        self.btn_select_dest = tk.Button(self.root, text="选择新文件夹", command=self.select_dest_folder)
        self.btn_select_dest.pack(pady=10)

        self.btn_edit_sender = tk.Button(self.root, text="编辑寄件人信息", command=self.edit_sender_info)
        self.btn_edit_sender.pack(pady=10)

        self.btn_process_files = tk.Button(self.root, text="处理文件", command=self.process_files)
        self.btn_process_files.pack(pady=20)

        # 初始化文件夹路径变量
        self.orig_folder_path = ""
        self.dest_folder_path = ""

        # 初始化寄件人信息
        self.sender_name = "田所浩二"
        self.sender_phone = "1145141919810"
        self.sender_address = "我是谁我在哪儿我也不知道啊"

    def select_orig_folder(self):
        self.orig_folder_path = filedialog.askdirectory(title="选择原文件夹")
        print(f"选择的原文件夹:{self.orig_folder_path}")

    def select_dest_folder(self):
        self.dest_folder_path = filedialog.askdirectory(title="选择新文件夹")
        print(f"选择的新文件夹:{self.dest_folder_path}")

    def edit_sender_info(self):
        # 创建编辑寄件人信息的窗口
        self.edit_sender_window = tk.Toplevel(self.root)
        self.edit_sender_window.title("编辑寄件人信息")

        # 添加姓名、电话、地址的输入框和标签
        tk.Label(self.edit_sender_window, text="姓名:").grid(row=0, column=0)
        self.entry_name = tk.Entry(self.edit_sender_window)
        self.entry_name.insert(tk.END, self.sender_name)
        self.entry_name.grid(row=0, column=1)

        tk.Label(self.edit_sender_window, text="电话:").grid(row=1, column=0)
        self.entry_phone = tk.Entry(self.edit_sender_window)
        self.entry_phone.insert(tk.END, self.sender_phone)
        self.entry_phone.grid(row=1, column=1)

        tk.Label(self.edit_sender_window, text="地址:").grid(row=2, column=0)
        self.entry_address = tk.Entry(self.edit_sender_window, width=50)
        self.entry_address.insert(tk.END, self.sender_address)
        self.entry_address.grid(row=2, column=1)

        # 添加确认按钮
        tk.Button(self.edit_sender_window, text="确认", command=self.save_sender_info).grid(row=3, columnspan=2, pady=10)

    def save_sender_info(self):
        # 从输入框获取寄件人信息并更新
        self.sender_name = self.entry_name.get().strip()
        self.sender_phone = self.entry_phone.get().strip()
        self.sender_address = self.entry_address.get().strip()

        # 关闭编辑窗口
        self.edit_sender_window.destroy()

    def process_files(self):
        if self.orig_folder_path == "" or self.dest_folder_path == "":
            messagebox.showwarning("警告", "请先选择原文件夹和新文件夹!")
            return

        # 读取并处理文件的逻辑
        print("开始处理文件...")

        # 示例:假设读取原文件夹中的所有Excel文件并处理
        for filename in os.listdir(self.orig_folder_path):
            if filename.endswith(".xlsx") or filename.endswith(".xls"):
                file_path = os.path.join(self.orig_folder_path, filename)
                df_raw = pd.read_excel(file_path, engine='openpyxl')

                # 进行数据处理,将处理后的结果保存到新文件夹
                # 示例:假设按照之前的逻辑处理数据并保存

                # 创建DataFrame并保存为新的Excel文件
                df_processed = self.process_data(df_raw)
                output_file_path = os.path.join(self.dest_folder_path, f"processed_{filename}")
                df_processed.to_excel(output_file_path, index=False)
                print(f"处理完成:{output_file_path}")

        print("所有文件处理完成!")

    def process_data(self, df_raw):
        # 示例处理数据的函数,可以根据需要修改
        data = {
            "收件人姓名(必填)": [],
            "收件人手机(二选一)": [],
            "收件人电话(二选一)": [],
            "收件人地址(必填)": [],
            "寄件人姓名": [],
            "寄件人手机(二选一)": [],
            "寄件人电话(二选一)": [],
            "寄件人地址": [],
            "物品类型(最多4个字)": [],
            "包裹备注": [],
            "订单编号": [],
            "包裹重量": [],
            "包裹体积": []
        }

        # 提取信息
        for index, row in df_raw.iterrows():
            info = row['补发信息']
            shop_name = row['店铺']

            # 正则表达式匹配
            match = re.search(r'^(.*?)\[(\d+)\]\n(\d+)\n(.*?)\[(\d+)\]\n(.*?)$', info, re.DOTALL)
            if match:
                recipient_name = match.group(1)
                order_number = match.group(2)
                recipient_phone = match.group(3)
                recipient_address = match.group(4)
                item_type = match.group(6)

                data["收件人姓名(必填)"].append(recipient_name)
                data["收件人手机(二选一)"].append(recipient_phone)
                data["收件人电话(二选一)"].append("")
                data["收件人地址(必填)"].append(recipient_address)
                data["寄件人姓名"].append(self.sender_name)
                data["寄件人手机(二选一)"].append(self.sender_phone)
                data["寄件人电话(二选一)"].append("")
                data["寄件人地址"].append(self.sender_address)
                data["物品类型(最多4个字)"].append(item_type)
                data["包裹备注"].append(shop_name + "+" + item_type)
                data["订单编号"].append(order_number)
                data["包裹重量"].append("")
                data["包裹体积"].append("")

        df_processed = pd.DataFrame(data)
        return df_processed

if __name__ == "__main__":
    root = tk.Tk()
    app = DataProcessingApp(root)
    root.mainloop()

大概做完了是这个鸟样吧.....

你别说,它还真丑(反正不是我用,凑合凑合就好)

好的!现在的确是能够修改了,但是每一次打开它的信息都得重新选择,怎么办呢?是时候请出我们的configparser了!也是直接贴源码

在运行后,python文件对应的文件夹下会生成一个config.ini,里面就保存着配置信息,后续就不需要再反复配置了!小伙伴们也可以多使用这个来配置,挺方便的

import os
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import re
import configparser

class DataProcessingApp:
    def __init__(self, root):
        self.root = root
        self.root.title("数据处理应用")

        # 创建按钮
        self.btn_select_orig = tk.Button(self.root, text="选择原文件夹", command=self.select_orig_folder)
        self.btn_select_orig.pack(pady=10)

        self.btn_select_dest = tk.Button(self.root, text="选择新文件夹", command=self.select_dest_folder)
        self.btn_select_dest.pack(pady=10)

        self.btn_edit_sender = tk.Button(self.root, text="编辑寄件人信息", command=self.edit_sender_info)
        self.btn_edit_sender.pack(pady=10)

        self.btn_process_files = tk.Button(self.root, text="处理文件", command=self.process_files)
        self.btn_process_files.pack(pady=20)

        # 初始化文件夹路径变量
        self.orig_folder_path = ""
        self.dest_folder_path = ""

        # 初始化寄件人信息
        self.sender_name = ""
        self.sender_phone = ""
        self.sender_address = ""

        # 加载配置文件中的路径和寄件人信息
        self.load_config()

    def load_config(self):
        # 创建或加载配置文件
        self.config_file = "../Process/config.ini"
        self.config = configparser.ConfigParser()

        if os.path.exists(self.config_file):
            self.config.read(self.config_file)
            self.orig_folder_path = self.config.get("Paths", "OrigFolder", fallback="")
            self.dest_folder_path = self.config.get("Paths", "DestFolder", fallback="")
            self.sender_name = self.config.get("Sender", "Name", fallback="")
            self.sender_phone = self.config.get("Sender", "Phone", fallback="")
            self.sender_address = self.config.get("Sender", "Address", fallback="")

    def save_config(self):
        # 保存配置信息到配置文件
        self.config["Paths"] = {
            "OrigFolder": self.orig_folder_path,
            "DestFolder": self.dest_folder_path
        }

        self.config["Sender"] = {
            "Name": self.sender_name,
            "Phone": self.sender_phone,
            "Address": self.sender_address
        }

        with open(self.config_file, "w") as configfile:
            self.config.write(configfile)

    def select_orig_folder(self):
        self.orig_folder_path = filedialog.askdirectory(title="选择原文件夹")
        print(f"选择的原文件夹:{self.orig_folder_path}")
        self.save_config()

    def select_dest_folder(self):
        self.dest_folder_path = filedialog.askdirectory(title="选择新文件夹")
        print(f"选择的新文件夹:{self.dest_folder_path}")
        self.save_config()

    def edit_sender_info(self):
        # 创建编辑寄件人信息的窗口
        self.edit_sender_window = tk.Toplevel(self.root)
        self.edit_sender_window.title("编辑寄件人信息")

        # 添加姓名、电话、地址的输入框和标签
        tk.Label(self.edit_sender_window, text="姓名:").grid(row=0, column=0)
        self.entry_name = tk.Entry(self.edit_sender_window)
        self.entry_name.insert(tk.END, self.sender_name)
        self.entry_name.grid(row=0, column=1)

        tk.Label(self.edit_sender_window, text="电话:").grid(row=1, column=0)
        self.entry_phone = tk.Entry(self.edit_sender_window)
        self.entry_phone.insert(tk.END, self.sender_phone)
        self.entry_phone.grid(row=1, column=1)

        tk.Label(self.edit_sender_window, text="地址:").grid(row=2, column=0)
        self.entry_address = tk.Entry(self.edit_sender_window, width=50)
        self.entry_address.insert(tk.END, self.sender_address)
        self.entry_address.grid(row=2, column=1)

        # 添加确认按钮
        tk.Button(self.edit_sender_window, text="确认", command=self.save_sender_info).grid(row=3, columnspan=2, pady=10)

    def save_sender_info(self):
        # 从输入框获取寄件人信息并更新
        self.sender_name = self.entry_name.get().strip()
        self.sender_phone = self.entry_phone.get().strip()
        self.sender_address = self.entry_address.get().strip()

        # 关闭编辑窗口
        self.edit_sender_window.destroy()
        # 保存寄件人信息到配置文件
        self.save_config()

    def process_files(self):
        if self.orig_folder_path == "" or self.dest_folder_path == "":
            messagebox.showwarning("警告", "请先选择原文件夹和新文件夹!")
            return

        # 读取并处理文件的逻辑
        print("开始处理文件...")

        # 示例:假设读取原文件夹中的所有Excel文件并处理
        for filename in os.listdir(self.orig_folder_path):
            if filename.endswith(".xlsx") or filename.endswith(".xls"):
                file_path = os.path.join(self.orig_folder_path, filename)
                df_raw = pd.read_excel(file_path, engine='openpyxl')

                # 进行数据处理,将处理后的结果保存到新文件夹
                # 示例:假设按照之前的逻辑处理数据并保存

                # 创建DataFrame并保存为新的Excel文件
                df_processed = self.process_data(df_raw)
                output_file_path = os.path.join(self.dest_folder_path, f"processed_{filename}")
                df_processed.to_excel(output_file_path, index=False)
                print(f"处理完成:{output_file_path}")

        print("所有文件处理完成!")

    def process_data(self, df_raw):
        # 示例处理数据的函数,可以根据需要修改
        data = {
            "收件人姓名(必填)": [],
            "收件人手机(二选一)": [],
            "收件人电话(二选一)": [],
            "收件人地址(必填)": [],
            "寄件人姓名": [],
            "寄件人手机(二选一)": [],
            "寄件人电话(二选一)": [],
            "寄件人地址": [],
            "物品类型(最多4个字)": [],
            "包裹备注": [],
            "订单编号": [],
            "包裹重量": [],
            "包裹体积": []
        }

        # 提取信息
        for index, row in df_raw.iterrows():
            info = row['补发信息']
            shop_name = row['店铺']

            # 正则表达式匹配
            match = re.search(r'^(.*?)\[(\d+)\]\n(\d+)\n(.*?)\[(\d+)\]\n(.*?)$', info, re.DOTALL)
            if match:
                recipient_name = match.group(1)
                order_number = match.group(2)
                recipient_phone = match.group(3)
                recipient_address = match.group(4)
                item_type = match.group(6)

                data["收件人姓名(必填)"].append(recipient_name)
                data["收件人手机(二选一)"].append(recipient_phone)
                data["收件人电话(二选一)"].append("")
                data["收件人地址(必填)"].append(recipient_address)
                data["寄件人姓名"].append(self.sender_name)
                data["寄件人手机(二选一)"].append(self.sender_phone)
                data["寄件人电话(二选一)"].append("")
                data["寄件人地址"].append(self.sender_address)
                data["物品类型(最多4个字)"].append(item_type)
                data["包裹备注"].append(shop_name + "+" + item_type)
                data["订单编号"].append(order_number)
                data["包裹重量"].append("")
                data["包裹体积"].append("")

        df_processed = pd.DataFrame(data)
        return df_processed

if __name__ == "__main__":
    root = tk.Tk()
    app = DataProcessingApp(root)
    root.mainloop()
现在要求的功能已经全部实现,但是不出意外的话出意外了,甲方又要改这改那的。。。真的很烦,一会儿给我的原表格中加了一个订单编号,一会儿要我这里的报错写详细点,那里的报错写详细点....真的很无语,直接上完整版的吧!

唉...已经打包好了跟我说要保证四行
import os
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import re
import configparser
from datetime import datetime
from openpyxl import Workbook
from openpyxl.reader.excel import load_workbook
from openpyxl.utils import get_column_letter

class DataProcessingApp:
    def __init__(self, root):
        self.root = root
        self.root.title("数据处理应用")

        self.btn_select_orig = tk.Button(self.root, text="选择原文件夹", command=self.select_orig_folder)
        self.btn_select_orig.pack(pady=10)

        self.btn_select_dest = tk.Button(self.root, text="选择新文件夹", command=self.select_dest_folder)
        self.btn_select_dest.pack(pady=10)

        self.btn_edit_sender = tk.Button(self.root, text="编辑寄件人信息", command=self.edit_sender_info)
        self.btn_edit_sender.pack(pady=10)

        self.btn_process_files = tk.Button(self.root, text="处理文件", command=self.process_files)
        self.btn_process_files.pack(pady=20)

        self.orig_folder_path = ""
        self.dest_folder_path = ""

        self.sender_name = ""
        self.sender_phone = ""
        self.sender_address = ""

        self.load_config()

    def load_config(self):
        self.config_file = "config.ini"
        self.config = configparser.ConfigParser()

        if os.path.exists(self.config_file):
            self.config.read(self.config_file)
            self.orig_folder_path = self.config.get("Paths", "OrigFolder", fallback="")
            self.dest_folder_path = self.config.get("Paths", "DestFolder", fallback="")
            self.sender_name = self.config.get("Sender", "Name", fallback="")
            self.sender_phone = self.config.get("Sender", "Phone", fallback="")
            self.sender_address = self.config.get("Sender", "Address", fallback="")

    def save_config(self):
        self.config["Paths"] = {
            "OrigFolder": self.orig_folder_path,
            "DestFolder": self.dest_folder_path
        }

        self.config["Sender"] = {
            "Name": self.sender_name,
            "Phone": self.sender_phone,
            "Address": self.sender_address
        }

        with open(self.config_file, "w") as configfile:
            self.config.write(configfile)

    def select_orig_folder(self):
        self.orig_folder_path = filedialog.askdirectory(title="选择原文件夹")
        print(f"选择的原文件夹:{self.orig_folder_path}")
        self.save_config()

    def select_dest_folder(self):
        self.dest_folder_path = filedialog.askdirectory(title="选择新文件夹")
        print(f"选择的新文件夹:{self.dest_folder_path}")
        self.save_config()

    def edit_sender_info(self):
        self.edit_sender_window = tk.Toplevel(self.root)
        self.edit_sender_window.title("编辑寄件人信息")

        tk.Label(self.edit_sender_window, text="姓名:").grid(row=0, column=0)
        self.entry_name = tk.Entry(self.edit_sender_window)
        self.entry_name.insert(tk.END, self.sender_name)
        self.entry_name.grid(row=0, column=1)

        tk.Label(self.edit_sender_window, text="电话:").grid(row=1, column=0)
        self.entry_phone = tk.Entry(self.edit_sender_window)
        self.entry_phone.insert(tk.END, self.sender_phone)
        self.entry_phone.grid(row=1, column=1)

        tk.Label(self.edit_sender_window, text="地址:").grid(row=2, column=0)
        self.entry_address = tk.Entry(self.edit_sender_window, width=50)
        self.entry_address.insert(tk.END, self.sender_address)
        self.entry_address.grid(row=2, column=1)

        tk.Button(self.edit_sender_window, text="确认", command=self.save_sender_info).grid(row=3, columnspan=2, pady=10)

    def save_sender_info(self):
        self.sender_name = self.entry_name.get().strip()
        self.sender_phone = self.entry_phone.get().strip()
        self.sender_address = self.entry_address.get().strip()

        self.edit_sender_window.destroy()
        self.save_config()

    def process_files(self):
        if self.orig_folder_path == "" or self.dest_folder_path == "":
            messagebox.showwarning("警告", "请先选择原文件夹和新文件夹!")
            return

        print("开始处理文件...")

        success_count = 0
        failure_count = 0
        failure_msgs = []

        for filename in os.listdir(self.orig_folder_path):
            if filename.endswith(".xlsx") or filename.endswith(".xls"):
                file_path = os.path.join(self.orig_folder_path, filename)
                try:
                    df_raw = pd.read_excel(file_path, engine='openpyxl')

                    df_processed = self.process_data(df_raw)
                    
                    #用时间戳命名文件
                    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
                    output_file_name = f"{os.path.splitext(filename)[0]}_{timestamp}.xlsx"
                    output_file_path = os.path.join(self.dest_folder_path, output_file_name)

                    df_processed.to_excel(output_file_path, index=False)
                    print(f"处理完成:{output_file_path}")
                    success_count += 1

                    # 调整列宽
                    self.adjust_column_width(output_file_path)

                except Exception as e:
                    failure_count += 1
                    failure_msgs.append(f"处理文件 '{filename}' 出错:{str(e)}")

        if failure_count > 0:
            messagebox.showerror("处理失败", f"处理完成:成功处理 {success_count} 个文件,失败 {failure_count} 个文件。\n\n错误信息:\n" + "\n".join(failure_msgs))
        else:
            messagebox.showinfo("处理完成", f"成功处理所有文件:{success_count} 个文件。")

        print("所有文件处理完成!")

    def process_data(self, df_raw):
        data = {
            "收件人姓名(必填)": [],
            "收件人手机(二选一)": [],
            "收件人电话(二选一)": [],
            "收件人地址(必填)": [],
            "寄件人姓名": [],
            "寄件人手机(二选一)": [],
            "寄件人电话(二选一)": [],
            "寄件人地址": [],
            "物品类型(最多4个字)": [],
            "包裹备注": [],
            "订单编号": [],
            "包裹重量": [],
            "包裹体积": []
        }

        invalid_format_orders = []  # 用于存储格式错误的订单编号

#******************************************************************************************

        //简单讲解一下这里,就是要求补发信息一定一定是四行,否则的话会导致match不匹配
        //就会导致对应行的数据整个不出现在处理后的excel当中
        //然后修改成这样的代码里面就会else-except抛出错误弹窗告诉小白员工哪里的数据出问题了赶紧去修改
        #请忽略上面的//,我的评价是写java写的(雾)

        for index, row in df_raw.iterrows():
            index_info = str(row['订单编号'])  # 将订单编号转换为字符串类型
            info = row['补发信息']
            shop_name = row['店铺']

            match = re.search(r'^(.*?)\[(\d+)\]\n(\d+)\n(.*?)\[(\d+)\]\n(.*?)$', info, re.DOTALL)
            if match:
                recipient_name = match.group(1)
                order_number = match.group(2)
                recipient_phone = match.group(3)
                recipient_address = match.group(4)
                item_type = match.group(6)
                recipient_name = recipient_name + "[" + order_number + "]"
                data["收件人姓名(必填)"].append(recipient_name)
                data["收件人手机(二选一)"].append(recipient_phone)
                data["收件人电话(二选一)"].append("")
                data["收件人地址(必填)"].append(recipient_address)
                data["寄件人姓名"].append(self.sender_name)
                data["寄件人手机(二选一)"].append(self.sender_phone)
                data["寄件人电话(二选一)"].append("")
                data["寄件人地址"].append(self.sender_address)
                data["物品类型(最多4个字)"].append("")
                data["包裹备注"].append(shop_name + "+" + item_type)
                data["订单编号"].append(index_info)
                data["包裹重量"].append("")
                data["包裹体积"].append("")
            else:
                # 格式错误的情况
                invalid_format_orders.append(index_info)

        if invalid_format_orders:
            messagebox.showerror("格式错误",
                                 f"订单编号:{', '.join(invalid_format_orders)} 出现格式错误,请务必保证为4行。")

        df_processed = pd.DataFrame(data)
        return df_processed

    def adjust_column_width(self, excel_file):
        wb = Workbook()
        wb = load_workbook(excel_file)
        ws = wb.active

        for col in ws.columns:
            max_length = 0
            column = col[0].column_letter  # 获取列字母表示

            # 计算每列中最长单元格的内容长度
            for cell in col:
                try:
                    if len(str(cell.value)) > max_length:
                        max_length = len(cell.value)
                except:
                    pass

            adjusted_width = (max_length + 2) * 1.2  # 根据内容长度调整列宽度
            ws.column_dimensions[column].width = adjusted_width  # 设置列宽

        # 保存调整后的 Excel 文件
        wb.save(excel_file)

if __name__ == "__main__":
    root = tk.Tk()
    app = DataProcessingApp(root)
    root.mainloop()

  • 7
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值