【Python】Python读取Excel数据,进行字符串处理


一、工具类

新建目录:TzqUtils
目录下建工具类:

1.1、日期工具类DateTool.py

DateTool.py

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import pandas
import datetime


class DateTool:
    """
    日期工具类
    """

    def __init__(self):
        pass

    # 得到日期的每隔N月的几号
    @staticmethod
    def get_date_every_n_months_m_days(date_i, n_i, day_i):
        import datetime
        year = date_i.year
        month = date_i.month + n_i
        if month > 12:
            year += 1
            month -= 12
        # new_date = datetime.date(year, month, date.day)
        new_date = datetime.date(year, month, day_i)
        return new_date

    @staticmethod
    def get_months_last_day(date_i):
        import calendar
        return calendar.monthrange(date_i.year, date_i.month)[1]

    # @staticmethod
    # def get_now_date_string():
    #     now = datetime.datetime.now()
    #     formatted_time = now.strftime("%Y%m%d-%H%M%S")
    #     # print(formatted_time)
    #     return formatted_time

    # 得到日期字符串
    @staticmethod
    def get_now_date_string(format_s="%Y%m%d-%H%M%S"):
        now = datetime.datetime.now()
        formatted_time = now.strftime(format_s)
        # print(formatted_time)
        return formatted_time


if __name__ == '__main__':
    date_string = "2022-02-01"
    date_obj = pandas.to_datetime(date_string)
    date_obj2 = DateTool.get_date_every_n_months_m_days(date_obj, 1, 15)
    print(str(date_obj2))

    print(DateTool.get_months_last_day(date_obj))
    print(DateTool.get_now_date_string())

1.2、读取excel功能封装类ExcelTool.py

ExcelTool.py

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import pandas as pd


class ExcelTool:
    """
    Excel数据处理工具类
    """

    def __init__(self):
        pass

    # 获取excel的数据
    @staticmethod
    def get_excel_data_f(excel_file_name_i: str, sheet_name_i: str):
        xls = pd.ExcelFile(excel_file_name_i)  # 利用ExcelFile打开文件存储在xls变量中
        data_frame = xls.parse(sheet_name_i)  # 将文件中的Sheet1工作表中内容存储在salesDf变量中
        all_data_list_o = data_frame  # data_frame.values
        return all_data_list_o


if __name__ == '__main__':
    # excel_path = 'E:\\learn\\test\\数据处理测试表.xlsx'  # 将文件地址存储在xls_path变量中
    excel_path = "D:\\tzq\\excel_all\\" \
                 "20240510-1725-TZQ-log配置.xlsx"
    sheet_name = "附1-LOG规则"
    # out_scripts_file = r"d:\test\20240424-0917-TZQ_LOG规则.sql"
    out_filename = "d:\\test\\" + DateTool.get_now_date_string("%Y%m%d-%H%M") \
                       + "-TZQ_LOG规则(20230518版本).sql"
    
    all_data_list = ExcelTool.get_excel_data_f(excel_path, sheet_name)
    print(type(all_data_list))
    # 获取 DataFrame 的数据
    print(str(all_data_list.iloc[1, 0]))

1.3、映射转换工具类,数据间关系的映射MappingTool.py

MappingTool.py

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import sqlite3
from TzqUtils.CommonTool import *


class MappingTool:
    """
    映射转换工具类,数据间关系的映射
    """

    def __init__(self):
        pass

	# 根据name,得到code,使用sqlite3方式
	@staticmethod
	def get_tzq_system_source_code(tzq_system_source_name_string, out_filename_path):
	    tzq_system_source_name_string = tzq_system_source_name_string.lower()
	    # 连接到SQLite内存数据库
	    conn = sqlite3.connect(':memory:')
	    cursor = conn.cursor()
	    # 创建临时表
	    cursor.execute('CREATE TABLE tzq_system_source_code_tmp (code TEXT, name TEXT)')
	    # 插入数据
	    cursor.execute("INSERT INTO tzq_system_source_code_tmp (code, name) VALUES ('100', 'tzq')")
	    cursor.execute("INSERT INTO tzq_system_source_code_tmp (code, name) VALUES ('101', 'is')")
	    cursor.execute("INSERT INTO tzq_system_source_code_tmp (code, name) VALUES ('102', 'god')")
	    # 提交事务
	    conn.commit()
	    # 查询数据
	    cursor.execute('SELECT code FROM tzq_system_source_code_tmp WHERE name = ?', (tzq_system_source_name_string,))
	    # 检查结果集是否为空
	    result = cursor.fetchone()
	    # 如果result为None,则数据不存在;否则,数据存在。
	    if result is None:
	        print("-- tzq_system_source 的 “" + tzq_system_source_name_string + "” 数据不存在")
	        # 记录到本地日志文件
	        write_file_append(out_filename_path, "-- tzq_system_source 的 “" + tzq_system_source_name_string + "” 数据不存在\n")
	        conn.close()
	        return ''
	    else:
	        # print("数据存在")
	        conn.close()
	        return result[0]

    # 由名称找编码
    @staticmethod
    def get_tzq_code_by_tzq_name_f(tzq_name_i: str, out_filename: str):
        tzq_code_string_o = ""
        conn = sqlite3.connect(r'D:\Python\python_project_location\TzqUtils\plan.db')
        cursor = conn.cursor()
        cursor.execute(
            "select distinct tzq_code from tzq_company_mapping_t where tzq_name = ? "
            "limit 1"
            , (tzq_name_i, )
        )
        # 检查结果集是否为空
        # result = cursor.fetchone()
        results = cursor.fetchall()
        # 如果result为None,则数据不存在;否则,数据存在。
        if results is None:
            print("-- [get_tzq_code_by_tzq_name_f] : \"" + tzq_name_i + "\" 的 tzq code 数据不存在!")
            CommonTool.append_file_content_f(
                out_filename,
                "-- [get_tzq_code_by_tzq_name_f] : \"" + tzq_name_i + "\" 的 tzq code 数据不存在!")
            cursor.close()
            conn.close()
            return None
        else:
            # print("数据存在")
            # print("results = “" + str(results) + "”")
            for row in results:
                tzq_code_string_o = row[0]
                # print(row[0])
            cursor.close()
            conn.close()
            return tzq_code_string_o


if __name__ == '__main__':
    tzq_name_string = "tangzhiqiang"
    tzq_code_string = MappingTool.get_tzq_code_by_tzq_name_f(tzq_name_string)
    print("tzq_code_string = “" + str(tzq_code_string) + "”")

三、封装实体类

新建目录:TzqModel
目录下建实体类:
TzqLog.py

#!/usr/bin/python
# -*- coding: UTF-8 -*-


class TzqLog:

    """
    Tzq日志表 :tzq_log_t 表的封装类
    get_insert_script : 生成 insert 脚本

    """

    def __init__(self):
        # 表名
        self.__table_name: str = "tzq_log_t"

        # 属性字典(表字段)
        self.__value = {
            "log_code": "",
            "log_name": ""
        }

    @property
    def value(self):
        return self.__value

    @value.setter
    def value(self, value: dict):
        self.__value = value

    @value.deleter
    def value(self):
        del self.__value

    def get_insert_script(self):
        # 字段取值
        if str(self.__value["log_source_code"]) == "nan":
            log_source_code_tmp = "null"
        else:
            log_source_code_tmp = "'" + str(self.__value["log_source_code"]) + "'"

        if str(self.__value["log_amount"]) == "nan":
            log_amount_tmp = "null::int8"
        else:
            log_amount_tmp = str(self.__value["log_amount"])

        # 拼接 insert 脚本
        sql_scripts_tmp = (
            "INSERT INTO tzq_log_t (log_id, log_dimension_id, "
            "log_name, log_source_code, parent_id, enable_flag, created_by, creation_date, "
            "last_updated_by, last_update_date, sys_description, tzq_system_id) "
            "select tmp.* from (select nextval('seq_tzq_log_t') as log_id, " +
            log_dimension_id_tmp + " as log_dimension_id, '" +
            str(self.__value["log_name"]) + "' as log_name, '" +
            str(self.__value["log_source_code"]) + "' as log_source_code, " +
            parent_id_tmp + " as parent_id, " +
            "'Y' as enable_flag, -1 as created_by, CURRENT_TIMESTAMP as creation_date, "
            "-1 as last_updated_by, CURRENT_TIMESTAMP as last_update_date, null as sys_description, "
            "'666666' as tzq_system_id" +
            ") tmp where not exists ("
            "select 1 from " + self.__table_name + " where " +
            "log_dimension_id = " + log_dimension_id_tmp + " and "
            "log_name = '" + str(self.__value["log_name"]) + "' and " +
            "log_source_code = '" + str(self.__value["log_source_code"]) + "'"
            ")"
            ";\n")

        return sql_scripts_tmp


if __name__ == '__main__':
    obj = TzqLog()
    # obj.value["log_item_name"] = "tzq log name"
    # obj.value["log_item_code"] = "LOG0001"
    print(obj)
    print(obj.get_insert_script())




四、服务类

新建目录:TzqService
目录下建实体类:
TzqLogService.py

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from TzqUtils.ExcelTool import *
from TzqUtils.MappingTool import *
from TzqUtils.DateTool import *
from TzqModel.TzqLog import *


class TzqLogService:

    """
    计划系统来源: tzq_log_t 表的封装类
    """

    def __init__(self):
        pass

    # 根据 name 找 code
    @staticmethod
    def get_tzq_code_from_name_f(log_name_i: str, out_filename_path: str):
        str_tmp = ""
        str_key = log_name_i.lower()
        # print("str_key = “" + str_key + "”")
        try:
            code_map_dict = {
                "tzq": "100"
                , "is": "101"
                , "god": "102"
                }
            str_tmp = code_map_dict[str_key]
        except KeyError:
            print_error_string_tmp = (
                "-- get_tzq_code_from_name_f : tzq_source_name 值 “"
                + log_name_i + "” 不存在!\n")
            print(print_error_string_tmp)
            # 记录到本地日志文件
            CommonTool.append_file_content_f(out_filename_path, print_error_string_tmp)
        return str_tmp

    # insert 脚本写入 SQL 文件
    @staticmethod
    def write_to_file_f(excel_path_i: str, sheet_name_i: str, output_file_i: str):
        all_data_frame_tmp = ExcelTool.get_excel_data_f(excel_path_i, sheet_name_i)
        sql_script_string: str = ""
        tmp_obj = TzqLog()
        data_frame_list = all_data_frame_tmp.iloc[:, :].values
        now_date_string = DateTool.get_now_date_string()

        sql_script_string = sql_script_string \
            + "/* ************************* tzq880808 日志系统表 " + now_date_string \
            + " start ************************************ */\n"

        for row_index, row_item in enumerate(data_frame_list):
            # 在循环中输出列表的每个元素
            # print("-- row_index = " + str(row_index) + ' , ' + str(row_item))
            if row_item[31] == "新增" and str(row_item[0]) != "nan" and str(row_item[26]) != "nan":
                for column_index, cell_item in enumerate(row_item):
                    # print("-- column_index = " + str(column_index) + ' , ' + str(cell_item))
                    # 取 excel 列值 给 字典
                    if column_index == 0:
                        tmp_obj.value["tzq_log_code"] = cell_item
                    if column_index == 26:
                        tzq_source_name_tmp = cell_item
                tzq_source_name_list_tmp = CommonTool.string_split_f(tzq_source_name_tmp, "/")
                for tzq_source_name_list_item_tmp in tzq_source_name_list_tmp:
                    tmp_obj.value["tzq_source"] = \
                        TzqLogService.get_tzq_code_from_name_f(str(tzq_source_name_list_item_tmp)
                                                                                , output_file_i)
                    print(
                        "-- tzq_log_code = '" + str(tmp_obj.value["tzq_log_code"]) + "'"
                        " , tzq_source_name = '" + str(tzq_source_name_list_item_tmp) + "'"
                        " , tzq_source_code = '" + str(tmp_obj.value["tzq_source"]) + "'"
                         )
                    sql_script_string = sql_script_string + (
                        "-- tzq_log_code = '" + str(tmp_obj.value["tzq_log_code"]) + "'"
                        " , tzq_source_name = '" + str(tzq_source_name_list_item_tmp) + "'"
                        " , tzq_source_code = '" + str(tmp_obj.value["tzq_source"]) + "'"
                        "\n")
                    sql_script_string = sql_script_string + tmp_obj.get_insert_script()
                # print(tmp_obj.get_insert_script())

        sql_script_string = sql_script_string \
            + "/* ************************* tzq880808 日志系统表 " + now_date_string \
            + " end ************************************ */\n\n"
        # return sql_script_string

        all_scripts = sql_script_string
        # print(all_scripts)
        # 追加写入文件
        CommonTool.append_file_content_f(output_file_i, all_scripts)


if __name__ == '__main__':
    excel_path = "D:\\tzq\\excel_all\\" \
                 "20240509-1045-TZQ配置信息表.xlsx"
    plan_item_sheet = "3.tzqLog-配置"
    # out_scripts_file = r"d:\test\11_tzq_log_880808.sql"
    out_scripts_file = "d:\\test\\" + DateTool.get_now_date_string("%Y%m%d-%H%M") + "-tzqLog-配置.sql"

    # 清空文件内容
    CommonTool.clear_file_content_f(out_scripts_file)

    # insert 脚本写入 SQL 文件
    TzqLogService.write_to_file_f(excel_path, plan_item_sheet, out_scripts_file)

五、主类MainService.py

根目录创建主类MainService.py:

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from TzqService.TzqLogService import *
from TzqService.TzqLogConfigService import *


class MainService:
    """
        主服务
        get_all_script_f() : 生成所有脚本
    """

    def __init__(self):
        pass

    @staticmethod
    def get_all_script_f(excel_path_i: str, out_scripts_file_i: str):
        # 清空文件内容
        CommonTool.clear_file_content_f(out_scripts_file_i)
        # 追加文件内容
        CommonTool.append_file_content_f(
            out_scripts_file_i,
            "set app.current_system = '666666';\n")

        # 计划项
        # sheet_name_tmp: str = "1.tzq_log"
        # TzqLogService.write_to_file_f(excel_path_i, sheet_name_tmp, out_scripts_file_i)

        # 计划类型
        sheet_name_tmp: str = "1.tzq_log_config"
        TzqLogConfigService.write_to_file_f(excel_path_i, sheet_name_tmp, out_scripts_file_i)


if __name__ == '__main__':
    excel_path = "D:\\tzq\\excel_all\\" \
                 "20240419-1153-TZQ日志配置表.xlsx"
    out_scripts_file = "d:\\test\\" + DateTool.get_now_date_string("%Y%m%d-%H%M") + "-tzqLog-配置-总线.sql"

    MainService.get_all_script_f(excel_path, out_scripts_file)

  • 4
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
在现代办公室环境中,处理Excel文件是日常工作的一部分。Python作为一种功能强大的编程语言,提供了许多库和工具来简化这一任务。特别是,当我们需要从Excel文件中提取包含中文字符的字符串时,Python可以提供快速有效的解决方案。Python通过pandas库可以轻松读取Excel文件,并使用正则表达式来匹配和提取所需的字符串。以下是一个简单的步骤说明:导入必要的库:首先,我们需要导入pandas库来读取Excel文件,以及re库来使用正则表达式。读取Excel文件:使用pandas的read_excel函数,我们可以加载Excel文件并将其转换为DataFrame对象。提取字符串:接下来,我们可以使用str.extract方法和正则表达式来提取包含中文字符的字符串。中文字符通常可以使用Unicode范围来匹配。保存结果:最后,我们可以将提取的字符串保存到新的Excel文件中,或者根据需要进行进一步处理。这个Python自动化办公源码可以帮助用户节省大量的时间和精力,避免了手动查找和提取字符串的繁琐工作。通过简单的代码,我们可以轻松地完成这项任务,提高工作效率,确保数据的准确性和一致性。总之,Python提供了一种快速、高效的方法来提取Excel文件中的中文字符串。无论是数据分析、文本处理还是其他办公任务,Python都可以成为您的强大助手,帮助您更好地应对日常办公挑战。重新回答||

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Tzq@2018

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值