【无标题】

最新推荐文章于 2024-07-30 17:23:09 发布

镜子里的角落

最新推荐文章于 2024-07-30 17:23:09 发布

阅读量89

点赞数

文章标签： python 开发语言 excel

本文链接：https://blog.csdn.net/meetingjie/article/details/132595059

版权

一些python处理excel的程序
Excel_compare_repeat

import openpyxl
from openpyxl.styles import PatternFill
import tkinter as tk
from tkinter import filedialog,messagebox
#检测Excel1中每个元素在Excel2中是否有重复，有就标颜色
def draw_grid_styple(file_path_1,file_path_2,out_file,fill_color,sheet_name):
    # df=pd.read_excel(file_path,sheet_name=sheet_name,header=0)
    # df.style.applymap(lambda x:'background-color:%s'%fill_color if x=='FALSE' else '')
    # df.to_excel(file_path,index=False)
    wb_1=openpyxl.load_workbook(file_path_1)
    ws_1=wb_1[sheet_name]
    wb_2=openpyxl.load_workbook(file_path_2)
    ws_2=wb_2[sheet_name]
    color_fill=PatternFill(start_color=fill_color, end_color=fill_color, fill_type='solid')
    color_fill_1 = PatternFill(start_color='FFFFFFFF', end_color='FFFFFFFF', fill_type='solid')
    #这个bgcolor完全不起作用
    # color_fill = PatternFill(bgColor=fill_color)
    # 这个也不起作用
    # rule=CellIsRule(operator='equal',formula=['False'],fill=color_fill)
#此程序用于检查excel1中元素在excel2中是否存在，存在赋为红色，需要注意，如果存在那么后面不循环Excel2中后面的内容，但是break只能跳出当前循环
    # 故需要标志位，先跳出当前循环并把标志位赋为真，再在外循环中检测标志位，跳出外循环。
    for row in ws_1.iter_rows():
        for cell in row:
            Flag = False
            for row_1 in ws_2.iter_rows():
                for cell_2 in row_1:
                    if cell.value == cell_2.value:
                        cell.fill = color_fill
                        Flag=True
                        break
                    # else:
                    #     cell.fill=color_fill_1
                if Flag==True:
                    break
    wb_1.save(out_file)
if __name__=='__main__':
              #使用文件夹打开文件及文件夹,
    #创建根窗口并隐藏
    root=tk.Tk()
    root.withdraw()
    #消息框提示
    messagebox.showinfo('提示','请选择第一个Excel文件')
    file_path_1=filedialog.askopenfilename()
    messagebox.showinfo('提示', '请选择第二个Excel文件')
    file_path_2=filedialog.askopenfilename()
    messagebox.showinfo('提示','请选择程序运行后输出的文件夹')
    folder_path=filedialog.askdirectory()
    total_output_file = folder_path + '/compare_result.xlsx'
    # file_path_1='E:/xls_data/1.xlsx'
    # file_path_2 = 'E:/xls_data/2.xlsx'
    draw_grid_styple(file_path_1=file_path_1, file_path_2=file_path_2, out_file=total_output_file,fill_color='FF0000', sheet_name='Sheet1')
    messagebox.showinfo('提示', '最后输出的文件为compare_result.xlsx,请查看')

故障列表_落地转换规则


import os.path

import pandas
from pypinyin import lazy_pinyin, Style
import pandas as pd
import test
from pypinyin import lazy_pinyin, Style
import pandas as pd
from openpyxl import load_workbook
# 将一个df数据类型直接写入一张表中，可检测文件名，sheet名是否存在，存在则覆盖，不存在则新建
def write_df_to_file(out_file_name,output_sheet_name,new_df):
    if os.path.exists(out_file_name):
        book=load_workbook(out_file_name)
        ef = pd.ExcelFile(out_file_name)
        sheet_name_list = ef.sheet_names
        # 查看sheet是否存在，存在进行覆盖，不存在进行model=append添加
        if output_sheet_name not in sheet_name_list:
            with pd.ExcelWriter(out_file_name, engine='openpyxl',mode='a') as writer:
                writer.book = book
                new_df.to_excel(writer, sheet_name=output_sheet_name, index=False)
        else:
            with pd.ExcelWriter(out_file_name, engine='openpyxl') as writer:
                writer.book = book
                new_df.to_excel(writer, sheet_name=output_sheet_name, index=False)
        # writer._save()
    else:
        new_df.to_excel(out_file_name,sheet_name=output_sheet_name,index=False)
# 将一张表中几列全部写入另一张表中
def write_somet_col_to_anthor_sheet(input_file,input_sheet,input_col_list,header,
                                    output_col_list,output_sheet,output_file,return_str):
    df=pd.read_excel(input_file,sheet_name=input_sheet,header=header,
                         skiprows=0,keep_default_na=False)
    df_new=pd.read_excel(output_file,sheet_name=output_sheet,header=header,
                         skiprows=0,keep_default_na=False)

    item_list=[]
    new_target_data = df_new.copy()
    # 将要写入的sheet行扩展为源sheet的行数，必须是这个数，要不后面写入有问题，写不进入，搞了一下午，尼玛头疼！！！！
    if(df_new.shape[0]<df.shape[0]):
        new_target_data = new_target_data.append([df_new] * (df.shape[0]-1), ignore_index=True)
    for i in range(len(input_col_list)):
        # df_new_new=df_new.assign(output_col_list[i]=df[input_col_list[i]])

        # df_new=df_new.head(len(df[input_col_list[i]]))
        new_target_data[output_col_list[i]]=df[input_col_list[i]]
    # new_target_data.to_excel(output_file,sheet_name=output_sheet, index=False)
    write_df_to_file(new_df=new_target_data, output_sheet_name=output_sheet, out_file_name=output_file)
    # 返回变量名主键
    return df[return_str].tolist()
    # for i,(item,title) in enumerate(zip(item_list,output_col_list)):
    #     df_new = pd.DataFrame({title: item})
    #     write_df_to_file(new_df=df_new,output_sheet_name=output_sheet,out_file_name=output_file)
# 定义一个从表中抽出一列作为key值，另外几列数据作为value，他们组成一个列表，{key:[v1,v2,v3]}
def extract_file_some_cols_to_dict(file_path,sheet_name,key_col_name,value_cols_name,header):
    print('this sheet name is ',sheet_name)
    df = pd.read_excel(file_path, sheet_name=sheet_name, header=header,keep_default_na=False)
    key_list = df[key_col_name].tolist()
    for i in range(len(value_cols_name)):
        locals()[f'a_{i}']=df[value_cols_name[i]].tolist()
    des_value=list(zip(*(v for k,v in locals().items() if k.startswith('a'))))
    key_value_dict = {k: v for k, v in zip(key_list, des_value)}

    return key_value_dict
# 定义 根据一个字典，找到excel中某列需要写入的内容，并写入该列,file_key_col为你要写入的该列对应的唯一id
#
def from_dict_to_excel_col_content(dict,input_file_name,header,sheet_name,file_key,
                                   file_value_col, out_file_name,file_key_col_connect_str='_',skiprows=[]
                                   ):
    write_file_list = []
    pin_yin_list=[]
    df=pd.read_excel(input_file_name, sheet_name=sheet_name, header=header,dtype=str,keep_default_na=False,
                     skiprows=skiprows)

    for ele in file_key:
        if ele in dict.keys():
            write_file_list.append(dict[ele])

        else:
            write_file_list.append((' ',)*len(dict[next(iter(dict))]))#在另一张表中没找到与之对应的键ele，就将value设置成空




    new_df = df.copy()
    for i in range(len(file_value_col)):
        a=list(zip(*write_file_list))
        new_df[file_value_col[i]]=list(zip(*write_file_list))[i]

    write_df_to_file(new_df=new_df, output_sheet_name=sheet_name, out_file_name=out_file_name)

if __name__=='__main__':
    input_file='C:/Users/15487/Desktop/新建文件夹/1/故障列表.xlsx'
    input_sheet_name=['CCU','TCU1','TCU2','BCU','BCC_DC','BMS1','BMS2','BMS3','BMS4']
    output_file='C:/Users/15487/Desktop/新建文件夹/1/1.xlsx'
    output_sheet_name=['CCU故障代码','TCU1故障代码','TCU2故障代码','BCU故障代码','DCDC故障代码','BMS1故障代码','BMS2故障代码','BMS3故障代码','BMS4故障代码']
    input_col_list=['十六进制代码','优先级','故障描述','故障提示','故障处理',]
    output_col_list=['故障码(十六进制)','故障等级','故障名称','故障说明','故障处理措施']
    return_str = '变量名'

    input_file_2 ='C:/Users/15487/Desktop/新建文件夹/1/落地数据转换规则定义表.xlsx'
    input_sheet_name_2=['CCU_DDU','TCU1_CCU','TCU2_CCU','BCU_CCU','DCDC_CCU','BMS12_CCU','BMS12_CCU','BMS34_CCU','BMS34_CCU']
    key_col_name=('附加信息','信号名')
    value_cols_name=[('落地项点定义','落地数据项点'),('解析规则定义','字节偏移'),('解析规则定义','位偏移')]
    output_file_total = 'C:/Users/15487/Desktop/新建文件夹/1/2.xlsx'
    need_write_col_name=['落地数据项点','字节偏移','位偏移']

    # key_total_list = write_somet_col_to_anthor_sheet(input_file=input_file, input_sheet=input_sheet_name[0],
    #                                                  input_col_list=input_col_list, header=0,
    #                                                  output_col_list=output_col_list, output_sheet=output_sheet_name[0],
    #                                                  output_file=output_file, return_str=return_str)
    # key_value_dict = extract_file_some_cols_to_dict(file_path=input_file_2, sheet_name=input_sheet_name_2[0],
    #                                                 key_col_name=key_col_name, value_cols_name=value_cols_name,
    #                                                 header=[0, 1])
    # from_dict_to_excel_col_content(dict=key_value_dict,input_file_name=output_file,header=0,
    #                                sheet_name=output_sheet_name[0],file_key=key_total_list,
    #                                file_value_col=need_write_col_name,out_file_name=output_file)
    # print(key_value_dict)

    for i in range(len(input_sheet_name)):

        key_total_list=write_somet_col_to_anthor_sheet(input_file=input_file,input_sheet=input_sheet_name[i],
                                    input_col_list=input_col_list,header=0,
                                    output_col_list=output_col_list,output_sheet=output_sheet_name[i],
                                    output_file=output_file,return_str=return_str)
        key_value_dict=extract_file_some_cols_to_dict(file_path=input_file_2,sheet_name=input_sheet_name_2[i],
                                                      key_col_name=key_col_name,value_cols_name=value_cols_name,
                                                      header=[0,1])
        from_dict_to_excel_col_content(dict=key_value_dict,input_file_name=output_file,header=0,
                                       sheet_name=output_sheet_name[i],file_key=key_total_list,
                                       file_value_col=need_write_col_name,out_file_name=output_file)
    # print(a)

find_different_two_col_and_draw_color

#
# data/time:2023.8.10. 10:23
# author：DH
#
#function：对比excel中两列同一行数据是否一致，不一致则标红其中一个单元格
import openpyxl
import pandas as pd
from openpyxl import load_workbook
import os
from openpyxl.styles import PatternFill
import tkinter as tk
from tkinter import filedialog,messagebox
import re
# from test import draw_grid_style #引入test中给某个格子标红得函数
# 定义一个函数，实现找到一个sheet中两列对应行内容不同的单元格,输出不同项的列表
def find_different_two_cols_about_every_row(file_path,sheet_name,col1,col2):
    out_list_new=[]
    df=pd.read_excel(file_path,sheet_name=sheet_name,header=[0,1])
    max_col_length=max(df[col1].shape[0],df[col2].shape[0])
    for i in range(max_col_length):
        a=str(df[col1][i])
        b=str(df[col2][i])
        if(a!=b):
            out_list_new.append(i+3)
    return out_list_new



# 定义单元格样式：給单元格内容为参数label的单元格标红和字体颜色,并对文件覆盖
def draw_grid_style(file_path,fill_color,col_name,sheet_name,label):
    wb=openpyxl.load_workbook(file_path)
    ws=wb[sheet_name]
    color_fill=PatternFill(start_color=fill_color, end_color=fill_color, fill_type='solid')
    #这个bgcolor完全不起作用
    # color_fill = PatternFill(bgColor=fill_color)
    # 这个也不起作用
    # rule=CellIsRule(operator='equal',formula=['False'],fill=color_fill)
    column_titles = [cell.value for cell in ws[2]]
    target_column_index = column_titles.index(col_name) + 1
    for row_index in label:
        cell=ws.cell(row_index,target_column_index)
        cell.fill=color_fill
    # for row in ws.iter_rows(min_row=3):
    #
    #     cell=row[target_column_index-1]
    #     if type(label)==str:
    #         if cell.value==label:
    #             cell.fill=color_fill
    #     if type(label)==list:
    #         for l in label:
    #             if cell.value == l:
    #                 cell.fill = color_fill
    #                 break
    wb.save(file_path)










if __name__=='__main__':
    file_path='C:/Users/15487/Desktop/phm/old.xlsx'
    read_sheet_name_file='C:/Users/15487/Desktop/phm/new.xlsx'
    # get all sheet name
    sheet_name_list=list(pd.read_excel(read_sheet_name_file,sheet_name=None))
    sheet_head = pd.read_excel(file_path, sheet_name=sheet_name_list[1], header=[0, 1]).columns
    flag=('附加信息','范围')in sheet_head
    col1=[('附加信息','描述'),('对比信息','描述对比列')]
    col2=[('附加信息','范围'),('对比信息','范围对比列')]
    i=0
    for sheet_name in sheet_name_list:
        label_list=find_different_two_cols_about_every_row(file_path=file_path,sheet_name=sheet_name,col1=col1[0],col2=col1[1])
        draw_grid_style(file_path=file_path,fill_color='FF0000',col_name='描述对比列',sheet_name=sheet_name,label=label_list)
        sheet_head=pd.read_excel(file_path,sheet_name=sheet_name,header=[0,1]).columns
        if ('附加信息','范围')in sheet_head:
            label_list = find_different_two_cols_about_every_row(file_path=file_path, sheet_name=sheet_name,
                                                                 col1=col2[0], col2=col2[1])
            draw_grid_style(file_path=file_path, fill_color='FF0000', col_name='范围对比列', sheet_name=sheet_name,
                            label=label_list)
        i+=1
        print('the total number  of  needing label sheet is %d,now finish sheet number is %d '%(len(sheet_name_list),i))

test

# title:compare Excel
#data:2023.04.15
#author：DH
#


#note：
# 列表相加为新列表，该列表中元素没有子列表，不同于append
import openpyxl
import pandas as pd
from openpyxl import load_workbook
import os
from openpyxl.styles import PatternFill
import tkinter as tk
from tkinter import filedialog,messagebox
import re
from openpyxl.formatting.rule import ColorScaleRule,CellIsRule,FormulaRule,Rule
from openpyxl import Workbook

# 对原表进行两列字符串相加，生成一张中间表，如果在原表改会破坏原表，下次运行就还会再加一次，header=0表示第一行为表头
def row_string_add(file_path,sheet_name,column_name_1,column_name_2,file_middle_path,input_sheet_name,skip_rows,str1='',str2=''):
    df=pd.read_excel(file_path,sheet_name=input_sheet_name,header=0,skiprows=skip_rows)
    df[column_name_2]=df[column_name_2].astype(str)+str1+df[column_name_1].astype(str)+str2
    df.to_excel(file_middle_path,index=False,sheet_name=sheet_name)

# 将中间表按列进行操作，每列中每隔两行（step=2）进行相加，并形成两个list，一个list中元素为每列中应该填什么，一个为表头list
def colume_upper_bottom_add_list(file_path,sheet_name,step):
    # 包含所有列的总列表
    total_list=[]
    title_list=[]
    # 读取Excel文件,header=0表示读取第一行
    df = pd.read_excel(file_path, sheet_name=sheet_name,header=0)
    for col in df.columns[:2]:
        num_rows_01=df[col].shape[0]//2
        new_list_01=[]
        for i in range(num_rows_01):
            new_list_01.append(df.iloc[i*2][col])
        total_list.append(new_list_01)
        title_list.append(col)

    for col in df.columns[2:]:
        # print(index）
        df[col+'new']=df[col].astype(str)+' '+df[col].shift(-1).astype(str)
        num_rows_old=df[col+'new'].shape[0]
        num_rows_new=(num_rows_old)//step
        # 新建一个存一个列的列表
        new_list=[]
        # new_list.append(col)
        for i in range(num_rows_new):
            new_list.append(df.iloc[i*step][col+'new'])
        total_list.append(new_list)
        title_list.append(col)
    return total_list,title_list
# 判断表是否存在，并将表头和列内容写进excel
def write_title_and_items_to_excel(items_list,title_list,file_out_path,sheet_name):
    if os.path.exists(file_out_path):
        book = load_workbook(file_out_path)
        # sheet=book['CMD故障定义']
        writer = pd.ExcelWriter(file_out_path,engine='openpyxl')
        writer.book = book
        # 将表头和列内容写进excel
        for i,(item,title) in enumerate(zip(items_list,title_list)):
            df_output = pd.DataFrame({title:item})
            # df_output.drop('偏移',axis=1,inplace=True)
            df_output.to_excel(writer, sheet_name=sheet_name, startcol=i,index=False)
        writer._save()
    else:
        writer = pd.ExcelWriter(file_out_path,engine='openpyxl')
        for i, (item, title) in enumerate(zip(items_list, title_list)):
            df_output = pd.DataFrame({title: item})
            # df_output.drop('偏移',axis=1,inplace=True)
            df_output.to_excel(writer, sheet_name=sheet_name, startcol=i, index=False)
        writer._save()
# 删除第一列，同时把“类型”列的nan字符元素删除，操作在原表进行更改
def delete_something(file_path,sheet_name,column_str):
    df=pd.read_excel(file_path,sheet_name=sheet_name,header=0)
    df.drop(column_str,axis=1,inplace=True)
    items=[]
    for item in df['类型']:
        item=item[:-2]
        items.append(item)
    df['类型']=items
    df.to_excel(file_path,sheet_name=sheet_name,index=False)

# 总的生成excel，只是一张表的更改。其中中间是为了将list写进EXCEl中
def write_single_file(file_input_path,file_middle_path,file_out_path,sheet_name,input_sheet_name,skip_rows):
    row_string_add(file_path=file_input_path,input_sheet_name=input_sheet_name,sheet_name=sheet_name,column_name_1='HDLC偏移',column_name_2='类型',
                   file_middle_path=file_middle_path,skip_rows=skip_rows)
    total_list,title_list=colume_upper_bottom_add_list(file_path=file_middle_path,step=2,sheet_name=sheet_name)
    write_title_and_items_to_excel(items_list=total_list,title_list=title_list,file_out_path=file_out_path,
                                   sheet_name=sheet_name)
    delete_something(file_out_path,sheet_name=sheet_name,column_str='HDLC偏移')
# 定义单元格样式：給单元格内容为参数label的单元格标红和字体颜色,并对文件覆盖
def draw_grid_style(file_path,fill_color,sheet_name,label):
    wb=openpyxl.load_workbook(file_path)
    ws=wb[sheet_name]
    color_fill=PatternFill(start_color=fill_color, end_color=fill_color, fill_type='solid')
    #这个bgcolor完全不起作用
    # color_fill = PatternFill(bgColor=fill_color)
    # 这个也不起作用
    # rule=CellIsRule(operator='equal',formula=['False'],fill=color_fill)
    for row in ws.iter_rows():
        for cell in row:
            if type(label)==str:
                if cell.value==label:
                    cell.fill=color_fill
            if type(label)==list:
                for l in label:
                    if cell.value == l:
                        cell.fill = color_fill
                        break
    wb.save(file_path)
#     删除文件夹下产生的中间文件，仅保留最后文件（reserve_file_str）
def delete_some_excel_file(file_path,reserve_file_str):
    for file in os.listdir(file_path):
        if file.endswith("xlsx" or "xls"):
            if file==reserve_file_str:
                continue
            else:
                os.remove(file_path+'/'+file)
# 将两列进行对比，返回主列中与对比列相比没有的元素列表、主列表头以及将不同元素置于最后的新列表
def col_to_col_compare_different_get_newList(file_name,sheet_name,main_col,compared_col,):
    df_1 = pd.read_excel(file_name, header=0,sheet_name=sheet_name)
    red_list=[]
    red_label_col=df_1[main_col]
    no_label_col=df_1[compared_col]
    for ele_1 in red_label_col:
        for i,ele_2 in enumerate(no_label_col):
            #排除两个字符串仅因为中文括号而导致的不相同，即如果是仅中文括号的差别，那么认为他们一样
            re_ele_1=re.sub(r"（(.*?)）",r"\1",str(ele_1))
            re_ele_2= re.sub(r"（(.*?)）", r"\1", str(ele_2))
            # 假如一个字符串和另一个字符串就差中间空格，那么认为它们一样
            if ''.join(str(re_ele_1).split(' '))==''.join(str(re_ele_2).split(' ')):
                break
            if (''.join(str(re_ele_1).split(' '))!=''.join(str(re_ele_2).split(' '))) and (i==no_label_col.shape[0]-1) :
                red_list.append(ele_1)
    # new_col_list=[]
    new_col_list=[elem for elem  in red_label_col if elem not in red_list]+red_list
    return new_col_list,main_col,red_list



    # draw_grid_style(file_path=file_name,fill_color='FF0000',sheet_name=sheet_name,label=red_list)
# 这个函数我觉得没什么太大作用，但又不得不写，烦死了！！！！！！！！！！
# 好吧，还是有点作用的，作用就是，跳过标题为skip_heads_row_list，同时读取标题为level_label_list的Excel内容
# 注意，它可能是一级、二级或者三级标题，反正就是得到最后标题下的内容,level_label_list是个列表【一级标题，二级标题，三级标题】
# 最多三级标题，太多觉得太麻烦，我本来也不想写
def read_excel_skip_head(file_path,skip_heads_row_list,sheet_name,level_label_list):
    df=pd.read_excel(file_path,header=skip_heads_row_list,sheet_name=sheet_name,index_col=None)
    if len(level_label_list)==1:
        df=df[level_label_list[0]]
    if len(level_label_list)==2:
        df=df[level_label_list[0]][level_label_list[1]]
    if len(level_label_list)==3:
        df = df[level_label_list[0]][level_label_list[1]][level_label_list[2]]
    return df
# 这个函数我也不想写，主要是写在action里面太难看，
# 作用就是，先找到表格中A节车数据属性写到哪一行，再在这些行中遍历故障代码，同时把故障代码下的三级标题内容用字符串连在一起
# 存在一起，注意我们判定出含有空值（nan）就把他剔除,skip_heads_row_list_1,skip_heads_row_list_2是需要跳过的标题行
# 有的两行（合并行也算1行），有的三行,fault_project为故障项，如部件故障数据，如有部件故障数据1-24。
def extract_A_fault_code_to_list(file_path,skip_heads_row_list_1,skip_heads_row_list_2,
                                 sheet_name,level_label_list_1,fault_project,level_label_list_2):
    df1=read_excel_skip_head(file_path=file_path,skip_heads_row_list=skip_heads_row_list_1,
                             sheet_name=sheet_name,level_label_list=level_label_list_1)
    df2=read_excel_skip_head(file_path=file_path,skip_heads_row_list=skip_heads_row_list_2,
                             sheet_name=sheet_name,level_label_list=level_label_list_2)
    big_name = [fault_project + str(i + 1) for i in range(24)]
    # print(big_name)
    a = []
    count=0
    for index in range(df1.shape[0]):
        if str(df1[index]) == 'B':
            count = index
            break

    for name in big_name:
        for index in range(count):
            if str(df2[name]['数据类型'][index]) == 'nan':
                continue
            else:
                a.append((str(df2[name]['故障名称'][index]) + ' ' + str(
                    df2[name]['故障代码'][index])))

    repeat_list = []
    no_repeat_list = list(set(a))

    for aa in no_repeat_list:
        if a.count(aa) > 1:
            repeat_list.append(aa)

    return no_repeat_list,repeat_list
# 对位表对比事件
def button1_action():
    messagebox.showinfo('提示','请选择第一个Excel文件')
    file_path_1=filedialog.askopenfilename()
    # 得到文件名并不输出xls
    str1=file_path_1.split('/')[-1].split('.')[0]
    messagebox.showinfo('提示', '请选择第二个Excel文件')
    file_path_2=filedialog.askopenfilename()
    str2 = file_path_2.split('/')[-1].split('.')[0]
    messagebox.showinfo('提示','请选择程序运行后输出的文件夹')
    folder_path=filedialog.askdirectory()

    middle_file_path_1 = folder_path + '/middle_1.xlsx'
    output_file_1 = folder_path + '/output_1.xlsx'

    middle_file_path_2 = folder_path + '/middle_2.xlsx'
    output_file_2 = folder_path + '/output_2.xlsx'

    sheet_name='CMD故障定义对位表对比'
    input_sheet_name='CMD故障定义'
    skip_rows=0


    total_output_file=folder_path+'/compare_two_option.xlsx'

    write_single_file(file_input_path=file_path_1,file_middle_path=middle_file_path_1,file_out_path=output_file_1,
                      input_sheet_name=input_sheet_name,sheet_name=sheet_name,skip_rows=skip_rows)
    write_single_file(file_input_path=file_path_2, file_middle_path=middle_file_path_2, file_out_path=output_file_2,
                      input_sheet_name=input_sheet_name,sheet_name=sheet_name,skip_rows=skip_rows)
    df1=pd.read_excel(output_file_1,sheet_name=sheet_name,header=0)
    df2=pd.read_excel(output_file_2,sheet_name=sheet_name,header=0)

    out_list=[]
    out_title_list=[]

    for i,(col1,col2) in enumerate(zip(df1.columns,df2.columns)):
        labels = []
        for (element1,element2) in zip(df1[col1],df2[col2]):

            if element2==element1:
                label='True'
            else:
                label='False'
            labels.append(label)
        for _ in range(min(df2[col2].shape[0],df1[col1].shape[0]),max(df2[col2].shape[0],df1[col1].shape[0])):
            label='False'
            labels.append(label)


        out_list.append(df1[col1])
        out_title_list.append(col1+'_'+str1)
        out_list.append(df2[col2])
        out_title_list.append(col2+'_'+str2)
        out_list.append(labels)
        out_title_list.append('label'+str(i))
    write_title_and_items_to_excel(items_list=out_list,title_list=out_title_list,file_out_path=total_output_file,sheet_name=sheet_name)
    draw_grid_style(file_path=total_output_file,fill_color='FF0000',sheet_name=sheet_name,label='False')
    # delete_some_excel_file(file_path=folder_path,reserve_file_str='compare_two_option.xlsx')
    messagebox.showinfo('提示', '最后输出的文件为compare_two_option.xlsx,请查看')
# 对位表与分级表对比
def button2_action():
    messagebox.showinfo('提示','请选择CMD故障对位表')
    file_path_1=filedialog.askopenfilename()
    # 得到文件名并不输出xls
    str1=file_path_1.split('/')[-1].split('.')[0]
    messagebox.showinfo('提示', '请选择分级表文件')
    file_path_2=filedialog.askopenfilename()
    str2 = file_path_2.split('/')[-1].split('.')[0]
    messagebox.showinfo('提示','请选择程序运行后输出的文件夹')
    folder_path=filedialog.askdirectory()

    middle_file_path_1 = folder_path + '/middle_1.xlsx'
    output_file_1 = folder_path + '/output_1.xlsx'

    middle_file_path_2 = folder_path + '/middle_2.xlsx'
    # output_file_2 = folder_path + '/output_2.xlsx'

    sheet_name='对位表与分级表对比'
    input_sheet_name_1 = 'CMD故障定义'
    input_sheet_name_2 =  '故障定义'

    middle_output_file = folder_path + '/compare_option_level_middle.xlsx'
    total_output_file=folder_path+'/compare_option_level.xlsx'

    write_single_file(file_input_path=file_path_1,file_middle_path=middle_file_path_1,file_out_path=output_file_1,
                      input_sheet_name=input_sheet_name_1,sheet_name=sheet_name,skip_rows=0)
    row_string_add(file_path=file_path_2,input_sheet_name=input_sheet_name_2,sheet_name=sheet_name,
                   column_name_1='DDU故障代码',column_name_2='DDU故障名称',skip_rows=1,file_middle_path=middle_file_path_2,str1=' ',str2='')

    df1=pd.read_excel(output_file_1,sheet_name=sheet_name,header=0)
    df2=pd.read_excel(middle_file_path_2,sheet_name=sheet_name,header=0)

    out_list=[]
    out_title_list=[]
    data1_list = []
    for i,col in enumerate(df1.columns):

        if col=='类型':
            continue #排除第一列
        else:#将后续所有列的元素集合在一个列表，并写在输出文件的一列
            for ele in df1[col]:
                if ('nan' not in ele)and('预留' not in ele):#排除源文件中空值和‘预留’值，那些不要

                    data1_list.append(ele)
    title_1='对位表故障代码'
    # 对列表进行字符串排序后再写入EXCEl文件中
    out_list.append(sorted(data1_list,key=str.lower))
    out_title_list.append(title_1)
    data2_list=[]
    for ele in df2['DDU故障名称']:
        if ('预留' not in ele) and ('nan' not in ele):#去掉小数点及其后面小数位，同时把字符串里面的（）都给去掉
            data2_list.append(ele.split('.')[0])
    out_list.append(sorted(data2_list,key=str.lower))
    out_title_list.append('分级表故障代码')

    write_title_and_items_to_excel(items_list=out_list,title_list=out_title_list,file_out_path=middle_output_file,sheet_name=sheet_name)
    # 分别把两列中不同都找出来
    new_list1,new_title_1,red_label_1=col_to_col_compare_different_get_newList(file_name=middle_output_file, sheet_name=sheet_name, main_col='对位表故障代码',
                                 compared_col='分级表故障代码')
    new_list2,new_title_2,red_label_2=col_to_col_compare_different_get_newList(file_name=middle_output_file,sheet_name=sheet_name,main_col='分级表故障代码',
                                 compared_col='对位表故障代码')

    new_list=[]
    new_list.append(new_list1)
    new_list.append(new_list2)
    new_title=[new_title_1,new_title_2]
    red_label=red_label_1+red_label_2
    write_title_and_items_to_excel(file_out_path=total_output_file,sheet_name=sheet_name,items_list=new_list,title_list=new_title)
    draw_grid_style(file_path=total_output_file,fill_color='FF0000',sheet_name=sheet_name,label=red_label)
    delete_some_excel_file(file_path=folder_path,reserve_file_str='compare_option_level.xlsx')
    messagebox.showinfo('提示', '最后输出的文件为compare_option_level.xlsx,请查看')

def button3_action():
    messagebox.showinfo('提示','请选择CMD故障对位表')
    file_path_1=filedialog.askopenfilename()
    input_sheet_name_1 = 'CMD故障定义'

    messagebox.showinfo('提示','请选择数据字典文件')
    file_path_2=filedialog.askopenfilename()
    sheet_name_dictionary='HXD2'

    messagebox.showinfo('提示','请选择程序运行后输出的文件夹')
    folder_path=filedialog.askdirectory()

    middle_file_path_1 = folder_path + '/middle_1.xlsx'
    output_file_1 = folder_path + '/output_1.xlsx'
    sheet_name = '对位表与数据字典对比'

    write_single_file(file_input_path=file_path_1, file_middle_path=middle_file_path_1, file_out_path=output_file_1,
                      input_sheet_name=input_sheet_name_1, sheet_name=sheet_name, skip_rows=0)
    df1 = pd.read_excel(output_file_1, sheet_name=sheet_name, header=0)
    out_list = []
    out_title_list = []
    data1_list = []
    for i, col in enumerate(df1.columns):

        if col == '类型':
            continue  # 排除第一列
        else:  # 将后续所有列的元素集合在一个列表，并写在输出文件的一列
            for ele in df1[col]:
                if ('nan' not in ele) and ('预留' not in ele):  # 排除源文件中空值和‘预留’值，那些不要
                    data1_list.append(ele)
    title_1 = '对位表故障代码'
    # 对列表进行字符串排序后再写入EXCEl文件中
    out_list.append(sorted(data1_list, key=str.lower))
    out_title_list.append(title_1)

    output_file_1 = folder_path + '/extract.xlsx'
    output_file = folder_path + '/compare_fault_code_to_A_datadictionary.xlsx'
    no_repeat_list,repeat_list=extract_A_fault_code_to_list(file_path=file_path_2,skip_heads_row_list_1=[0,1],skip_heads_row_list_2=[0,1,2],
                                 sheet_name=sheet_name_dictionary,level_label_list_1=['资产目录控制区','AB节'],
                                 level_label_list_2=['数据属性监控区'],fault_project='部件故障数据'
                                 )
    print(len(no_repeat_list))
    title = '字典故障代码'
    # 测试，测试显示，单个list不能写入Excel文件的一列，必须两个list，写两列数据，不明所以。

    out_list.append(sorted(no_repeat_list, key=str.lower))
    out_title_list.append(title)


    write_title_and_items_to_excel(file_out_path=output_file_1,items_list=out_list,
                                   title_list=out_title_list,sheet_name=sheet_name)
    # 分别把两列中不同都找出来
    new_list1, new_title_1, red_label_1 = col_to_col_compare_different_get_newList(file_name=output_file_1,
                                                                                   sheet_name=sheet_name,
                                                                                   main_col='对位表故障代码',
                                                                                   compared_col='字典故障代码')
    new_list2, new_title_2, red_label_2 = col_to_col_compare_different_get_newList(file_name=output_file_1,
                                                                                   sheet_name=sheet_name,
                                                                                   main_col='字典故障代码',
                                                                                   compared_col='对位表故障代码')

    new_list = []
    new_list.append(new_list1)
    new_list.append(new_list2)
    new_title = [new_title_1, new_title_2]
    red_label = red_label_1 + red_label_2
    write_title_and_items_to_excel(file_out_path=output_file, sheet_name=sheet_name, items_list=new_list,
                                   title_list=new_title)
    draw_grid_style(file_path=output_file, fill_color='FF0000', sheet_name=sheet_name, label=red_label)
    # delete_some_excel_file(file_path=folder_path, reserve_file_str='compare_fault_code_to_A_datadictionary.xlsx')
    messagebox.showinfo('提示', '最后输出的文件为compare_fault_code_to_A_datadictionary.xlsx,请查看')



if __name__=='__main__':
                 #使用文件夹打开文件及文件夹,


    #创建根窗口
    root=tk.Tk()
    root.geometry("400x300")
    button1=tk.Button(root,text='对位表对比',command=button1_action)
    button2=tk.Button(root,text='对位表与分级表对比',command=button2_action)
    button3=tk.Button(root,text='数据字典的数据绑定',command=button3_action)
    button1.pack(expand=True)
    button2.pack(expand=True)
    button3.pack(expand=True)
    # mainloop必须在最后，否则关闭窗口时会有报错：application has been destroyed，因为后面如果还有程序
    # 他会检测到窗口关闭了，但没办法执行后面程序。
    root.mainloop()



    #消息框提示


    #隐藏窗口
    # root.withdraw()

write_string_to_excel

import os.path

from pypinyin import lazy_pinyin, Style
import pandas as pd
import test
from pypinyin import lazy_pinyin, Style
import pandas as pd
from openpyxl import load_workbook
# 定义 两列字符相加函数,得到对应元素字符串相加后的列表
def row_string_add(file_path,column_name_1,column_name_2,input_sheet_name,skip_rows,str1='',str2=''):

    df=pd.read_excel(file_path,sheet_name=input_sheet_name,header=0,skiprows=skip_rows,keep_default_na=False)
    a=(df[column_name_1].astype(str)+str1+df[column_name_2].astype(str)+str2).tolist()
    return a
    # df.to_excel(file_middle_path,index=False,sheet_name=sheet_name)
# 定义 两个列表对应形成键值对的字典，这两个列表可以是一个excel的两列相对应，也可以是一个excel中
# 几个列加在一起形成的唯一id构成的列表
def excel_list_to_list_dict(file_path,sheet_name,value_col,key_list):
    df = pd.read_excel(file_path, sheet_name=sheet_name, header=0,keep_default_na=False)
    des_value = df[value_col].tolist()
    key_value_dict = {k: v for k, v in zip(key_list, des_value)}
    return key_value_dict

# 定义 根据一个字典，找到excel中某列需要写入的内容，并写入该列,file_key_col为你要写入的该列对应的唯一id
# 列，可以是两列加在一起的字符串，必须写成list形式，以区别是用一列作为的id，还是两列加在一起的id
def from_dict_to_excel_col_content(dict,input_file_name,header,sheet_name,file_key_col,
                                   file_value_col, out_file_name,file_key_col_connect_str='_',
                                   ):
    write_file_list = []
    pin_yin_list=[]
    df=pd.read_excel(input_file_name, sheet_name=sheet_name, header=header,dtype=str,keep_default_na=False)
    if len(file_key_col)==1:
        for ele in df[file_key_col]:
            if ele in dict.keys():
                write_file_list.append(dict[ele])
                pin_yin_s = lazy_pinyin(str(dict[ele]), style=Style.FIRST_LETTER)
                first_char = "".join(pin_yin_s)
                pin_yin_list.append(first_char)
            else:
                write_file_list.append('')
                pin_yin_list.append('')
    if len(file_key_col)==2:
        for ele1,ele2 in zip(df[file_key_col[0]],df[file_key_col[1]]):
            ele=str(ele1)+file_key_col_connect_str+str(ele2)
            if ele in dict.keys():
                write_file_list.append(dict[ele])
                pin_yin_s = lazy_pinyin(str(dict[ele]), style=Style.FIRST_LETTER)
                first_char = "".join(pin_yin_s).upper()
                pin_yin_list.append(first_char)
            else:
                write_file_list.append('')
                pin_yin_list.append('')

    new_df = df.copy()
    new_df[file_value_col[0]]=write_file_list
    new_df[file_value_col[1]]=pin_yin_list
    # 查看文件是否存在，存在的话需要writer进行写
    if os.path.exists(out_file_name):
        book=load_workbook(out_file_name)
        ef = pd.ExcelFile(out_file_name)
        sheet_name_list = ef.sheet_names
        # 查看sheet是否存在，存在进行覆盖，不存在进行model=append添加
        if sheet_name not in sheet_name_list:
            with pd.ExcelWriter(out_file_name, engine='openpyxl',mode='a') as writer:
                writer.book = book
                new_df.to_excel(writer, sheet_name=sheet_name, index=True)
        else:
            with pd.ExcelWriter(out_file_name, engine='openpyxl') as writer:
                writer.book = book
                new_df.to_excel(writer, sheet_name=sheet_name, index=True)
        # writer._save()
    else:
        new_df.to_excel(out_file_name,sheet_name=sheet_name,index=True)





    # 创建一个ExcelWriter对象，用于操作Excel文件

        # 将DataFrame写入到新的工作表中
        # df.to_excel(writer, sheet_name='new_sheet', index=False)




if __name__=='__main__':
    file_1 = 'D:/xls_data/phm/criterion.xlsx'
    file_2 = 'D:/xls_data/phm/need_write_1.xlsx'
    df = pd.ExcelFile(file_1)
    sheet_name_1_list = df.sheet_names
    df = pd.ExcelFile(file_2)
    sheet_name_2_list = df.sheet_names
    dict_sheet_name=sheet_name_1_list[1:-2]
    write_sheet_name=sheet_name_2_list[3:-5]


    dict_file_path='D:/xls_data/phm/criterion.xlsx'
    # dict_sheet_name='RIOM1->CCU'
    # dict_sheet_name='CCU->RIOM1'
    dict_key_col_1='字节偏移ByteOffset'
    dict_key_col_2='位偏移BitOffset'
    # dict_value_col='描述Signal Description'

    write_value_file_path='D:/xls_data/phm/need_write_1.xlsx'
    output_file_name_path='D:/xls_data/phm/validate.xlsx'
    # sheet_name_1='RIOM1_CCU'
    # sheet_name_1='CCU_RIOM1'
    write_file_key_col=[('解析规则定义','字节偏移'),('解析规则定义','位偏移')]
    write_file_value_col=[('落地项点定义', '中文名称'),('落地项点定义', '落地数据项点')]


    # df=pd.ExcelFile(write_value_file_path)
    # sheet_name=df.sheet_names
    # print(sheet_name)
    for i in range(len(dict_sheet_name)):
        if i in list(range(4,12)):
            dict_value_col = '变量名VariableName'
        elif i in list(range(12,15)):
            dict_value_col='信号名SignalName'
        else:
            dict_value_col = '描述Signal Description'
        print('测试：',dict_sheet_name[i])
        key=row_string_add(file_path=dict_file_path,column_name_1=dict_key_col_1,
                       column_name_2=dict_key_col_2,input_sheet_name=dict_sheet_name[i],
                           str1='_',skip_rows=0)
        dict=excel_list_to_list_dict(file_path=dict_file_path,sheet_name=dict_sheet_name[i],
                                     value_col=dict_value_col,key_list=key)

          # 将列表中的所有字母拼接成一个字符串
        from_dict_to_excel_col_content(dict=dict,input_file_name=write_value_file_path,
                                       header=[0,1],sheet_name=write_sheet_name[i],
                                       file_key_col=write_file_key_col,
                                       file_value_col=write_file_value_col,
                                       out_file_name=output_file_name_path
                                       )
    # 对单独的一个sheet进行读写，只要是因为他跟前面的sheet不连续

    dict_value_col = '变量名VariableName'
    dict_sheet_name = sheet_name_1_list[-1]
    write_sheet_name = sheet_name_2_list[-4]
    key = row_string_add(file_path=dict_file_path, column_name_1=dict_key_col_1,
                         column_name_2=dict_key_col_2, input_sheet_name=dict_sheet_name,
                         str1='_', skip_rows=0)
    dict = excel_list_to_list_dict(file_path=dict_file_path, sheet_name=dict_sheet_name,
                                   value_col=dict_value_col, key_list=key)

    # 将列表中的所有字母拼接成一个字符串
    from_dict_to_excel_col_content(dict=dict, input_file_name=write_value_file_path,
                                   header=[0, 1], sheet_name=write_sheet_name,
                                   file_key_col=write_file_key_col,
                                   file_value_col=write_file_value_col,
                                   out_file_name=output_file_name_path
                                   )

    dict_value_col = '变量名VariableName'
    dict_sheet_name = sheet_name_1_list[-2]
    write_sheet_name = sheet_name_2_list[-5]
    key = row_string_add(file_path=dict_file_path, column_name_1=dict_key_col_1,
                         column_name_2=dict_key_col_2, input_sheet_name=dict_sheet_name,
                         str1='_', skip_rows=0)
    dict = excel_list_to_list_dict(file_path=dict_file_path, sheet_name=dict_sheet_name,
                                   value_col=dict_value_col, key_list=key)

    # 将列表中的所有字母拼接成一个字符串
    from_dict_to_excel_col_content(dict=dict, input_file_name=write_value_file_path,
                                   header=[0, 1], sheet_name=write_sheet_name,
                                   file_key_col=write_file_key_col,
                                   file_value_col=write_file_value_col,
                                   out_file_name=output_file_name_path
                                   )


# 读取Excel文件






# l=row_string_add(file_path=file_path,column_name_1=col_1,
#                column_name_2=col_2,input_sheet_name=sheet_name,skip_rows=0,str1='_')
# df = pd.read_excel('D:/xls_data/phm/criterion.xlsx', sheet_name=sheet_name,header=0)
# des_value=df['描述Signal Description'].tolist()
# key_value_dict={k: v for k, v in zip(l, des_value)}
# print(key_value_dict)
# df1=pd.read_excel(write_file_path, sheet_name=sheet_name_1,header=[0,1],dtype=str)
# write_file_list=[]
# for i,(ele1,ele2) in enumerate(zip(df1['解析规则定义']['字节偏移'],df1['解析规则定义']['位偏移'])):
#     write_file_key=str(ele1)+'_'+str(ele2)
#     if write_file_key in key_value_dict.keys():
#         write_file_value=key_value_dict[write_file_key]
#         write_file_list.append(write_file_value)
#     else:
#         write_file_list.append('nan')
# print('this is:',write_file_list)
#
# multiindex = ('落地项点定义', '中文名称')
# df1[multiindex]=write_file_list
#
# df1.to_excel(write_file_path, sheet_name=sheet_name_1,index=True)