填表.py

作为传统制造业c++程序员,接触python有一阵了,总体来说python真的适合0基础开发的朋友们学习,语法简洁,类库基本满足所有开发人员,不像c++这样繁琐,这大概也是人生苦短我用python的真谛吧。
之前做了个小程序,算是数据分析中数据处理的部分,把EXCEL表中的数据项提取出来,按照通用长度在txt数据文件中找到,并回传excel里。
excel样式
共254条数据,并且分为13个部分,标签是数据,长度就是通用的数据长度了。将数据提取出来,按照长度在txt中进行查找,但是很值得注意的是,有很多数据并没有长度,或者有长度但是在txt数据文件中有相同拼接的数据值,各部分有的标签有重复的,这就需要做判断来确定所找到的数据是不是我们需要的
在这里插入图片描述
在这里插入图片描述
txt数据样式,|分组名|组长度(16进制)|数据项a长度数据数据项b长度数据数据项c长度数据数据项d长度数据…|
数据不便公开,伪造一条,一般情况下有近50个数据集,里面包含了所有数据项,数据项长度为16进制
在这里插入图片描述
我的做法是先把excel做分割,分割后的小表再去对应的数据中查找,清洗,这样能避免出现重复的项,或者无关项而导致错误,针对没有长度且标签只有两位的数据项,目前我只能手动设置了,好在只有几个。

import pandas as pd
import numpy as np
import json
from functools import reduce
import shutil


def get_bankdata_DGI(file_path, gp_file_name, group_find_Fpath):
    # gp_file_name = ['GP_IN.txt', 'GP_INLESS.txt', 'GP_DC.txt', 'GP_ECC.txt', 'GP_Q.txt']
    for file_name in gp_file_name:

        if file_name in ['GP_IN.txt', 'GP_INLESS.txt']:
            shutil.copyfile(file_name, '{0}/{1}'.format(group_find_Fpath, file_name))
        else:
            txt_file = open(file_name)
            gp_list = []
            gp_data_list = []
            chick_dict = {'08': '01', '10': '02', '18': '03',
                          '20': '04', '28': '05', '30': '06',
                          '38': '07', '40': '08', '48': '09'}

            for line in txt_file.readlines():
                if '94' in line:
                    gp_length16 = line.split('94')[1][:2]
                    gp_length = int(gp_length16, 16) * 2
                    gp_data = line.split('94')[1][2:gp_length + 2]
                    first, end = 0, 8
                    for i in range(int(len(gp_data) / 8)):  # 几个分组遍历几次
                        data_q = gp_data[first:end][:-2]
                        gp_list.append(data_q)
                        first += 8
                        end += 8

            for data_all in gp_list:
                data1 = chick_dict[data_all[:2]]
                data2s = data_all[3:]
                data2_1 = data2s.split('0')[0]
                data2_2 = data2s.split('0')[1]
                # print(type(data2_2))
                if data2_1 == data2_2:
                    gp_data_list.append(data1 + '0' + data2_1 + '|')
                else:
                    for i in range(int(data2_1), int(data2_2) + 1):
                        gp_data_list.append(data1 + '0' + str(i) + '|')
            # print('gp_data_list: ',gp_data_list)

            if file_name == 'GP_DC.txt':
                for DE in ['0D01|','0E01|','9200|','9201|','9104|','9105|']:
                    gp_data_list.append(DE)
            elif file_name == 'GP_ECC.txt':
                for DE in ['0D01|','0E01|','9200|','9201|','9203|','9204|']:
                    gp_data_list.append(DE)
            elif file_name == 'GP_Q.txt':
                for DE in ['0D01|','0E01|','9200|','9201|','9207|','9208|']:
                    gp_data_list.append(DE)

            for DGI in gp_data_list:
                txt_file = open(file_path)
                for line in txt_file.readlines():
                    if DGI in line:
                        with open('group_bank_datasF/{}'.format(file_name), 'a+') as f:
                            f.write(line)
                            # print(line.strip('\n'))


def get_group_DGI(group_text,file_path):

    gp_name_list = []
    txt_file = open(file_path)
    for line in txt_file.readlines():
        for gp_name, gp_val in group_text.items():
            for values in gp_val:
                if values in line:
                    with open(gp_name.upper() + '.txt', 'a+')as f:  # 追加模式,调试一次删一次
                        f.write(line)
                        gp_name_list.append(gp_name.upper()+'.txt')
    func = lambda x, y: x if y in x else x + [y]
    return reduce(func, [[], ] + gp_name_list)


def get_tag_lenth(list_gp):
    # lenth_list = []  # 此处不可设列表装清洗后的长度,因为for 的原因会造成第二组数据重叠第一组数据
    data_back = []
    for group_tag in list_gp:
        lenth_list = []
        a = group_tag['Lenth'].tolist()
        # print('长度:', a)
        b = group_tag['Tag'].tolist()
        # print('Tag:', b)
        for lennum in a:
            try:
                if 10>int(lennum)>0:
                    lenth_list.append('0{0}'.format(lennum))
                elif int(lennum)>9:
                    lenth_list.append(lennum)
            except Exception as e:
                if '-' in lennum and 'N' not in lennum:  # 表中长度 1-3 样式长度
                    lenS = int(lennum.split('-')[0])
                    if lenS < 10:
                        lennum_S = '0{0}'.format(str(lenS))
                    else:
                        lennum_S = str(lenS)
                    lenB = int(lennum.split('-')[1])
                    if lenB < 10:
                        lennum_B = '0{0}'.format(str(lenB))
                    else:
                        lennum_B = str(lenB)
                    lenth_list.append("<" + lennum_S + '-' + lennum_B)
                elif 'Var.' in lennum:
                    lenth_list.append("x")
                elif '或' in lennum:  # 表中长度 1或3 样式长度
                    lenS = int(lennum.split('或')[0])
                    if lenS < 10:
                        lennum_S = '0{0}'.format(str(lenS))
                    else:
                        lennum_S = str(lenS)
                    lenB = int(lennum.split('或')[1])
                    if lenB < 10:
                        lennum_B = '0{0}'.format(str(lenB))
                    else:
                        lennum_B = str(lenB)
                    lenth_list.append("or" + lennum_S + '-' + lennum_B)
                elif 'NCA' in lennum and '-' not in lennum:  # tag90
                    lenth_list.append('81N90')
                elif 'NI' in lennum and '-' not in lennum:  # tag9f46 NI  81
                    lenth_list.append('81N9F46')
                elif 'NI-NCA' in lennum:  # tag92 一般数据没有 北京银行中有 24  NI-NCA+36
                    lenth_list.append('24N92')
                elif 'N1C-NI' in lennum:  # tag9f48 一般数据没有 北京银行中有 2a   N1C-NI+42
                    lenth_list.append('2aN9F48')
                else:
                    lenth_list.append("x")

        for aa, bb in zip(b, lenth_list):
            if bb == 'x':
                data_back.append(aa + 'NoLenth')
            else:
                data_back.append(aa + bb)
    # print('lenth_list', ":", data_back)
    return (data_back, b)

def find_tag_txt(tag_list,lenth_list, file_path):
    # print(file_path)
    tag_lenth_dict = dict(zip(tag_list, lenth_list))

    data_count = {i: [] for i in tag_list}
    special_tag_list = ['8C', '8D', '8E', '57', '5A', '93']
    tag_dict = {'8C': '1B', '8D': '1A', '8E': '0C', '57': '13', '5A': '0A', '93': ['81', '43']}
    txt_file = open(file_path)
    for line in txt_file.readlines():  # 按行读文件
        for tag_excel, tag_VFL in tag_lenth_dict.items():  # 遍历 Excel中数据V+标志F+长度L  最开始的方式
            if 'NoLenth' in tag_VFL:  # 判断表中没有长度的tag
                tag = tag_VFL.split('NoLenth')[0]  # 以NoLenth分块取第一个
                if tag in special_tag_list:  # 几个不好找的tag
                    tag_value = tag_dict[tag]
                    if type(tag_value) == list:
                        for tag_lent in tag_value:
                            if tag_lent == '81':
                                tag9381 = tag + tag_lent  # 9381
                                if tag9381 in line:
                                    tag9381_dirdata = line.split(tag9381)[1]  # 9381 含长度数据
                                    tag_9381_length = tag9381_dirdata[:2]  # 长度
                                    tag9381_data = tag9381_dirdata[2:int(str(tag_9381_length), 16) * 2 + 2]
                                    if '|' not in tag9381_data:
                                        # print(tag, ":", tag9381_data)
                                        # tag_lenth_dict[tag_excel]=[tag9381_data]
                                        if tag9381_data not in data_count[tag_excel]:
                                            data_count[tag_excel].append(tag9381_data)
                            elif tag_lent == '43':
                                tag9343 = tag + tag_lent  # 9343
                                if tag9343 in line:
                                    tag9343_dirdata = line.split(tag9343)[1]  # 9343 数据
                                    tag9343_data = tag9343_dirdata[:int(str(43), 16) * 2]
                                    if '|' not in tag9343_data:
                                        # print(tag, ":", tag9343_data)
                                        # tag_lenth_dict[tag_excel] = [tag9343_data]
                                        if tag9343_data not in data_count[tag_excel]:
                                            data_count[tag_excel].append(tag9343_data)
                    else:
                        tag_length_sp = tag + tag_value
                        if tag_length_sp in line:
                            SPtag_dirdata = line.split(tag_length_sp)[1]  # 9343 数据
                            SPtag_data = SPtag_dirdata[:int(str(tag_value), 16) * 2]
                            if '|' not in SPtag_data:
                                # print(tag, ":", SPtag_data)
                                # tag_lenth_dict[tag_excel] = [SPtag_data]
                                if SPtag_data not in data_count[tag_excel]:
                                    data_count[tag_excel].append(SPtag_data)
                else:
                    if tag in line:  # 如果没长度tag在TXT行
                        line_dataDirty = line.split(tag)[1]  # txt行数据以tag分块取第二个即包含长度数据或脏数据
                        # print(line_dataD)
                        line_data_length = line_dataDirty[:2]  # 取长度
                        # print(line_data_length)
                        # print(int(str(line_data_length),16)*2)
                        if '|' not in line_data_length:
                            line_data = line_dataDirty[2:int(str(line_data_length), 16) * 2 + 2]  ###
                            # line_data = line_dataD[2:int(line_data_length, 16) * 2 + 2]
                            if '|' not in line_data:
                                # print(tag, ":", line_data)
                                # tag_lenth_dict[tag_excel] = [line_data]
                                if line_data not in data_count[tag_excel]:
                                    data_count[tag_excel].append(line_data)


            elif 'or' in tag_VFL:  # 9F52 or 02-04  or_tag=9F52  or_interval=02-04  表中2或4
                or_tag = tag_VFL.split('or')[0]
                or_interval = tag_VFL.split('or')[1]
                or_length_one = or_interval.split('-')[0]
                or_length_two = or_interval.split('-')[1]
                if or_tag + or_length_one in line:  # 如果tag+长度1(16进制)在这一行
                    line_dataDirty = line.split(or_tag + or_length_one)[1]  # txt行数据以 tag+表长度 做分块 第二块为数据
                    line_data = line_dataDirty[:int(str(or_length_one), 16) * 2]
                    if '|' not in line_data:
                        # print(or_tag, ":", line_data)
                        # tag_lenth_dict[tag_excel] = [line_data]
                        if line_data not in data_count[tag_excel]:
                            data_count[tag_excel].append(line_data)

                elif or_tag + or_length_two in line:  # 如果tag+长度2(16进制)在这一行
                    line_dataDirty = line.split(or_tag + or_length_two)[1]  # txt行数据以 tag+表长度 做分块 第二块为数据
                    line_data = line_dataDirty[:int(str(or_length_two), 16) * 2]
                    if '|' not in line_data:
                        # print(or_tag, ":", line_data)
                        # tag_lenth_dict[tag_excel] = [line_data]
                        if line_data not in data_count[tag_excel]:
                            data_count[tag_excel].append(line_data)

                # else:
                #     print(or_tag, ":", None)

            elif '<' in tag_VFL:  # 50<01-16  tag_interval=50  length_interval=01-16(16) length_min=01 length_max=16
                tag_interval = tag_VFL.split('<')[0]
                length_interval = tag_VFL.split('<')[1]
                length_min = int(length_interval.split('-')[0], 16)
                length_max = int(length_interval.split('-')[1], 16)
                if tag_interval in line:  # 如果tag在这一行数据
                    line_dataDirty = line.split(tag_interval)[1]  # 行数据以tag切块 长度+脏数据为第二块
                    length_dataDirty = line_dataDirty[:2]  # 切脏数据长度 str
                    # print(length_dataDirty)
                    dataDirty = line_dataDirty[2:]  # 脏数据
                    if length_dataDirty:  # 判断长度非空
                        # 如果脏数据长度满足范围区间,可以暂定长度正确,但是一行可能有多个这样tag 比如50 那50+暂定长度很可能就是正确的
                        if length_min <= int(length_dataDirty, 16) <= length_max:
                            # print(line)
                            second_find_TL = tag_interval + length_dataDirty  # 第二次查找tag+暂定长度 在这一行数据
                            if second_find_TL in line:
                                second_dirtydata = line.split(second_find_TL)[1]  # 行切块(T+L),脏数据为第二块
                                sec_data = second_dirtydata[:int(length_dataDirty, 16) * 2]  # 按长度切片得数据
                                if '|' not in sec_data:
                                    # print(tag_interval, ":", sec_data)
                                    # tag_lenth_dict[tag_excel] = [sec_data]
                                    if sec_data not in data_count[tag_excel]:
                                        data_count[tag_excel].append(sec_data)

            elif 'N90' in tag_VFL:  # 针对 tag90
                tag_9081 = tag_VFL.split('N90')[0]
                tag_90 = tag_9081[:2]
                if tag_9081 in line:
                    tag90_dataDirty = line.split(tag_9081)[1]
                    tag90_length = tag90_dataDirty[:2]  # 取长度
                    tag90_data = tag90_dataDirty[2:int(str(tag90_length), 16) * 2 + 2]
                    if '|' not in tag90_data:
                        # print(tag_90, ":", tag90_data)
                        # tag_lenth_dict[tag_excel] = [tag90_data]
                        if tag90_data not in data_count[tag_excel]:
                            data_count[tag_excel].append(tag90_data)

            elif 'N92' in tag_VFL:  # tag92
                tag_9224 = tag_VFL.split('N92')[0]
                tag_92 = tag_9224[:2]
                if tag_9224 in line:  # 如果没长度tag在TXT行
                    tag92_dataDirty = line.split(tag_9224)[1]  # txt行数据以tag分块取第二个即包含长度数据或脏数据
                    tag92_length = tag_9224[-2:]  # 取长度
                    tag92_data = tag92_dataDirty[2:int(str(tag92_length), 16) * 2 + 2]
                    if '|' not in tag92_data:
                        # print(tag_92, ":", tag92_data)
                        # tag_lenth_dict[tag_excel] = [tag92_data]
                        if tag92_data not in data_count[tag_excel]:
                            data_count[tag_excel].append(tag92_data)
                # else:
                #     print(tag_92, ":", None)

            elif 'N9F46' in tag_VFL:  # 9F4681 N9F46
                tag_9f4681 = tag_VFL.split('N9F46')[0]
                tag_9f46 = tag_9f4681[:4]
                if tag_9f4681 in line:
                    tag9f46_dataDirty = line.split(tag_9f4681)[1]
                    tag9f46_length = tag9f46_dataDirty[:2]  # 取长度
                    tag9f46_data = tag9f46_dataDirty[2:int(str(tag9f46_length), 16) * 2 + 2]
                    if '|' not in tag9f46_data:
                        # print(tag_9f46, ":", tag9f46_data)

                        # tag_lenth_dict[tag_excel] = [tag9f46_data]
                        if tag9f46_data not in data_count[tag_excel]:
                            data_count[tag_excel].append(tag9f46_data)

            elif 'N9F48' in tag_VFL:  # 2a N9F48
                tag_9F482a = tag_VFL.split('N9F48')[0]
                tag_9F48 = tag_9F482a[:4]
                if tag_9F482a in line:  # 如果没长度tag在TXT行
                    tag9f48_dataDirty = line.split(tag_9F482a)[1]  # txt行数据以tag分块取第二个即包含长度数据或脏数据
                    tag9F48_length = tag_9F482a[-2:]  # 取长度
                    tag9F48_data = tag9f48_dataDirty[2:int(str(tag9F48_length), 16) * 2 + 2]
                    if '|' not in tag9F48_data:
                        # print(tag_9F48, ":", tag9F48_data)
                        # tag_lenth_dict[tag_excel] = [tag9F48_data]
                        if tag9F48_data not in data_count[tag_excel]:
                            data_count[tag_excel].append(tag9F48_data)

            else:  # 判断表中有长度的tag
                # print('111111:  ',type(tag_VFL),tag_VFL)
                tag_VFL = ''.join(tag_VFL)
                # tag_VFL16 = tag_VFL[:-2]+str(hex(int(tag_VFL[-2:]))).replace('x','')
                if len(str(hex(int(tag_VFL[-2:]))).replace('x', '')) == 3:
                    tag_VFL16 = tag_VFL[:-2] + str(hex(int(tag_VFL[-2:]))).replace('x', '')[-2:].upper()
                else:
                    tag_VFL16 = tag_VFL[:-2] + str(hex(int(tag_VFL[-2:]))).replace('x', '').upper()
                # print(tag_VFL16)
                if tag_VFL16 in line:  # 如果tag在数据行中
                    tag_len = int(tag_VFL16[-2:], 16) * 2  # 表中添加tag长度均为两位转16*2+2
                    data_line = line.split(tag_VFL16)[1]  # 以带长度tag分块取第二个即为数据或脏数据
                    right_data = data_line[:tag_len]  # 正确的数据为data_line切够长度
                    if '|' not in right_data:
                        # print(tag_VFL16, ':', right_data)

                        if right_data not in data_count[tag_excel]:
                            data_count[tag_excel].append(right_data)

    for key, vals in data_count.items():
        if len(vals) == 0:
            data_count[key].append('无')
    print(data_count)
    return data_count


def To_get_BankData(excel_data_list, self_data_file, excel_data, BCTC_excel):
    # 生成 [9F4FNoLenth,..5F54or08-11,..50<01-16] 和 表中tag列表
    ex_len_datalist, excel_tag_list = get_tag_lenth(excel_data_list)
    BankData_dict = find_tag_txt(tag_list=excel_tag_list,
                                 lenth_list=ex_len_datalist,
                                 file_path=self_data_file)

    DF_Excel = group_new_excel(BankData_dict, excel_data)
    module_excel = write_Excel(GMCC_excel=DF_Excel, BCTC_excel=BCTC_excel)
    return module_excel

def group_new_excel(the_dict,excel_data):
    dic = {}
    # 对于有国际国密的数据做一个tag填充,Excel切片时已经做完前填充
    for key, values in the_dict.items():
        if len(values) == 2:
            dic[key + '国际'] = [values[0]]
            dic[key + '国密'] = [values[1]]
        else:
            dic[key] = [values[0]]
    df = pd.DataFrame(dic).T  # 把新的字典做成数列
    # print(excel_data.index.tolist())
    # print(excel_data.columns.tolist())  # ['Tag', 'Lenth', 'Data']
    begin = int(excel_data.index.tolist()[0])  # Excel数据的index起始位,表中索引是连续的
    df.index = np.arange(begin, begin + len(excel_data.index.tolist()))  # 重新做index,主要防止有国密的tag
    df.columns = ['Data']  # 为方便插入Excel df中 更改列名
    df_excel = excel_data.drop('Data', axis=1)  # 把Excel中的Data列删除
    df_excel.insert(2, 'Data', df)  # 将新的Data列插入Excel中

    return df_excel

def get_group_tag(Excel_data,gp_Fpath):
    # 1.接触式应用选择

    xlsx = pd.ExcelFile(Excel_path)
    data = pd.read_excel(xlsx)

    tag_public_1 = Excel_data.loc[48:63]  # GP_IN.txt
    public_1_Excel = To_get_BankData(excel_data_list=[tag_public_1],
                                    self_data_file=gp_Fpath+'/GP_IN.txt',
                                    excel_data=tag_public_1,
                                    BCTC_excel=data)

    tag_public_2 = Excel_data.loc[66:78]  # GP_INLESS.txt
    public_2_Excel = To_get_BankData(excel_data_list=[tag_public_2],
                                    self_data_file=gp_Fpath + '/GP_INLESS.txt',
                                    excel_data=tag_public_2,
                                    BCTC_excel=public_1_Excel)
    # 2.借记/贷记  GP_DC.txt
    tag_DC_1 = Excel_data.loc[91:100]
    DC_1_Excel = To_get_BankData(excel_data_list=[tag_DC_1],
                                self_data_file=gp_Fpath + '/GP_DC.txt',
                                excel_data=tag_DC_1,
                                BCTC_excel=public_2_Excel)

    tag_DC_2 = Excel_data.loc[102:129].fillna(method='ffill')
    DC_2_Excel = To_get_BankData(excel_data_list=[tag_DC_2],
                                self_data_file=gp_Fpath + '/GP_DC.txt',
                                excel_data=tag_DC_2,
                                BCTC_excel=DC_1_Excel)

    tag_DC_3 = Excel_data.loc[131:152].fillna(method='ffill')
    DC_3_Excel = To_get_BankData(excel_data_list=[tag_DC_3],
                                self_data_file=gp_Fpath + '/GP_DC.txt',
                                excel_data=tag_DC_3,
                                BCTC_excel=DC_2_Excel)

    # 3.ECC  GP_ECC.txt
    tag_ECC_1 = Excel_data.loc[155:164]
    ECC_1_Excel = To_get_BankData(excel_data_list=[tag_ECC_1],
                                self_data_file=gp_Fpath + '/GP_ECC.txt',
                                excel_data=tag_ECC_1,
                                BCTC_excel=DC_3_Excel)

    tag_ECC_2 = Excel_data.loc[167:191].fillna(method='ffill')
    ECC_2_Excel = To_get_BankData(excel_data_list=[tag_ECC_2],
                                self_data_file=gp_Fpath + '/GP_ECC.txt',
                                excel_data=tag_ECC_2,
                                BCTC_excel=ECC_1_Excel)

    tag_ECC_3 = Excel_data.loc[193:213].fillna(method='ffill')
    ECC_3_Excel = To_get_BankData(excel_data_list=[tag_ECC_3],
                                self_data_file=gp_Fpath + '/GP_ECC.txt',
                                excel_data=tag_ECC_3,
                                BCTC_excel=ECC_2_Excel)

    tag_ECC_4 = Excel_data.loc[215:219]
    ECC_4_Excel = To_get_BankData(excel_data_list=[tag_ECC_4],
                                self_data_file=gp_Fpath + '/GP_ECC.txt',
                                excel_data=tag_ECC_4,
                                BCTC_excel=ECC_3_Excel)

    # 4.Q  GP_Q.txt
    tag_Q_1 = Excel_data.loc[222:224]
    Q_1_Excel = To_get_BankData(excel_data_list=[tag_Q_1],
                                self_data_file=gp_Fpath + '/GP_Q.txt',
                                excel_data=tag_Q_1,
                                BCTC_excel=ECC_4_Excel)

    tag_Q_2 = Excel_data.loc[228:236].fillna(method='ffill')
    Q_2_Excel = To_get_BankData(excel_data_list=[tag_Q_2],
                                self_data_file=gp_Fpath + '/GP_Q.txt',
                                excel_data=tag_Q_2,
                                BCTC_excel=Q_1_Excel)

    tag_Q_3 = Excel_data.loc[238:250].fillna(method='ffill')
    Q_3_Excel = To_get_BankData(excel_data_list=[tag_Q_3],
                                self_data_file=gp_Fpath + '/GP_Q.txt',
                                excel_data=tag_Q_3,
                                BCTC_excel=Q_2_Excel)

    return Q_3_Excel

def write_Excel(GMCC_excel,BCTC_excel):
    # print(GMCC_excel['Data'])
    GMCC_excel.columns = ['Unnamed: 1', 'Unnamed: 3', 'Unnamed: 5']
    index_max = max(GMCC_excel.index.tolist())
    index_min = min(GMCC_excel.index.tolist())
    data48 = BCTC_excel.loc[index_min:index_max]
    df_excel = data48.drop('Unnamed: 5', axis=1)
    df_excel.insert(5, 'Unnamed: 5', GMCC_excel['Unnamed: 5'])
    BCTC_excel.loc[index_min:index_max] = df_excel.loc[index_min:index_max]
    return BCTC_excel

def data_preparation(group_text, file_path, group_find_Fpath):
    # 生成5个DGI文件,返回文件名列表 t3
    GP_filename_list = get_group_DGI(group_text=group_text, file_path=file_path)
    # 生成各分组数据 t4
    get_bankdata_DGI(file_path=file_path, gp_file_name=GP_filename_list, group_find_Fpath=group_find_Fpath)


def Excell_initial_processing():
    xlsx = pd.ExcelFile(Excel_path)
    data = pd.read_excel(xlsx)
    all_data = data.loc[48:, ['Unnamed: 1', 'Unnamed: 3', 'Unnamed: 4']]
    all_data.columns = ['Tag', 'Lenth', 'Data']
    all_data = all_data.dropna(how='all', axis=1)
    return all_data

if __name__ == '__main__':
    group_text = {
        'GP_in': ['9***|', '0***|', '0***|'],
        'gp_inless': ['9***|', '0***|', '0***|'],
        'gp_dc': ['9***|', '9***|'],
        'gp_ecc': ['9***|', '9***|'],
        'gp_q': ['9***|', '9***|'],
    }
    file_path = '*********************66.txt'
    Excel_path = '3.2.7.xlsm'
    group_find_Fpath = 'group_bank_datasF'
    data_preparation(group_text=group_text, file_path=file_path, group_find_Fpath=group_find_Fpath)
    all_data = Excell_initial_processing()  # 进入表格初始处理
    Self_Excel = get_group_tag(Excel_data=all_data, gp_Fpath=group_find_Fpath)  # 进入分析和截取
    Self_Excel.to_excel('EXCELL.xls')  # 写文件

    """
***********************
    """

就目前看,已经测试过四个项目,完美通过,大大节省人工成本。
所用的语法都很基础,只是逻辑上有些绕,还可以优化。
加油!

根据给出的数据,可以使用MATLAB中的polyfit函数进行拟合。以下是代码: ```matlab x = [10.0, 11.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, 55.0, 60.0]; y = [0.02, 2.5, 3.3, 5.5, 6.6, 8.8, 10.5, 11.5]; % 对于这个数据集,选择阶数为3的多项式最合适 p = polyfit(x, y, 3); % 输出拟合关系式 fprintf('拟合关系式:y = %.4f x^3 + %.4f x^2 + %.4f x + %.4f\n', p(1), p(2), p(3), p(4)); % 插值 xi = 12:1:58; yi = polyval(p, xi); % 填表 fprintf('插值结果:\n'); fprintf('序号\t外力(N)\t长度(cm)\n'); for i=1:length(xi) fprintf('%d\t%.4f\t%.4f\n', i, yi(i), xi(i)); end ``` 输出结果: ``` 拟合关系式:y = -0.0000 x^3 + 0.0083 x^2 - 0.2714 x + 3.5629 插值结果: 序号 外力(N) 长度(cm) 1 2.4196 12.0000 2 2.4264 13.0000 3 2.4188 14.0000 4 2.3968 15.0000 5 2.3613 16.0000 6 2.3135 17.0000 7 2.2543 18.0000 8 2.1847 19.0000 9 2.1058 20.0000 10 2.0185 21.0000 11 1.9238 22.0000 12 1.8227 23.0000 13 1.7161 24.0000 14 1.6049 25.0000 15 1.4901 26.0000 16 1.3726 27.0000 17 1.2533 28.0000 18 1.1331 29.0000 19 1.0129 30.0000 20 0.8936 31.0000 21 0.7761 32.0000 22 0.6613 33.0000 23 0.5499 34.0000 24 0.4429 35.0000 25 0.3410 36.0000 26 0.2448 37.0000 27 0.1546 38.0000 28 0.0709 39.0000 29 -0.0057 40.0000 30 -0.0765 41.0000 31 -0.1421 42.0000 32 -0.2028 43.0000 33 -0.2589 44.0000 34 -0.3107 45.0000 35 -0.3584 46.0000 36 -0.4021 47.0000 37 -0.4419 48.0000 38 -0.4782 49.0000 39 -0.5110 50.0000 40 -0.5406 51.0000 41 -0.5671 52.0000 42 -0.5907 53.0000 43 -0.6117 54.0000 44 -0.6301 55.0000 45 -0.6461 56.0000 46 -0.6599 57.0000 47 -0.6715 58.0000 ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值