工具功能说明
- 根据Excel的替换模板将,内容批零替换到文件里面并追加
- 实现批零替换和固定模板的配置输出
工具使用说明
- 配置替换数据模板 # 只处理第一个sheet name
- 替换模板表头不可以更改
- 替换组可以自己定义 但是需要和 下面的参数表表名字一致
- 替换组的表名为配置模板里面的参数(主要参数的唯一性 建议
在这里插入代码片
加上{})
工具代码
代码示例
from pandas import read_excel
from chardet import detect
def find_encoding(path):
'''查找编码的形式
{'encoding': 'utf-8', 'confidence': 0.99, 'language': ''}'''
with open(path, 'rb') as f:
encode = detect(f.read())
# print(encode)
return encode['encoding']
def get_new_tabe_title(title):
'''
:param title: 表头部分
:return: list
'''
title.fillna(method='ffill', axis=1, inplace=True)
title_columns_num = len(title.columns)
title_columns = []
for i in range(title_columns_num):
val = [str(x) for x in title.iloc[:, i].values]
title_columns.append('//'.join(val))
return title_columns
def get_targeted_table(df, table_head, table_head_num=1):
'''
:param df: 传入的大表
:param table_head: 找的新表的名称
:param table_head_num: 新表头部的数量
:return: 一个信的表
'''
start_line_num = False
##找到开始位置
for i in range(len(df)):
df_line = df.iloc[i]
if table_head == df_line.iloc[0]:
start_line_num = i
break
if start_line_num: # 找到开始位置了
title = df.iloc[start_line_num + 1:start_line_num + 1 + table_head_num, :].dropna(how='all', axis=1).reset_index(drop=True, )
title_columns = get_new_tabe_title(title)
title_columns_num = len(title.columns)
start_df = df.iloc[start_line_num + 1 + table_head_num:, :title_columns_num]
start_df.reset_index(drop=True, inplace=True)
end_line_num = False
for i in range(len(start_df)):
df_line = start_df.iloc[i]
# print(df_line.isnull().values.tolist())
if False not in df_line.isnull().values.tolist():
end_line_num = i
break
if end_line_num:
new_table = start_df.iloc[:end_line_num]
else:
new_table = start_df
new_table.columns = title_columns
new_table.reset_index(drop=True, inplace=True)
new_table = new_table.copy()
new_table.fillna(method='ffill', inplace=True)
return new_table
else:
return None
def get_config(df, mould_path, output_path, replace_group):
with open(mould_path, 'r', encoding=find_encoding(mould_path)) as f:
mould1 = f.read()
replace_df = get_targeted_table(df, replace_group, table_head_num=1)
for i in range(len(replace_df)):
mould = mould1
df_line=replace_df.iloc[i]
for columns in replace_df.columns:
mould = mould.replace(str(columns), str(df_line[columns]))
# print(output_path)
with open(output_path, 'a', encoding='utf-8') as f1:
f1.write(mould + '\n')
def read_replace_mode(df):
# print(df)
replace_df = get_targeted_table(df, '替换模块', table_head_num=1)
for i in range(len(replace_df)):
df_line = replace_df.iloc[i]
mould_path = df_line[0]
output_path = df_line[1]
replace_group = df_line[2]
get_config(df, mould_path, output_path, replace_group)
path = input('excel的路径:')
df = read_excel(path)
read_replace_mode(df)
input('完成!!!')
Excel表格
红色部分不可以更改