1、提取word内的所有表格内容,写到excel内
安装上python-docx
# coding=utf-8
from docx import Document
from xlwt import Workbook
# 初始化路径
xl_path = f"./path/excel.xls"
word_path = f"./path/word.doc"
# 读word
doc = Document(word_path)
tables = []
for table in doc.tables:
table_temp = []
for row in table.rows:
row_temp = []
for cell in row.cells:
row_temp.append(cell.text)
table_temp.append(row_temp)
tables.append(table_temp)
tables_2 = list(filter(None, tables))
# 写到表里
Sheet_index = 0
workbook = Workbook(encoding='utf-8')
for table in tables_2:
worksheet = workbook.add_sheet('sheet' + str(Sheet_index), cell_overwrite_ok=True)
Sheet_index = Sheet_index + 1
for rows in table:
r = table.index(rows)
for cell in rows:
c = rows.index(cell)
worksheet.write(r, c, cell)
workbook.save(xl_path)
2、多个字段名一样的excel合并成一个
import pandas as pd
xl_path = r'D:\path\excel.xls'
res = pd.DataFrame()
for parent, dirnames, filenames in os.walk(xl_path):
for filename in filenames:
df = pd.read_excel(os.path.join(parent, filename))
df_empty = res.append(df, ignore_index=True)
res.to_excel('D:\path\\new_excel.xls', encoding='utf_8_sig')
3、数字转换成excel列序号
def convert_to_title(n):
result = ""
while n != 0:
result = chr((n-1) % 26+65) + result
n = (n-1)//26
return result