适合单表头和多级表头,注意关键词判断
from openpyxl import load_workbook
from openpyxl.cell import MergedCell
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.merge import MergedCellRange
file = "工资表_双层表头.xlsx"
file="2022年1月工资表.xlsx"
file="工资表_单层表头.xlsx"
#path="/root/PycharmProjects1/python/excel/{}".format(file)
book = load_workbook(file)
ws = book.active
def get_header():
# df = pd.read_excel(file, header=[0, 1], engine='openpyxl')
# header = df.columns.to_list()
# print(header)
# return
# df.loc[1].to_csv('test.csv')
row1 = ws[1]
row2 = ws[2]
merged_cells = ws.merged_cells.ranges
letters = []
for i in range(1, ws.max_column + 1):
letter = get_column_letter(i)
letters.append(letter)
print(letters)
is_two_header=False
for letter in letters:
coord = '{}1:{}2'.format(letter, letter)
if MergedCellRange(ws, coord) in merged_cells:
print('是二级', coord)
is_two_header=True
break
headers2 = []
for i, e in enumerate(row1):
# if isinstance(e,MergedCell) or isinstance(row2[i],MergedCell):
if e.value is not None:
headers2.append({'key': e.value, 'index': i})
if is_two_header:
if row2[i].value is not None:
if 'child' in headers2[-1]:
headers2[-1]['child'].append({'key': row2[i].value, 'index': i})
else:
headers2[-1]['child'] = [{'key': row2[i].value, 'index': i}]
print(headers2)
return headers2
#header = get_header()
def get_value(row):
"""
get row value
:param ws:
:param row:
:return:
"""
value = []
for i in header:
if 'child' in i:
for j in i['child']:
v = row[j['index']].value
value.append(v)
else:
v = row[i['index']].value
value.append(v)
print(value)
# for i,e in enumerate(row):
# print(e.value)
def get_headers2():
merged_cells = ws.merged_cells.ranges
is_two_header = False
name_cell_row_start = None
name_cell_row_end=None
row1=None
for merge_cell in merged_cells:
cell = ws.cell(merge_cell.min_row, merge_cell.min_col)
#print(cell.value)
if cell.value and isinstance(cell.value,str) and '姓名' in cell.value:
is_two_header=True
name_cell_row_start=merge_cell.min_row
name_cell_row_end=merge_cell.max_row
row1 = ws[name_cell_row_start]
row2 = ws[name_cell_row_end]
break
if not is_two_header:
for row in ws.iter_rows(max_row=100):
for cell in row:
if cell.value and isinstance(cell.value,str) and '姓名' in cell.value:
row1 = ws[cell.row]
break
if not row1:
print('没有姓名')
return
headers2 = []
for i, e in enumerate(row1):
# if isinstance(e,MergedCell) or isinstance(row2[i],MergedCell):
if e.value is not None:
headers2.append({'key': e.value, 'index': i})
if is_two_header:
if row2[i].value is not None:
if 'child' in headers2[-1]:
headers2[-1]['child'].append({'key': row2[i].value, 'index': i})
else:
headers2[-1]['child'] = [{'key': row2[i].value, 'index': i}]
print(headers2)
return headers2
get_headers2()