python读取xlsx,包含合并单元格
import xlrd
import pandas as pd
def read_excel(inputpath,outputpath):
workbook = xlrd.open_workbook(inputpath)
print('打印所有sheet:', workbook.sheet_names())
sheet2 = workbook.sheet_by_index(0)
rows_num = sheet2.nrows
cols_num = sheet2.ncols
merged = get_merged_cells(sheet2)
data = []
for r in range(rows_num):
entity_dict = {}
for c in range(cols_num):
cell_value = sheet2.row_values(r)[c]
if (cell_value is None or cell_value == ''):
cell_value = (get_merged_cells_value(sheet2, merged, r, c))
the_key = 'column' + str(c + 1);
entity_dict[the_key] = cell_value
data.append(entity_dict)
df = pd.DataFrame(data)
df.to_csv(outputpath, index=False, header=False)
def get_merged_cells(sheet):
"""
获取所有的合并单元格,格式如下:
[(4, 5, 2, 4), (5, 6, 2, 4), (1, 4, 3, 4)]
(4, 5, 2, 4) 的含义为:行 从下标4开始,到下标5(不包含) 列 从下标2开始,到下标4(不包含),为合并单元格
:param sheet:
:return:
"""
return sheet.merged_cells
def get_merged_cells_value(sheet, merged, row_index, col_index):
"""
先判断给定的单元格,是否属于合并单元格;
如果是合并单元格,就返回合并单元格的内容
:return:
"""
for (rlow, rhigh, clow, chigh) in merged:
if (row_index >= rlow and row_index < rhigh):
if (col_index >= clow and col_index < chigh):
cell_value = sheet.cell_value(rlow, clow)
return cell_value
break
return None
if __name__ == "__main__":
read_excel("in.xlsx",'out.csv')