目录
Excel转Html
示例
函数
import openpyxl
from openpyxl.styles import Border
from openpyxl.styles import Font
from openpyxl.styles import Side
from openpyxl.styles import Alignment
from openpyxl.styles import PatternFill
from openpyxl.cell import MergedCell
# 模式:adaptive / normal
def transform_xlsx_to_html(m_sheet, m_mode):
# sheet.min_row的值在后续数据获取过程中会改变,原因不明
m_min_row = m_sheet.min_row
m_max_row = m_sheet.max_row
m_min_col = m_sheet.min_column
m_max_col = m_sheet.max_column
if m_min_row == m_max_row == m_min_col == m_max_col and not m_sheet.cell(m_min_row, m_min_col).value:
return ''
m_merged_cell_dict = {} # 用于储存 所有合并单元格的左上单元格对象
m_width_dict = {} # 用于储存 所有列的列宽,px
m_height_dict = {} # 用于储存 所有列的行高,px
# 查询行高
for m_i in range(m_min_row, m_max_row + 1):
m_height = 13.5
if m_sheet.row_dimensions[m_i].height:
m_height = m_sheet.row_dimensions[m_i].height
m_height_dict[str(m_i)] = m_height
# 查询列宽
for m_i in range(m_min_col, m_max_col + 1):
m_col_name = openpyxl.utils.get_column_letter(m_i)
m_width = m_sheet.column_dimensions[m_col_name].width
if m_mode == 'normal':
m_width = m_sheet.column_dimensions[m_col_name].width * 6
m_width_dict[m_col_name] = m_width
# 找出所有合并区域的行高,列宽,向右合并距离,向下合并距离
for m_merged_range in m_sheet.merged_cells.ranges:
m_width = 0 # 定义列宽
m_height = 0 # 定义行高
for m_i in range(m_merged_range.min_row, m_merged_range.max_row + 1):
m_height += m_height_dict[str(m_i)]
for m_i in range(m_merged_range.min_col, m_merged_range.max_col + 1):
m_col_name = openpyxl.utils.get_column_letter(m_i)
m_width += m_width_dict[m_col_name]
m_cell = m_sheet.cell(row=m_merged_range.min_row, column=m_merged_range.min_col) # 选择合并区域左上单元格
m_colspan = m_merged_range.max_col - m_merged_range.min_col + 1 # 向右合并长度
m_rowspan = m_merged_range.max_row - m_merged_range.min_row + 1 # 向下合并长度
m_merged_cell_dict[m_cell] = (m_height, m_width, m_colspan, m_rowspan)
# 开始写入数据到table标签
m_html = '''<table style="border: 1px solid #A9A9A9">'''
for m_i in range(m_min_row, m_max_row + 1):
tr = '''<tr>'''
for m_j in range(m_min_col, m_max_col + 1):
m_cell = m_sheet.cell(m_i, m_j)
m_col_name = openpyxl.utils.get_column_letter(m_j)
# html 字体大小
m_font_size = str(int(m_cell.font.size) + 3) if m_cell.font.size else 10
# html 字体加粗
m_font_weight = '700' if m_cell.font.b else '400'
# html 边框格式
m_border = 'border: 1px solid #A9A9A9' if m_cell.value or m_cell.value == 0 else 'border: 0'
# 水平位置
m_align_horizontal = 'text-align: {}'.format(m_cell.alignment.horizontal) if m_cell.alignment.horizontal else ''
# 垂直位置
m_align_vertical = 'vertical-align: {}'.format(m_cell.alignment.vertical) if m_cell.alignment.vertical else ''
# 字体颜色 颜色未设置时有时会存在 Values must be of type <class 'str'> 输出
if m_cell.font.color and 'str' not in str(m_cell.font.color.rgb):
m_font_color = 'color: #{}'.format(m_cell.font.color.rgb[2:])
else:
m_font_color = 'color: #000000'
# 背景颜色 颜色未设置时有时会存在 Values must be of type <class 'str'> 输出
if m_cell.fill.fgColor and 'str' not in str(m_cell.fill.fgColor.rgb) and m_cell.fill.fgColor.rgb[2:] != '000000':
m_fill_color = 'background-color: #{}'.format(m_cell.fill.fgColor.rgb[2:])
else:
m_fill_color = 'background-color: #FFFFFF'
# 单元格格式
style = f'''{m_fill_color}; {m_border}; {m_font_color};
font-style: normal;
font-family: 微软雅黑;
font-size: {m_font_size}px;
font-weight: {m_font_weight};
{m_align_horizontal}; {m_align_vertical}; '''
if m_mode == 'normal':
if m_cell in m_merged_cell_dict.keys():
style += f'''height: {m_merged_cell_dict[m_cell][0]}px; width: {m_merged_cell_dict[m_cell][1]}px; '''
else:
style += f'''height: {m_height_dict[str(m_i)]}px; width: {m_width_dict[m_col_name]}px; '''
else:
style += f'''word-break: keep-all; word-wrap:break-word; white-space: nowrap'''
text = m_cell.value if m_cell.value or m_cell.value == 0 else ''
# 文本处理:小数转为百分比展示
text = '{:.2f}%'.format(text * 100) if text and type(text) == float else text
# 文本处理:字符串换行展示
text = text.replace('\n', '<br/>') if text and type(text) == str else text
td = ""
if m_cell in m_merged_cell_dict.keys():
td = f'''<td colspan="{m_merged_cell_dict[m_cell][2]}" rowspan="{m_merged_cell_dict[m_cell][3]}" style="{style}">{text}</td>'''
elif not isinstance(m_cell, MergedCell):
td = f'''<td style="{style}">{text}</td>'''
tr = tr + td
tr = tr + '''</tr>'''
m_html += tr
m_html += '''</table>'''
return m_html
Openpyxl知识点
函数 | 功能 |
workbook.sheetnames | 返回一个列表,记录所有Sheet页名称 |
workbook[sheetname] | 定位Sheet页 |
workbook.remove(sheetname) | 删除Sheet页 |
sheet.min_row sheet.max_row sheet.min_column sheet.max_column | 获取有效数据首行(有时会改变,原因不明) 获取有效数据末行 获取有效数据首列 获取有效数据末列 |
sheet.views.sheetView[0].showGridLines | 设置网格线 |
sheet.row_dimensions[row_index].height sheet.column_dimensions[column_name].width openpyxl.utils.get_column_letter(column_index) | 获取、设置行高 获取、设置列宽 获取下标对应的列名(从1开始) |
cell.value cell.alignment cell.number_format | 获取、设置文本 获取、设置对齐 获取、设置数字格式 |
cell.border = Border() cell.border.left.style cell.border.top.style cell.border.right.style cell.border.bottom.style | 设置边框格式 获取边框格式-左(不能直接获取) 获取边框格式-上(不能直接获取) 获取边框格式-右(不能直接获取) 获取边框格式-下(不能直接获取) |
cell.font cell.font.size cell.font.color.rgb | 设置字体格式 获取字体信息-尺寸 获取字体信息-粗细 获取字体颜色(不能直接获取) |
cell.fill cell.fill.fgColor.rgb | 设置填充颜色 获取填充颜色(不能直接获取) |
cell.coordinate cell.alignment.vertical cell.alignment.horizontal | 获取坐标信息 获取对齐信息-竖直对齐 获取对齐信息-水平对齐 |
sheet.merge_cells() | 合并单元格 |
isinstance(cell, MergedCell) | 判断是否为合并单元格 |
sheet.merged_cells.ranges sheet.merged_cells.ranges[index].min_row sheet.merged_cells.ranges[index]..max_row sheet.merged_cells.ranges[index]..min_col sheet.merged_cells.ranges[index]..max_col | 获取合并单元格信息 |
示例代码:
import openpyxl
from openpyxl.styles import Border
from openpyxl.styles import Font
from openpyxl.styles import Side
from openpyxl.styles import Alignment
from openpyxl.styles import PatternFill
from openpyxl.cell import MergedCell
# 打开Excel
workbook = openpyxl.load_workbook(filename='D:\Project\test.xlsx')
print(workbook.sheetnames)
# 创建删除sheet
workbook.create_sheet('test')
workbook.remove('test')
# 行数列数
sheet = workbook.sheetnames[0]
print(sheet.min_row)
print(sheet.max_row)
print(sheet.min_column)
print(sheet.max_column)
# 行高列宽
print(sheet.row_dimensions[1].height )
print(sheet.column_dimensions['A'].width)
# 设置行高列宽
sheet.row_dimensions[1].height = 10
sheet.column_dimensions['A'].width = 10
# 获取下标对应的列名(从1开始)
column_name = openpyxl.utils.get_column_letter(1)
# 设置不显示网格线
sheet.views.sheetView[0].showGridLines = False
# 合并单元格
sheet.merge_cells(start_row=1, end_row=2, start_column=3, end_column=4)
# 设置数字格式
sheet.cell(row=2, column=2, value=100).number_format = '0'
sheet.cell(row=3, column=3, value=100).number_format = '0.00%'
# 设置单元格
sheet.cell(row=1, column=1, value='test')
# 设置填充颜色
fill = PatternFill('solid', fgColor='123456')
# 设置字体格式
font = Font(u'微软雅黑', size=16, bold=True, italic=False, strike=False, color='000000')
# 设置文本对齐
alignment = Alignment(wrap_text=True, horizontal='center', vertical='center')
# 方式一
sheet.cell(row=1, column=1, fill=fill, font=font, alignment=alignment, value='test')
# 方式二
sheet.cell(row=1, column=1).value = 'test'
sheet.cell(row=1, column=1).fill = fill
sheet.cell(row=1, column=1).font = font
sheet.cell(row=1, column=1).alignment = alignment
# 设置边框 方式一 合并单元格时右侧边框设置会不成功
border = Border()
border.left = Side(border_style='double', color='000000')
border.top = Side(border_style='double', color='000000')
border.right = Side(border_style='double', color='000000')
border.bottom = Side(border_style='double', color='000000')
# 设置边框 方式二 合并单元格时右侧边框设置会不成功
border = Border(left=Side(border_style='double', color='000000'),
top=Side(border_style='double', color='000000'),
right=Side(border_style='double', color='000000'),
bottom=Side(border_style='double', color='000000'))
# 使用下标索引时从0开始
cell = sheet[0][0]
cell = sheet.cell(row=1, column=1)
# 获取单元格信息
print(cell.value)
print(cell.coordinate)
# 获取字体信息
print(cell.font.size)
print(cell.font.b)
# 获取对齐信息
print(cell.alignment.vertical)
print(cell.alignment.horizontal)
# 判断是否为合并单元格
print(isinstance(cell, MergedCell))
# 获取边框信息 未初始化时不可直接获取
left = cell.border.left.style if cell.border.left else ''
top = cell.border.top.style if cell.border.top else ''
right = cell.border.right.style if cell.border.right else ''
bottom = cell.border.bottom.style if cell.border.bottom else ''
# 获取颜色信息 未初始化时不可直接获取
font_color = cell.font.color.rgb if 'str' not in str(cell.font.color.rgb) else ''
fill_color = cell.fill.fgColor.rgb if 'str' not in str(cell.fill.fgColor.rgb) else ''
# 获取合并单元格信息
merged_list = sheet.merged_cells.ranges
merged_cell = sheet.merged_cells.ranges[0]
print(merged_cell.min_row)
print(merged_cell.max_row)
print(merged_cell.min_col)
print(merged_cell.max_col)
其他Excel解析库问题
库名 | 说明 |
pandas | 支持数据处理,不支持格式获取 |
xlsx2html | 支持简单表格,不支持合并单元格 |
xlwings | 支持Windows和MacOS系统,Linux系统需要使用xlwings pro |
参考资料:
python将excel转html的table标签(含合并单元格)_python xlsx转html_weixin_50702169的博客-CSDN博客