目前通过c#实现excel转pdf无非是用通过office组件或者Spire.XLS ,其中office组件需要单独下载安装,而Spire.XLS 并非开源转换效率也算很高,基于以上原因本文基于轻量级方法高效实现excel转pdf。
本文实现excel转pdf文件引用了NPOI类库来读取excel文件数据,用itextsharp类库来实现动态生成pdf文档,话不多说直接贴代码,如下是转换的核心代码:
/// <summary>
/// 生成pdf文件
/// </summary>
/// <param name="excelContent">excel文件的字节流</param>
/// <returns></returns>
public byte[] Render(byte[] excelContent)
{
if (excelContent == null)
return null;
byte[] result = null;
MemoryStream stream = new MemoryStream(excelContent);
HSSFWorkbook hw = new HSSFWorkbook(stream);//创建excel操作对象
//创建pdf文档对象,设置pdf文档的纸张大小
Document doc;
if (_isLandscape)
{
doc = new Document(_pageSize.Rotate());//pdf文档设置为横向
}
else
{
doc = new Document(_pageSize);
}
doc.SetMargins(0, 0, _marginTop, _marginBottom);//设置文档的页边距
try
{
ISheet sheet = hw.GetSheetAt(0);//获取excel中的第一个sheet,如果excel中有多个sheet,此处需要进行循环
stream = new MemoryStream();
PdfWriter pdfWriter = PdfWriter.GetInstance(doc, stream);
BaseFont bsFont = BaseFont.CreateFont(_fontPath, BaseFont.IDENTITY_H, BaseFont.EMBEDDED);//创建pdf文档字体
doc.Open();
float[] widths = GetColWidth(sheet);//获取excel中每列的宽度
PdfPTable table = new PdfPTable(widths);//设置pdf中表格每列的宽度
table.WidthPercentage = _widthPercent;
int colCount = widths.Length;
//通过循环读取excel内容,并将读取的数据写入pdf文档中
for (int r = sheet.FirstRowNum; r < sheet.PhysicalNumberOfRows; r++)
{
IRow row = sheet.GetRow(r);
if (row != null)
{
for (int c = row.FirstCellNum; (c < row.PhysicalNumberOfCells || c < colCount) && c > -1; c++)
{
if (c >= row.PhysicalNumberOfCells)
{
PdfPCell cell = new PdfPCell(new Phrase(""));
cell.Border = 0;
table.AddCell(cell);
continue;
}
ICell excelCell = row.Cells[c];
string value = "";
string horAlign = excelCell.CellStyle.Alignment.ToString();
string verAlign = excelCell.CellStyle.VerticalAlignment.ToString();
if (excelCell != null)
{
value = excelCell.ToString().Trim();
if (!string.IsNullOrEmpty(value))
{
string dataFormat = excelCell.CellStyle.GetDataFormatString();
if (dataFormat != "General" && dataFormat != "@")//数据不为常规或者文本
{
try
{
string numStyle = GetNumStyle(dataFormat);
value = string.Format("{0:" + numStyle + "}", excelCell.NumericCellValue);//如果解析不成功则按字符串处理
}
catch { }
}
}
}
IFont excelFont = excelCell.CellStyle.GetFont(hw);
HSSFPalette palette = hw.GetCustomPalette();
HSSFColor color = null;
Color ftColor = Color.BLACK;
short ft = excelFont.Color;
color = palette.GetColor(ft);
if (color != null && ft != 64)
{
byte[] ftRGB = color.RGB;
ftColor = new Color(ftRGB[0], ftRGB[1], ftRGB[2]);
}
bool isBorder = HasBorder(excelCell);
Font pdfFont = new Font(bsFont, excelFont.FontHeightInPoints, excelFont.IsBold ? 1 : 0, ftColor);
PdfPCell pdfCell = new PdfPCell(new Phrase(value, pdfFont));
List<PicturesInfo> info = sheet.GetAllPictureInfos(r, r, c, c, true);//判断单元格中是否有图片,不支持图片跨单元格
if (info.Count > 0)
{
pdfCell = new PdfPCell(Image.GetInstance(info[0].PictureData));
}
short bg = excelCell.CellStyle.FillForegroundColor;
color = palette.GetColor(bg);
if (color != null && bg != 64)
{
byte[] bgRGB = color.RGB;
pdfCell.BackgroundColor = new Color(bgRGB[0], bgRGB[1], bgRGB[2]);
}
if (!isBorder)
{
pdfCell.Border = 0;
}
else
{
short bd = excelCell.CellStyle.TopBorderColor;
color = palette.GetColor(bd);
if (color != null && bd != 64)
{
byte[] bdRGB = color.RGB;
pdfCell.BorderColor = new Color(bdRGB[0], bdRGB[1], bdRGB[2]);
}
}
pdfCell.MinimumHeight = row.HeightInPoints;
pdfCell.HorizontalAlignment = GetCellHorAlign(horAlign);
pdfCell.VerticalAlignment = GetCellVerAlign(verAlign);
if (excelCell.IsMergedCell)//合并单元格
{
int[] span = GetMergeCellSpan(sheet, r, c);
if (span[0] == 1 && span[1] == 1)//合并过的单元直接跳过
continue;
pdfCell.Rowspan = span[0];
pdfCell.Colspan = span[1];
c = c + span[1] - 1;//直接跳过合并过的单元格
}
table.AddCell(pdfCell);
}
}
else
{//空行
PdfPCell pdfCell = new PdfPCell(new Phrase(""));
pdfCell.Border = 0;
pdfCell.MinimumHeight = 13;
table.AddCell(pdfCell);
}
}
doc.Add(table);
doc.Close();
result = stream.ToArray();
}
finally
{
hw.Close();
stream.Close();
}
return result;
}
在将excel转为pdf时为了保持和excel的样式,需要读取excel的样式:
1、获取单元格数字格式:
/// <summary>
/// 获取单元格的数字格式
/// </summary>
/// <param name="style"></param>
/// <returns></returns>
private string GetNumStyle(string style)
{
if (string.IsNullOrEmpty(style))
{
throw new ArgumentException("");
}
if (style.IndexOf('%') > -1)
{
return style;
}
else
{
return style.Substring(0, style.Length - 2);
}
}
2、获取excel每列宽度的比例,在初始化pdf文档可以用到:
/// <summary>
/// 获取列的宽度比例
/// </summary>
/// <param name="sheet"></param>
/// <returns></returns>
private float[] GetColWidth(ISheet sheet)
{
int rowNum = GetMaxColRowNum(sheet);
IRow row = sheet.GetRow(rowNum);
int cellCount = row.PhysicalNumberOfCells;
int[] colWidths = new int[cellCount];
float[] colWidthPer = new float[cellCount];
int sum = 0;
for (int i = row.FirstCellNum; i < cellCount; i++)
{
ICell cell = row.Cells[i];
if (cell != null)
{
colWidths[i] = sheet.GetColumnWidth(i);
sum += sheet.GetColumnWidth(i);
}
}
for (int i = row.FirstCellNum; i < cellCount; i++)
{
colWidthPer[i] = (float)colWidths[i] / sum * 100;
}
return colWidthPer;
}
3、设置pdf中单元对齐方式以及边框:
/// <summary>
/// 单元格水平对齐方式
/// </summary>
/// <param name="align"></param>
/// <returns></returns>
private int GetCellHorAlign(string align)
{
switch (align)
{
case "Right":
return Element.ALIGN_RIGHT;
case "Center":
return Element.ALIGN_CENTER;
case "Left":
return Element.ALIGN_LEFT;
default:
return Element.ALIGN_LEFT;
}
}
/// <summary>
/// 单元格垂直对齐方式
/// </summary>
/// <param name="align"></param>
/// <returns></returns>
private int GetCellVerAlign(string align)
{
switch (align)
{
case "Center":
return Element.ALIGN_MIDDLE;
case "Top":
return Element.ALIGN_TOP;
case "Bottom":
return Element.ALIGN_BOTTOM;
default:
return Element.ALIGN_MIDDLE;
}
}
private bool HasBorder(ICell cell)
{
int bottom = cell.CellStyle.BorderBottom != 0 ? 1 : 0;
int top = cell.CellStyle.BorderTop != 0 ? 1 : 0;
int left = cell.CellStyle.BorderLeft != 0 ? 1 : 0;
int right = cell.CellStyle.BorderRight != 0 ? 1 : 0;
return (bottom + top + left + right) > 2;
}
/// <summary>
/// 合并单元格的rowspan、colspan
/// </summary>
/// <param name="sheet"></param>
/// <param name="rowNum"></param>
/// <param name="colNum"></param>
/// <returns></returns>
private int[] GetMergeCellSpan(ISheet sheet, int rowNum, int colNum)
{
int[] span = { 1, 1 };
int regionsCount = sheet.NumMergedRegions;
for (int i = 0; i < regionsCount; i++)
{
CellRangeAddress range = sheet.GetMergedRegion(i);
sheet.IsMergedRegion(range);
if (range.FirstRow == rowNum && range.FirstColumn == colNum)
{
span[0] = range.LastRow - range.FirstRow + 1;
span[1] = range.LastColumn - range.FirstColumn + 1;
break;
}
}
return span;
}
4、获取excel中单元的合并信息,以便在pdf中实现单元格的合并:
/// <summary>
/// 合并单元格的rowspan、colspan
/// </summary>
/// <param name="sheet"></param>
/// <param name="rowNum"></param>
/// <param name="colNum"></param>
/// <returns></returns>
private int[] GetMergeCellSpan(ISheet sheet, int rowNum, int colNum)
{
int[] span = { 1, 1 };
int regionsCount = sheet.NumMergedRegions;
for (int i = 0; i < regionsCount; i++)
{
CellRangeAddress range = sheet.GetMergedRegion(i);
sheet.IsMergedRegion(range);
if (range.FirstRow == rowNum && range.FirstColumn == colNum)
{
span[0] = range.LastRow - range.FirstRow + 1;
span[1] = range.LastColumn - range.FirstColumn + 1;
break;
}
}
return span;
}
如下是获取excel单元格中的图片信息代码(目前支持单元格内图片转换):
//单元格中图片信息类
public class PicturesInfo
{
public int MinRow { get; set; }
public int MaxRow { get; set; }
public int MinCol { get; set; }
public int MaxCol { get; set; }
public string Mime { get; set; }
public Byte[] PictureData { get; private set; }
public PicturesInfo(int minRow, int maxRow, int minCol, int maxCol, Byte[] pictureData, string mime)
{
this.MinRow = minRow;
this.MaxRow = maxRow;
this.MinCol = minCol;
this.MaxCol = maxCol;
this.PictureData = pictureData;
this.Mime= mime;
}
}
//NPOI扩展方法类
public static NPOIExtend
{
public static List<PicturesInfo> GetAllPictureInfos(this ISheet sheet)
{
return sheet.GetAllPictureInfos(null, null, null, null, true);
}
public static List<PicturesInfo> GetAllPictureInfos(this ISheet sheet, int? minRow, int? maxRow, int? minCol, int? maxCol, bool onlyInternal)
{
if (sheet is HSSFSheet)
{
return GetAllPictureInfos((HSSFSheet)sheet, minRow, maxRow, minCol, maxCol, onlyInternal);
}
else if (sheet is XSSFSheet)
{
return GetAllPictureInfos((XSSFSheet)sheet, minRow, maxRow, minCol, maxCol, onlyInternal);
}
else
{
throw new Exception("未处理类型,没有为该类型添加:GetAllPicturesInfos()扩展方法!");
}
}
private static List<PicturesInfo> GetAllPictureInfos(HSSFSheet sheet, int? minRow, int? maxRow, int? minCol, int? maxCol, bool onlyInternal)
{
List<PicturesInfo> picturesInfoList = new List<PicturesInfo>();
var shapeContainer = sheet.DrawingPatriarch as HSSFShapeContainer;
if (null != shapeContainer)
{
var shapeList = shapeContainer.Children;
foreach (var shape in shapeList)
{
if (shape is HSSFPicture && shape.Anchor is HSSFClientAnchor)
{
var picture = (HSSFPicture)shape;
var anchor = (HSSFClientAnchor)shape.Anchor;
if (IsInternalOrIntersect(minRow, maxRow, minCol, maxCol, anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, onlyInternal))
{
picturesInfoList.Add(new PicturesInfo(anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, picture.PictureData.Data, picture.PictureData.MimeType));
}
}
}
}
return picturesInfoList;
}
private static List<PicturesInfo> GetAllPictureInfos(XSSFSheet sheet, int? minRow, int? maxRow, int? minCol, int? maxCol, bool onlyInternal)
{
List<PicturesInfo> picturesInfoList = new List<PicturesInfo>();
var documentPartList = sheet.GetRelations();
foreach (var documentPart in documentPartList)
{
if (documentPart is XSSFDrawing)
{
var drawing = (XSSFDrawing)documentPart;
var shapeList = drawing.GetShapes();
foreach (var shape in shapeList)
{
if (shape is XSSFPicture)
{
var picture = (XSSFPicture)shape;
var anchor = picture.GetPreferredSize();
if (IsInternalOrIntersect(minRow, maxRow, minCol, maxCol, anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, onlyInternal))
{
picturesInfoList.Add(new PicturesInfo(anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, picture.PictureData.Data, picture.PictureData.MimeType));
}
}
}
}
}
return picturesInfoList;
}
private static bool IsInternalOrIntersect(int? rangeMinRow, int? rangeMaxRow, int? rangeMinCol, int? rangeMaxCol,
int pictureMinRow, int pictureMaxRow, int pictureMinCol, int pictureMaxCol, bool onlyInternal)
{
int _rangeMinRow = rangeMinRow ?? pictureMinRow;
int _rangeMaxRow = rangeMaxRow ?? pictureMaxRow;
int _rangeMinCol = rangeMinCol ?? pictureMinCol;
int _rangeMaxCol = rangeMaxCol ?? pictureMaxCol;
if (onlyInternal)
{
return (_rangeMinRow <= pictureMinRow && _rangeMaxRow >= pictureMaxRow &&
_rangeMinCol <= pictureMinCol && _rangeMaxCol >= pictureMaxCol);
}
else
{
return ((Math.Abs(_rangeMaxRow - _rangeMinRow) + Math.Abs(pictureMaxRow - pictureMinRow) >= Math.Abs(_rangeMaxRow + _rangeMinRow - pictureMaxRow - pictureMinRow)) &&
(Math.Abs(_rangeMaxCol - _rangeMinCol) + Math.Abs(pictureMaxCol - pictureMinCol) >= Math.Abs(_rangeMaxCol + _rangeMinCol - pictureMaxCol - pictureMinCol)));
}
}
}
以下是完整 的代码:
using iTextSharp.text;
using iTextSharp.text.pdf;
using NPOI.HSSF.UserModel;
using NPOI.HSSF.Util;
using NPOI.SS.UserModel;
using NPOI.SS.Util;
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Excel2PDF
{
public class Excel2PDF
{
private float _widthPercent = 88;//设置pdf内容占文档的宽度比例
private bool _isLandscape = false;//设置pdf是否横向
private string _fontPath = @"C:\Windows\Fonts\simsun.ttc,0";//使itextsharp支持中文
private float _marginTop = 15;
private float _marginBottom = 15;
private Rectangle _pageSize = PageSize.A4;//设置pdf文档纸张大小
//以上设置均可在配置文件中进行设置,在此就不介绍
public RptRenderPDF(float widthPercent, bool isLandscape)
{
this._widthPercent = widthPercent;
this._isLandscape = isLandscape;
}
/// <summary>
/// 生成pdf文件
/// </summary>
/// <param name="excelContent"></param>
/// <returns></returns>
public byte[] Render(byte[] excelContent)
{
if (excelContent == null)
return null;
byte[] result = null;
MemoryStream stream = new MemoryStream(excelContent);
HSSFWorkbook hw = new HSSFWorkbook(stream);
Document doc;
if (_isLandscape)
{
doc = new Document(_pageSize.Rotate());
}
else
{
doc = new Document(_pageSize);
}
doc.SetMargins(0, 0, _marginTop, _marginBottom);
try
{
ISheet sheet = hw.GetSheetAt(0);
stream = new MemoryStream();
PdfWriter pdfWriter = PdfWriter.GetInstance(doc, stream);
BaseFont bsFont = BaseFont.CreateFont(_fontPath, BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
doc.Open();
float[] widths = GetColWidth(sheet);
PdfPTable table = new PdfPTable(widths);
table.WidthPercentage = _widthPercent;
int colCount = widths.Length;
for (int r = sheet.FirstRowNum; r < sheet.PhysicalNumberOfRows; r++)
{
IRow row = sheet.GetRow(r);
if (row != null)
{
for (int c = row.FirstCellNum; (c < row.PhysicalNumberOfCells || c < colCount) && c > -1; c++)
{
if (c >= row.PhysicalNumberOfCells)
{
PdfPCell cell = new PdfPCell(new Phrase(""));
cell.Border = 0;
table.AddCell(cell);
continue;
}
ICell excelCell = row.Cells[c];
string value = "";
string horAlign = excelCell.CellStyle.Alignment.ToString();
string verAlign = excelCell.CellStyle.VerticalAlignment.ToString();
if (excelCell != null)
{
value = excelCell.ToString().Trim();
if (!string.IsNullOrEmpty(value))
{
string dataFormat = excelCell.CellStyle.GetDataFormatString();
if (dataFormat != "General" && dataFormat != "@")//数据不为常规或者文本
{
try
{
string numStyle = GetNumStyle(dataFormat);
value = string.Format("{0:" + numStyle + "}", excelCell.NumericCellValue);//如果解析不成功则按字符串处理
}
catch { }
}
}
}
IFont excelFont = excelCell.CellStyle.GetFont(hw);
HSSFPalette palette = hw.GetCustomPalette();
HSSFColor color = null;
Color ftColor = Color.BLACK;
short ft = excelFont.Color;
color = palette.GetColor(ft);
if (color != null && ft != 64)
{
byte[] ftRGB = color.RGB;
ftColor = new Color(ftRGB[0], ftRGB[1], ftRGB[2]);
}
bool isBorder = HasBorder(excelCell);
Font pdfFont = new Font(bsFont, excelFont.FontHeightInPoints, excelFont.IsBold ? 1 : 0, ftColor);
PdfPCell pdfCell = new PdfPCell(new Phrase(value, pdfFont));
List<PicturesInfo> info = sheet.GetAllPictureInfos(r, r, c, c, true);//判断单元格中是否有图片,不支持图片跨单元格
if (info.Count > 0)
{
pdfCell = new PdfPCell(Image.GetInstance(info[0].PictureData));
}
short bg = excelCell.CellStyle.FillForegroundColor;
color = palette.GetColor(bg);
if (color != null && bg != 64)
{
byte[] bgRGB = color.RGB;
pdfCell.BackgroundColor = new Color(bgRGB[0], bgRGB[1], bgRGB[2]);
}
if (!isBorder)
{
pdfCell.Border = 0;
}
else
{
short bd = excelCell.CellStyle.TopBorderColor;
color = palette.GetColor(bd);
if (color != null && bd != 64)
{
byte[] bdRGB = color.RGB;
pdfCell.BorderColor = new Color(bdRGB[0], bdRGB[1], bdRGB[2]);
}
}
pdfCell.MinimumHeight = row.HeightInPoints;
pdfCell.HorizontalAlignment = GetCellHorAlign(horAlign);
pdfCell.VerticalAlignment = GetCellVerAlign(verAlign);
if (excelCell.IsMergedCell)
{
int[] span = GetMergeCellSpan(sheet, r, c);
if (span[0] == 1 && span[1] == 1)//合并过的单元直接跳过
continue;
pdfCell.Rowspan = span[0];
pdfCell.Colspan = span[1];
c = c + span[1] - 1;//直接跳过合并过的单元格
}
table.AddCell(pdfCell);
}
}
else
{//空行
PdfPCell pdfCell = new PdfPCell(new Phrase(""));
pdfCell.Border = 0;
pdfCell.MinimumHeight = 13;
table.AddCell(pdfCell);
}
}
doc.Add(table);
doc.Close();
result = stream.ToArray();
}
finally
{
hw.Close();
stream.Close();
}
return result;
}
/// <summary>
/// 获取单元格的数字格式
/// </summary>
/// <param name="style"></param>
/// <returns></returns>
private string GetNumStyle(string style)
{
if (string.IsNullOrEmpty(style))
{
throw new ArgumentException("");
}
if (style.IndexOf('%') > -1)
{
return style;
}
else
{
return style.Substring(0, style.Length - 2);
}
}
/// <summary>
/// 获取列的宽度比例
/// </summary>
/// <param name="sheet"></param>
/// <returns></returns>
private float[] GetColWidth(ISheet sheet)
{
int rowNum = GetMaxColRowNum(sheet);
IRow row = sheet.GetRow(rowNum);
int cellCount = row.PhysicalNumberOfCells;
int[] colWidths = new int[cellCount];
float[] colWidthPer = new float[cellCount];
int sum = 0;
for (int i = row.FirstCellNum; i < cellCount; i++)
{
ICell cell = row.Cells[i];
if (cell != null)
{
colWidths[i] = sheet.GetColumnWidth(i);
sum += sheet.GetColumnWidth(i);
}
}
for (int i = row.FirstCellNum; i < cellCount; i++)
{
colWidthPer[i] = (float)colWidths[i] / sum * 100;
}
return colWidthPer;
}
/// <summary>
/// 取EXCEL中列数最大的行
/// </summary>
/// <param name="sheet"></param>
/// <returns></returns>
private int GetMaxColRowNum(ISheet sheet)
{
int rowNum = 0;
int maxCol = 0;
for (int r = sheet.FirstRowNum; r < sheet.PhysicalNumberOfRows; r++)
{
IRow row = sheet.GetRow(r);
if (row != null && maxCol < row.PhysicalNumberOfCells)
{
maxCol = row.PhysicalNumberOfCells;
rowNum = r;
}
}
return rowNum;
}
/// <summary>
/// 单元格水平对齐方式
/// </summary>
/// <param name="align"></param>
/// <returns></returns>
private int GetCellHorAlign(string align)
{
switch (align)
{
case "Right":
return Element.ALIGN_RIGHT;
case "Center":
return Element.ALIGN_CENTER;
case "Left":
return Element.ALIGN_LEFT;
default:
return Element.ALIGN_LEFT;
}
}
/// <summary>
/// 单元格垂直对齐方式
/// </summary>
/// <param name="align"></param>
/// <returns></returns>
private int GetCellVerAlign(string align)
{
switch (align)
{
case "Center":
return Element.ALIGN_MIDDLE;
case "Top":
return Element.ALIGN_TOP;
case "Bottom":
return Element.ALIGN_BOTTOM;
default:
return Element.ALIGN_MIDDLE;
}
}
private bool HasBorder(ICell cell)
{
int bottom = cell.CellStyle.BorderBottom != 0 ? 1 : 0;
int top = cell.CellStyle.BorderTop != 0 ? 1 : 0;
int left = cell.CellStyle.BorderLeft != 0 ? 1 : 0;
int right = cell.CellStyle.BorderRight != 0 ? 1 : 0;
return (bottom + top + left + right) > 2;
}
}
}