C#在excel中读取图片 获取图片的单元格位置信息(仅限于xlsx)
主要的代码逻辑如下:
- 将excel使用zip直接打开
- 压缩包内图片信息主要存放在两个文件夹中(xl/media,xl/drawings)前者是图片本身,后者是图片的主要信息
- 在drawings文件夹下会存在若干以drawing开头的.xml文件,一个sheet页对应一个drawing.xml后缀按照数字序号递增(例如:第一页为drawing1.xml,第二页为drawing2.xml以此类推)
- 打开drawing.xml后,读取文档节点,节点含义对照表在末尾
- 在pic标签中获取到图片的rid后通过该id在同级目录下的_rels文件夹下找到对应的drawing.xml.rels中读取图片的存放地址(例如drawing1.xml对应drawing1.xml.rels)
twoCellAnchor | 图片信息的节点 节点下包含了图片的起始单元格位置 图片的id即[r:embed]的内容 存在多少个[twoCellAnchor]即存在多少个图片 |
twoCellAnchor/from | 图片左上角开始位置单元格信息 |
twoCellAnchor/to | 图片右下角结束位置单元格信息 (当前需求需要尽量保证一张图片存在一个单元格中,不允许跨单元格 因为不会对其做其他的处理) |
twoCellAnchor/pic | 图片的文件信息 主要读取 [blipFill->blip->r:embed]此信息为图片的id信息可以读取到文件在media文件夹中存放的位置 |
twoCellAnchor/xfrm | 待确定该信息,主要猜想为图片的大小? |
twoCellAnchor/prstGeom | 图片的插入方式 |
以下是封装完成的代码
public class ExcelImgHelper
{
#region 常量
/// <summary>
/// 文件id与文件路径文件夹路径
/// </summary>
private const string DrawingRels = "xl/drawings/_rels/drawing_id_.xml.rels";
/// <summary>
/// 图片信息文件
/// </summary>
private const string Drawing = "xl/drawings/drawing_id_.xml";
#region 图片信息主要标签
private const string twoCellAnchor = "twoCellAnchor";
private const string embed = "embed";
private const string link = "link";
private const string prst = "prst";
#endregion 图片信息主要标签
#endregion 常量
#region 路径信息
/// <summary>
/// excel文件地址
/// </summary>
private string ExcelPath { get; }
/// <summary>
/// 解压的文件夹
/// </summary>
private string ExcelZipPath { get; }
/// <summary>
/// 压缩包 *注意 与上方需要区分开当前路劲是压缩文件包.zip
/// </summary>
private string ExcelZipFilePath { get; }
#endregion 路径信息
private List<ExcelImgInfo> ExcelImgInfos = new List<ExcelImgInfo>();
public ExcelImgHelper(string filePath)
{
if (string.IsNullOrEmpty(filePath))
{
throw new ArgumentNullException(nameof(filePath));
}
//解压后文件夹存放的位置 与源文件同目录
var dir = Path.GetDirectoryName(filePath);
//获取文件名
var fileName = Path.GetFileNameWithoutExtension(filePath);
//压缩包路径
var zipFilePath = dir + "\\" + fileName + ".zip";
//复制为压缩包
File.Copy(filePath, zipFilePath);
//解压文件
if (UnZipFile(filePath, out string UnZipFilePath))
{
ExcelPath = filePath;
ExcelZipPath = UnZipFilePath;
ExcelZipFilePath = zipFilePath;
ExcelImgInfos = Analysis();
}
else
{
throw new Exception("解压失败");
}
}
/// <summary>
/// 解析excel中的图片
/// </summary>
/// <returns></returns>
private List<ExcelImgInfo> Analysis()
{
List<ExcelImgPathAndId> imgs = new List<ExcelImgPathAndId>();
List<ExcelImgInfo> excelImgInfos = new List<ExcelImgInfo>();
//读取所有图片以及位置信息
FindPicPathByID(ref imgs);
//默认命名空间
XNamespace xdr_namespace = "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing";
XNamespace a_namespace = "http://schemas.openxmlformats.org/drawingml/2006/main";
XNamespace r_namespace = "http://schemas.openxmlformats.org/officeDocument/2006/relationships";
//加载图片信息文档xml(替换的文件名与页数对应 此处可以优化为传入指定id)
XDocument xdoc = XDocument.Load(ExcelZipPath + Drawing.Replace("_id_", "1"));
//加载文档中的默认命名空间
var root = xdoc.Root;
foreach (var item in root.Attributes())
{
if (item.Name.LocalName == "xdr")
{
xdr_namespace = item.Value;
}
else if (item.Name.LocalName == "a")
{
a_namespace = item.Value;
}
}
//读取twoCellAnchor标签中的内容 **核心部分**
foreach (var node in xdoc.Descendants(xdr_namespace + twoCellAnchor))
{
//twoCellAnchor标签中子标签内容顺序永远为:from->to->pic
//所以此处顺序读取即可
var NodeFrom = (XElement)node.FirstNode;
var NodeTo = (XElement)NodeFrom.NextNode;
var NodePic = (XElement)NodeTo.NextNode;
//找到blipFill节点,并找到r节点的命名空间
var blipFill = (XElement)(((XElement)NodePic.FirstNode.NextNode).FirstNode);
r_namespace = blipFill.FirstAttribute.IsNamespaceDeclaration ? blipFill.FirstAttribute.Value : r_namespace;
//找到spPr节点
var spPr = (XElement)NodePic.FirstNode.NextNode.NextNode;
//获取图片Id
var ImgId = (blipFill.Attribute(r_namespace + embed) != null ? blipFill.Attribute(r_namespace + embed) : blipFill.Attribute(r_namespace + link)).Value.ToString();
//获取from
var From = new Position()
{
Col = int.Parse(((XElement)NodeFrom.FirstNode).Value),
ColOff = int.Parse(((XElement)NodeFrom.FirstNode.NextNode).Value),
Row = int.Parse(((XElement)NodeFrom.FirstNode.NextNode.NextNode).Value),
RowOff = int.Parse(((XElement)NodeFrom.FirstNode.NextNode.NextNode.NextNode).Value)
};
//获取to
var To = new Position()
{
Col = int.Parse(((XElement)NodeTo.FirstNode).Value),
ColOff = int.Parse(((XElement)NodeTo.FirstNode.NextNode).Value),
Row = int.Parse(((XElement)NodeTo.FirstNode.NextNode.NextNode).Value),
RowOff = int.Parse(((XElement)NodeTo.FirstNode.NextNode.NextNode.NextNode).Value)
};
//获取图片插入方式
var PrstGeom = ((XElement)spPr.FirstNode.NextNode).Attribute(prst).Value.ToString();
//
var xfrm = ((XElement)spPr.FirstNode);
var xfrm_off = ((XElement)xfrm.FirstNode);
var xfrm_ext = ((XElement)xfrm.FirstNode.NextNode);
List<int> xfrm_offData = new List<int>
{
int.Parse(xfrm_off.Attribute("x").Value.ToString()),
int.Parse(xfrm_off.Attribute("y").Value.ToString())
};
List<int> xfrm_extData = new List<int>
{
int.Parse(xfrm_ext.Attribute("cx").Value.ToString()),
int.Parse(xfrm_ext.Attribute("cy").Value.ToString())
};
//获取图片实际位置
var PathOfPicture = imgs.FirstOrDefault(e => e.Id == ImgId)?.Path;
//此处图片为相对位置需要处理成为绝对路径
PathOfPicture = PathOfPicture.Replace("../", ExcelZipPath + "xl\\").Replace("/", "\\");
//至此 所有需要使用的节点全部取出 开始组装数据
ExcelImgInfo excelImgInfo = new ExcelImgInfo(
imgId: ImgId,
from: From,
to: To,
prstGeom: PrstGeom,
xfrm_off: xfrm_offData,
xfrm_ext: xfrm_extData,
pathOfPicture: PathOfPicture);
excelImgInfos.Add(excelImgInfo);
}
//Dispose();
return excelImgInfos;
}
/// <summary>
/// 解压文件
/// </summary>
/// <param name="zipFilePath">压缩文件路径</param>
/// <param name="path">返回压缩文件夹路径</param>
/// <param name="unZipDir">解压文件存放路径,为空时默认与压缩文件同一级目录下,跟压缩文件同名的文件夹</param>
/// <returns></returns>
private bool UnZipFile(string zipFilePath, out string path, string unZipDir = null)
{
if (zipFilePath == string.Empty)
{
path = null;
return false;
}
if (!System.IO.File.Exists(zipFilePath))
{
path = null;
return false;
}
//解压文件夹为空时默认与压缩文件同一级目录下,跟压缩文件同名的文件夹
if (string.IsNullOrWhiteSpace(unZipDir))
unZipDir = zipFilePath.Replace(Path.GetFileName(zipFilePath), Path.GetFileNameWithoutExtension(zipFilePath));
if (!unZipDir.EndsWith("\\"))
unZipDir += "\\";
if (!Directory.Exists(unZipDir))
Directory.CreateDirectory(unZipDir);
try
{
using (ZipInputStream s = new ZipInputStream(System.IO.File.OpenRead(zipFilePath)))
{
ZipEntry theEntry;
while ((theEntry = s.GetNextEntry()) != null)
{
string directoryName = Path.GetDirectoryName(theEntry.Name);
string fileName = Path.GetFileName(theEntry.Name);
if (directoryName.Length > 0)
{
Directory.CreateDirectory(unZipDir + directoryName);
}
if (!directoryName.EndsWith("\\"))
directoryName += "\\";
if (fileName != String.Empty)
{
using (FileStream streamWriter = System.IO.File.Create(unZipDir + theEntry.Name))
{
int size = 2048;
byte[] data = new byte[2048];
while (true)
{
size = s.Read(data, 0, data.Length);
if (size > 0)
{
streamWriter.Write(data, 0, size);
}
else
{
break;
}
}
}
}
}
}
}
catch
{
path = null;
return false;
}
path = unZipDir;
return true;
}
/// <summary>
/// 获取全部文件id与路径
/// </summary>
/// <param name="imgs"></param>
/// <param name="_id"></param>
private void FindPicPathByID(ref List<ExcelImgPathAndId> imgs, int _id = 1)
{
string _file = Path.Combine(ExcelZipPath + DrawingRels.Replace("_id_", _id.ToString()));
if (!File.Exists(_file))
{
throw new DirectoryNotFoundException(_file);
}
XDocument xDoc = XDocument.Load(_file);
var root = xDoc.Root;
foreach (XElement node in root.Nodes())
{
var attrs = node.Attributes();
string Id = "";
string Target = "";
foreach (var attr in attrs)
{
if (attr.Name == "Id")
Id = attr.Value.ToString();
else if (attr.Name == "Target")
Target = attr.Value.ToString();
}
imgs.Add(new ExcelImgPathAndId()
{
Id = Id,
Path = Target
});
}
}
/// <summary>
/// 获取excel图片以及位置信息
/// </summary>
/// <returns></returns>
public List<ExcelImgInfo> GetAllImgs()
{
return ExcelImgInfos;
}
/// <summary>
/// 删除解压的文件
/// </summary>
public void Dispose()
{
File.Delete(ExcelZipFilePath);
DirectoryInfo di = new DirectoryInfo(ExcelZipPath);
di.Delete(true);
}
}
需要用到的辅助类
/// <summary>
/// 提取出来的图片信息类
/// </summary>
public class ExcelImgInfo
{
public ExcelImgInfo()
{
}
public ExcelImgInfo(string imgId, Position from, Position to, string prstGeom, List<int> xfrm_off, List<int> xfrm_ext, string pathOfPicture)
{
try
{
ImgId = imgId;
From = from;
To = to;
PrstGeom = prstGeom;
this.xfrm_off = xfrm_off;
this.xfrm_ext = xfrm_ext;
PathOfPicture = pathOfPicture;
if (File.Exists(PathOfPicture))
{
//将图片读取到内存中并且不锁定文件
FileStream fileStream = new FileStream(PathOfPicture, FileMode.Open, FileAccess.Read);
int byteLength = (int)fileStream.Length;
byte[] fileBytes = new byte[byteLength];
fileStream.Read(fileBytes, 0, byteLength);
fileStream.Close();
using (MemoryStream ms = new MemoryStream(fileBytes))
{
ms.Write(fileBytes, 0, fileBytes.Length);
ExcelImage = Image.FromStream(ms, true);
}
imgByteArray = fileBytes;
}
else
{
throw new FileNotFoundException("图片位置错误");
}
}
catch (Exception e)
{
throw new Exception($" 图片对象初始化时错误:[{imgId}]\n{e.Message}\n{e.StackTrace}");
}
}
/// <summary>
/// 图片Id
/// </summary>
public string ImgId { get; protected set; }
/// <summary>
/// 开始的单元格
/// </summary>
public Position From { get; protected set; }
/// <summary>
/// 结束的单元格
/// </summary>
public Position To { get; protected set; }
/// <summary>
/// 图片插入方式
/// </summary>
public string PrstGeom { get; protected set; }
/// <summary>
/// [0]:x
/// [1]:y
/// </summary>
public List<int> xfrm_off { get; protected set; } = new List<int>();
/// <summary>
/// [0]:cx
/// [1]:cy
/// </summary>
public List<int> xfrm_ext { get; protected set; } = new List<int>();
/// <summary>
/// 图片地址
/// </summary>
public string PathOfPicture { get; protected set; }
/// <summary>
/// 图片数据
/// </summary>
public Image ExcelImage { get; protected set; }
/// <summary>
/// 图片数组
/// </summary>
public byte[] imgByteArray { get; protected set; }
public void Dispose()
{
//ExcelImage.Dispose();
}
}
/// <summary>
/// 文件Id与路径
/// </summary>
public class ExcelImgPathAndId
{
public string Id { get; set; }
public string Path { get; set; }
}
/// <summary>
/// 位置信息
/// </summary>
public class Position
{
public int Col { get; set; }
public int ColOff { get; set; }
public int Row { get; set; }
public int RowOff { get; set; }
}