C# Net Core openxml 提取 提出 取 word 文本 图片 Html Text Drawing
C# Net Core openxml 提取 提出 取 word 文本 图片 Html Text Drawing
注:只支持内嵌,不支持公式
------------------------------------------------
---------------文章最后为效果------------
------------------------------------------------
加入包:OpenXml
创建文件:Read.cs
复制下面全部代码到文件 Read.cs
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Xml;
using System.Xml.Xsl;
namespace YCBX.Office.WordXml
{
public class WordRead
{
public static List ReadToHtml(string wordPathStr)
{
return ReadToHtml(new FileStream(wordPathStr, FileMode.Open));
}
public static List ReadToHtml(Stream wordStream)
{
using (WordprocessingDocument doc = WordprocessingDocument.Open(wordStream, false))
{
//XmlWriterSettings settings = new XmlWriterSettings() { OmitXmlDeclaration = true, ConformanceLevel = ConformanceLevel.Auto,DoNotEscapeUriAttributes=true};
List paragraphHtmls = new List();
MainDocumentPart mainPart = doc.MainDocumentPart;
Body body = doc.MainDocumentPart.Document.Body;
//段落
foreach (var paragraph in body.Elements())
{
StringBuilder paragraphHtml = new StringBuilder();
//块
foreach (var run in paragraph.ChildElements)
{
if (run is Run)
{
foreach (OpenXmlElement openXmlElement