利用OpenXml SDK获取Word段落样式和文本

该小例子演示了如何根据RelationshipType获取文档格式和段落文本。

1、代码:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using System.IO.Packaging;
using System.IO;
using System.Xml;

namespace PackageRelationshipsGetParagraphs
{
    public static class LocalExtensions
    {
        public static string StringConcatenate(this IEnumerable<string> source)
        {
            StringBuilder sb = new StringBuilder();
            foreach (string s in source)
                sb.Append(s);
            return sb.ToString();
        }

        public static string StringConcatenate<T>(this IEnumerable<T> source,
            Func<T, string> func)
        {
            StringBuilder sb = new StringBuilder();
            foreach (T item in source)
                sb.Append(func(item));
            return sb.ToString();
        }

        public static string StringConcatenate(this IEnumerable<string> source, string separator)
        {
            StringBuilder sb = new StringBuilder();
            foreach (string s in source)
                sb.Append(s).Append(separator);
            return sb.ToString();
        }

        public static string StringConcatenate<T>(this IEnumerable<T> source,
            Func<T, string> func, string separator)
        {
            StringBuilder sb = new StringBuilder();
            foreach (T item in source)
                sb.Append(func(item)).Append(separator);
            return sb.ToString();
        }
    }

    class Program
    {
        public static string ParagraphText(XElement e)
        {
            XNamespace w = e.Name.Namespace;
            return e
                    .Elements(w + "r")
                    .Elements(w + "t")
                    .StringConcatenate(element => (string)element);
        }

        static void Main(string[] args)
        {
            const string fileName = "Test.docx";

            const string documentRelationshipType =
                "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
            const string stylesRelationshipType =
                "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
            const string wordmlNamespace =
                "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
            XNamespace w = wordmlNamespace;

            XDocument xDoc = null;
            XDocument styleDoc = null;

            using (Package wdPackage = Package.Open(fileName, FileMode.Open, FileAccess.Read))
            {
                PackageRelationship docPackageRelationship =
                    wdPackage.GetRelationshipsByType(documentRelationshipType).FirstOrDefault();

                if (docPackageRelationship != null)
                {
                    // Resolve the Relationship Target Uri
                    // so the Document Part can be retrieved (expected /word/document.xml)
                    Uri documentUri = PackUriHelper.ResolvePartUri(new Uri("/", UriKind.Relative),
                        docPackageRelationship.TargetUri);
                    PackagePart documentPart = wdPackage.GetPart(documentUri);

                    // Load the style XML to the part into an XDocument instance.
                    xDoc = XDocument.Load(XmlReader.Create(documentPart.GetStream()));

                    // Find the style part. 
                    PackageRelationship styleRelation =
                        documentPart.GetRelationshipsByType(stylesRelationshipType)
                        .FirstOrDefault();
                    if (null != styleRelation)
                    {
                        Uri styleUri = PackUriHelper.ResolvePartUri(documentUri, styleRelation.TargetUri);
                        PackagePart stylePart = wdPackage.GetPart(styleUri);

                        // load the style XML in the part into an XDocument instance.
                        styleDoc = XDocument.Load(XmlReader.Create(stylePart.GetStream()));
                    }
                }
            }

            // <w:style w:type="paragraph" w:default="1" w:styleId="a">
            //  <w:name w:val="Normal" />
            //  <w:qFormat />
            // </w:style>
            // Get styleId, we can get "a" from the below sample xml
            string defaultStyle =
                (string)(from style in styleDoc.Root.Elements(w + "style")
                         where (string)style.Attribute(w + "type") == "paragraph" &&
                               (string)style.Attribute(w + "default") == "1"
                         select style)
                 .First().Attribute(w + "styleId");


            // Find all paragraphs in the document.
            var paragraphs =
                from para in xDoc
                             .Root
                             .Element(w + "body")
                             .Descendants(w + "p")
                let styleNode = para
                                .Elements(w + "pPr")
                                .Elements(w + "pStyle")
                                .FirstOrDefault()
                select new
                {
                    ParagraphNode = para,
                    StyleName = styleNode != null ? (string)styleNode.Attribute(w + "val") : defaultStyle
                };

            // Retrieve the text of each paragraph
            var paraWithText =
                from para in paragraphs
                select new
                {
                    ParagraphNode = para.ParagraphNode,
                    StyleName = para.StyleName,
                    Text = ParagraphText(para.ParagraphNode)
                };
            foreach (var p in paraWithText)
                Console.WriteLine("StyleName:{0}, Text:{1}", p.StyleName, p.Text);

            Console.ReadKey();
        }
    }
}

2、运行结果:


  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值