在使用HtmlToOpenXml库将html转换成word的时候有一些特殊标签是没有解析的。
比如我的html源中使用的锚链接是<a filepos=000456>link</a>,而目标是<a id="filepos456"></a>
该库将所有的a解析标签都写在HtmlConverterProcessTag.cs的ProcessLink方法里
代码如下
#region ProcessLink
private void ProcessLink(HtmlEnumerator en)
{
String att = en.Attributes["href"];
Hyperlink h = null;
Uri uri = null;
if (!String.IsNullOrEmpty(att))
{
// is it an anchor?
if (att[0] == '#' && att.Length > 1)
{
// Always accept _top anchor
if (!this.ExcludeLinkAnchor || att == "#_top")
{
h = new Hyperlink(
) { History = true, Anchor = att.Substring(1) };
}
}
// ensure the links does not start with javascript:
else if (Uri.TryCreate(att, UriKind.Absolute, out uri) && uri.Scheme != "javascript")
{
HyperlinkRelationship extLink = mainPart.AddHyperlinkRelationship(uri, true);
h = new Hyperlink(
) { History = true, Id = extLink.Id };
}
}
if (h == null)
{
// link to a broken url, simply process the content of the tag
ProcessHtmlChunks(en, "</a>");
return;
}
AlternateProcessHtmlChunks(en, "</a>");
if (elements.Count > 0)
{
// Let's see whether the link tag include an image inside its body.
// If so, the Hyperlink OpenXmlElement is lost and we'll keep only the images
// and applied a HyperlinkOnClick attribute.
List<OpenXmlElement> imageInLink = elements.FindAll(e => { return e.HasChild<Drawing>(); });
if (imageInLink.Count != 0)
{
for (int i = 0; i < imageInLink.Count; i++)
{
// Retrieves the "alt" attribute of the image and apply it as the link's tooltip
Drawing d = imageInLink[i].GetFirstChild<Drawing>();
var enDp = d.Descendants<pic.NonVisualDrawingProperties>().GetEnumerator();
String alt;
if (enDp.MoveNext()) alt = enDp.Current.Description;
else alt = null;
d.InsertInDocProperties(
new a.HyperlinkOnClick() { Id = h.Id ?? h.Anchor, Tooltip = alt });
}
CompleteCurrentParagraph();
AddParagraph(currentParagraph = htmlStyles.Paragraph.NewParagraph());
}
else
{
// Append the processed elements and put them to the Run of the Hyperlink
h.Append(elements);
if (!htmlStyles.DoesStyleExists("Hyperlink"))
{
htmlStyles.AddStyle("Hyperlink", new Style(
new StyleName() { Val = "Hyperlink" },
new UnhideWhenUsed(),
new StyleRunProperties(
new DocumentFormat.OpenXml.Wordprocessing.Color() { Val = "0000FF", ThemeColor = ThemeColorValues.Hyperlink },
new Underline() { Val = UnderlineValues.Single }
)
) { Type = StyleValues.Character, StyleId = "Hyperlink" });
}
h.GetFirstChild<Run>().InsertInProperties(
new RunStyle() { Val = htmlStyles.GetStyle("Hyperlink", StyleValues.Character) });
this.elements.Clear();
// Append the hyperlink
elements.Add(h);
}
}
}
#endregion
实际上是只解析了href部分,那么对于特殊文件的特殊标签该怎么处理呢?在不改变大框架的前提下,只能在该方法内部添加我们需要的代码
由于我的链接是<a filepos=000456>link</a>和<a id="filepos456"></a>,所以我只修改了标签的定义
String att = en.Attributes["href"];
if (att == null)
{
att = en.Attributes["filepos"];
if (att != null)
{
char[] c = { '0' };
att = "#filepos" + en.Attributes["filepos"].TrimStart(c); //锚点跟链接的不同是前面没有0,添加了filepos,所以对应修改
}
else
{
att = en.Attributes["id"];
}
}
添加定义
BookmarkStart bs = null;
BookmarkEnd be = null;
赋值
if (att[0] == 'f' && att.Length > 1)
{
//添加书签
bs = new BookmarkStart();
bs.Name = att;
bs.Id = att;
be = new BookmarkEnd();
be.Id = att;
}
最后将节点添加到文档中即可
if (bs != null)
{
elements.Add(bs);
elements.Add(be);
}
转换成功后文档已正确添加书签
祝贺该项目又解决掉一个bug!!!!!!!!