HTML2Word
目前转换html与word有很多方案,这里只说明windows下的。
- 使用HtmlToOpenXml库,直接转换,问题是只支持基础的,不支持CSS(效果一般)
- 使用Xceed收费版,insertContent, 自带RTF/HTML转换功能(没用过)
- 使用Office自带的打开和另存为功能,间接转换。(效果最接近)
参考代码
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Xceed.Words.NET;
using Xceed.Document.NET;
using System.IO;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using HtmlToOpenXml;
using MSWord = Microsoft.Office.Interop.Word;
namespace docxdemo
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
// 将一个文档合并到另外一个里面
using (var document = DocX.Load("file1.docx"))
{
// goto end of document
for(var i = 0; i < 3; i++) {
using (var document2 = DocX.Load("file2.docx"))
{
// Insert a document at the end of another document.
// When true, document is added at the end. When false, document is added at beginning.
document.InsertDocument(document2, true, false, MergingMode.Local);
}
}
// Save the changes to the document
document.SaveAs("demo.docx");
System.Diagnostics.Process.Start("demo.docx");
}
}
private void button2_Click(object sender, EventArgs e)
{
// 通过第三方库将html转换为word
const string filename = "file2.docx";
string html = textBox1.Text;
if (File.Exists(filename)) File.Delete(filename);
using (MemoryStream generatedDocument = new MemoryStream())
{
using (WordprocessingDocument package = WordprocessingDocument.Create(generatedDocument, WordprocessingDocumentType.Document))
{
MainDocumentPart mainPart = package.MainDocumentPart;
if (mainPart == null)
{
mainPart = package.AddMainDocumentPart();
new DocumentFormat.OpenXml.Wordprocessing.Document(new Body()).Save(mainPart);
}
HtmlConverter converter = new HtmlConverter(mainPart);
converter.ParseHtml(html);
mainPart.Document.Save();
}
File.WriteAllBytes(filename, generatedDocument.ToArray());
}
System.Diagnostics.Process.Start(filename);
}
private void button3_Click(object sender, EventArgs e)
{
// 通过office自带的功能将html转换为word
string source = "demo.htm";
string target = "./msdemo.docx";
using (var ht = File.Open(source, FileMode.OpenOrCreate))
{
var txt = $"<html>{ textBox1.Text}</html>";
var buff = Encoding.UTF8.GetBytes(txt);
ht.Write(buff, 0, buff.Length);
ht.Close();
}
MSWord._Application newApp = new MSWord.Application();
//try
//{
// this.app = (Application)Microsoft.VisualBasic.Interaction.GetObject(null, "Word.Application");
//}
//catch
//{
// this.app = new Application();
//}
MSWord.Documents d = newApp.Documents;
object Unknown = Type.Missing;
object format = MSWord.WdOpenFormat.wdOpenFormatWebPages; //Microsoft.Office.Interop.Word.WdOpenFormat
object enc = Microsoft.Office.Core.MsoEncoding.msoEncodingUTF8; //msoEncodingAutoDetect;
object src = Path.GetFullPath("./demo.htm");
MSWord.Document od = d.Open(ref src, false,
ref Unknown, ref Unknown, ref Unknown,
ref Unknown, ref Unknown, ref Unknown,
ref Unknown, ref format, ref enc,
ref Unknown, ref Unknown, ref Unknown, ref Unknown);
object targ = Path.GetFullPath("./msdemo.docx");
object format2 = MSWord.WdSaveFormat.wdFormatDocumentDefault;
od.SaveAs2(targ, ref format2,
//newApp.ActiveDocument.SaveAs(targ, ref format2,
ref Unknown, ref Unknown, ref Unknown,
ref Unknown, ref Unknown, ref Unknown,
ref Unknown, ref Unknown, ref Unknown,
ref Unknown, ref Unknown, ref Unknown,
ref Unknown, ref Unknown);
od.Close(MSWord.WdSaveOptions.wdDoNotSaveChanges);
newApp.Quit(MSWord.WdSaveOptions.wdDoNotSaveChanges);
System.Diagnostics.Process.Start(targ.ToString());
}
}
}