C# 将Word文档转换为HTML
日常生活中,我们总是在Word中进行文字的编辑,它不仅能够保存Text文本,还可以保存文本的格式等等。那么如果我要将一Word文档上的内容展示在网页上,该怎么做呢?这里我提供了一个小工具,你可以将Word转换为Html,需要显示的话,可以直接访问该Html,废话不多说,下面看代码。
页面代码:
- <SPAN style="FONT-SIZE: 18px"><div>
- <input id="File1" type="file" runat="server"/>
- <asp:Button ID="btnConvert" runat="server" Text="转换" OnClick="btnConvert_Click" />
- </div></SPAN>
<div>
<input id="File1" type="file" runat="server"/>
<asp:Button ID="btnConvert" runat="server" Text="转换" OnClick="btnConvert_Click" />
</div>
C#代码:
- <SPAN style="FONT-SIZE: 18px">using System;
- using System.Data;
- using System.Configuration;
- using System.Collections;
- using System.Collections.Generic;
- using System.Linq;
- using System.Web;
- using System.Web.Security;
- using System.Web.UI;
- using System.Web.UI.WebControls;
- using System.Web.UI.WebControls.WebParts;
- using System.Web.UI.HtmlControls;
- using System.IO;
- protected void Page_Load(object sender, EventArgs e)
- {
- }
- /// <summary>
- /// 将word转换为Html
- /// </summary>
- /// <param name="sender"></param>
- /// <param name="e"></param>
- protected void btnConvert_Click(object sender, EventArgs e)
- {
- try
- {
- //上传
- //uploadWord(File1);
- //转换
- wordToHtml(File1);
- }
- catch (Exception ex)
- {
- throw ex;
- }
- finally
- {
- Response.Write("恭喜,转换成功!");
- }
- }
- //上传文件并转换为html wordToHtml(wordFilePath)
- ///<summary>
- ///上传文件并转存为html
- ///</summary>
- ///<param name="wordFilePath">word文档在客户机的位置</param>
- ///<returns>上传的html文件的地址</returns>
- public string wordToHtml(System.Web.UI.HtmlControls.HtmlInputFile wordFilePath)
- {
- Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
- Type wordType = word.GetType();
- Microsoft.Office.Interop.Word.Documents docs = word.Documents;
- // 打开文件
- Type docsType = docs.GetType();
- //应当先把文件上传至服务器然后再解析文件为html
- string filePath = uploadWord(wordFilePath);
- //判断是否上传文件成功
- if (filePath == "0")
- return "0";
- //判断是否为word文件
- if (filePath == "1")
- return "1";
- object fileName = filePath;
- Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
- System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });
- // 转换格式,另存为html
- Type docType = doc.GetType();
- string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
- System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();
- // 判断指定目录下是否存在文件夹,如果不存在,则创建
- if (!Directory.Exists(Server.MapPath("~\\html")))
- {
- // 创建up文件夹
- Directory.CreateDirectory(Server.MapPath("~\\html"));
- }
- //被转换的html文档保存的位置
- string ConfigPath = HttpContext.Current.Server.MapPath("html/" + filename + ".html");
- object saveFileName = ConfigPath;
- /*下面是Microsoft Word 9 Object Library的写法,如果是10,可能写成:
- * docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
- * null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML});
- * 其它格式:
- * wdFormatHTML
- * wdFormatDocument
- * wdFormatDOSText
- * wdFormatDOSTextLineBreaks
- * wdFormatEncodedText
- * wdFormatRTF
- * wdFormatTemplate
- * wdFormatText
- * wdFormatTextLineBreaks
- * wdFormatUnicodeText
- */
- docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
- null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
- //关闭文档
- docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
- null, doc, new object[] { null, null, null });
- // 退出 Word
- wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
- //转到新生成的页面
- return ("/" + filename + ".html");
- }
- public string uploadWord(System.Web.UI.HtmlControls.HtmlInputFile uploadFiles)
- {
- if (uploadFiles.PostedFile != null)
- {
- string fileName = uploadFiles.PostedFile.FileName;
- int extendNameIndex = fileName.LastIndexOf(".");
- string extendName = fileName.Substring(extendNameIndex);
- string newName = "";
- try
- {
- //验证是否为word格式
- if (extendName == ".doc" || extendName == ".docx")
- {
- DateTime now = DateTime.Now;
- newName = now.DayOfYear.ToString() + uploadFiles.PostedFile.ContentLength.ToString();
- // 判断指定目录下是否存在文件夹,如果不存在,则创建
- if (!Directory.Exists(Server.MapPath("~\\wordTmp")))
- {
- // 创建up文件夹
- Directory.CreateDirectory(Server.MapPath("~\\wordTmp"));
- }
- //上传路径 指当前上传页面的同一级的目录下面的wordTmp路径
- uploadFiles.PostedFile.SaveAs(System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName));
- }
- else
- {
- return "1";
- }
- }
- catch
- {
- return "0";
- }
- //return "http://" + HttpContext.Current.Request.Url.Host + HttpContext.Current.Request.ApplicationPath + "/wordTmp/" + newName + extendName;
- return System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName);
- }
- else
- {
- return "0";
- }
- }</SPAN>
using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.IO;
protected void Page_Load(object sender, EventArgs e)
{
}
/// <summary>
/// 将word转换为Html
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
protected void btnConvert_Click(object sender, EventArgs e)
{
try
{
//上传
//uploadWord(File1);
//转换
wordToHtml(File1);
}
catch (Exception ex)
{
throw ex;
}
finally
{
Response.Write("恭喜,转换成功!");
}
}
//上传文件并转换为html wordToHtml(wordFilePath)
///<summary>
///上传文件并转存为html
///</summary>
///<param name="wordFilePath">word文档在客户机的位置</param>
///<returns>上传的html文件的地址</returns>
public string wordToHtml(System.Web.UI.HtmlControls.HtmlInputFile wordFilePath)
{
Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
Type wordType = word.GetType();
Microsoft.Office.Interop.Word.Documents docs = word.Documents;
// 打开文件
Type docsType = docs.GetType();
//应当先把文件上传至服务器然后再解析文件为html
string filePath = uploadWord(wordFilePath);
//判断是否上传文件成功
if (filePath == "0")
return "0";
//判断是否为word文件
if (filePath == "1")
return "1";
object fileName = filePath;
Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });
// 转换格式,另存为html
Type docType = doc.GetType();
string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();
// 判断指定目录下是否存在文件夹,如果不存在,则创建
if (!Directory.Exists(Server.MapPath("~\\html")))
{
// 创建up文件夹
Directory.CreateDirectory(Server.MapPath("~\\html"));
}
//被转换的html文档保存的位置
string ConfigPath = HttpContext.Current.Server.MapPath("html/" + filename + ".html");
object saveFileName = ConfigPath;
/*下面是Microsoft Word 9 Object Library的写法,如果是10,可能写成:
* docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
* null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML});
* 其它格式:
* wdFormatHTML
* wdFormatDocument
* wdFormatDOSText
* wdFormatDOSTextLineBreaks
* wdFormatEncodedText
* wdFormatRTF
* wdFormatTemplate
* wdFormatText
* wdFormatTextLineBreaks
* wdFormatUnicodeText
*/
docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
//关闭文档
docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { null, null, null });
// 退出 Word
wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
//转到新生成的页面
return ("/" + filename + ".html");
}
public string uploadWord(System.Web.UI.HtmlControls.HtmlInputFile uploadFiles)
{
if (uploadFiles.PostedFile != null)
{
string fileName = uploadFiles.PostedFile.FileName;
int extendNameIndex = fileName.LastIndexOf(".");
string extendName = fileName.Substring(extendNameIndex);
string newName = "";
try
{
//验证是否为word格式
if (extendName == ".doc" || extendName == ".docx")
{
DateTime now = DateTime.Now;
newName = now.DayOfYear.ToString() + uploadFiles.PostedFile.ContentLength.ToString();
// 判断指定目录下是否存在文件夹,如果不存在,则创建
if (!Directory.Exists(Server.MapPath("~\\wordTmp")))
{
// 创建up文件夹
Directory.CreateDirectory(Server.MapPath("~\\wordTmp"));
}
//上传路径 指当前上传页面的同一级的目录下面的wordTmp路径
uploadFiles.PostedFile.SaveAs(System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName));
}
else
{
return "1";
}
}
catch
{
return "0";
}
//return "http://" + HttpContext.Current.Request.Url.Host + HttpContext.Current.Request.ApplicationPath + "/wordTmp/" + newName + extendName;
return System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName);
}
else
{
return "0";
}
}
效果图:
转换后的Html文件
这样就可以简单的在Html中展示word文档中的内容,而不需要在自己进行编辑了。当然,如果有需要的话,可以将转换的Html的路径存入数据库,根据不同的条件直接进行访问。