原文出处:http://www.cnblogs.com/cdz-sky/p/4266471.html
Excel文件转HTML,返回相对路径:
这里可能会遇到一个问题,由于转化为HTML文件的页面编码可能使得浏览器无法正确解读,所以需要转码,转换代码如下(不同word文档转换成html,格式编码会不同,这里统一改成GB2312编码):
已通过测试,很好用,谢谢博主的分享,以下是正文内容:
Word文件转html,返回相对路径:
private string GetPathByDocToHTML(string strFile)
{
if (string.IsNullOrEmpty(strFile))
{
return "0";//没有文件
}
Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
Type wordType = word.GetType();
Microsoft.Office.Interop.Word.Documents docs = word.Documents;
// 打开文件
Type docsType = docs.GetType();
object fileName = strFile;
Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });
// 转换格式,另存为html
Type docType = doc.GetType();
//给文件重新起名
string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();
string strFileFolder = "../html/";
DateTime dt = DateTime.Now;
//以yyyymmdd形式生成子文件夹名
string strFileSubFolder = dt.Year.ToString();
strFileSubFolder += (dt.Month < 10) ? ("0" + dt.Month.ToString()) : dt.Month.ToString();
strFileSubFolder += (dt.Day < 10) ? ("0" + dt.Day.ToString()) : dt.Day.ToString();
string strFilePath = strFileFolder + strFileSubFolder + "/";
// 判断指定目录下是否存在文件夹,如果不存在,则创建
if (!Directory.Exists(Server.MapPath(strFilePath)))
{
// 创建up文件夹
Directory.CreateDirectory(Server.MapPath(strFilePath));
}
//被转换的html文档保存的位置
// HttpContext.Current.Server.MapPath("html" + strFileSubFolder + filename + ".html")
string ConfigPath = Server.MapPath(strFilePath + filename + ".html");
object saveFileName = ConfigPath;
/*下面是Microsoft Word 9 Object Library的写法,如果是10,可能写成:
* docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
* null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML});
* 其它格式:
* wdFormatHTML
* wdFormatDocument
* wdFormatDOSText
* wdFormatDOSTextLineBreaks
* wdFormatEncodedText
* wdFormatRTF
* wdFormatTemplate
* wdFormatText
* wdFormatTextLineBreaks
* wdFormatUnicodeText
*/
docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
//docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
// null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
//关闭文档
docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { null, null, null });
// 退出 Word
wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
//转到新生成的页面
//return ("/" + filename + ".html");
//转化HTML页面统一编码格式
TransHTMLEncoding(ConfigPath);
return (strFilePath + filename + ".html");
}
Excel文件转HTML,返回相对路径:
private string GetPathByXlsToHTML(string strFile)
{
if (string.IsNullOrEmpty(strFile))
{
return "0";//没有文件
}
//实例化Excel
Microsoft.Office.Interop.Excel.Application repExcel = new Microsoft.Office.Interop.Excel.Application();
Microsoft.Office.Interop.Excel.Workbook workbook = null;
Microsoft.Office.Interop.Excel.Worksheet worksheet = null;
//打开文件,n.FullPath是文件路径
workbook = repExcel.Application.Workbooks.Open(strFile, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing);
worksheet = (Microsoft.Office.Interop.Excel.Worksheet)workbook.Worksheets[1];
//给文件重新起名
string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();
string strFileFolder = "../html/";
DateTime dt = DateTime.Now;
//以yyyymmdd形式生成子文件夹名
string strFileSubFolder = dt.Year.ToString();
strFileSubFolder += (dt.Month < 10) ? ("0" + dt.Month.ToString()) : dt.Month.ToString();
strFileSubFolder += (dt.Day < 10) ? ("0" + dt.Day.ToString()) : dt.Day.ToString();
string strFilePath = strFileFolder + strFileSubFolder + "/";
// 判断指定目录下是否存在文件夹,如果不存在,则创建
if (!Directory.Exists(Server.MapPath(strFilePath)))
{
// 创建up文件夹
Directory.CreateDirectory(Server.MapPath(strFilePath));
}
string ConfigPath = Server.MapPath(strFilePath + filename + ".html");
object savefilename = (object)ConfigPath;
object ofmt = Microsoft.Office.Interop.Excel.XlFileFormat.xlHtml;
//进行另存为操作
workbook.SaveAs(savefilename, ofmt, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Microsoft.Office.Interop.Excel.XlSaveAsAccessMode.xlNoChange, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing);
object osave = false;
//逐步关闭所有使用的对象
workbook.Close(osave, Type.Missing, Type.Missing);
repExcel.Quit();
System.Runtime.InteropServices.Marshal.ReleaseComObject(worksheet);
worksheet = null;
//垃圾回收
GC.Collect();
System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook);
workbook = null;
GC.Collect();
System.Runtime.InteropServices.Marshal.ReleaseComObject(repExcel.Application.Workbooks);
GC.Collect();
System.Runtime.InteropServices.Marshal.ReleaseComObject(repExcel);
repExcel = null;
GC.Collect();
//依据时间杀灭进程
System.Diagnostics.Process[] process = System.Diagnostics.Process.GetProcessesByName("EXCEL");
foreach (System.Diagnostics.Process p in process)
{
if (DateTime.Now.Second - p.StartTime.Second > 0 && DateTime.Now.Second - p.StartTime.Second < 5)
{
p.Kill();
}
}
return (strFilePath + filename + ".html");
}
这里可能会遇到一个问题,由于转化为HTML文件的页面编码可能使得浏览器无法正确解读,所以需要转码,转换代码如下(不同word文档转换成html,格式编码会不同,这里统一改成GB2312编码):
private void TransHTMLEncoding(string strFilePath)
{
try
{
System.IO.StreamReader sr = new System.IO.StreamReader(strFilePath, Encoding.GetEncoding(0));
string html = sr.ReadToEnd();
sr.Close();
//此处有两种写法
//写法一(语句中包含有“<meta”都会被替换成gb2312,不太准确)
html = System.Text.RegularExpressions.Regex.Replace(html, @"<meta[^>]*>", "<meta http-equiv=Content-Type content='text/html; charset=gb2312'>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
//写法二(只修改编码格式一句)
html = System.Text.RegularExpressions.Regex.Replace(html, @"<meta http-equiv=Content-Type[^>]*>", "<meta http-equiv=Content-Type content='text/html; charset=gb2312'>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.IO.StreamWriter sw = new System.IO.StreamWriter(strFilePath, false, Encoding.Default);
sw.Write(html);
sw.Close();
}
catch (Exception ex)
{
Page.RegisterStartupScript("alt", "<script>alert('" + ex.Message + "')</script>");
}
}
已验证通过可正常使用的,word转html:
private void ConvertWordToHtml(string sourcefile, string destfile) //sourcefile:原word文件路径,destfile:生成的html文件路径
{
Microsoft.Office.Interop.Word.Application word = new Microsoft.Office.Interop.Word.ApplicationClass();
//Word.ApplicationClass word = new Word.ApplicationClass();
Type wordType = word.GetType();
Microsoft.Office.Interop.Word.Documents docs = word.Documents;
// 打开文件
Type docsType = docs.GetType();
object fileName = sourcefile;
Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
System.Reflection.BindingFlags.InvokeMethod, null, (object)docs, new Object[] { fileName, true, true });
// 转换格式,另存为
Type docType = doc.GetType();
object saveFileName = destfile;
//下面是Microsoft Word 9(11.0) Object Library的写法,如果是10(没试过),可能写成:
/*
docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML});
*/
///其它格式:
///wdFormatHTML
///wdFormatDocument
///wdFormatDOSText
///wdFormatDOSTextLineBreaks
///wdFormatEncodedText
///wdFormatRTF
///wdFormatTemplate
///wdFormatText
///wdFormatTextLineBreaks
///wdFormatUnicodeText
docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatHTML });
//关闭文档
docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { null, null, null });
// 退出 Word
wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod,
null, word, null);
//将html统一格式成gb2312
TransHTMLEncoding(destfile);
}