C#.NET实现Word或Excel文件转为HTML文件

原文出处:http://www.cnblogs.com/cdz-sky/p/4266471.html

已通过测试,很好用,谢谢博主的分享,以下是正文内容:


Word文件转html,返回相对路径:

private string GetPathByDocToHTML(string strFile)
    {
        if (string.IsNullOrEmpty(strFile))
        {
            return "0";//没有文件
        }
        Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
        Type wordType = word.GetType();
        Microsoft.Office.Interop.Word.Documents docs = word.Documents;

        // 打开文件  
        Type docsType = docs.GetType();

        object fileName = strFile;

        Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
        System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });

        // 转换格式,另存为html  
        Type docType = doc.GetType();
        //给文件重新起名
        string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
        System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();

        string strFileFolder = "../html/";
        DateTime dt = DateTime.Now;
        //以yyyymmdd形式生成子文件夹名
        string strFileSubFolder = dt.Year.ToString();
        strFileSubFolder += (dt.Month < 10) ? ("0" + dt.Month.ToString()) : dt.Month.ToString();
        strFileSubFolder += (dt.Day < 10) ? ("0" + dt.Day.ToString()) : dt.Day.ToString();
        string strFilePath = strFileFolder + strFileSubFolder + "/";
        // 判断指定目录下是否存在文件夹,如果不存在,则创建 
        if (!Directory.Exists(Server.MapPath(strFilePath)))
        {
            // 创建up文件夹 
            Directory.CreateDirectory(Server.MapPath(strFilePath));
        }

        //被转换的html文档保存的位置 
        // HttpContext.Current.Server.MapPath("html" + strFileSubFolder + filename + ".html")
        string ConfigPath = Server.MapPath(strFilePath + filename + ".html");
        object saveFileName = ConfigPath;

        /*下面是Microsoft Word 9 Object Library的写法,如果是10,可能写成: 
          * docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, 
          * null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML}); 
          * 其它格式: 
         * wdFormatHTML 
          * wdFormatDocument 
          * wdFormatDOSText 
          * wdFormatDOSTextLineBreaks 
          * wdFormatEncodedText 
          * wdFormatRTF 
          * wdFormatTemplate 
          * wdFormatText 
          * wdFormatTextLineBreaks 
         * wdFormatUnicodeText 
        */
        docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
        null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });

        //docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
        //  null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML }); 

        //关闭文档  
        docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
        null, doc, new object[] { null, null, null });

        // 退出 Word  
        wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
        //转到新生成的页面  
        //return ("/" + filename + ".html");

        //转化HTML页面统一编码格式
        TransHTMLEncoding(ConfigPath);

        return (strFilePath + filename + ".html");
    }

Excel文件转HTML,返回相对路径:
private string GetPathByXlsToHTML(string strFile)
    {
        if (string.IsNullOrEmpty(strFile))
        {
            return "0";//没有文件
        }

        //实例化Excel  
        Microsoft.Office.Interop.Excel.Application repExcel = new Microsoft.Office.Interop.Excel.Application();
        Microsoft.Office.Interop.Excel.Workbook workbook = null;
        Microsoft.Office.Interop.Excel.Worksheet worksheet = null;

        //打开文件,n.FullPath是文件路径  
        workbook = repExcel.Application.Workbooks.Open(strFile, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing);
        worksheet = (Microsoft.Office.Interop.Excel.Worksheet)workbook.Worksheets[1];

        //给文件重新起名
        string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
        System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();

        string strFileFolder = "../html/";
        DateTime dt = DateTime.Now;
        //以yyyymmdd形式生成子文件夹名
        string strFileSubFolder = dt.Year.ToString();
        strFileSubFolder += (dt.Month < 10) ? ("0" + dt.Month.ToString()) : dt.Month.ToString();
        strFileSubFolder += (dt.Day < 10) ? ("0" + dt.Day.ToString()) : dt.Day.ToString();
        string strFilePath = strFileFolder + strFileSubFolder + "/";
        // 判断指定目录下是否存在文件夹,如果不存在,则创建 
        if (!Directory.Exists(Server.MapPath(strFilePath)))
        {
            // 创建up文件夹 
            Directory.CreateDirectory(Server.MapPath(strFilePath));
        }
        string ConfigPath = Server.MapPath(strFilePath + filename + ".html");
        object savefilename = (object)ConfigPath;

        object ofmt = Microsoft.Office.Interop.Excel.XlFileFormat.xlHtml;
        //进行另存为操作    
        workbook.SaveAs(savefilename, ofmt, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Microsoft.Office.Interop.Excel.XlSaveAsAccessMode.xlNoChange, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing);
        object osave = false;
        //逐步关闭所有使用的对象  
        workbook.Close(osave, Type.Missing, Type.Missing);
        repExcel.Quit();
        System.Runtime.InteropServices.Marshal.ReleaseComObject(worksheet);
        worksheet = null;
        //垃圾回收  
        GC.Collect();
        System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook);
        workbook = null;
        GC.Collect();
        System.Runtime.InteropServices.Marshal.ReleaseComObject(repExcel.Application.Workbooks);
        GC.Collect();
        System.Runtime.InteropServices.Marshal.ReleaseComObject(repExcel);
        repExcel = null;
        GC.Collect();
        //依据时间杀灭进程  
        System.Diagnostics.Process[] process = System.Diagnostics.Process.GetProcessesByName("EXCEL");
        foreach (System.Diagnostics.Process p in process)
        {
            if (DateTime.Now.Second - p.StartTime.Second > 0 && DateTime.Now.Second - p.StartTime.Second < 5)
            {
                p.Kill();
            }
        }

        return (strFilePath + filename + ".html");
    }

这里可能会遇到一个问题,由于转化为HTML文件的页面编码可能使得浏览器无法正确解读,所以需要转码,转换代码如下(不同word文档转换成html,格式编码会不同,这里统一改成GB2312编码):

private void TransHTMLEncoding(string strFilePath)
     {
         try
         {
             System.IO.StreamReader sr = new System.IO.StreamReader(strFilePath, Encoding.GetEncoding(0));
             string html = sr.ReadToEnd();
             sr.Close();
             //此处有两种写法
             //写法一(语句中包含有“<meta”都会被替换成gb2312,不太准确)
             html = System.Text.RegularExpressions.Regex.Replace(html, @"<meta[^>]*>", "<meta http-equiv=Content-Type content='text/html; charset=gb2312'>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            //写法二(只修改编码格式一句)
             html = System.Text.RegularExpressions.Regex.Replace(html, @"<meta http-equiv=Content-Type[^>]*>", "<meta http-equiv=Content-Type content='text/html; charset=gb2312'>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

             System.IO.StreamWriter sw = new System.IO.StreamWriter(strFilePath, false, Encoding.Default);
 
             sw.Write(html);
             sw.Close();
         }
         catch (Exception ex)
         {
             Page.RegisterStartupScript("alt", "<script>alert('" + ex.Message + "')</script>");
         }
     }

已验证通过可正常使用的,word转html:

 private void ConvertWordToHtml(string sourcefile, string destfile)   //sourcefile:原word文件路径,destfile:生成的html文件路径
{
        Microsoft.Office.Interop.Word.Application word = new Microsoft.Office.Interop.Word.ApplicationClass();
        //Word.ApplicationClass word = new Word.ApplicationClass();
        Type wordType = word.GetType();
        Microsoft.Office.Interop.Word.Documents docs = word.Documents;

        // 打开文件
        Type docsType = docs.GetType();
        object fileName = sourcefile;
        Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
        System.Reflection.BindingFlags.InvokeMethod, null, (object)docs, new Object[] { fileName, true, true });

        // 转换格式,另存为
        Type docType = doc.GetType();
        object saveFileName = destfile;
        //下面是Microsoft Word 9(11.0) Object Library的写法,如果是10(没试过),可能写成:
        /*
        docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
         null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML});
        */
        ///其它格式:
        ///wdFormatHTML
        ///wdFormatDocument
        ///wdFormatDOSText
        ///wdFormatDOSTextLineBreaks
        ///wdFormatEncodedText
        ///wdFormatRTF
        ///wdFormatTemplate
        ///wdFormatText
        ///wdFormatTextLineBreaks
        ///wdFormatUnicodeText
        docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
         null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatHTML });

        //关闭文档  
        docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
        null, doc, new object[] { null, null, null }); 

        // 退出 Word
        wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod,
         null, word, null);

        //将html统一格式成gb2312
        TransHTMLEncoding(destfile);
    }



  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值