mht 转htm ，实际项目开发，自己亲测可用。拿来分享给大家

最新推荐文章于 2024-08-09 14:29:13 发布

qq_39977900

最新推荐文章于 2024-08-09 14:29:13 发布

阅读量1.9k

点赞数

文章标签： mht转htm 文件上传 htm解析查看 java mht 转htm

本文链接：https://blog.csdn.net/qq_39977900/article/details/81382249

版权

我的想法是我先把文件上传到一个目录下。然后我点击查看文件详情的时候我再解析查看。每次解析前。清空资源目录下的文件，清空存放转换后存放的详情页。

这是根据你的mht 地址转行 mht ，因为可能每个mht 里面资源文件路径不一样，所有我每次转的时候都加上了jquery,让img 图片的路径固定在一个地方。。详情请看代码；

package audit.action.Document_Data;

import javax.activation.DataHandler;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Session;
import javax.mail.internet.MimeBodyPart;
import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeMultipart;
import javax.mail.internet.MimePartDataSource;

import java.io.*;
import java.util.Enumeration;

/**
* mht转html格式文件
*
*/
public class Mht2HtmlUtil {

   /**写入htm 内容
   * @param fileName
   */
   public static void addhtm(String fileName,String content)
   {
       File file =new File(fileName);
   try {
   if(!file.exists()) {
   file.createNewFile();
   }
   FileWriter fileWriter =new FileWriter(file);
   fileWriter.write(content);
   fileWriter.flush();
   fileWriter.close();
   } catch (IOException e) {
   e.printStackTrace();
   }
   }

   /**
   * 清空资源目录下的所有文件
   * @param dataBasePath
   */
   public static void deleteDir(String dataBasePath)
   {
       File file=new File(dataBasePath);
       String[] content = file.list();//取得当前目录下所有文件和文件夹
       for(String name : content){
           File temp = new File(dataBasePath, name);
           if(temp.isDirectory()){//判断是否是目录
               deleteDir(temp.getAbsolutePath());//递归调用，删除目录里的内容
               temp.delete();//删除空目录
           }else{
               if(!temp.delete()){//直接删除文件
                   System.err.println("Failed to delete " + name);
               }
           }
       }

   }

   /**
   * 清空htm 内容
   */
   public static void deletehtm(String fileName)
   {
       File file =new File(fileName);
   try {
   if(!file.exists()) {
   file.createNewFile();
   }
   FileWriter fileWriter =new FileWriter(file);
   fileWriter.write("");
   fileWriter.flush();
   fileWriter.close();
   } catch (IOException e) {
   e.printStackTrace();
   }
   }

/**
* 将 mht文件转换成 html文件
*
* @param s_SrcMht
* @param s_DescHtml
*/
public static String mht2html(String s_SrcMht, String s_DescHtml,String file) {

   try {
InputStream fis = new FileInputStream(s_SrcMht);
Session mailSession = Session.getDefaultInstance(
System.getProperties(), null);
MimeMessage msg = new MimeMessage(mailSession, fis);
Object content = msg.getContent();
if (content instanceof Multipart) {
MimeMultipart mp = (MimeMultipart) content;
MimeBodyPart bp1 = (MimeBodyPart) mp.getBodyPart(0);

// 获取mht文件内容代码的编码
String strEncodng = getEncoding(bp1);

// 获取mht文件的内容
String strText = getHtmlText(bp1, strEncodng);
if (strText == null)
return "";
else if(strText.equals("error"))
return "error";

// 创建以mht文件名称的文件夹，主要用来保存资源文件。
File parent = null;
if (mp.getCount() > 1) {
// parent = new File(new File(s_DescHtml).getAbsolutePath() + ".files");
parent = new File(new File(s_DescHtml).getAbsolutePath());
parent.mkdirs();
if (!parent.exists()) { // 创建文件夹失败的话则退出
return "";
}
}

// FOR中代码主要是保存资源文件及替换路径
for (int i = 1; i < mp.getCount(); ++i) {
MimeBodyPart bp = (MimeBodyPart) mp.getBodyPart(i);
// 获取资源文件的路径
// 例（获取： http://xxx.com/abc.jpg）
String strUrl = getResourcesUrl(bp);
if (strUrl == null || strUrl.length() == 0)
continue;

DataHandler dataHandler = bp.getDataHandler();
MimePartDataSource source = (MimePartDataSource) dataHandler
.getDataSource();

// 获取资源文件的绝对路径
String FilePath = file+"doc\\file8807.files\\" + File.separator
+ getName(strUrl, i);
File resources = new File(FilePath);

// 保存资源文件
if (SaveResourcesFile(resources, bp.getInputStream())) {
// 将远程地址替换为本地地址如图片、JS、CSS样式等等
strText = strText.replace(strUrl,
resources.getAbsolutePath());
}
}

// 最后保存HTML文件
SaveHtml(strText, s_DescHtml, strEncodng);
}else
{
return content.toString();
}
} catch (Exception e) {
e.printStackTrace();
}

return "";
}

/**
* 获取mht文件内容中资源文件的名称
*
* @param strName
* @param ID
* @return
*/
public static String getName(String strName, int ID) {
char separator1 = '/';
char separator2 = '\\';
// 将换行替换
strName = strName.replaceAll("\r\n", "");

// 获取文件名称
if (strName.lastIndexOf(separator1) >= 0) {
return strName.substring(strName.lastIndexOf(separator1) + 1);
}
if (strName.lastIndexOf(separator2) >= 0) {
return strName.substring(strName.lastIndexOf(separator2) + 1);
}
return "";
}

/**
* 将提取出来的html内容写入保存的路径中。
*
* @param s_HtmlTxt
* @param s_HtmlPath
* @param s_Encode
*/
public static boolean SaveHtml(String s_HtmlTxt, String s_HtmlPath,
String s_Encode) {
try {
Writer out = null;
out = new OutputStreamWriter(
new FileOutputStream(s_HtmlPath, false), s_Encode);

String jqueryhtml=" <script src=\"jquery-3.3.1.min.js\"></script>\n" +
       "\n" +
       "<script type=\"text/javascript\">\n" +
       " $(function () {\n" +
       "\n" +
       " var imglist=$(\"img\");\n" +
       " for(var i=0;i<imglist.length;i++){\n" +
       "\n" +
       " var oldsrc= $(\"img:eq(\"+i+\")\").attr(\"src\");\n" +
       " var newsrc=\"file8807.files/\"+oldsrc.substr(oldsrc.lastIndexOf(\"/\")+1);\n" +
       " $(\"img:eq(\"+i+\")\").attr(\"src\",newsrc);\n" +
       " }\n" +
       " })\n" +
       "</script>";
out.write(s_HtmlTxt+""+jqueryhtml);
out.close();
} catch (Exception e) {
return false;
}
return true;
}

/**
* 保存网页中的JS、图片、CSS样式等资源文件
*
* @param SrcFile
* 源文件
* @param inputStream
* 输入流
* @return
*/
private static boolean SaveResourcesFile(File SrcFile,
InputStream inputStream) {
if (SrcFile == null || inputStream == null) {
return false;
}

BufferedInputStream in = null;
FileOutputStream fio = null;
BufferedOutputStream osw = null;
try {
in = new BufferedInputStream(inputStream);
fio = new FileOutputStream(SrcFile);
osw = new BufferedOutputStream(new DataOutputStream(fio));
int index = 0;
byte[] a = new byte[1024];
while ((index = in.read(a)) != -1) {
osw.write(a, 0, index);
}
osw.flush();
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
} finally {
try {
if (osw != null)
osw.close();
if (fio != null)
fio.close();
if (in != null)
in.close();
if (inputStream != null)
inputStream.close();
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
}

/**
* 获取mht文件里资源文件的URL路径
*
* @param bp
* @return
*/
private static String getResourcesUrl(MimeBodyPart bp) {
if (bp == null) {
return null;
}
try {
Enumeration list = bp.getAllHeaders();
while (list.hasMoreElements()) {
javax.mail.Header head = (javax.mail.Header) list.nextElement();
if (head.getName().compareTo("Content-Location") == 0) {
return head.getValue();
}
}
return null;
} catch (MessagingException e) {
return null;
}
}

/**
* 获取mht文件中的内容代码
*
* @param bp
* @param strEncoding
* 该mht文件的编码
* @return
*/
private static String getHtmlText(MimeBodyPart bp, String strEncoding) {
InputStream textStream = null;
BufferedInputStream buff = null;
BufferedReader br = null;
Reader r = null;
if(strEncoding==null)
{
return "error";
}
try {
textStream = bp.getInputStream();
buff = new BufferedInputStream(textStream);
r = new InputStreamReader(buff, strEncoding);
br = new BufferedReader(r);
StringBuffer strHtml = new StringBuffer("");
String strLine = null;
while ((strLine = br.readLine()) != null) {
//System.out.println(strLine);
strHtml.append(strLine + "\r\n");
}
br.close();
r.close();
textStream.close();
return strHtml.toString();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (br != null)
br.close();
if (buff != null)
buff.close();
if (textStream != null)
textStream.close();
} catch (Exception e) {
}
}
return null;
}

/**
* 获取mht网页文件中内容代码的编码
*
* @param bp
* @return
*/
private static String getEncoding(MimeBodyPart bp) {
if (bp == null) {
return null;
}
try {
Enumeration list = bp.getAllHeaders();
while (list.hasMoreElements()) {
javax.mail.Header head = (javax.mail.Header) list.nextElement();
if (head.getName().equalsIgnoreCase("Content-Type")) {
String strType = head.getValue();
int pos = strType.indexOf("charset=");
if (pos >= 0) {
String strEncoding = strType.substring(pos + 8,
strType.length());
if (strEncoding.startsWith("\"")
|| strEncoding.startsWith("\'")) {
strEncoding = strEncoding.substring(1,
strEncoding.length());
}
if (strEncoding.endsWith("\"")
|| strEncoding.endsWith("\'")) {
strEncoding = strEncoding.substring(0,
strEncoding.length() - 1);
}
if (strEncoding.toLowerCase().compareTo("gb2312") == 0) {
strEncoding = "gbk";
}
return strEncoding;
}
}
}
} catch (MessagingException e) {
e.printStackTrace();
}
return null;
}
}

//下面就调用上面的转换类

/**
   *文件详情写入detail.htm 中
   * @param request
   * @param response
   * @throws UnsupportedEncodingException
   */
   public void queryDetail(HttpServletRequest request,HttpServletResponse response) throws UnsupportedEncodingException
   {

       //获取文件名称
       String fileName = URLDecoder.decode( MyStringUtil.showNull(request.getParameter("fileName")),"utf-8");

       //获取当前项目路径
       String dataBasePath = request.getSession().getServletContext().getRealPath("/");
       //文件保存地址
               String new_file_root_path="";
               new_file_root_path =dataBasePath.substring(0,dataBasePath.indexOf('\\')+1);

               file_root_path=new_file_root_path+"\\upload\\doc\\";


       //文件存放路径
       String filePath = dataBasePath + "doc/"+fileName;
       //新文件路径
       String newFilePath=dataBasePath+"doc/"+"detail.htm";
       //将mht 文件转换成html 文件 param1 需要转换对的文件路径，param2 转换后文件的路径,param3 绝对路径

       //每次查看文档详情清空查看详情页面和资源文件目录
       Mht2HtmlUtil.deleteDir(dataBasePath+"doc/file8807.files/");

       //清空htm 内容

       Mht2HtmlUtil.deletehtm(newFilePath);

   PrintWriter print = null;
       String result="";
       try {
           print=response.getWriter();

           // 转换
       String content= Mht2HtmlUtil.mht2html(file_root_path+""+fileName, newFilePath,dataBasePath);
       if(!content.equals(""))
       {//写入内容写去js 是因为这个mht解析出来 img 是放在固定的地方。所有洗个js 让页面上的img src也在固定的地方
           String jqueryhtml=" <script src=\"jquery-3.3.1.min.js\"></script>\n" +
       "\n" +
       "<script type=\"text/javascript\">\n" +
       " $(function () {\n" +
       "\n" +
       " var imglist=$(\"img\");\n" +
       " for(var i=0;i<imglist.length;i++){\n" +
       "\n" +
       " var oldsrc= $(\"img:eq(\"+i+\")\").attr(\"src\");\n" +
       " var newsrc=\"file8807.files/\"+oldsrc.substr(oldsrc.lastIndexOf(\"/\")+1);\n" +
       " $(\"img:eq(\"+i+\")\").attr(\"src\",newsrc);\n" +
       " }\n" +
       " })\n" +
       "</script>";
           Mht2HtmlUtil.addhtm(newFilePath,content+""+jqueryhtml);
       }
       } catch (Exception e) {
           e.printStackTrace();
       }finally{
           print.print(result);
           print.close();
       }

   }

//这是长传文件上传文件代码方法就不贴出来了，用的公司的框架。

/**
   * 文档上传
   * @param request
   * @param response
   */
   public void addupload(HttpServletRequest request,HttpServletResponse response)
   {
       response.setContentType("text/html;charset=utf-8");
//       获取类型id
       String type_id = MyStringUtil.showNull(request.getParameter("type_id"));

       String dataBasePath = request.getSession().getServletContext().getRealPath("/");

//       文件保存地址根目录
       String new_file_root_path="";
       new_file_root_path =dataBasePath.substring(0,dataBasePath.indexOf('\\')+1);

       file_root_path=new_file_root_path+"\\upload\\doc\\";

       PrintWriter print = null;
       String result="文件解析失败";
       try {
           print=response.getWriter();
           FileUpload myfileUpload = new FileUpload(request);


           Map<String,Object> parameters = myfileUpload.UploadFile(".mht", file_root_path);
           try {
               //解析后的地址
               String newFilePath=dataBasePath+"doc/"+"detail.htm";
               // 解析上传的文件
           String op=   Mht2HtmlUtil.mht2html(file_root_path+""+parameters.get("real_fileName").toString(), newFilePath,dataBasePath);
               if(op.equals("error"))
               {
                   result="error";
               }else
               {//解析成功写入数据库
                   String sub=parameters.get("fileName").toString();
                   String newsub=sub.substring(0,sub.lastIndexOf('.'));
                   result=   service.addupload(type_id,newsub ,parameters.get("real_fileName").toString());
               }
           } catch (Exception e) {
               // TODO: handle exception
               result="error";
               return;
           }
           //       将数据读入表中
       } catch (Exception e) {
           e.printStackTrace();
       }finally{
           print.print(result);
           print.close();
       }

   }