java mht转html文件

最新推荐文章于 2024-05-21 12:27:17 发布
一只努力xx的程序媛
最新推荐文章于 2024-05-21 12:27:17 发布
阅读量877
点赞数
分类专栏： java - - 文档转换文章标签： java
原文链接：https://blog.csdn.net/qq_23888451
版权
java 同时被 2 个专栏收录
55 篇文章 1 订阅
订阅专栏
- - 文档转换
2 篇文章 0 订阅
订阅专栏
mht文件转换成 html文件
import java.io.BufferedInputStream;  
import java.io.BufferedOutputStream;  
import java.io.BufferedReader;  
import java.io.DataOutputStream;  
import java.io.File;  
import java.io.FileInputStream;  
import java.io.FileOutputStream;  
import java.io.InputStream;  
import java.io.InputStreamReader;  
import java.io.OutputStreamWriter;  
import java.io.Reader;  
import java.io.Writer;  
import java.util.Enumeration;  
import javax.activation.DataHandler;  
import javax.mail.MessagingException;    
import javax.mail.Multipart;    
import javax.mail.Session;    
import javax.mail.internet.MimeBodyPart;    
import javax.mail.internet.MimeMessage;    
import javax.mail.internet.MimeMultipart;    
import javax.mail.internet.MimePartDataSource;   
  
public class HtmlApplication{  
public static void main(String[] args){  
        HtmlApplication.mht2html("D:\\xx.mht", "D:\\xx.html");
    }  
  
/** 
 * 将 mht文件转换成 html文件 
 * @param s_SrcMht 
 * @param s_DescHtml 
 */  
public static void mht2html(String s_SrcMht, String s_DescHtml) {  
    try {    
        InputStream fis = new FileInputStream(s_SrcMht);  
        Session mailSession = Session.getDefaultInstance(System.getProperties(), null);  
        MimeMessage msg = new MimeMessage(mailSession, fis);  
        Object content = msg.getContent();  
        if (content instanceof Multipart){    
            MimeMultipart mp = (MimeMultipart)content;    
            MimeBodyPart bp1 = (MimeBodyPart)mp.getBodyPart(0);  
              
            //获取mht文件内容代码的编码  
            String strEncodng = getEncoding(bp1);  
              
            //获取mht文件的内容  
            String strText = getHtmlText(bp1, strEncodng);    
            if (strText == null)    
                return;  
              
            //创建以mht文件名称的文件夹，主要用来保存资源文件。  
            File parent = null;  
            if (mp.getCount() > 1) {  
                parent = new File(new File(s_DescHtml).getAbsolutePath() + ".files");  
                parent.mkdirs();  
                if (!parent.exists()){   //创建文件夹失败的话则退出  
                    return;  
                }  
            }  
              
            //FOR中代码 主要是保存资源文件及替换路径  
            for (int i = 1; i < mp.getCount(); ++i) {    
                MimeBodyPart bp = (MimeBodyPart)mp.getBodyPart(i);  
                //获取资源文件的路径   
                //例（获取： http://xxx.com/abc.jpg）  
                String strUrl = getResourcesUrl(bp);  
                if (strUrl==null || strUrl.length()==0)    
                    continue;  
                  
                DataHandler dataHandler = bp.getDataHandler();    
                MimePartDataSource source = (MimePartDataSource)dataHandler.getDataSource();  
                  
                //获取资源文件的绝对路径  
                String FilePath = parent.getAbsolutePath() + File.separator + getName(strUrl, i);  
                File resources = new File(FilePath);  
                  
                //保存资源文件  
                if (SaveResourcesFile(resources, bp.getInputStream())){  
                    //将远程地址替换为本地地址  如图片、JS、CSS样式等等  
                    strText = strText.replace(strUrl, resources.getAbsolutePath());   
                }  
            }  
              
            //最后保存HTML文件  
            SaveHtml(strText, s_DescHtml, strEncodng);  
        }    
    } catch (Exception e) {    
    e.printStackTrace();    
    }    
}  
  
/** 
 * 获取mht文件内容中资源文件的名称 
 * @param strName 
 * @param ID 
 * @return 
 */  
public static String getName(String strName, int ID) {    
    char separator1 = '/';  
    char separator2 = '\\';  
    //将换行替换  
    strName = strName.replaceAll("\r\n", "");  
      
    //获取文件名称  
    if( strName.lastIndexOf(separator1) >= 0){  
        return strName.substring(strName.lastIndexOf(separator1) + 1);  
    }  
    if( strName.lastIndexOf(separator2) >= 0){  
        return strName.substring(strName.lastIndexOf(separator2) + 1);  
    }  
    return "";  
}  
  
  
/** 
 * 将提取出来的html内容写入保存的路径中。 
 * @param strText 
 * @param strHtml 
 * @param strEncodng 
 */  
public static boolean SaveHtml(String s_HtmlTxt, String s_HtmlPath , String s_Encode) {    
    try{  
        Writer out = null;  
        out = new OutputStreamWriter(new FileOutputStream(s_HtmlPath, false), s_Encode);  
        out.write(s_HtmlTxt);  
        out.close();  
    }catch(Exception e){  
        return false;  
    }  
    return true;  
}    
  
  
/** 
 * 保存网页中的JS、图片、CSS样式等资源文件 
 * @param SrcFile     源文件 
 * @param inputStream 输入流 
 * @return 
 */  
private static boolean SaveResourcesFile(File SrcFile, InputStream inputStream) {    
    if (SrcFile == null || inputStream == null) {    
        return false;     
    }    
    
    BufferedInputStream in = null;    
    FileOutputStream fio = null;    
    BufferedOutputStream osw = null;    
    try {    
        in = new BufferedInputStream(inputStream);    
        fio = new FileOutputStream(SrcFile);    
        osw = new BufferedOutputStream(new DataOutputStream(fio));    
        int index = 0;    
        byte[] a = new byte[1024];    
        while ((index = in.read(a)) != -1) {    
            osw.write(a, 0, index);  
        }  
        osw.flush();    
        return true;    
    } catch (Exception e) {       
        e.printStackTrace();    
        return false;    
    } finally{    
        try {    
        if (osw != null)    
            osw.close();    
        if (fio != null)    
            fio.close();    
        if (in != null)    
            in.close();    
        if (inputStream != null)    
            inputStream.close();    
        } catch (Exception e) {    
            e.printStackTrace();  
            return false;  
        }   
    }    
}    
  
  
/** 
 * 获取mht文件里资源文件的URL路径 
 * @param bp 
 * @return 
 */  
private static String getResourcesUrl(MimeBodyPart bp) {   
    if(bp==null){  
        return null;  
    }  
    try {    
        Enumeration list = bp.getAllHeaders();    
        while (list.hasMoreElements()) {    
            javax.mail.Header head = (javax.mail.Header)list.nextElement();    
            if (head.getName().compareTo("Content-Location") == 0) {    
                return head.getValue();  
            }    
        }  
        return null;  
    } catch (MessagingException e) {    
        return null;  
    }    
}   
  
  
/** 
 * 获取mht文件中的内容代码 
 * @param bp 
 * @param strEncoding 该mht文件的编码 
 * @return 
 */  
private static String getHtmlText(MimeBodyPart bp, String strEncoding) {    
    InputStream textStream = null;    
    BufferedInputStream buff = null;    
    BufferedReader br = null;    
    Reader r = null;    
    try {    
        textStream = bp.getInputStream();    
        buff = new BufferedInputStream(textStream);    
        r = new InputStreamReader(buff, strEncoding);   
        br = new BufferedReader(r);    
        StringBuffer strHtml = new StringBuffer("");    
        String strLine = null;    
        while ((strLine = br.readLine()) != null) {    
            strHtml.append(strLine + "\r\n");    
        }    
        br.close();    
        r.close();    
        textStream.close();    
        return strHtml.toString();    
    } catch (Exception e) {    
    e.printStackTrace();    
    } finally{  
        try{    
            if (br != null)    
            br.close();    
            if (buff != null)    
            buff.close();    
            if (textStream != null)    
            textStream.close();    
        }catch(Exception e){    
        }    
    }    
    return null;    
}  
  
/** 
 * 获取mht网页文件中内容代码的编码 
 * @param bp 
 * @return 
 */  
private static String getEncoding(MimeBodyPart bp) {  
    if(bp==null){  
        return null;  
    }  
    try {    
        Enumeration list = bp.getAllHeaders();    
        while (list.hasMoreElements()) {    
            javax.mail.Header head = (javax.mail.Header)list.nextElement();    
            if (head.getName().compareTo("Content-Type") == 0) {    
                String strType = head.getValue();    
                int pos = strType.indexOf("charset=");    
                if (pos>=0) {    
                    String strEncoding = strType.substring(pos + 8, strType.length());    
                    if(strEncoding.startsWith("\"") || strEncoding.startsWith("\'")){  
                        strEncoding = strEncoding.substring(1 , strEncoding.length());  
                    }  
                    if(strEncoding.endsWith("\"") || strEncoding.endsWith("\'")){  
                        strEncoding = strEncoding.substring(0 , strEncoding.length()-1);  
                    }  
                    if (strEncoding.toLowerCase().compareTo("gb2312") == 0) {    
                        strEncoding = "gbk";    
                    }    
                    return strEncoding;    
                }    
            }  
        }    
    } catch (MessagingException e) {    
        e.printStackTrace();    
    }  
    return null;   
}  
}
一只努力xx的程序媛
关注
0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
java mht转html文件

mht文件转换成 html文件import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.DataOutputStream; import java.io.File; import java.io.F...
复制链接

扫一扫
专栏目录