java mht转html文件

mht文件转换成 html文件

import java.io.BufferedInputStream;  
import java.io.BufferedOutputStream;  
import java.io.BufferedReader;  
import java.io.DataOutputStream;  
import java.io.File;  
import java.io.FileInputStream;  
import java.io.FileOutputStream;  
import java.io.InputStream;  
import java.io.InputStreamReader;  
import java.io.OutputStreamWriter;  
import java.io.Reader;  
import java.io.Writer;  
import java.util.Enumeration;  
import javax.activation.DataHandler;  
import javax.mail.MessagingException;    
import javax.mail.Multipart;    
import javax.mail.Session;    
import javax.mail.internet.MimeBodyPart;    
import javax.mail.internet.MimeMessage;    
import javax.mail.internet.MimeMultipart;    
import javax.mail.internet.MimePartDataSource;   
  
public class HtmlApplication{  
public static void main(String[] args){  
        HtmlApplication.mht2html("D:\\xx.mht", "D:\\xx.html");
    }  
  
/** 
 * 将 mht文件转换成 html文件 
 * @param s_SrcMht 
 * @param s_DescHtml 
 */  
public static void mht2html(String s_SrcMht, String s_DescHtml) {  
    try {    
        InputStream fis = new FileInputStream(s_SrcMht);  
        Session mailSession = Session.getDefaultInstance(System.getProperties(), null);  
        MimeMessage msg = new MimeMessage(mailSession, fis);  
        Object content = msg.getContent();  
        if (content instanceof Multipart){    
            MimeMultipart mp = (MimeMultipart)content;    
            MimeBodyPart bp1 = (MimeBodyPart)mp.getBodyPart(0);  
              
            //获取mht文件内容代码的编码  
            String strEncodng = getEncoding(bp1);  
              
            //获取mht文件的内容  
            String strText = getHtmlText(bp1, strEncodng);    
            if (strText == null)    
                return;  
              
            //创建以mht文件名称的文件夹,主要用来保存资源文件。  
            File parent = null;  
            if (mp.getCount() > 1) {  
                parent = new File(new File(s_DescHtml).getAbsolutePath() + ".files");  
                parent.mkdirs();  
                if (!parent.exists()){   //创建文件夹失败的话则退出  
                    return;  
                }  
            }  
              
            //FOR中代码 主要是保存资源文件及替换路径  
            for (int i = 1; i < mp.getCount(); ++i) {    
                MimeBodyPart bp = (MimeBodyPart)mp.getBodyPart(i);  
                //获取资源文件的路径   
                //例(获取: http://xxx.com/abc.jpg)  
                String strUrl = getResourcesUrl(bp);  
                if (strUrl==null || strUrl.length()==0)    
                    continue;  
                  
                DataHandler dataHandler = bp.getDataHandler();    
                MimePartDataSource source = (MimePartDataSource)dataHandler.getDataSource();  
                  
                //获取资源文件的绝对路径  
                String FilePath = parent.getAbsolutePath() + File.separator + getName(strUrl, i);  
                File resources = new File(FilePath);  
                  
                //保存资源文件  
                if (SaveResourcesFile(resources, bp.getInputStream())){  
                    //将远程地址替换为本地地址  如图片、JS、CSS样式等等  
                    strText = strText.replace(strUrl, resources.getAbsolutePath());   
                }  
            }  
              
            //最后保存HTML文件  
            SaveHtml(strText, s_DescHtml, strEncodng);  
        }    
    } catch (Exception e) {    
    e.printStackTrace();    
    }    
}  
  
/** 
 * 获取mht文件内容中资源文件的名称 
 * @param strName 
 * @param ID 
 * @return 
 */  
public static String getName(String strName, int ID) {    
    char separator1 = '/';  
    char separator2 = '\\';  
    //将换行替换  
    strName = strName.replaceAll("\r\n", "");  
      
    //获取文件名称  
    if( strName.lastIndexOf(separator1) >= 0){  
        return strName.substring(strName.lastIndexOf(separator1) + 1);  
    }  
    if( strName.lastIndexOf(separator2) >= 0){  
        return strName.substring(strName.lastIndexOf(separator2) + 1);  
    }  
    return "";  
}  
  
  
/** 
 * 将提取出来的html内容写入保存的路径中。 
 * @param strText 
 * @param strHtml 
 * @param strEncodng 
 */  
public static boolean SaveHtml(String s_HtmlTxt, String s_HtmlPath , String s_Encode) {    
    try{  
        Writer out = null;  
        out = new OutputStreamWriter(new FileOutputStream(s_HtmlPath, false), s_Encode);  
        out.write(s_HtmlTxt);  
        out.close();  
    }catch(Exception e){  
        return false;  
    }  
    return true;  
}    
  
  
/** 
 * 保存网页中的JS、图片、CSS样式等资源文件 
 * @param SrcFile     源文件 
 * @param inputStream 输入流 
 * @return 
 */  
private static boolean SaveResourcesFile(File SrcFile, InputStream inputStream) {    
    if (SrcFile == null || inputStream == null) {    
        return false;     
    }    
    
    BufferedInputStream in = null;    
    FileOutputStream fio = null;    
    BufferedOutputStream osw = null;    
    try {    
        in = new BufferedInputStream(inputStream);    
        fio = new FileOutputStream(SrcFile);    
        osw = new BufferedOutputStream(new DataOutputStream(fio));    
        int index = 0;    
        byte[] a = new byte[1024];    
        while ((index = in.read(a)) != -1) {    
            osw.write(a, 0, index);  
        }  
        osw.flush();    
        return true;    
    } catch (Exception e) {       
        e.printStackTrace();    
        return false;    
    } finally{    
        try {    
        if (osw != null)    
            osw.close();    
        if (fio != null)    
            fio.close();    
        if (in != null)    
            in.close();    
        if (inputStream != null)    
            inputStream.close();    
        } catch (Exception e) {    
            e.printStackTrace();  
            return false;  
        }   
    }    
}    
  
  
/** 
 * 获取mht文件里资源文件的URL路径 
 * @param bp 
 * @return 
 */  
private static String getResourcesUrl(MimeBodyPart bp) {   
    if(bp==null){  
        return null;  
    }  
    try {    
        Enumeration list = bp.getAllHeaders();    
        while (list.hasMoreElements()) {    
            javax.mail.Header head = (javax.mail.Header)list.nextElement();    
            if (head.getName().compareTo("Content-Location") == 0) {    
                return head.getValue();  
            }    
        }  
        return null;  
    } catch (MessagingException e) {    
        return null;  
    }    
}   
  
  
/** 
 * 获取mht文件中的内容代码 
 * @param bp 
 * @param strEncoding 该mht文件的编码 
 * @return 
 */  
private static String getHtmlText(MimeBodyPart bp, String strEncoding) {    
    InputStream textStream = null;    
    BufferedInputStream buff = null;    
    BufferedReader br = null;    
    Reader r = null;    
    try {    
        textStream = bp.getInputStream();    
        buff = new BufferedInputStream(textStream);    
        r = new InputStreamReader(buff, strEncoding);   
        br = new BufferedReader(r);    
        StringBuffer strHtml = new StringBuffer("");    
        String strLine = null;    
        while ((strLine = br.readLine()) != null) {    
            strHtml.append(strLine + "\r\n");    
        }    
        br.close();    
        r.close();    
        textStream.close();    
        return strHtml.toString();    
    } catch (Exception e) {    
    e.printStackTrace();    
    } finally{  
        try{    
            if (br != null)    
            br.close();    
            if (buff != null)    
            buff.close();    
            if (textStream != null)    
            textStream.close();    
        }catch(Exception e){    
        }    
    }    
    return null;    
}  
  
/** 
 * 获取mht网页文件中内容代码的编码 
 * @param bp 
 * @return 
 */  
private static String getEncoding(MimeBodyPart bp) {  
    if(bp==null){  
        return null;  
    }  
    try {    
        Enumeration list = bp.getAllHeaders();    
        while (list.hasMoreElements()) {    
            javax.mail.Header head = (javax.mail.Header)list.nextElement();    
            if (head.getName().compareTo("Content-Type") == 0) {    
                String strType = head.getValue();    
                int pos = strType.indexOf("charset=");    
                if (pos>=0) {    
                    String strEncoding = strType.substring(pos + 8, strType.length());    
                    if(strEncoding.startsWith("\"") || strEncoding.startsWith("\'")){  
                        strEncoding = strEncoding.substring(1 , strEncoding.length());  
                    }  
                    if(strEncoding.endsWith("\"") || strEncoding.endsWith("\'")){  
                        strEncoding = strEncoding.substring(0 , strEncoding.length()-1);  
                    }  
                    if (strEncoding.toLowerCase().compareTo("gb2312") == 0) {    
                        strEncoding = "gbk";    
                    }    
                    return strEncoding;    
                }    
            }  
        }    
    } catch (MessagingException e) {    
        e.printStackTrace();    
    }  
    return null;   
}  
}  
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
要将MHT文件换为HTML文件,可以使用以下代码: ```java import java.io.*; public class Mht2Html { public static void main(String[] args) throws Exception { String mhtFile = "path/to/mht/file.mht"; String htmlFile = "path/to/html/file.html"; String line; BufferedReader reader = new BufferedReader(new FileReader(mhtFile)); BufferedWriter writer = new BufferedWriter(new FileWriter(htmlFile)); while ((line = reader.readLine()) != null) { if (line.startsWith("Content-Type:")) { String contentType = line.substring("Content-Type:".length()).trim(); if (contentType.equalsIgnoreCase("text/html")) { writer.write("<html>\n"); writer.write("<head>\n"); writer.write("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n"); writer.write("</head>\n"); writer.write("<body>\n"); } } else if (line.startsWith("Content-Transfer-Encoding:")) { String encoding = line.substring("Content-Transfer-Encoding:".length()).trim(); if (encoding.equalsIgnoreCase("base64")) { reader.readLine(); // skip empty line String base64 = ""; while (!(line = reader.readLine()).equals("==")) { base64 += line; } byte[] bytes = Base64.getDecoder().decode(base64); String text = new String(bytes, "UTF-8"); writer.write(text); } } if (line.equals("") || line.equals("--")) { writer.write("</body>\n"); writer.write("</html>\n"); } } reader.close(); writer.close(); } } ``` 这个代码会解析MHT文件,将其中的HTML部分提取出来,并换为HTML文件。你需要将代码中的`path/to/mht/file.mht`和`path/to/html/file.html`替换为你自己的文件路径。需要注意的是,这个代码中使用了`java.util.Base64`类,如果你的Java版本低于8,需要使用其他的Base64库替换。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值