Java解析qq邮箱的html文件

这周接的一个很坑的需求,平时邮件解析都是标准eml格式邮件,第一次解析html格式,网上没找到解决办法,只有自己写一个按标签来匹配内容,代码如下
package com.email_monitor.until;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.;
import java.text.ParseException;
import java.util.
;

public class PraseMimeMessage {
/**
* 读取文件内容
*
* @param fileName String 如 c:\1.txt 绝对路径
* @return boolean
*/
public static String readFile(String fileName, String charset) {
int index = charset.indexOf(“charset=”);
String fileContent = “”;

    try {
        File f = new File(fileName);
        if (f.isFile() && f.exists()) {
            InputStreamReader read = new InputStreamReader(new FileInputStream(f), charset.substring(index + 8).replaceAll(" ", ""));
            BufferedReader reader = new BufferedReader(read);
            String line;
            while ((line = reader.readLine()) != null) {
                fileContent += line;
            }
            read.close();
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return fileContent;
}


/**
 * @param html 解析qq邮件html
 * @return
 */
public static Map<Object, Object> readQQEmailHtml(String html) {
    Map<Object, Object> map = new HashMap<>();
    Document doc = Jsoup.parseBodyFragment(html);
    //发件人
    String Efrom = "";
    for (Element element : doc.select("div[class=tcolor qm_left txtflow]").select("b[class=grn]")) {
        Efrom = element.text();
        break;
    }
    Elements elements = doc.select("div[class=clear]").select("div[class=qm_left txtflow graytext]");
    for (Element element : elements) {
        Efrom = Efrom + "<" + element.text() + ">";
        map.put("fjrAddress", Efrom);
        break;
    }
    //收件人,抄送人,是否有附件
    Elements elements1 = doc.select("td[class=settingtable txt_left]").select("div[class=addrtitle nowrap]");
    for (Element element : elements1) {
        if (element.text().contains("收件人")) {
            element = element.nextElementSibling();
            if (map.containsKey("sjrAddressList") && map.containsKey("sjrAddressList")) {
                map.put("sjrAddressList", map.get("sjrAddressList") + element.text());
            } else {
                map.put("sjrAddressList", element.text());
            }
        } else if (element.text().contains("抄 送")) {
            element = element.nextElementSibling();
            if (map.containsKey("ccMailAddr") && map.containsKey("ccMailAddr")) {
                map.put("ccMailAddr", map.get("ccMailAddr") + element.text());
            } else {
                map.put("ccMailAddr", element.text());
            }
        } else if (element.text().contains("附 件")) {
            map.put("existFile", true);
        }
    }
    //发送时间
    Elements elements2 = doc.select("td[class=settingtable txt_left]").select("span[class=addrtitle]");
    try {
        for (Element element : elements2) {
            if (element.text().contains("时 间")) {
                element = element.nextElementSibling();
                map.put("sendTime",PraseMimeMessage.switchDate(element.text()));
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    //正文
    Elements elements4 = doc.select("div[id=contentDiv]");
    StringBuilder content = new StringBuilder();
    for (Element element : elements4) {
        content.append(element.text() + "  ");
        //解析正文中的图片
        Elements element11 = element.getElementsByTag("img");
        for(Element element2 : element11) {
            String imgSrc=element2.attr("src"); //获取src属性的值
            content.append(imgSrc + "  ");
        }
    }
    map.put("content",content.toString());

    Elements elements3 = doc.select("div[class=qm_left]").select("span[class=sub_title]");
    for (Element element : elements3) {
        map.put("subject",element.text());
    }
    return map;
}
public static String switchDate(String date) throws ParseException {
    String date3 = date.substring(0, date.indexOf("(")) + date.substring(date.indexOf(")") + 1);

    if (date3.contains("上午")) {
        //10:26
        StringBuffer str = new StringBuffer();
        for (int i = 0; i < date3.length(); i++) {
            if (date3.charAt(i) != '上' && date3.charAt(i) != '午') {
                str.append(date3.charAt(i));
            }
        }
        return str.toString();
    } else if(date3.contains("下午")){
        Integer time = Integer.parseInt(PraseMimeMessage.subjectStr(date3, "下午").substring(0, PraseMimeMessage.subjectStr(date3, "下午").indexOf(":"))) + 12;
        Integer time1 = Integer.parseInt(PraseMimeMessage.subjectStr(date3, "下午").substring(PraseMimeMessage.subjectStr(date3, "下午").indexOf(":") + 1));
        date.substring(0, date.indexOf("(")).substring(0, date.indexOf("("));
        String datee = date.substring(0, date.indexOf("(")) + " " + time.toString() + ":" + time1.toString();
        return datee;
    } else if(date3.contains("中午")){
        //10:26
        StringBuffer str = new StringBuffer();
        for (int i = 0; i < date3.length(); i++) {
            if (date3.charAt(i) != '中' && date3.charAt(i) != '午') {
                str.append(date3.charAt(i));
            }
        }
        return str.toString();
    }else if(date3.contains("晚上")){
        Integer time = Integer.parseInt(PraseMimeMessage.subjectStr(date3, "晚上").substring(0, PraseMimeMessage.subjectStr(date3, "晚上").indexOf(":"))) + 12;
        Integer time1 = Integer.parseInt(PraseMimeMessage.subjectStr(date3, "晚上").substring(PraseMimeMessage.subjectStr(date3, "晚上").indexOf(":") + 1));
        date.substring(0, date.indexOf("(")).substring(0, date.indexOf("("));
        String datee = date.substring(0, date.indexOf("(")) + " " + time.toString() + ":" + time1.toString();
        return datee;
    }
    return null;
}
public static String subjectStr(String str, String subStr) {
    return str.substring(str.indexOf(subStr) + subStr.length());
}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值