java解析outlook的msg邮件(outlook-message-parser)

如何本地没有按照outlook,那么如何打开msg附件呢
方法1:让发邮件的人通过outlook导出html或者eml等通用的格式给你
方法2:利用在线转换为eml,点击这里这种比较方便,推荐
方法3:使用转换软件转换为eml等格式,比如SysTools MSG to EML Converter
方法4:利用java手动解析为html,利用的是outlook-message-parser
这个依赖,在底层,它使用Apache POI - POIFS库来解析使用OLE 2复合文档格式的消息文件。因此,它只是一个涵盖.msg文件细节的便利库。该实现基于fileformat.info提供的信息。
开源地址:
https://github.com/bbottema/outlook-message-parser
码云的镜像:
https://gitee.com/mirrors_bbottema/outlook-message-parser/tree/master

本次介绍的就是第4种方法;

依赖:

<dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.10</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-io -->

        <dependency>
            <groupId>org.simplejavamail</groupId>
            <artifactId>outlook-message-parser</artifactId>
            <version>1.7.7</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.8.3</version>
        </dependency>

代码:

public class FileVo {

    private  String  fileName;
    private  long  fileLength;
    private  String  filePath;
    private  String  desc;
    private  String  suffix;

    public String getFileName() {
        return fileName;
    }

    public void setFileName(String fileName) {
        this.fileName = fileName;
    }

    public long getFileLength() {
        return fileLength;
    }

    public void setFileLength(long fileLength) {
        this.fileLength = fileLength;
    }

    public String getFilePath() {
        return filePath;
    }

    public void setFilePath(String filePath) {
        this.filePath = filePath;
    }

    @Override
    public String toString() {
        return fileName;
    }

    public String getDesc() {
        return desc;
    }

    public void setDesc(String desc) {
        this.desc = desc;
    }

    public String getSuffix() {
        return suffix;
    }

    public void setSuffix(String suffix) {
        this.suffix = suffix;
    }
}


public class EmailPreviewVo {

    private Long id;

    private String from;

    private String cc;

    private String to;

    private String subject;

    private String sentDate;

    private String content;

    private String FileName;

    private List<FileVo> attachments;

    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    public String getFrom() {
        return from;
    }

    public void setFrom(String from) {
        this.from = from;
    }

    public String getCc() {
        return cc;
    }

    public void setCc(String cc) {
        this.cc = cc;
    }

    public String getTo() {
        return to;
    }

    public void setTo(String to) {
        this.to = to;
    }

    public String getSubject() {
        return subject;
    }

    public void setSubject(String subject) {
        this.subject = subject;
    }

    public String getSentDate() {
        return sentDate;
    }

    public void setSentDate(String sentDate) {
        this.sentDate = sentDate;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }

    public List<FileVo> getAttachments() {
        return attachments;
    }

    public void setAttachments(List<FileVo> attachments) {
        this.attachments = attachments;
    }

    public String getFileName() {
        return FileName;
    }

    public void setFileName(String fileName) {
        FileName = fileName;
    }
}

package com.xxx.tpi.dmp.util;

import com.cntaiping.tpi.dmp.bean.dto.EmailPreviewVo;
import com.cntaiping.tpi.dmp.bean.dto.FileVo;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.simplejavamail.outlookmessageparser.OutlookMessageParser;
import org.simplejavamail.outlookmessageparser.model.OutlookAttachment;
import org.simplejavamail.outlookmessageparser.model.OutlookFileAttachment;
import org.simplejavamail.outlookmessageparser.model.OutlookMessage;
import org.simplejavamail.outlookmessageparser.model.OutlookMsgAttachment;

import java.io.*;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.UUID;

/**
 * Created by yusy02 on 2020/10/10 13:29
 */
public class MsgUtil {
    /**
     * 解析MSG邮件,可以将邮件以HTML展示。
     *
     * @return vo
     * @throws IOException IO异常
     */

    private static OutlookMessage parseMsgFile(String msgPath)
            throws IOException {
        InputStream resourceAsStream = OutlookMessageParser.class.getClassLoader().getResourceAsStream(msgPath);
        return new OutlookMessageParser().parseMsg(resourceAsStream);
    }


    public static String getSuffix(String fileName) {
        if (fileName.contains(".")) {
            String suffix = fileName.substring(fileName.lastIndexOf("."));
            return suffix.toLowerCase();
        }
        return "";
    }

    public static File getTmpDir() {
        String projectPath = System.getProperty("user.dir") + File.separator + "temp";
        File file = new File(projectPath);
        if (!file.exists()) {
            file.mkdirs();
        }
        return file;

    }

    public static void main(String[] args) throws IOException {
        //String path="e:\\用户配置文件勿删\\yusy02\\Desktop\\GIS见费出单Email Template\\Email Template\\Agent Letter.msg";
        //String path="e:\\用户配置文件勿删\\yusy02\\Desktop\\GIS见费出单Email Template\\Email Template\\embedded image.msg";
        //String path="e:\\用户配置文件勿删\\yusy02\\Desktop\\GIS见费出单Email Template\\Email Template\\chinese message.msg";
        //String path = "e:\\用户配置文件勿删\\yusy02\\Desktop\\GIS见费出单Email Template\\Email Template\\Policy Documents - 0078877RA2020.msg";
        String path = "e:\\用户配置文件勿删\\yusy02\\Desktop\\GIS见费出单Email Template\\Email Template\\forward with attachments and embedded images.msg";
        //String path = "e:\\用户配置文件勿删\\yusy02\\Desktop\\GIS见费出单Email Template\\Email Template\\Policy Documents - 0078877RA2020.msg";
        //String path = "e:\\用户配置文件勿删\\yusy02\\Desktop\\GIS见费出单Email Template\\Email Template\\S_MIME test message signedencrypted.msg";
        EmailPreviewVo emailPreviewVo = msgParseToPreview(new File(path));
        writeHtmlFile(emailPreviewVo);
    }

 /*   public static File createTmpFile(String suffix) {
        return new File(getTmpDir(), UUID.randomUUID().toString().replace("-", "") + suffix);
    }*/

    public static File createTmpFileWithName(String fileName) throws IOException {
        File file = new File(getTmpDir(), fileName);
        if (!file.exists()) {
            file.createNewFile();
        }
        return file;
    }

    /**
     * 生存html文件
     *
     * @param email
     */
    public static String writeHtmlFile(EmailPreviewVo email) throws IOException {
        String name = email.getFileName();
        name = name.replace(getSuffix(name), ".html");
        File file = createTmpFileWithName(name);
        String cont = "发送时间:" + email.getSentDate() + "</br>" +
                "发件人:" + email.getFrom() + "</br>" +
                "抄送:" + email.getCc() + "</br>" +
                "收件人:" + email.getTo() + "</br>" +
                "主题:" + email.getSubject() + "</br>"+
                "附件:" + email.getAttachments() + "</br>"+
                email.getContent();
        Files.write(file.toPath(), cont.getBytes(), StandardOpenOption.TRUNCATE_EXISTING);
        return file.getAbsolutePath();
    }

    public static EmailPreviewVo msgParseToPreview(File file) throws IOException {

        EmailPreviewVo vo = new EmailPreviewVo();
        vo.setFileName(file.getName());
        OutlookMessageParser msgp = new OutlookMessageParser();
        OutlookMessage msg = msgp.parseMsg(file.getAbsolutePath());

        List<FileVo> attachList = new ArrayList<>();
        List<OutlookAttachment> outlookAttachments = msg.getOutlookAttachments();
        for (OutlookAttachment outlookattachment : outlookAttachments) {
            /** TODO 注意:OutlookAttachment 是个接口有两个实现类,
             *  1)、OutlookFileAttachment  存在真实文件字节数据集
             *  2)、OutlookMsgAttachment 为.msg格式文件再次被递归解析
             *      目前没有好办法去获取到邮件附件为.msg格式真实文件,
             */
            // .msg格式附件暂时忽略
            if (outlookattachment instanceof OutlookMsgAttachment) {
                continue;
            }
            OutlookFileAttachment attachment = (OutlookFileAttachment) outlookattachment;
            String attachName = attachment.getFilename();
            if (StringUtils.isBlank(attachName)) {
                attachName = attachment.getLongFilename();
            }
            //存在没有命名的文件
            if (StringUtils.isBlank(attachName)){
                attachName= UUID.randomUUID().toString().replace("-", "");
            }
            File attachementFile = null;
            String suffix = getSuffix(attachName);
            //创建临时文件
            attachementFile = createTmpFileWithName(attachName);

            InputStream is = new ByteArrayInputStream(attachment.getData());
            Files.copy(is, attachementFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
            if (attachementFile != null) {
                FileVo fileVo = new FileVo();
                fileVo.setFileName(attachName);
                //这个可能为空
                fileVo.setSuffix(suffix);
                //好像除了png图片外,其他的ContentId是空的
                fileVo.setDesc(attachment.getContentId());
                fileVo.setFileLength(attachementFile.length());
                fileVo.setFilePath(attachementFile.getAbsolutePath());
                attachList.add(fileVo);
            }
        }
        vo.setAttachments(attachList);

        // 内容 要处理下不然他会带有微软雅黑的样式,与原邮件样式不符
        
        String bodyText = msg.getBodyText();
        //防止空指针
        if (bodyText!=null) {
            Document doc = Jsoup.parse(msg.getConvertedBodyHTML());
            List<FileVo> newAttachList = new ArrayList<>();
            newAttachList.addAll(attachList);

            // 对邮件中图片进行处理,这里的处理方式是把附件进行转码.然后在页面展示处理
            Elements imgList = doc.select("img");
            for (Element element : imgList) {
                String src = element.attr("src");
                if (src.indexOf("cid:") < 0) {
                    continue;
                }
                String imgAttach = src.substring(4);
                FileVo fileVo = null;
                for (FileVo tmp : attachList) {
                    if (imgAttach.contains(tmp.getFileName())) {
                        fileVo = tmp;
                        break;
                    }
                       /* if (tmp.getDescription().equals(imgAttach)) {
                            fileVo = tmp;
                            break;
                        }*/
                }
                if (fileVo == null) {
                    continue;
                }
                File attach = new File(fileVo.getFilePath());
                String base64 = null;
                InputStream in = null;
                try {
                    in = new FileInputStream(attach);
                    byte[] bytes = new byte[(int) attach.length()];
                    in.read(bytes);
                    base64 = Base64.getEncoder().encodeToString(bytes);
                } catch (Exception e) {
                    e.printStackTrace();
                } finally {
                    if (in != null) {
                        try {
                            in.close();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }
                }

                if (StringUtils.isNotBlank(base64)) {
                    String srcBase64 = "data:image/png;base64," + base64;
                    element.attr("src", srcBase64);
                    if (newAttachList != null && newAttachList.size() > 0 && newAttachList.contains(fileVo)) {
                        newAttachList.remove(fileVo);
                    }
                }
            }
            // 内容
            Elements bodyList = doc.select("body");
            if (bodyList.size() > 0) {
                Element bodyEle = bodyList.first();
                if (bodyEle.html().length() > 0) {
                    vo.setContent(bodyEle.html());
                }
            }
        }else{
            vo.setContent("");
        }

        // 消息头信息
        if (msg.getClientSubmitTime() != null) {
            vo.setSentDate(msg.getClientSubmitTime().toLocaleString());// 日期格式化,自己手动处理下
        }
        vo.setFrom(msg.getFromEmail());
        vo.setTo(msg.getDisplayTo().trim());
        vo.setCc(msg.getDisplayCc().trim());
        vo.setSubject(msg.getSubject());
        return vo;
    }

}




目前测试了中文,含图片的,没有问题,其他的暂时还没有测试…
这个主要是参考了如下的文章,在这个基础上,把代码稍微完善了一下
https://blog.csdn.net/qq_32793985/article/details/105767705

然后测试的邮件msg例子,可以点击这里获取

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值