outlook导出邮件(.msg)读取和解析

@TOC

由outlook导出的邮件后缀.msg 文件读取,获取邮件内容提取有效数据

引入必要依赖

	<dependency>
        <groupId>org.simplejavamail</groupId>
        <artifactId>outlook-message-parser</artifactId>
        <version>1.7.7</version>
    </dependency>

实现代码

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.List;
import java.util.function.Consumer;
import java.util.stream.Collectors;

import org.apache.tomcat.util.http.fileupload.FileUtils;
import org.hibernate.validator.constraints.Email;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.reflections.util.Utils;
import org.simplejavamail.outlookmessageparser.OutlookMessageParser;
import org.simplejavamail.outlookmessageparser.model.OutlookFileAttachment;
import org.simplejavamail.outlookmessageparser.model.OutlookMessage;
import org.simplejavamail.outlookmessageparser.model.OutlookMsgAttachment;
import org.simplejavamail.outlookmessageparser.model.OutlookRecipient;
import org.springframework.util.StringUtils;

import cn.spj.spr_parent.TestRead.EmailPreviewVo;


public class TestRead {

	public static void main(String[] args) throws IOException {
		 msgParseToPreview(new File("C:\\Users\\os-lusd\\Desktop\\邮件\\内部研究,严禁外传.msg"));
		
		
	}
	
	
    /**
     * 解析MSG邮件,可以将邮件以HTML展示。
     * @param file MSG格式邮件的全路径
     * @return vo 
     * @throws IOException IO异常
     */
    public static  void msgParseToPreview(File file) throws IOException {

       

        OutlookMessageParser msgp = new OutlookMessageParser();
        OutlookMessage msg = msgp.parseMsg(file.getAbsolutePath());

        List<FileVo> attachList = new ArrayList<>();
        for(int i=0; i < msg.getOutlookAttachments().size(); i++) {
            /** TODO 注意:OutlookAttachment 是个接口有两个实现类,
             *  1)、OutlookFileAttachment  存在真实文件字节数据集
             *  2)、OutlookMsgAttachment 为.msg格式文件再次被递归解析
             *      目前没有好办法去获取到邮件附件为.msg格式真实文件,
             */
             // .msg格式附件暂时忽略
            if (msg.getOutlookAttachments().get(i) instanceof OutlookMsgAttachment) {
					continue;
              }
        }

        // 内容 要处理下不然他会带有微软雅黑的样式,与原邮件样式不符
        /**
         *org.jsoup.nodes.Document
         *org.jsoup.Jsoup
         */
        Document doc = Jsoup.parse(msg.getConvertedBodyHTML());
            List<FileVo> newAttachList = new ArrayList<>();
           newAttachList.addAll(attachList);

        // 对邮件中图片进行处理
       
           String context = null;
        // 内容
        Elements bodyList = doc.select("body");
        if (bodyList.size() > 0) {
            Element bodyEle = bodyList.first();
            if (bodyEle.html().length() > 0) {
            	context=bodyEle.html();
            }
        }
        // 消息头信息
      
        
        System.out.println( "EmailPreviewVo [ from=" + msg.getFromEmail() + 
        		", cc=" + getMailUser(msg, msg.getDisplayCc().trim()) + ","
        				+ " to=" + getMailUser(msg, msg.getDisplayTo().trim()) + ","
        						+ " subject=" + msg.getSubject() + ", sentDate=" 
        				+ msg.getClientSubmitTime()
				+ ", content=" + context.substring(0,100) + "]");
	
    }
    
    
    /**
     * MSG 以名称获取真实收发抄邮件地址
     * @param msg OutlookMessage 
     * @param parm 人员成名
     * @return 展示名称
     */
       private static String getMailUser(OutlookMessage msg, String parm) {
        List<String> parmList = null;
        OutlookRecipient recipient = null;
        StringBuffer sb = new StringBuffer();
        if(StringUtils.hasText(parm)){
            if (parm.contains(";")) {
                parmList = Arrays.asList(parm.split(";")).
                        stream().map(s -> s.trim()).collect(Collectors.toList());
                for (int i = 0; i < parmList.size(); i++) {
                    String value = parmList.get(i);
                    if (msg != null && msg.getRecipients().size() > 0) {
                        recipient = msg.getRecipients().stream().filter(e -> e.getName().equals(value)).collect(Collectors.toList()).get(0);
                        sb.append(recipient.getName());
                        sb.append(" <" + recipient.getAddress() + ">");
                        if (i != (parmList.size() - 1)) {
                            sb.append(",");
                        }
                    }
                }
            } else {
                recipient = msg.getRecipients().stream().filter(e -> e.getName().equals(parm)).collect(Collectors.toList()).get(0);
                sb.append(recipient.getName());
                sb.append(" <" + recipient.getAddress() + ">");
            }
        }
        return sb.toString();
    }

    

     class EmailPreviewVo {

    	 
    	 
        public EmailPreviewVo() {
			super();
			
		}

		private Long id;

        private String from;

        private String cc;

        private String to;

        private String subject;

        private String sentDate;

        private String content;

        private List<String> attachments;

		public Long getId() {
			return id;
		}

		public void setId(Long id) {
			this.id = id;
		}

		public String getFrom() {
			return from;
		}

		public void setFrom(String from) {
			this.from = from;
		}

		public String getCc() {
			return cc;
		}

		public void setCc(String cc) {
			this.cc = cc;
		}

		public String getTo() {
			return to;
		}

		public void setTo(String to) {
			this.to = to;
		}

		public String getSubject() {
			return subject;
		}

		public void setSubject(String subject) {
			this.subject = subject;
		}

		public String getSentDate() {
			return sentDate;
		}

		public void setSentDate(String sentDate) {
			this.sentDate = sentDate;
		}

		public String getContent() {
			return content;
		}

		public void setContent(String content) {
			this.content = content;
		}

		public List<String> getAttachments() {
			return attachments;
		}

		public void setAttachments(List<String> attachments) {
			this.attachments = attachments;
		}

		@Override
		public String toString() {
			return "EmailPreviewVo [id=" + id + ", from=" + from + ", cc=" + cc + ", to=" + to + ", subject=" + subject + ", sentDate=" + sentDate
					+ ", content=" + content + ", attachments=" + attachments + "]";
		}
        
        
    }
    public class FileVo {
    	
    	
    }


}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值