@TOC
由outlook导出的邮件后缀.msg 文件读取,获取邮件内容提取有效数据
引入必要依赖
<dependency>
<groupId>org.simplejavamail</groupId>
<artifactId>outlook-message-parser</artifactId>
<version>1.7.7</version>
</dependency>
实现代码
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.List;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import org.apache.tomcat.util.http.fileupload.FileUtils;
import org.hibernate.validator.constraints.Email;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.reflections.util.Utils;
import org.simplejavamail.outlookmessageparser.OutlookMessageParser;
import org.simplejavamail.outlookmessageparser.model.OutlookFileAttachment;
import org.simplejavamail.outlookmessageparser.model.OutlookMessage;
import org.simplejavamail.outlookmessageparser.model.OutlookMsgAttachment;
import org.simplejavamail.outlookmessageparser.model.OutlookRecipient;
import org.springframework.util.StringUtils;
import cn.spj.spr_parent.TestRead.EmailPreviewVo;
public class TestRead {
public static void main(String[] args) throws IOException {
msgParseToPreview(new File("C:\\Users\\os-lusd\\Desktop\\邮件\\内部研究,严禁外传.msg"));
}
/**
* 解析MSG邮件,可以将邮件以HTML展示。
* @param file MSG格式邮件的全路径
* @return vo
* @throws IOException IO异常
*/
public static void msgParseToPreview(File file) throws IOException {
OutlookMessageParser msgp = new OutlookMessageParser();
OutlookMessage msg = msgp.parseMsg(file.getAbsolutePath());
List<FileVo> attachList = new ArrayList<>();
for(int i=0; i < msg.getOutlookAttachments().size(); i++) {
/** TODO 注意:OutlookAttachment 是个接口有两个实现类,
* 1)、OutlookFileAttachment 存在真实文件字节数据集
* 2)、OutlookMsgAttachment 为.msg格式文件再次被递归解析
* 目前没有好办法去获取到邮件附件为.msg格式真实文件,
*/
// .msg格式附件暂时忽略
if (msg.getOutlookAttachments().get(i) instanceof OutlookMsgAttachment) {
continue;
}
}
// 内容 要处理下不然他会带有微软雅黑的样式,与原邮件样式不符
/**
*org.jsoup.nodes.Document
*org.jsoup.Jsoup
*/
Document doc = Jsoup.parse(msg.getConvertedBodyHTML());
List<FileVo> newAttachList = new ArrayList<>();
newAttachList.addAll(attachList);
// 对邮件中图片进行处理
String context = null;
// 内容
Elements bodyList = doc.select("body");
if (bodyList.size() > 0) {
Element bodyEle = bodyList.first();
if (bodyEle.html().length() > 0) {
context=bodyEle.html();
}
}
// 消息头信息
System.out.println( "EmailPreviewVo [ from=" + msg.getFromEmail() +
", cc=" + getMailUser(msg, msg.getDisplayCc().trim()) + ","
+ " to=" + getMailUser(msg, msg.getDisplayTo().trim()) + ","
+ " subject=" + msg.getSubject() + ", sentDate="
+ msg.getClientSubmitTime()
+ ", content=" + context.substring(0,100) + "]");
}
/**
* MSG 以名称获取真实收发抄邮件地址
* @param msg OutlookMessage
* @param parm 人员成名
* @return 展示名称
*/
private static String getMailUser(OutlookMessage msg, String parm) {
List<String> parmList = null;
OutlookRecipient recipient = null;
StringBuffer sb = new StringBuffer();
if(StringUtils.hasText(parm)){
if (parm.contains(";")) {
parmList = Arrays.asList(parm.split(";")).
stream().map(s -> s.trim()).collect(Collectors.toList());
for (int i = 0; i < parmList.size(); i++) {
String value = parmList.get(i);
if (msg != null && msg.getRecipients().size() > 0) {
recipient = msg.getRecipients().stream().filter(e -> e.getName().equals(value)).collect(Collectors.toList()).get(0);
sb.append(recipient.getName());
sb.append(" <" + recipient.getAddress() + ">");
if (i != (parmList.size() - 1)) {
sb.append(",");
}
}
}
} else {
recipient = msg.getRecipients().stream().filter(e -> e.getName().equals(parm)).collect(Collectors.toList()).get(0);
sb.append(recipient.getName());
sb.append(" <" + recipient.getAddress() + ">");
}
}
return sb.toString();
}
class EmailPreviewVo {
public EmailPreviewVo() {
super();
}
private Long id;
private String from;
private String cc;
private String to;
private String subject;
private String sentDate;
private String content;
private List<String> attachments;
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public String getFrom() {
return from;
}
public void setFrom(String from) {
this.from = from;
}
public String getCc() {
return cc;
}
public void setCc(String cc) {
this.cc = cc;
}
public String getTo() {
return to;
}
public void setTo(String to) {
this.to = to;
}
public String getSubject() {
return subject;
}
public void setSubject(String subject) {
this.subject = subject;
}
public String getSentDate() {
return sentDate;
}
public void setSentDate(String sentDate) {
this.sentDate = sentDate;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public List<String> getAttachments() {
return attachments;
}
public void setAttachments(List<String> attachments) {
this.attachments = attachments;
}
@Override
public String toString() {
return "EmailPreviewVo [id=" + id + ", from=" + from + ", cc=" + cc + ", to=" + to + ", subject=" + subject + ", sentDate=" + sentDate
+ ", content=" + content + ", attachments=" + attachments + "]";
}
}
public class FileVo {
}
}