Java登录QQ邮箱整理邮件的58的简历
实习期间给公司做的第一个小工具,只需下载jxl.jar和mail.jar的第三方那个类库,就可以着手敲代码了:
- 1、此次是使用POP3协议登录的邮箱,使用了手机获取的账号配置码
- 2、遍历收件箱的所有邮件,判断邮件类型,得到邮件的内容
- 3、解析每一封邮件的内容,parseMessage(Message …messages)
- 4、找到符合主题的邮件,如包含(58.com)……,getSubject(MimeMessage msg)
- 5、针对特定邮件解析出相应内容,并创建excel表,一条条的存入其中
- 6、所有简历的部分信息先放在Excel表中集合,关闭表
登录邮箱
网上普遍可寻找到遵循POP3协议及IMAP协议的登录方式,下面我用的是POP3协议,缺陷是不能给邮件做标记,只能读,不可改变邮件的状态及属性,后面会有IMAP协议的登录的方式的博客进行补充…….
另外敲代码前,先去邮箱的设置里,进去“账户”的设置,开启POP3协议的开启功能,使用手机发送信息得到登录码,很方便,可避免修改了密码,或是邮箱设置了独立密码等层层关卡的麻烦。写成了方法,代码如下
public static void receive() throws Exception {
// 准备连接服务器的会话信息
Properties props = new Properties();
//props.setProperty("mail.store.protocol", "pop3"); // 协议
props.setProperty("mail.pop3.port", "110"); // 端口
props.setProperty("mail.pop3.host", "pop.qq.com"); // pop3服务器
// SSL安全连接参数
props.setProperty("mail.pop3.socketFactory.class", "javax.net.ssl.SSLSocketFactory");
props.setProperty("mail.pop3.socketFactory.fallback", "true");
props.setProperty("mail.pop3.socketFactory.port", "995");
// 创建Session实例对象
Session session = Session.getInstance(props);
Store store = session.getStore("pop3");
store.connect("pop.qq.com","2627178143@qq.com", "......");//前面是我的邮箱号,后面填入自己的登陆码即可
// 获得收件箱
Folder folder = store.getFolder("INBOX");
/* Folder.READ_ONLY:只读权限
* Folder.READ_WRITE:可读可写(可以修改邮件的状态)
*/
folder.open(Folder.READ_WRITE); //打开收件箱
// 由于POP3协议无法获知邮件的状态,所以getUnreadMessageCount得到的是收件箱的邮件总数
// 获得收件箱中的邮件总数
System.out.println("邮件总数: " + folder.getMessageCount());
// 得到收件箱中的所有邮件,并解析
try{
Message[] messages = folder.getMessages();
parseMessage(messages);
//释放资源
folder.close(true);
store.close();
}catch(Exception e){
e.printStackTrace();
System.out.println("receive内部异常");}
}
解析邮件
判断主题里是否有58.com,并判断该邮件是否是当天的,整理当天的邮件信息存入Excel表中去:
public static void parseMessage(Message ...messages) throws MessagingException, IOException {
if (messages == null || messages.length < 1)
throw new MessagingException("未找到要解析的邮件!");
//58简历整理工具,使用正则表达式匹配相应的信息:姓名,性别,年龄,电话,邮箱,经验
Pattern p1 = Pattern.compile("<h3.*?>([\\s\\S]*)<span.*?>([\\s\\S]*)</span></h3>");
Pattern p2 = Pattern.compile("<label.*?>([\\s\\S]*)<span.*?><span.*?>([\\s\\S]*)</span></span></label>");
Pattern p3 = Pattern.compile("<ul.*?>[\\s]*?<li.*?>([^\n]*)</li>[\\s]*?<li.*?>([^\n]*)</li>[\\s]*?<li.*?>([^\n]*)</li>[\\s]*?<li.*?>([^\n]*)</li>[\\s]*?</ul>");
Pattern p4 = Pattern.compile("<label.*?>([\\s\\S]*)<span.*?>([\\s\\S]*)</span></label>");
//斗米简历整理工具,使用正则表达式匹配相应的信息:姓名,性别,年龄,电话,邮箱,经验
//???
Date now = new Date();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd_hh:mm:ss");//可以方便地修改日期格式
String nowDate = dateFormat.format( now );
System.out.println(nowDate);
Calendar c = Calendar.getInstance();//可以对每个时间域单独修改
int year = c.get(Calendar.YEAR);
int month = c.get(Calendar.MONTH)+1;
int date = c.get(Calendar.DATE);
try{
//创建及打开Excel表,存储58同城的简历信息
String path="C:\\resume\\58Resume"+year+"."+month+"."+date+".xlsx";
//String path="/Users/LiuDuan/workspace/temp_edm/58Resume"+year+"."+month+"."+date+".xlsx";
//InputStream is = new FileInputStream("C:\\Users\\yang\\Desktop\\ResumeData.xlsx");;
WritableWorkbook wb = Workbook.createWorkbook(new File(path));
WritableSheet ws = wb.createSheet("Sheet1", 0);
//创建及打开Excel表,存储58同城的简历信息
int j=0;
int count = messages.length;
// 解析所有邮件
for (int i = 130; i < count; i++) {
MimeMessage msg = (MimeMessage) messages[i];
//解决邮件主题乱码的问题
String subject1 = getSubject(msg); //获得邮件主题
String subject = "";
//前面必须判断下是否为null,否则会有异常
if (subject1 ==null || subject1 == "" || "".equals(subject1)
|| "null".equals(subject1)) {
subject = "此邮件没有主题";
continue;
} else {
subject = subject1;
}
//
System.out.println("第"+i+"封邮件主题是: " + subject);
String str=getSentDate(msg, null);
System.out.println("------发送时间:" +str);
//if(subject.indexOf("58.com")>0 && nowDate.equals(getSentDate(msg, null))){
if(subject.indexOf("58.com")>0 &&judgeDate(nowDate,str)){
StringBuffer content = new StringBuffer(300);
getMailTextContent(msg, content);
//System.out.println(content);
//checkhtml(content.toString(),i);
//得到每条邮件的三个信息
Matcher m = p1.matcher(content);
Matcher n = p2.matcher(content);
Matcher p = p3.matcher(content);
Matcher q = p4.matcher(content);
StringBuilder sb = new StringBuilder();
while (m.find()) {
sb.append(m.group(1)+","+m.group(2));
Label label1 = new Label(0,j,m.group(1));
ws.addCell(label1);
Label label2 = new Label(1,j,m.group(2).substring(1, 2));
ws.addCell(label2);
Label label3 = new Label(2,j,m.group(2).substring(3, 5));
ws.addCell(label3);
sb.append(",");
}
while (n.find()) {
sb.append(n.group(2));
Label label4 = new Label(3,j,n.group(2));
ws.addCell(label4);
sb.append(",");
}
while (p.find()) {
sb.append(p.group(2));
Label label5 = new Label(4,j,p.group(2));
ws.addCell(label5);
sb.append(",");
}
while (q.find()) {
//System.out.println("4");
sb.append(q.group(2));
Label label6 = new Label(5,j,q.group(2));
ws.addCell(label6);
sb.append(",");
}
j++;
System.out.println(sb);
}else{
continue;
}
}
wb.write();
wb.close();
}catch(IOException e){
e.printStackTrace();
System.out.println("parseMessage内部1");
}catch(MessagingException e){
e.printStackTrace();
System.out.println("parseMessage内部2");
}catch (RowsExceededException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (WriteException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 获得邮件主题
* @param msg 邮件内容
* @return 解码后的邮件主题
*/
public static String getSubject(MimeMessage msg) throws UnsupportedEncodingException, MessagingException {
try{
if(msg==null || msg.getSubject()==null)
return null;
return MimeUtility.decodeText(msg.getSubject());
}
catch(UnsupportedEncodingException e){
return null;
}catch(MessagingException e){
return null;
}
}
/**
* 获得邮件发送时间
* @param msg 邮件内容
* @return yyyy年mm月dd日 星期X HH:mm
* @throws MessagingException
*/
public static String getSentDate(MimeMessage msg, String pattern) throws MessagingException {
try{
Date receivedDate = msg.getSentDate();
if (receivedDate == null)
return "";
if (pattern == null || "".equals(pattern))
//pattern = "yyyy年MM月dd日 E HH:mm ";
pattern = "yyyy.MM.dd_HH:mm:ss";
return new SimpleDateFormat(pattern).format(receivedDate);} catch(MessagingException e){System.out.println("获取时间内部异常");return null;}
}
//判断邮箱邮件时间是否在昨天的5点到今天的4点之间
public static boolean judgeDate(String s1,String resume_time_str){
//截取系统的年月日时间
/* int nowY = Integer.parseInt(s1.substring(0, 4));
int nowM = Integer.parseInt(s1.substring(5, 7));
int nowD = Integer.parseInt(s1.substring(8, 10));
//System.out.println(nowY+"."+nowM+"."+nowD);
// 截取邮件时间的时分秒
int emailY = Integer.parseInt(s2.substring(0, 4));
int emailM = Integer.parseInt(s2.substring(5, 7));
int emailD = Integer.parseInt(s2.substring(8, 10));
int emailh = Integer.parseInt(s2.substring(11, 13));
int emailm = Integer.parseInt(s2.substring(14, 16));
int emails = Integer.parseInt(s2.substring(17, 19));
//System.out.println(emailY+"."+emailM+"."+emailD+"."+emailh+"."+emailm+"."+emails);
if(nowY == emailY && nowM == emailM && nowD == emailD){
if(emailh > 0 && emailh <17){
return true;}
else if(emailh == 0 && emailm >= 0 && emails >= 0){
return true;}
else if(emailh == 17 && emailm < 1 && emails < 1){
return true;}
else
return false;
}else if(nowY==emailY && nowM==emailM && nowD==emailD+1){
if(emailh >= 17 && emailh <=23){return true;}
//else if(emailh == 0 && emailm > 0 && emails > 0){return true;}
//else if(emailh == 17 && emailm <= 30 && emails <= 30){return true;}
else
return false;
}
return false;
*/
//取得昨天中午到今天中午的时间段范围,2017-05-01 12:00:00 ~ 2017-05-02 11:59:59
Date todydate = new Date();
Date yestoday = new Date(todydate.getTime() - 24 * 3600 * 1000);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
SimpleDateFormat sdf_hms = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String dateBeginStr = sdf.format(yestoday)+" 12:00:00";
String dateEndStr = sdf.format(todydate)+" 11:59:59";
System.out.println("格式化后的日期:" + dateBeginStr);
System.out.println("格式化后的日期:" + dateEndStr);
SimpleDateFormat resume_sdf = new SimpleDateFormat("yyyy.MM.dd_HH:mm:ss");
try {
long time_start = sdf_hms.parse(dateBeginStr).getTime()/1000;
long time_end = sdf_hms.parse(dateEndStr).getTime()/1000;
long resume_time = resume_sdf.parse(resume_time_str).getTime()/1000;
if(time_start < resume_time && resume_time < time_end) {
return true;
}
//System.out.println("time1 "+ String.valueOf(sdf.parse(dateBeginStr).getTime()/1000));
//System.out.println("time1 "+ String.valueOf(sdf.parse(dateEndStr).getTime()/1000));
} catch (Exception e) {
System.out.println("get date error!!");
return false;
}
return false;
}
/**
* 获得邮件文本内容
* @param part 邮件体
* @param content 存储邮件文本内容的字符串
* @throws MessagingException
* @throws IOException
*/
public static void getMailTextContent(Part part,StringBuffer content) throws MessagingException, IOException {
//如果是文本类型的附件,通过getContent方法可以取到文本内容,但这不是我们需要的结果,所以在这里要做判断
/*boolean isContainTextAttach = part.getContentType().indexOf("name") > 0;
System.out.println(part.getContentType());
if (part.isMimeType("text/*") && !isContainTextAttach) {
content.append(part.getContent().toString());
} else if (part.isMimeType("message/rfc822")) {
getMailTextContent((Part)part.getContent(),content);
} else if (part.isMimeType("multipart/*")) {
Multipart multipart = (Multipart) part.getContent();
int partCount = multipart.getCount();
for (int i = 0; i < partCount; i++) {
BodyPart bodyPart = multipart.getBodyPart(i);
getMailTextContent(bodyPart,content);
}
}*/
//StringBuffer content = new StringBuffer();
String contenttype = part.getContentType();
int nameindex = contenttype.indexOf("name");
boolean conname = false;
if (nameindex != -1)
conname = true;
if (part.isMimeType("text/html")&& !conname) {
content.append((String)part.getContent());
}else if(part.isMimeType("text/plain")&& !conname){
content.append((String)part.getContent());
}else if (part.isMimeType("multipart/*")) {
Multipart multipart = (Multipart) part.getContent();
int count = multipart.getCount();
boolean hasHtml = checkHasHtml(multipart);//这里校验是否有text/html内容
for(int index = 0 ; index < count ; index++ ){
Part temp = multipart.getBodyPart(index);
if(temp.isMimeType("text/plain")&&hasHtml){
//有html格式的则不显示无格式文档的内容
}else{
getMailTextContent(temp, content);
}
}
}else if (part.isMimeType("message/rfc822")) {
getMailTextContent((Part) part.getContent(), content);
}else{}
}
public static boolean checkHasHtml(Multipart part) throws MessagingException, IOException{
boolean hasHtml = false;
int count = part.getCount();
for(int i = 0 ; i < count ; i++ ){
Part bodyPart = part.getBodyPart(i);
if (bodyPart.isMimeType("text/html")) {
hasHtml = true;
break;
}
}
return hasHtml;
}
主函数的调用
前面的异常处理里都添加了输出语句,可要可不要,另外对于程序中提示要添加的类,一定要看仔细了,经常会出现jar包名字不一样却有相同名字的类,添加错了,程序就出了问题,之前浪费了我不少的时间找bug。。。以下是我的程序里需要添加的类及主函数:
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.mail.Address;
import javax.mail.BodyPart;
import javax.mail.Flags;
import javax.mail.Folder;
import javax.mail.Message;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.Store;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeMultipart;
import javax.mail.internet.MimeUtility;
import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;
public static void main(String[] args) throws Exception {
try{receive();}
catch(Exception e){e.printStackTrace();System.out.println("receive异常");}
}
这个Java程序的名字自己随机取得,只是一个外壳,对内部程序没有影响。
有什么问题希望大家可以留言一起交流哈,集思广益,加油加油在加油,勤能补拙,希望自己技能日以渐进!
下面是jar包的下载的链接:
http://download.csdn.net/detail/yixiesuifeng/9831640