EL-ADMIN完善获取简历定时任务

前言

前面已经写过了如何获取简历以及如何配置定时任务,这篇是做个综合,配合eladmin将定时任务完善好。

当然,所有代码这篇都会有所调整,调整后的代码也会提供。之前的几篇:

Java代码读取电子邮件(含附件)并解析简历文件:https://blog.csdn.net/m0_58095675/article/details/124223353
Java如何使用IP代理:https://blog.csdn.net/m0_58095675/article/details/124229740
EL-ADMIN单模块操作:https://blog.csdn.net/m0_58095675/article/details/124656816
EL-ADMIN配置定时任务:https://blog.csdn.net/m0_58095675/article/details/124663885

辅助类

引入包

在自己的module中引入如下:

		<dependency>
			<groupId>com.aliyun</groupId>
			<artifactId>aliyun-java-sdk-core</artifactId>
			<version>4.4.0</version>
		</dependency>
		<dependency>
			<groupId>com.aliyun.oss</groupId>
			<artifactId>aliyun-sdk-oss</artifactId>
			<version>3.10.2</version>
		</dependency>
		<dependency>
			<groupId>org.jsoup</groupId>
			<artifactId>jsoup</artifactId>
			<version>1.12.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.httpcomponents</groupId>
			<artifactId>httpmime</artifactId>
		</dependency>
日期处理

计算前一天日期、字符和时间转化等。

package me.zhengjie.xingchenlie.util;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;

public class Dates {

	private static String formatter = "yyyy-MM-dd HH:mm:ss";
	private static SimpleDateFormat sdf = new SimpleDateFormat(formatter);
	
	public static String now() {
		return sdf.format(new Date());
	}
	
	public static String yestoday() {
		long y = (System.currentTimeMillis()/1000-24*3600)*1000;
		return sdf.format(new Date(y));
	}
	
	public static Date parse(String dateString) {
		try {
			return sdf.parse(dateString);
		} catch (ParseException e) {
			e.printStackTrace();
		}
		return null;
	}
	
}

阿里云OSS文件上传

邮件附件简历下载解析后,上传到阿里云并存储路径。eladmin支持七牛云,此处先不用,如果后面用到了再单独写一篇。

package me.zhengjie.xingchenlie.util;

import java.io.InputStream;
import java.net.URL;
import java.util.Date;

import org.springframework.stereotype.Component;

import com.aliyun.oss.OSS;
import com.aliyun.oss.OSSClientBuilder;

@Component
public class AliyunUtil {

	private static final String ACCESS_KEY_ID = "阿里云后台的 ACCESS_KEY";
	private static final String SECRET = "阿里云后台的 ACCESS_SECRET ";

	/**
	 * 文件地址获取
	 */
	public static String getOssUrl(String filePathName) {
    	OSS ossClient = new OSSClientBuilder().build("http://oss-cn-beijing.aliyuncs.com", ACCESS_KEY_ID, SECRET);
		Date expiration = new Date(new Date().getTime() + 3 * 3600 * 1000);
		URL url = ossClient.generatePresignedUrl("xingchenlie", filePathName, expiration);
		ossClient.shutdown();
		return url.toString();
	}
	
	/**
	 * OSS文件上传
	 */
	public static boolean uploadOssFile(String fileName, InputStream inputStream) {
		OSS ossClient = new OSSClientBuilder().build("http://oss-cn-beijing.aliyuncs.com", ACCESS_KEY_ID, SECRET);
    	ossClient.putObject("xingchenlie", fileName, inputStream);
    	ossClient.shutdown();
    	return true;
	}
	
}
简历解析辅助类

简历上传到某个网站,将结果保存,涉及的是哪个网站不写了。因为邮件比较多,所以挂上代理(代理账号使用自己的替换),避免被封。

package me.zhengjie.xingchenlie.util;

import java.io.InputStream;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;

import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CookieStore;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.SSLContextBuilder;
import org.apache.http.conn.ssl.TrustStrategy;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;
import org.jsoup.Connection.Method;
import org.jsoup.Jsoup;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;

@SuppressWarnings("deprecation")
public class DistinguishUtil {

	private static final String HTTP = "http";
	private static final String HTTPS = "https";
	private static SSLConnectionSocketFactory sslsf = null;
	private static PoolingHttpClientConnectionManager cm = null;
	private static SSLContextBuilder builder = null;
	static {
		System.setProperty("https.protocols", "TLSv1,TLSv1.1,TLSv1.2");
		try {
			builder = new SSLContextBuilder();
			builder.loadTrustMaterial(null, new TrustStrategy() {
				@Override
				public boolean isTrusted(X509Certificate[] x509Certificates, String s) throws CertificateException {
					return true;
				}
			});
			sslsf = new SSLConnectionSocketFactory(builder.build(),
					new String[] { "SSLv2Hello", "SSLv3", "TLSv1", "TLSv1.2" }, null, NoopHostnameVerifier.INSTANCE);
			Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
					.register(HTTP, new PlainConnectionSocketFactory()).register(HTTPS, sslsf).build();
			cm = new PoolingHttpClientConnectionManager(registry);
			cm.setMaxTotal(200);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	public static String getResume(InputStream is, String fileName) throws Exception {
		try {
			CookieStore store = new BasicCookieStore();
			CredentialsProvider credsProvider = new BasicCredentialsProvider();
			credsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials("代理用户名", "代理密码"));
			CloseableHttpClient httpclient = HttpClients.custom().setSSLSocketFactory(sslsf).setConnectionManager(cm)
					.setConnectionManagerShared(true).setDefaultCookieStore(store)
					.setDefaultCredentialsProvider(credsProvider).build();
			String url = Jsoup.connect("https://proxy.qg.net/allocate?Key=72EAFBDM").ignoreContentType(true)
					.method(Method.GET).execute().body();
			JSONObject json = JSON.parseObject(url);
			JSONObject obj = json.getJSONArray("Data").getJSONObject(0);
			String ip = obj.getString("IP");
			int port = obj.getIntValue("port");
			System.out.println(Dates.now() + "代理IP " + ip + ":" + port);
			HttpHost myProxy = new HttpHost(ip, port);
			RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout(30000).setConnectTimeout(30000).setProxy(myProxy).build();
			MultipartEntityBuilder builder = MultipartEntityBuilder.create();
			builder.addBinaryBody("resume-file", is, ContentType.APPLICATION_OCTET_STREAM, fileName);
			HttpEntity multipart = builder.build();
			
			HttpPost post = new HttpPost("识别简历网站的网址");
			post.setConfig(requestConfig);
			post.setEntity(multipart);
			System.out.println(Dates.now() + "文件解析开始上传了");
			CloseableHttpResponse response = httpclient.execute(post);
			System.out.println(Dates.now() + "上传完成了");
			byte[] bResultXml = EntityUtils.toByteArray(response.getEntity());
			String result = new String(bResultXml, "utf-8");
			return result;
		} catch(Exception e) {
			e.printStackTrace();
		}
		return null;
	}

}

邮件辅助类

读取邮件,满足条件的返回邮件列表。

package me.zhengjie.xingchenlie.util;

import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

import javax.mail.Address;
import javax.mail.BodyPart;
import javax.mail.Flags;
import javax.mail.Folder;
import javax.mail.Message;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeMultipart;
import javax.mail.internet.MimeUtility;

import com.sun.mail.imap.IMAPStore;

import me.zhengjie.xingchenlie.basedata.service.dto.EmailAccountDto;
import me.zhengjie.xingchenlie.basedata.service.dto.EmailVo;

public class EmailUtil {
	
	// 如果date为null,查询全部邮件;如果date有值yyyy-MM-dd,查询指定日期及其以后的邮件
	public static List<EmailVo> getEmailList(EmailAccountDto account, String date, List<String> messageIds) throws Exception {
		List<EmailVo> list = new ArrayList<>();
		Properties props = System.getProperties();
		props.setProperty(account.getHostKey(), account.getHost());
		props.setProperty(account.getPortKey(), account.getPort());
		props.setProperty(account.getProtocolKey(), account.getProtocol());
		props.setProperty("mail.imap.partialfetch", "false");
		props.setProperty("mail.imaps.partialfetch", "false");
		Session session = Session.getInstance(props);
		IMAPStore store = (IMAPStore) session.getStore(account.getProtocol());
		store.connect(account.getAccount(), account.getPassword());
		Folder folder = store.getFolder("INBOX");
		folder.open(Folder.READ_WRITE); // Folder.READ_ONLY:只读权限 Folder.READ_WRITE:可读可写(可以修改邮件的状态)
		Message[] messages = folder.getMessages();
		int count = messages.length;
		for (int i = count - 1; i >= 0; i--) {
			// 常规邮件这一行就可以,遇到退信或者类似部分平台发送的邮件会报错javax.mail.MessagingException: Unable to load BODYSTRUCTURE
			// 所以需要下面两行处理,复制出来一份后重新赋值
			MimeMessage msg2 = (MimeMessage) messages[i];
			MimeMessage msg = new MimeMessage(msg2);
			EmailVo vo = new EmailVo();
			String messageId = msg.getMessageID();
			if(messageIds!= null && messageIds.size() > 0 && messageIds.contains(messageId)) {
				System.out.println(i + "无需获取,邮件Id:" + messageId);
				continue;
			}
			vo.setMessageId(messageId);
			String sendTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(msg.getSentDate());
			if(date != null && date.length() > 0) {
				if(sendTime.compareTo(date) < 0) {
					break;
				} else if(!sendTime.startsWith(date)) {
					continue;
				}
			}
			System.out.println(i + ":" + sendTime);
			vo.setSendTime(sendTime);
			vo.setSubject(msg.getSubject());
			Address[] froms = msg.getFrom();
			if (froms.length > 0) {
				InternetAddress address = (InternetAddress) froms[0];
				vo.setFromEmail(address.getAddress());
				vo.setFromName(address.getPersonal());
			}
			Address[] addresss = msg.getAllRecipients();
			if (addresss != null && addresss.length > 0) {
				StringBuffer receiveAddress = new StringBuffer();
				for (Address address : addresss) {
					InternetAddress internetAddress = (InternetAddress) address;
					receiveAddress.append(internetAddress.toUnicodeString()).append(",");
				}
				receiveAddress.deleteCharAt(receiveAddress.length() - 1);
				vo.setReceive(receiveAddress.toString());
			}
			vo.setRead(msg.getFlags().contains(Flags.Flag.SEEN));
			boolean isContainerAttachment = isContainAttachment(msg);
			vo.setContainerAttachment(isContainerAttachment);
			StringBuffer content = new StringBuffer(100);
			getMailTextContent(msg, content);
			vo.setContent(content.toString());
			if (isContainerAttachment) {
				StringBuffer fileName = new StringBuffer(100);
				InputStream is = getAttachment(msg, fileName);
				System.out.println(fileName);
				byte[] byteBuffer = null;
				ByteArrayOutputStream outByte = new ByteArrayOutputStream();
				byte[] tmpByte = new byte[2048];
				int len = 0;
				while ((len = is.read(tmpByte)) != -1) {
				      outByte.write(tmpByte, 0, len);
				}
				byteBuffer = outByte.toByteArray();
				vo.setAttachmentFile(byteBuffer);
				vo.setFileName(fileName.toString());
			}
			list.add(vo);
		}
		folder.close(true);
		store.close();
		return list;
	}

	private static InputStream getAttachment(Part part, StringBuffer fileName)
			throws UnsupportedEncodingException, MessagingException, FileNotFoundException, IOException {
		if (part.isMimeType("multipart/*")) {
			Multipart multipart = (Multipart) part.getContent();
			int partCount = multipart.getCount();
			for (int i = 1; i < partCount; i++) {
				BodyPart bodyPart = multipart.getBodyPart(i);
				if(bodyPart == null) {
					continue;
				}
				String disp = bodyPart.getDisposition();
				if (disp != null && (disp.equalsIgnoreCase(Part.ATTACHMENT) || disp.equalsIgnoreCase(Part.INLINE))) {
					InputStream is = bodyPart.getInputStream();
					fileName = fileName.append(MimeUtility.decodeText(bodyPart.getFileName()));
					return is;
				} else if (bodyPart.isMimeType("multipart/*")) {
					return getAttachment(bodyPart, fileName);
				} else {
					String contentType = bodyPart.getContentType();
					if (contentType.indexOf("name") != -1 || contentType.indexOf("application") != -1) {
						InputStream is = bodyPart.getInputStream();
						fileName = fileName.append(MimeUtility.decodeText(bodyPart.getFileName()));
						return is;
					}
				}
			}
		} else if (part.isMimeType("message/rfc822")) {
			return getAttachment((Part) part.getContent(), fileName);
		}
		return null;
	}

	private static void getMailTextContent(Part part, StringBuffer content) throws MessagingException, IOException {
		boolean isContainTextAttach = part.getContentType().indexOf("name") > 0;
		if (part.isMimeType("text/*") && !isContainTextAttach) {
			content.append(part.getContent().toString());
		} else if (part.isMimeType("message/rfc822")) {
			getMailTextContent((Part) part.getContent(), content);
		} else if (part.isMimeType("multipart/*")) {
			Multipart multipart = (Multipart) part.getContent();
			int partCount = multipart.getCount();
			for (int i = 0; i < partCount; i++) {
				BodyPart bodyPart = multipart.getBodyPart(i);
				getMailTextContent(bodyPart, content);
			}
		}
	}

	/**
	 * 判断邮件中是否包含附件
	 */
	private static boolean isContainAttachment(Part part) throws MessagingException, IOException {
		boolean flag = false;
		if (part.isMimeType("multipart/*")) {
			MimeMultipart multipart = (MimeMultipart) part.getContent();
			int partCount = multipart.getCount();
			for (int i = 0; i < partCount; i++) {
				BodyPart bodyPart = multipart.getBodyPart(i);
				String disp = bodyPart.getDisposition();
				if (disp != null && (disp.equalsIgnoreCase(Part.ATTACHMENT) || disp.equalsIgnoreCase(Part.INLINE))) {
					flag = true;
				} else if (bodyPart.isMimeType("multipart/*")) {
					flag = isContainAttachment(bodyPart);
				} else {
					String contentType = bodyPart.getContentType();
					if (contentType.indexOf("application") != -1) {
						flag = true;
					}
					if (contentType.indexOf("name") != -1) {
						flag = true;
					}
				}
				if (flag)
					break;
			}
		} else if (part.isMimeType("message/rfc822")) {
			flag = isContainAttachment((Part) part.getContent());
		}
		return flag;
	}

}

数据准备

先维护自己的邮箱账号数据,具体步骤可以看:https://blog.csdn.net/m0_58095675/article/details/124656816,定时任务获取的账号就是这里维护好的。

定时任务

定时任务位于eladmin-system中,包名是me.zhengjie.modules.quartz.task,这里新建的EmailTask代码如下

/*
 *  Copyright 2019-2020 Zheng Jie
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package me.zhengjie.modules.quartz.task;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Component;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;

import lombok.extern.slf4j.Slf4j;
import me.zhengjie.xingchenlie.basedata.service.EmailAccountService;
import me.zhengjie.xingchenlie.basedata.service.dto.EmailAccountDto;
import me.zhengjie.xingchenlie.basedata.service.dto.EmailAccountQueryCriteria;
import me.zhengjie.xingchenlie.basedata.service.dto.EmailVo;
import me.zhengjie.xingchenlie.business.domain.Resume;
import me.zhengjie.xingchenlie.business.service.ResumeService;
import me.zhengjie.xingchenlie.business.service.dto.ResumeDto;
import me.zhengjie.xingchenlie.business.service.dto.ResumeQueryCriteria;
import me.zhengjie.xingchenlie.util.AliyunUtil;
import me.zhengjie.xingchenlie.util.Dates;
import me.zhengjie.xingchenlie.util.DistinguishUtil;
import me.zhengjie.xingchenlie.util.EmailUtil;

/**
 * 定时获取邮件
 */
@Slf4j
@Async
@Component

public class EmailTask {

	@Autowired EmailAccountService emailAccountService;
	@Autowired ResumeService resumeService;
	
    public void run(){
        log.info("进入email定时任务");
        List<EmailAccountDto> accounts = emailAccountService.queryAll(new EmailAccountQueryCriteria());
        for(EmailAccountDto dto : accounts) {
        	try {
        		// 查询所有的简历,拿到邮件Id列表,避免重复插入
        		List<ResumeDto> orgResumeList = resumeService.queryAll(new ResumeQueryCriteria());
        		List<String> messageIds = new ArrayList<>();
				for (ResumeDto r : orgResumeList) {
					messageIds.add(r.getFileId());
				}
				// 获取全部邮件列表
				List<EmailVo> list = EmailUtil.getEmailList(dto, Dates.yestoday().substring(0,10), messageIds);
				// 解析并添加到数据库中
				for (EmailVo item : list) {
					log.info("开始解析:" + item.toString());
					if (!Objects.equals("cv@service.bosszhipin.com", item.getFromEmail())
							&& !Objects.equals("cv@service.zhipin.com", item.getFromEmail())) {
						log.info("发件人不是boss直聘,继续下一封解析。");
						continue;
					}
					if(!item.isContainerAttachment()) {
						log.info("没有附件,继续下一封解析。");
						continue;
					}
					byte[] b = item.getAttachmentFile();
					InputStream is = new ByteArrayInputStream(b);
					String fileName = item.getSendTime().substring(0, 10) + "/" + item.getFileName();
					AliyunUtil.uploadOssFile(fileName, is);
					log.info("附件名称:" + item.getFileName());
					InputStream is2 = new ByteArrayInputStream(b);
					log.info("开始上传文件解析");
					String resumeString = DistinguishUtil.getResume(is2, item.getFileName());
					if(resumeString == null) {
						log.info("解析结果异常,下个定时周期重试");
						continue;
					}
					if(!resumeString.startsWith("{")) {
						log.info("代理数据异常,下个定时周期重启,错误信息:" + resumeString);
						continue;
					}
					log.info("解析结果有了");
					JSONObject json = JSON.parseObject(resumeString);
					if(json.getIntValue("errorcode") != 0) {
						log.info("解析异常,继续下一封。" + resumeString);
						continue;
					}
					log.info("解析正常,开始保存。");
					
					Resume resume = new Resume();
					resume.setCreateTime(new Timestamp(System.currentTimeMillis()));
					resume.setUpdateTime(new Timestamp(Dates.parse(item.getSendTime()).getTime()));
					resume.setIsDelete(0);
					resume.setFileName(fileName);
					resume.setFileId(item.getMessageId());
					resume.setSource(item.getFromName());
					JSONObject dataObject = json.getJSONObject("parsing_result");
					if (!Objects.equals(json.getString("cv_language"), "zh")) {
						dataObject = json.getJSONObject("english_parsing_result");
					}
					resume.setLanguage(json.getString("cv_language")); // 简历语言
					JSONObject baseInfo = dataObject.getJSONObject("basic_info");
					resume.setName(baseInfo.getString("name"));
					resume.setGender(baseInfo.getString("gender"));
					String expectLocation = baseInfo.getString("expect_location"); // 期望工作城市或区域
					String currentLocation = baseInfo.getString("current_location"); // 所在地城市或区域
					resume.setAddress(StringUtils.isNotBlank(expectLocation) ? expectLocation : currentLocation);
					resume.setBrithDay(baseInfo.getString("date_of_birth")); // 生日 出生日期
					resume.setWorkStartYear(baseInfo.getString("work_start_year")); // 开始工作年份
					resume.setSchool(baseInfo.getString("school_name")); // 毕业院校
					resume.setDegree(baseInfo.getString("degree")); // 学历
					resume.setMajor(baseInfo.getString("major")); // 专业
					resume.setCurrentPosition(baseInfo.getString("current_position")); // 当前职位
					resume.setCurrentCompany(baseInfo.getString("current_company")); // 当前公司
					resume.setCurrentSalary(baseInfo.getString("current_salary")); // 当前工资或当前工资范围
					resume.setDesiredSalary(baseInfo.getString("desired_salary")); // 期望工资或期望工资范围
					JSONObject contactInfo = dataObject.getJSONObject("contact_info");
					resume.setPhoneNumber(contactInfo.getString("phone_number")); // 电话
					resume.setEmail(contactInfo.getString("email")); // 邮箱
					resume.setResumeRawtext(resumeString);
					resumeService.create(resume);
				}
        		
        	} catch(Exception e) {
        		e.printStackTrace();
        	}
        }
        
    }

}

执行

前端运行起来,左侧目录系统管理–任务调度,在对应的调用任务后面点击执行,观察控制台是否正常执行获取邮件并解析。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

lootaa

你的鼓励是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值