扫描简历PDF文件并提取基本信息
一.导入依赖 三个
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.21</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>2.0.21</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jempbox</artifactId>
<version>1.8.13</version>
</dependency>
二.方法类实现
package com.csii.manage.common.util;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ReadCharUtil {
public static Map readChar(File pdfpath) {
Map map = new HashMap<>();
try {
PDDocument doc = PDDocument.load(pdfpath);
PDFTextStripper tStripper = new PDFTextStripper();
tStripper.setSortByPosition(true);
String info = tStripper.getText(doc);
info = info.replaceFirst("姓[\\s]*名[\\s]*", " 姓名")
.replaceFirst("性[\\s]*别[\\s]*", " 性别")
.replaceFirst("年[\\s]*龄[\\s]*", " 年龄")
.replaceFirst("邮[\\s]*箱[\\s]*", " 邮箱")
.replaceFirst("电[\\s]*子[\\s]*邮[\\s]*箱[\\s]*", " 邮箱")
.replaceFirst("邮[\\s]*箱[\\s]*地[\\s]*址[\\s]*", " 邮箱")
.replaceFirst("E-mail", " 邮箱")
.replaceFirst("E-Mail", " 邮箱")
.replaceFirst("手[\\s]*机[\\s]*号[\\s]*", " 手机号")
.replaceFirst("电[\\s]*话[\\s]*", " 手机号")
.replaceFirst("联[\\s]*系[\\s]*电[\\s]*话[\\s]*", " 手机号")
.replaceFirst("联[\\s]*系[\\s]*方[\\s]*式[\\s]*", " 手机号")
.replaceFirst("学[\\s]*校[\\s]*", " 毕业学校")
.replaceFirst("毕[\\s]*业[\\s]*学[\\s]*校[\\s]*", " 毕业学校")
.replaceFirst("毕[\\s]*业[\\s]*院[\\s]*校[\\s]*", " 毕业学校")
.replaceFirst("专[\\s]*业[\\s]*", " 所学专业")
.replaceFirst("所[\\s]*学[\\s]*专[\\s]*业[\\s]*", " 所学专业")
.replaceFirst("工[\\s]*作[\\s]*年[\\s]*限[\\s]*"," 工作年限")
.replaceFirst("工[\\s]*作[\\s]*经[\\s]*验[\\s]*"," 工作年限")
.replaceFirst("工[\\s]*龄[\\s]*"," 工作年限")
.replaceFirst("工[\\s]*作[\\s]*时[\\s]*间[\\s]*"," 工作年限")
.replaceAll("[\\s]*:[\\s]*", "")
.replaceAll("[\\s]*:[\\s]*", "");
String reg1 = "(?<=姓名).*?(?=\\s)";
Pattern p1 = Pattern.compile(reg1);
Matcher m1 = p1.matcher(info);
String name = "";
if (m1.find()) {
name = m1.group();
}
map.put("Name",name);
String reg2 = "(?<=性别).*?(?=\\s)";
Pattern p2 = Pattern.compile(reg2);
Matcher m2 = p2.matcher(info);
String sex = "";
if (m2.find()) {
sex = m2.group();
}
map.put("Sex",sex);
String reg3 = "(?<=邮箱).*?(?=\\s)";
Pattern p3 = Pattern.compile(reg3);
Matcher m3 = p3.matcher(info);
String email = "";
if (m3.find()) {
email = m3.group();
}
System.out.println("邮箱:" + email);
map.put("Email",email);
String reg4 = "(?<=手机号).*?(?=\\s)";
Pattern p4 = Pattern.compile(reg4);
Matcher m4 = p4.matcher(info);
String phone = "";
if (m4.find()) {
phone = m4.group();
}
map.put("Phone",phone);
String reg5 = "(?<=毕业学校).*?(?=\\s)";
Pattern p5 = Pattern.compile(reg5);
Matcher m5 = p5.matcher(info);
String school = "";
if (m5.find()) {
school = m5.group();
}
map.put("School",school);
String reg6 = "(?<=毕业时间).*?(?=\\s)";
Pattern p6 = Pattern.compile(reg6);
Matcher m6 = p6.matcher(info);
String graduateYear = "";
if (m6.find()) {
graduateYear = m6.group();
}
map.put("GraduateYear",graduateYear);
String reg7 = "(?<=所学专业).*?(?=\\s)";
Pattern p7 = Pattern.compile(reg7);
Matcher m7 = p7.matcher(info);
String major = "";
if (m7.find()) {
major = m7.group();
}
map.put("Major",major);
String reg8 = "(?<=工作年限).*?(?=\\s)";
Pattern p8 = Pattern.compile(reg8);
Matcher m8 = p8.matcher(info);
String workExp = "";
if (m8.find()) {
workExp = m8.group();
}
map.put("WorkExp",workExp);
String reg9 = "(?<=是否培训学校).*?(?=\\s)";
Pattern p9 = Pattern.compile(reg9);
Matcher m9 = p9.matcher(info);
String trainFlag = "";
if (m9.find()) {
trainFlag = m9.group();
}
map.put("TrainFlag",trainFlag);
String reg10 = "(?<=技术类型).*?(?=\\s)";
Pattern p10 = Pattern.compile(reg10);
Matcher m10 = p10.matcher(info);
String skill = "";
if (m10.find()) {
skill = m10.group();
}
map.put("Skill",skill);
} catch (Exception e) {
e.printStackTrace();
}
return map;
}
}
三.获取前端传来的PDF文件并调用方法
public JsonResult pdfGetData (@RequestParam("file") MultipartFile uploadfile) throws IOException {
String originalFilename = uploadfile.getOriginalFilename();
String fileName = originalFilename.substring(0, originalFilename.lastIndexOf("."));
String suff = originalFilename.substring(originalFilename.lastIndexOf("."));
File tempFile = File.createTempFile(fileName, suff);
uploadfile.transferTo(tempFile);
Map map = ReadCharUtil.readChar(tempFile);
System.out.println(map);
return JsonResult.buildSuccess(map);
}