读取罗科仕简历以及对应的职位(简历)

注:罗科仕简历有三种格式,分别为doc(分为普通doc和xml)、docx
1.doc(word)抽取
package com.beagledata.mgc.lucas.jdcvmatch.openapi.utils;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.*;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class WordDocParse {
//读Doc表格型文档
public String readDocTableWord(String path) {
StringBuffer result = new StringBuffer();
try {
InputStream is = null;
HWPFDocument docm = null;
is = new FileInputStream(path); //读取文件
docm = new HWPFDocument(is); //转换doc文档is
Range range = docm.getRange(); //行
TableIterator it = new TableIterator(range); //表迭代器
while (it.hasNext()) {
Table tb = it.next(); ???
for (int i = 0; i < tb.numRows(); i++) { //小于行数
TableRow tr = tb.getRow(i);
for (int j = 0; j < tr.numCells(); j++) { //行内单元格不为空
TableCell td = tr.getCell(j);
for (int k = 0; k < td.numParagraphs(); k++) { //段落数
Paragraph para = td.getParagraph(k);
String s = para.text();
result.append(s);
}
}
}
}
} catch (IOException e) {
e.printStackTrace();
}

	String result1 = result.toString().replaceAll("\r", "@@@@"); //过滤 换行符
	String result2; //过滤空格
	result2 = result1.replaceAll("\\s", "##");
	return result2;

}
//正则匹配
//输入开始字符串和结束字符串,得到其之间的字符串
public String findString(String str1, String start, String end) {
	String result = "";
	if (end != null) {
		String regex = start + ".*" + end;
		Matcher matcher = Pattern.compile(regex).matcher(str1);
		while (matcher.find()) {
			String s = matcher.group();
			result = s.replaceAll(end.replaceAll("^\\?",""), "").replaceAll(start, "");
		}
	} else {
		//把最后的所有字符串作为最后一个块的值
		result = str1.substring(str1.indexOf(start) + 1);
	}
	return result;
}
public static JSONObject parse(String path) {
	Map<String, Object> map = new HashMap<>();
	JSONObject jsonObject = new JSONObject();
	WordDocParse cwd = new WordDocParse();
	String str = cwd.readDocTableWord(path);
	String name = cwd.findString(str, "\u0007", "?@@@@@@@@(基本信息|手机|邮箱)");
	String sex = cwd.findString(str, "性别@@@@", "?@@@@(婚姻状况)?").replaceAll("性别|婚姻状况","").replaceAll("@@@@.*", "");
    String marriage = cwd.findString(str, "婚姻状况@@@@", "?@@@@@@@@(出生年月)?").replaceAll("出生年月|婚姻状况","");
	String birth = cwd.findString(str, "出生年月@@@@", "?@@@@(现所在地)?").replaceAll("出生年月|现所在地","");
	String address = cwd.findString(str, "现所在地@@@@", "?@@@@@@@@");
	String assessment = cwd.findString(str, "顾问评价\u0007 \u0007", "?@@@@@@@@");
	String jy = cwd.findString(str, "教育经历\u0007", "?@@@@@@@@\u0007\u0007").replaceAll("##-##", "-");
	String[] jys = jy.split("@@@@@@@@");
	JSONArray eduJson = new JSONArray();
	for(String key:jys){
		Map<String, Object> map1 = new HashMap<>();
		JSONObject jsonObject1 = new JSONObject();
		String[] ssss = key.replaceAll("(.*)", "").split("@@@@|##");
		int length = ssss.length;
		if(length==4){
			String school = DataUtils.parseString(ssss.length >1 ? ssss[1]:"");
			String xl = DataUtils.parseString((ssss.length >2 ? ssss[2]:""));
			String major = DataUtils.parseString(ssss.length >3 ? ssss[3]:"");

			map1.put("school", school);
			map1.put("education_id", xl);
			map1.put("major", major);
		}else if(length ==3){
			String school = DataUtils.parseString(ssss.length >1 ? ssss[1]:"");
			String major = DataUtils.parseString(ssss.length >2 ? ssss[2]:"");

			map1.put("school", school);
			map1.put("major", major);
		}
		DataUtils.parseCvParams(map1, jsonObject1);
		eduJson.add(jsonObject1);
	}
	String gz = cwd.findString(str, "工作经历\u0007", "?@@@@@@@@\u0007").replaceAll("##~##", "~");
	String[] gzs = gz.split("@@@@@@@@");
	String te = "";
	List<String> ss = new ArrayList<>();
	for (String key : gzs) {
		if (key.startsWith("@@@@")) {
			te = te + "@@@@@@@@" + key;
		} else {
			if (StringUtils.isNotBlank(te)&&te.matches("\\d.*")) {
				ss.add(te);
			}
			te = key;
		}
	}
	if (StringUtils.isNotBlank(te)&&te.matches("\\d.*")) {
		ss.add(te);
	}
	int i = 0;
	JSONArray workJson = new JSONArray();
	for (String key : ss) {
		Map<String, Object> map1 = new HashMap<>();
		JSONObject jsonObject1 = new JSONObject();
		String company = DataUtils.parseString(cwd.findString(key, "@@@@", "@@@@@@@@@@@@"));
		String job = DataUtils.parseString(cwd.findString(key, "@@@@@@@@@@@@", "?工作职责"));
		String duty = DataUtils.parseString(cwd.findString(key, "工作职责", ""));

		map1.put("company_name",company);
		map1.put("job",job);
		map1.put("duty",duty);
		DataUtils.parseCvParams(map1, jsonObject1);
		workJson.add(jsonObject1);
	}

	String xm = cwd.findString(str, "项目经历\u0007", "?@@@@@@@@\u0007").replaceAll("##~##", "~");
	String[] xms = xm.split("@@@@@@@@");
	String te1 = "";
	List<String> ss1 = new ArrayList<>();
	for (String key : xms) {
		if (key.startsWith("@@@@")) {
			te1 = te1 + "@@@@@@@@" + key;
		} else {
			if (StringUtils.isNotBlank(te1)) {
				ss1.add(te1);
			}
			te1 = key;
		}
	}
	if (StringUtils.isNotBlank(te1)) {
		ss1.add(te1);
	}
	JSONArray projectJson = new JSONArray();
	for (String key : ss1) {
		Map<String, Object> map1 = new HashMap<>();
		JSONObject jsonObject1 = new JSONObject();
		String projectName = DataUtils.parseString(cwd.findString(key, "@@@@", "?@@@@@@@@@@@@"));
		String description = DataUtils.parseString(cwd.findString(key, "@@@@@@@@@@@@(项目简介:)?", ""));

		map1.put("projectName", projectName);
		map1.put("projectDescription", description);
		DataUtils.parseCvParams(map1, jsonObject1);
		projectJson.add(jsonObject1);
	}
	String salary = cwd.findString(str, "总薪酬:人民币税前约", "?万").replaceAll("万","");
	map.put("id", IdUtils.generateShortUuid());
	map.put("name", name);
	map.put("gender_id", sex);
	map.put("is_married", marriage);
	map.put("birth", birth);
	map.put("address", address);
	map.put("description", assessment);
	map.put("education_experience", eduJson);
	map.put("work_experience", workJson);
	map.put("project_experience", projectJson);
	map.put("salary_total", salary);
	DataUtils.parseCvParams(map, jsonObject);
	return jsonObject;
}

public static void main(String[] args) {
	List<String> paths = traverseFolder("E:\\简历-岗位\\罗科仕简历\\安卓和iOS岗\\ios");
	System.out.println("读取到的文件夹大小:" + paths.size());
	for (String path : paths) {
		if (path.endsWith("doc")) {
			try {
				parse(path);
			} catch (Exception e) {
				if (e.getLocalizedMessage().contains("Your file appears not to be a valid OLE2 document")
						|| e.getLocalizedMessage().contains("The document is really a XML file")) {
					XmlDocParser.parseXml(path);
				}
			}
		} else if (path.endsWith("docx")) {
			DocxParser.parserByFilePath(path);
		} else {
			System.out.println("未解析文件路径:" + path);
		}
	}
}

public static List<String> traverseFolder(String path1) {
    List<String> fileNameList = new ArrayList<>();
    File file = new File(path1);
    if (file.exists()) {
        File[] files = file.listFiles();
        if (null == files || files.length == 0) {
            System.out.println("文件夹是空的!");
            return null;
        } else {
            for (int i = 0; i<files.length; i++) {
				fileNameList.add(files[i].getAbsolutePath());
            }
        }
    } else {
        System.out.println("文件不存在!");
    }
    return fileNameList;
}

}

2.doc(xml)抽取
package com.beagledata.mgc.lucas.jdcvmatch.openapi.utils;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.HierarchicalConfiguration;
import org.apache.commons.configuration.SubnodeConfiguration;
import org.apache.commons.configuration.XMLConfiguration;
import org.apache.commons.lang3.StringUtils;

import java.io.File;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class XmlDocParser {
private Map<String, Map<String, Object>> orderMap = new HashMap<String, Map<String, Object>>(); //map数据结构??

//正则匹配
//输入开始字符串和结束字符串,得到其之间的字符串
public String findString(String str1, String start, String end) {
    String result = "";
    if (end != null) {
        String regex = start + ".*" + end;
        Matcher matcher = Pattern.compile(regex).matcher(str1);
        while (matcher.find()) {
            String s = matcher.group();
            result = s.replaceAll(end.replaceAll("^\\?", ""), "").replaceAll(start, "");
        }
    } else {
        //把最后的所有字符串作为最后一个块的值
        result = str1.substring(str1.indexOf(start) + 1);
    }
    return result;
}


public static JSONObject parseXml(String filePath) {
    File file = new File(filePath);
    XMLConfiguration config = new XMLConfiguration();
    try {
        config.load(file);
    } catch (ConfigurationException e) {
        e.printStackTrace();
    }
    List<HierarchicalConfiguration> s = config.configurationsAt("pkg:part");
    HierarchicalConfiguration docConfig = null;
    for (HierarchicalConfiguration h : s) {
        if ("/word/document.xml".equals(h.getString("[@pkg:name]"))) {
            docConfig = h;
            break;
        }
    }
    List<HierarchicalConfiguration> ssss1 = docConfig.configurationsAt("pkg:xmlData.w:document.w:body.w:tbl");
    HierarchicalConfiguration hierarchicalConfiguration = ssss1.get(1);
    List<HierarchicalConfiguration> trs = hierarchicalConfiguration.configurationsAt("w:tr");
    JSONObject jsonObject = new JSONObject();
    Map<String, Object> map = new HashMap<>();
    //解析基础信息
    try {
        getBaseData(trs.get(0), map, filePath);
    } catch (Exception e) {
        System.out.println(filePath);
    }
    for (int i = 1; i < trs.size(); i = i + 2) {
        String title = trs.get(i).configurationsAt("w:tc").get(1).getString("w:p.w:r.w:t");
        if ("顾问评价".equals(title)) {
            parserEvaluation(trs.get(i + 1), map);
        } else if ("教育经历".equals(title)) {
            parserEdu(trs.get(i + 1), map);
        } else if ("工作经历".equals(title)) {
            parserWork(trs.get(i + 1), map);
        } else if ("项目经历".equals(title)) {
            parserProject(trs.get(i + 1), map);
        } else if ("目前薪资".equals(title)) {
            parserSalary(trs.get(i + 1), map);
        } else if ("其他资料".equals(title)) {
            parserOther(trs.get(i + 1), map);
        }
    }
    DataUtils.parseCvParams(map, jsonObject);
    return jsonObject;
}

private static void parserOther(HierarchicalConfiguration hierarchicalConfiguration, Map<String, Object> map) {
    SubnodeConfiguration ss = hierarchicalConfiguration.configurationsAt("w:tc").get(0).configurationAt("w:tbl");
    List<HierarchicalConfiguration> trs = ss.configurationsAt("w:tr");
    StringBuilder stringBuilder = new StringBuilder();
    for (HierarchicalConfiguration h1 : trs) {
        List<HierarchicalConfiguration> tcs = h1.configurationsAt("w:tc");
        for (HierarchicalConfiguration tc : tcs) {
            List<Object> datas = tc.getList("w:p.w:r.w:t");
            for (Object o : datas) {
                stringBuilder.append(o);
            }
        }

    }
}

private static void parserSalary(HierarchicalConfiguration hierarchicalConfiguration, Map<String, Object> map) {

    SubnodeConfiguration ss = hierarchicalConfiguration.configurationsAt("w:tc").get(0).configurationAt("w:tbl");
    List<HierarchicalConfiguration> trs = ss.configurationsAt("w:tr");
    List<Object> datas = trs.get(0).configurationsAt("w:tc").get(1).getList("w:p.w:r.w:t");
    StringBuilder stringBuilder = new StringBuilder();
    for (Object o : datas) {
        stringBuilder.append(o);
    }
    String salary = new XmlDocParser().findString(stringBuilder.toString(), "总薪酬:人民币税前约", "?万");
    map.put("salary_total", salary);

}

private static void parserProject(HierarchicalConfiguration hierarchicalConfiguration, Map<String, Object> map) {
    SubnodeConfiguration ss = hierarchicalConfiguration.configurationsAt("w:tc").get(0).configurationAt("w:tbl");
    List<HierarchicalConfiguration> trs = ss.configurationsAt("w:tr");
    String other = "";
    JSONArray jsonArray = new JSONArray();
    JSONObject jsonObject1 = new JSONObject();
    for (int i = 0; i < trs.size(); i++) {
        String date = trs.get(i).configurationsAt("w:tc").get(0).getString("w:p.w:r.w:t");
        if (StringUtils.isNotBlank(date)) {
            if (StringUtils.isNotBlank(other)) {
                jsonObject1.put("description", DataUtils.parseString(other));
                jsonArray.add(jsonObject1);
                jsonObject1 = new JSONObject();
            }
            jsonObject1.put("name", DataUtils.parseString(trs.get(i).configurationsAt("w:tc").get(1).getString("w:p.w:r.w:t")));
        } else {
            List<Object> datas = trs.get(i).configurationsAt("w:tc").get(1).getList("w:p.w:r.w:t");
            StringBuilder stringBuilder = new StringBuilder();
            for (Object o : datas) {
                stringBuilder.append(o);
            }
            other = other + stringBuilder.toString();
        }
    }
    map.put("project_experience", jsonArray);
}

private static void parserWork(HierarchicalConfiguration hierarchicalConfiguration, Map<String, Object> map) {
    SubnodeConfiguration ss = hierarchicalConfiguration.configurationsAt("w:tc").get(0).configurationAt("w:tbl");
    List<HierarchicalConfiguration> trs = ss.configurationsAt("w:tr");
    JSONArray jsonArray = new JSONArray();
    for (int i = 0; i < trs.size(); i = i + 2) {
        Map<String, Object> map1 = new HashMap<>();
        JSONObject jsonObject1 = new JSONObject();
        String data = trs.get(i).configurationsAt("w:tc").get(1).getString("w:p.w:r.w:t");
        List<Object> datas = trs.get(i + 1).configurationsAt("w:tc").get(1).getList("w:p.w:r.w:t");
        String company = "";
        String job = "";
        String duty = "";
        if (data.contains("|")) {
             company = data.split("\\|")[0].replaceAll("\\s", "").replaceAll("((\\d+年)?(\\d+个月)?)", "");
             job = data.split("\\|")[1].replaceAll("\\s", "");
        } else {
             company = data;
             job = datas.get(1).toString();
        }

        StringBuilder stringBuilder = new StringBuilder();
        for (int j = 2; j < datas.size(); j++) {
            stringBuilder.append("其他:" + datas.get(j));
        }
        duty = stringBuilder.toString();

        map1.put("company_name",company);
		map1.put("job",job);
		map1.put("duty",duty);
		DataUtils.parseCvParams(map1, jsonObject1);
        jsonArray.add(jsonObject1);
    }
    map.put("work_experience", jsonArray);
}

private static void parserEdu(HierarchicalConfiguration hierarchicalConfiguration, Map<String, Object> map) {
    SubnodeConfiguration ss = hierarchicalConfiguration.configurationsAt("w:tc").get(0).configurationAt("w:tbl");
    List<HierarchicalConfiguration> trs = ss.configurationsAt("w:tr");
    JSONArray jsonArray = new JSONArray();
    for (HierarchicalConfiguration h : trs) {
        Map<String, Object> map1 = new HashMap<>();
        JSONObject jsonObject1 = new JSONObject();
        List<Object> data = h.configurationsAt("w:tc").get(1).getList("w:p.w:r.w:t");
        String school = data.get(0).toString();
        String xl = data.get(1).toString();
        String major = data.get(2).toString();
        map1.put("school", school);
        map1.put("education_id", xl);
        map1.put("major", major);
        DataUtils.parseCvParams(map1, jsonObject1);
        jsonArray.add(jsonObject1);
    }
    map.put("education_experience", jsonArray);
}

private static void parserEvaluation(HierarchicalConfiguration hierarchicalConfiguration, Map<String, Object> map) {
    SubnodeConfiguration ss = hierarchicalConfiguration.configurationsAt("w:tc").get(1).configurationAt("w:tbl");
    List<Object> trs = ss.configurationsAt("w:tr").get(0).getList("w:tc.w:p.w:r.w:t");
    StringBuilder stringBuilder = new StringBuilder();
    for (Object o : trs) {
        stringBuilder.append(o);
    }
    map.put("description", stringBuilder.toString());
}


private static void getBaseData(HierarchicalConfiguration hierarchicalConfiguration, Map<String, Object> map, String path) {
    try {
        SubnodeConfiguration ss = hierarchicalConfiguration.configurationsAt("w:tc").get(1).configurationAt("w:tbl");
        List<HierarchicalConfiguration> trs = ss.configurationsAt("w:tr");
        String name = trs.get(0).getString("w:tc.w:p.w:r.w:t");
        path = path.substring(path.lastIndexOf("\\") + 1);
        if (path.contains("的简历") && path.contains("(年薪)")) {
            name = path.substring(0, path.indexOf("("));
        } else if (path.contains("的简历") && !path.contains("(年薪)")) {
            name = path.substring(0, path.indexOf("的简历"));
        }
        String gender = trs.get(4).configurationsAt("w:tc").get(1).getString("w:p.w:r.w:t");
        String marriage = trs.get(4).configurationsAt("w:tc").get(3).getString("w:p.w:r.w:t");
        String birth = trs.get(5).configurationsAt("w:tc").get(1).getString("w:p.w:r.w:t");
        String address = trs.get(5).configurationsAt("w:tc").get(3).getString("w:p.w:r.w:t");

        map.put("id", IdUtils.generateShortUuid());
        map.put("name", name);
        map.put("gender_id", gender);
        map.put("is_married", marriage);
        map.put("birth", birth);
        map.put("address", address);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

}

3.docx抽取
package com.beagledata.mgc.lucas.jdcvmatch.openapi.utils;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.configuration.HierarchicalConfiguration;
import org.apache.commons.configuration.SubnodeConfiguration;
import org.apache.commons.configuration.XMLConfiguration;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DocxParser {
public static void main(String[] args) throws Exception {
String filePath = “C:\Users\zhoug\Desktop\data\part05-20190731\机器学习-Ivy\有信-政策研究”;
File[] files = new File(filePath).listFiles();
for (File file : files) {
if (file.getName().endsWith(“docx”)) {
parserByFilePath(file.getPath());
}
}
}

public static JSONObject parserByFilePath(String filePath) {
	XWPFDocument docx = null;
	JSONObject jsonObject = new JSONObject();
	Map<String, Object> map = new HashMap<>();
	try {
		XMLConfiguration config = new XMLConfiguration();
		docx = new XWPFDocument(POIXMLDocument.openPackage(filePath));
		String xmlData = docx.getDocument().xmlText();
		InputStream is = new ByteArrayInputStream(xmlData.getBytes());
		config.load(is);
		List<HierarchicalConfiguration> ssss1 = config.configurationsAt("w:body.w:tbl");
		HierarchicalConfiguration hierarchicalConfiguration = ssss1.get(1);
		List<HierarchicalConfiguration> trs = hierarchicalConfiguration.configurationsAt("w:tr");
		int size = trs.size();
		if (size % 2 == 0 || size < 5) {
			System.out.println("不是罗科仕简历:" + filePath);
			return null;
		}
		parserBaseData(trs.get(0), map);
		for (int i = 1; i < trs.size(); i = i + 2) {
			String title = trs.get(i).configurationsAt("w:tc").get(1).getString("w:p.w:r.w:t");
			if ("顾问评价".equals(title) || "SUMMARY".equals(title)) {
				parserEvaluation(trs.get(i + 1), map);
			} else if ("教育经历".equals(title) || "EDUCATION".equals(title)) {
				parserEdu(trs.get(i + 1), filePath, map);
			} else if ("工作经历".equals(title) || "EXPERIENCE".equals(title)) {
				parserWork(trs.get(i + 1), map);
			} else if ("项目经历".equals(title)) {
				parserProject(trs.get(i + 1), map);
			} else if ("目前薪资".equals(title) || "PACKAGE INFO".equals(title)) {
				parserSalary(trs.get(i + 1), map);
			} else if ("其他资料".equals(title) || "OTHER INFORMATION".equals(title) || "备注".equals(title)) {
				parserOther(trs.get(i + 1), map);
			}
		}
		DataUtils.parseCvParams(map, jsonObject);
	} catch (Exception e) {
		String localMsg = e.getLocalizedMessage();
		if (localMsg.contains("Index: 1")) {
			System.out.println("不是罗科仕模板简历:" + filePath);
		} else {
			System.out.println(localMsg);
			System.out.println("错误:" + filePath + " =========" + (docx == null ? "" : docx.getDocument().xmlText()));
		}
	}
	return jsonObject;
}

private static void parserOther(HierarchicalConfiguration hierarchicalConfiguration, Map<String, Object> map) {
	try {
		SubnodeConfiguration ss = hierarchicalConfiguration.configurationsAt("w:tc").get(0).configurationAt("w:tbl");
		List<HierarchicalConfiguration> trs = ss.configurationsAt("w:tr");
		StringBuilder stringBuilder = new StringBuilder();
		for (HierarchicalConfiguration h1 : trs) {
			List<HierarchicalConfiguration> tcs = h1.configurationsAt("w:tc");
			for (HierarchicalConfiguration tc : tcs) {
				List<Object> datas = tc.getList("w:p.w:r.w:t");
				for (Object o : datas) {
					stringBuilder.append(o);
				}
			}

		}
	} catch (Exception e) {

	}
}

private static void parserSalary(HierarchicalConfiguration hierarchicalConfiguration, Map<String, Object> map) {
	List<HierarchicalConfiguration> tcs = getHcsByKey(getHcsByKey(hierarchicalConfiguration, "w:tc").get(0), "w:tbl.w:tr.w:tc");
	String salary = getTextByPRT(tcs.get(1));
	String finalSalary = findString(salary, "全年薪(.*)人民币(约)?", "(税前)");
	if (StringUtils.isBlank(finalSalary)) {
		finalSalary = findString(salary, "总薪酬:人民币税前(约)?", "?万");
	}
	finalSalary = finalSalary.replaceAll("\\s", "");
	if (finalSalary.length() >= 5) {
		finalSalary = NumberUtils.toDouble(finalSalary) / 10000 + "";
	}
	map.put("salary_total", finalSalary);
}

private static void parserProject(HierarchicalConfiguration hierarchicalConfiguration, Map<String, Object> map) {
	JSONArray jsonArray = new JSONArray();
	List<HierarchicalConfiguration> trs = getHcsByKey(hierarchicalConfiguration, "w:tc.w:tbl.w:tr");
	String other = "";
	String xm = "";
	String date = "";
	JSONObject jsonObject1 = new JSONObject();
	for (int i = 0; i < trs.size(); i++) {
		String date1 = trs.get(i).configurationsAt("w:tc").get(0).getString("w:p.w:r.w:t");
		if (StringUtils.isNotBlank(date1)) {
			if (StringUtils.isNotBlank(other)) {
				//  这里添加一次并重新声明
				jsonObject1.put("description",DataUtils.parseString(other));
				jsonObject1.put("name", DataUtils.parseString(xm));
				jsonArray.add(jsonObject1);
				jsonObject1 = new JSONObject();
				date = date1;
				xm = getTextByPRT(trs.get(i).configurationsAt("w:tc").get(1));
				other = "";
			} else {
				date = date1;
				xm = getTextByPRT(trs.get(i).configurationsAt("w:tc").get(1));
			}
		} else {
			other = other + getTextByPRT(trs.get(i).configurationsAt("w:tc").get(1));
		}
	}
	if (StringUtils.isNotBlank(other)) {
		// 如果其他信息不为空,再次添加
		jsonObject1.put("description",DataUtils.parseString(other));
		jsonObject1.put("name", DataUtils.parseString(xm));
		jsonArray.add(jsonObject1);
	}
	map.put("project_experience", jsonArray);
}

private static void parserWork(HierarchicalConfiguration hierarchicalConfiguration, Map<String, Object> map) {
	List<HierarchicalConfiguration> trs = getHcsByKey(hierarchicalConfiguration, "w:tc.w:tbl.w:tr");

	String date = "";
	String comName = "";
	String other = "";
	String job = "";
	//这里声明 jsonobject =
	JSONObject jsonObject1 = new JSONObject();
	JSONArray jsonArray = new JSONArray();
	for (int i = 0; i < trs.size(); i++) {
		List<HierarchicalConfiguration> tcs = getHcsByKey(trs.get(i), "w:tc");
		String date1 = getTextByPRT(tcs.get(0));
		if (StringUtils.isNotBlank(date1)) {
			date = date1;
		}
		List<HierarchicalConfiguration> p = getHcsByKey(tcs.get(1), "w:p");
		if (p.size() == 1 && StringUtils.isNotBlank(date1)) {
			comName = getTextByPRT(tcs.get(1));
		} else {
			if (p.size() == 1) {
				List<HierarchicalConfiguration> zws = p.get(0).configurationsAt("w:r");
				boolean falg = false;
				for (HierarchicalConfiguration pr : zws) {
					List<Object> objects = pr.getList("w:t");
					for (Object o : objects) {
						if (o.toString().contains("工作职责")) {
							falg = true;
						}
						if (falg) {
							other = other + o.toString();
						} else {
							job = job + o.toString();
						}
					}
				}
			} else {
				List<HierarchicalConfiguration> zws = p.get(0).configurationsAt("w:r");
				StringBuilder zwStringBuilder = new StringBuilder();
				for (HierarchicalConfiguration pr : zws) {
					List<Object> objects = pr.getList("w:t");
					objects.forEach(o -> zwStringBuilder.append(o));
				}
				job = zwStringBuilder.toString();
				StringBuilder otherStringBuilder = new StringBuilder();
				for (int j = 1; j < p.size(); j++) {
					List<HierarchicalConfiguration> others = p.get(j).configurationsAt("w:r");

					for (HierarchicalConfiguration pr : others) {
						List<Object> objects = pr.getList("w:t");
						objects.forEach(o -> otherStringBuilder.append(o));
					}
				}
				other = otherStringBuilder.toString();
			}


		}
		if (StringUtils.isNotBlank(other)) {
			// 一组完整数据 输出
			//  这里添加一次并重新声明
			jsonObject1.put("company_name", DataUtils.parseString(comName));
			jsonObject1.put("job", DataUtils.parseString(job));
			jsonObject1.put("duty", DataUtils.parseString(other));
			jsonArray.add(jsonObject1);
			jsonObject1 = new JSONObject();
			other = "";
			job = "";
		}
	}

	if (StringUtils.isNotBlank(other)) {
		// 如果其他信息不为空,再次添加
		jsonObject1.put("company_name", DataUtils.parseString(comName));
		jsonObject1.put("job", DataUtils.parseString(job));
		jsonObject1.put("duty", DataUtils.parseString(other));
		jsonArray.add(jsonObject1);
	}
	map.put("work_experience",jsonArray);
}

private static void parserEdu(HierarchicalConfiguration hierarchicalConfiguration, String filePath, Map<String, Object> map) {
	List<HierarchicalConfiguration> trs = getHcsByKey(hierarchicalConfiguration, "w:tc.w:tbl.w:tr");
	JSONArray jsonArray = new JSONArray();
	for (HierarchicalConfiguration tr : trs) {
		Map<String, Object> map1 = new HashMap<>();
		JSONObject jsonObject1 = new JSONObject();
		List<HierarchicalConfiguration> rs = getHcsByKey(getHcsByKey(tr, "w:tc").get(1), "w:p.w:r");
		int size = rs.size();
		if (size == 5 || size == 6) {
			String school = rs.get(0).getString("w:t");
			String xl = rs.get(2).getString("w:t");
			String major = rs.get(4).getString("w:t");
			map1.put("school", school);
			map1.put("education_id", xl);
			map1.put("major", major);
			DataUtils.parseCvParams(map1, jsonObject1);
        	jsonArray.add(jsonObject1);
		} else {
			System.out.println("缺失信息:需要特殊处理 :" + filePath);
		}
	}
	map.put("education_experience", jsonArray);
}

private static void parserEvaluation(HierarchicalConfiguration hierarchicalConfiguration, Map<String, Object> map) {
	String gw = getTextByPRT(getHcsByKey(getHcsByKey(hierarchicalConfiguration, "w:tc").get(1), "w:tbl.w:tr.w:tc").get(0));
	map.put("description", gw);
}

private static void parserBaseData(HierarchicalConfiguration hierarchicalConfiguration, Map<String, Object> map) {
	List<HierarchicalConfiguration> trs = hierarchicalConfiguration.configurationsAt("w:tc").get(1).configurationsAt("w:tbl.w:tr");
	int size = trs.size();
	String name = "";
	String sex = "";
	String marriage = "";
	String birth = "";
	String place = "";
	String phone = "";
	String email = "";
	name = getTextByPRT(trs.get(0).configurationAt("w:tc"));
	for (int i = 1; i < trs.size(); i++) {
		List<HierarchicalConfiguration> tcs = getHcsByKey(trs.get(i), "w:tc");
		for (int j = 0; j < tcs.size(); j = j + 2) {
			String title = getTextByPRT(tcs.get(j));
			if (title.contains("手机") || title.contains("hone")) {
				phone = getTextByPRT(tcs.get(j + 1));
			} else if (title.contains("邮箱") || title.contains("mail")) {
				email = getTextByPRT(tcs.get(j + 1));
			} else if (title.contains("性别") || title.contains("Gender")) {
				sex = getTextByPRT(tcs.get(j + 1));
			} else if (title.contains("婚姻") || title.contains("arital")) {
				marriage = getTextByPRT(tcs.get(j + 1));
			} else if (title.contains("出生") || title.contains("DOB")) {
				birth = getTextByPRT(tcs.get(j + 1));
			} else if (title.contains("所在地") || title.contains("ocation")) {
				place = getTextByPRT(tcs.get(j + 1));
			} else if (title.contains("中文名") || title.contains("ame")) {
				name = getTextByPRT(tcs.get(j + 1));
			}
		}
	}
	map.put("id", IdUtils.generateShortUuid());
	map.put("name", name);
	map.put("gender_id", sex);
	map.put("is_married", marriage);
	map.put("birth", birth);
	map.put("address", place);
}

public static String getTextByPRT(HierarchicalConfiguration hierarchicalConfiguration) {
	List<HierarchicalConfiguration> prs = hierarchicalConfiguration.configurationsAt("w:p.w:r");
	StringBuilder stringBuilder = new StringBuilder();
	for (HierarchicalConfiguration pr : prs) {
		List<Object> objects = pr.getList("w:t");
		objects.forEach(o -> stringBuilder.append(o + " "));
	}
	return stringBuilder.toString();
}

public static List<HierarchicalConfiguration> getHcsByKey(HierarchicalConfiguration hierarchicalConfiguration, String key) {
	return hierarchicalConfiguration.configurationsAt(key);
}

public static String findString(String str1, String start, String end) {
	String result = "";
	if (end != null) {
		String regex = start + ".*" + end;
		Matcher matcher = Pattern.compile(regex).matcher(str1);
		while (matcher.find()) {
			String s = matcher.group();
			result = s.replaceAll(end.replaceAll("^\\?", ""), "").replaceAll(start, "");
		}
	} else {
		result = str1.substring(str1.indexOf(start) + 1);
	}
	return result;
}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值