部门给我找了点事做,帮筛选简历.估计是觉得我加班少了.为了不浪费时间,写了个简单的简历内容打分排序,以后直接排序转发
代码如下:
package com.lu;
import java.io.IOException;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.codec.binary.Base64;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class LuceneUtils {
/**
* 获取分词结果
*
* @param 输入的字符串
* @param 分词器
* @return 分词结果
*/
// getWords("系统提供HTTP服务给其他系统用于实时数据交互,采用WebService与总行进行实时数据交互",
// analyzer).forEach(System.out::println);
public static List<String> getWords(String str, Analyzer analyzer) {
List<String> result = new ArrayList<String>();
TokenStream stream = null;
try {
stream = analyzer.tokenStream("content", new StringReader(str));
CharTermAttribute attr = stream.addAttribute(CharTermAttribute.class);
stream.reset();
while (stream.incrementToken()) {
result.add(attr.toString());
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (stream != null) {
try {
stream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}
/**
* 使用 Map按value进行排序
*
* @param map
* @return
*/
public static Map<String, Integer> sortMapByValue(Map<String, Integer> scoreMap) {
if (scoreMap == null || scoreMap.isEmpty()) {
return null;
}
Map<String, Integer> sortedMap = new LinkedHashMap<String, Integer>();
List<Map.Entry<String, Integer>> entryList = new ArrayList<Map.Entry<String, Integer>>(scoreMap.entrySet());
entryList.stream().sorted(new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
return Integer.compare(o1.getValue(), o2.getValue());
}
}).forEach(new Consumer<Entry<String, Integer>>() {
@Override
public void accept(Entry<String, Integer> t) {
sortedMap.put(t.getKey(), t.getValue());
}
});
return sortedMap;
}
public static Optional<String> checkGetContent(String content) {
String regx = "(^[\\s|\\S]*?)Content-Type:text/html;charset=\"([\\s|\\S]*?)\"[\\s|\\S]*?Content-Transfer-Encoding:base64([\\S|\\s]*?)----boundary_([\\S|\\s]*?$)";
Pattern compile = Pattern.compile(regx);
Matcher matcher = compile.matcher(content);
if (matcher.matches()) {
if (matcher.groupCount() > 0) {
String matchCharset = matcher.group(2);
String matchContent = matcher.group(3);
return Optional.of(decodeStr(matchContent, matchCharset));
}
}
return Optional.of(content);
}
public static String decodeStr(String encodeStr, String charset) {
byte[] b = encodeStr.getBytes();
Base64 base64 = new Base64();
b = base64.decode(b);
String s;
try {
s = new String(b, charset);
return s;
} catch (UnsupportedEncodingException e) {
s = new String(b);
return s;
}
}
public static Optional<Directory> openFSDirectory(String indexPath) {
Path path = Paths.get(indexPath);
try {
FSDirectory fsDirectory = FSDirectory.open(path);
return Optional.of(fsDirectory);
} catch (IOException e) {
e.printStackTrace();
}
return Optional.empty();
}
}
package com.lu;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
public class ContentScoror {
String indexPath = "lucene\\Index\\";
Map<String, Integer> scoreMap = new HashMap<>();
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
public void eval(IndexSearcher searcher, Query query, Integer weight) throws IOException {
TopDocs topDocs = searcher.search(query, 1000);
ScoreDoc[] hits = topDocs.scoreDocs;
for (int i = 0; i < hits.length; i++) {
ScoreDoc hit = hits[i];
Document hitDoc = searcher.doc(hit.doc);
System.out.println("(" + hit.doc + "-" + hit.score + ")" + " name:" + hitDoc.get("name"));
String filename = hitDoc.get("name");
Integer score = scoreMap.get(filename);
// 结果按照得分来排序。主要由 关键字的个数和权值来决定
if (null == score) {
score = 0;
scoreMap.put(filename, 0);
}
scoreMap.put(filename, score + weight);
}
}
public void checkIndexAndScore(Directory directory, Analyzer analyzer) {
try {
IndexReader ir = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(ir);
QueryParser parse = new QueryParser("content", analyzer);
Query query = parse.parse("统招本科");
eval(searcher, query, 1);
query = parse.parse("计算机数学信息管理");
eval(searcher, query, 1);
query = parse.parse("Java Web App");
eval(searcher, query, 1);
query = parse.parse("struts");
eval(searcher, query, 1);
query = parse.parse("mybatis");
eval(searcher, query, 1);
query = parse.parse("ibatis");
eval(searcher, query, 1);
query = parse.parse("hibernate");
eval(searcher, query, 1);
query = parse.parse("spring");
eval(searcher, query, 1);
query = parse.parse("调优");
eval(searcher, query, 2);
query = parse.parse("webservice");
eval(searcher, query, 1);
query = parse.parse("axis");
eval(searcher, query, 2);
query = parse.parse("xfire");
eval(searcher, query, 1);
query = parse.parse("cxf");
eval(searcher, query, 1);
query = parse.parse("jax-ws jws");
eval(searcher, query, 1);
query = parse.parse("xml json");
eval(searcher, query, 1);
query = parse.parse("oracle mysql sqlserver db2");
eval(searcher, query, 1);
query = parse.parse("redis memcached");
eval(searcher, query, 1);
query = parse.parse("组长管理设计架构分析");
eval(searcher, query, 1);
Query pq = new PhraseQuery("content", "培训", "机构");
eval(searcher, pq, *);
ir.close();
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
public void doScore() {
Optional<Directory> dir = LuceneUtils.openFSDirectory(indexPath);
if (dir.isPresent()) {
checkIndexAndScore(dir.get(), analyzer);
}
}
public void showResult() {
LuceneUtils.sortMapByValue(scoreMap).forEach((k, v) -> System.out.println(k + "---->" + v));
}
public static void main(String[] args) {
ContentScoror fie = new ContentScoror();
fie.doScore();
fie.showResult();
}
}
package com.lu;
import java.io.File;
import java.io.IOException;
import java.util.Optional;
import java.util.stream.Stream;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
public class FileIndexCreator {
String indexPath = "lucene\\Index\\";
String contentFilePath = "content";
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
public void addDoc(IndexWriter iw, File f) throws IOException {
String str = FileUtils.readFileToString(f);
Document doc = new Document();
doc.add(new StringField("name", f.getName(), Field.Store.YES));
doc.add(new TextField("content", LuceneUtils.checkGetContent(str).get(), Field.Store.YES));
iw.addDocument(doc);
}
public void content(IndexWriter iw) {
File file = new File(contentFilePath);
File[] listFiles = file.listFiles();
Stream.of(listFiles).forEach(f -> {
try {
addDoc(iw, f);
} catch (IOException e) {
e.printStackTrace();
}
});
}
public void createIndex() {
// create index
Optional<Directory> dir = LuceneUtils.openFSDirectory(indexPath);
if (dir.isPresent()) {
// 也可以存放到内存
// Directory directory = new RAMDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
IndexWriter iw = null;
try {
iw = new IndexWriter(dir.get(), iwc);
content(iw);
iw.commit();
iw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) {
// Analyzer analyzer = new SmartChineseAnalyzer();
// getWords("系统提供HTTP服务给其他系统用于实时数据交互,采用WebService与总行进行实时数据交互",
// analyzer).forEach(System.out::println);
FileIndexCreator fie = new FileIndexCreator();
fie.createIndex();
}
}
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>l.l.h</groupId> <artifactId>domjj</artifactId> <version>0.0.1-SNAPSHOT</version> <dependencies> <!-- <dependency> <groupId>pull-parser</groupId> <artifactId>pull-parser</artifactId> <version>2</version> </dependency> --> <dependency> <groupId>xml-resolver</groupId> <artifactId>xml-resolver</artifactId> <version>1.2</version> </dependency> <dependency> <groupId>pull-parser</groupId> <artifactId>pull-parser</artifactId> <version>2.1.10</version> </dependency> <dependency> <groupId>org.dom4j</groupId> <artifactId>dom4j</artifactId> <version>2.0.0-RC1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>5.3.1</version> </dependency> <!-- 高亮 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>5.3.1</version> </dependency> <!-- 中文分词器 SmartChineseAnalyzer --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-smartcn</artifactId> <version>5.3.1</version> </dependency> <!-- 文件操作jar包 --> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.4</version> </dependency> <dependency> <groupId>commons-codec</groupId> <artifactId>commons-codec</artifactId> <version>1.9</version> </dependency> </dependencies> </project>