最近项目中要用到lucene,就用了最新的版本6.2.1,尽管之前用了4.4的版本,但是在用6.2.1时还是遇到了问题,在用中文分词时,paoding用不了了,是lucene在接口方面做了变动,导致paoding直接就不能用了。
下面是一个用例,对paoding做了修改,使之与lucene6.2.1能配合调用。
java测试代码
import java.io.StringReader;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
public class Test {
public static void main(String[] args) throws Exception {
String text="生成analyzer实例 将项目中的dic复制到工程的classpath下,默认配置";
testSplitChinese(text);
System.out.println("==============");
testDemo(text);
}
/**
* 分词测试
*/
public static void testSplitChinese(String text) throws Exception{
// 生成analyzer实例 将项目中的dic复制到工程的根下,若修改paoding.dic.home,更换位置
Analyzer analyzer = new PaodingAnalyzer();
// 取得Token流
TokenStream tokenizer = analyzer.tokenStream("text", new StringReader(text));
tokenizer.reset();
// 添加工具类 注意:以下这些与之前lucene2.x版本不同的地方
CharTermAttribute offAtt = (CharTermAttribute) tokenizer.addAttribute(CharTermAttribute.class);
// 循环打印出分词的结果,及分词出现的位置
while (tokenizer.incrementToken()) {
System.out.print(offAtt.toString() + "\t");
}
tokenizer.close();
}
private static Document createDocument(String title, String content) {
Document doc = new Document();
doc.add(new TextField("title", title, Store.YES));
doc.add(new TextField("content", content, Store.YES));
return doc;
}
/**
* lucene简单实例
*/
public static void testDemo(String text) throws Exception{
Analyzer analyzer = new PaodingAnalyzer();
Directory idx = new RAMDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(idx, iwc);
writer.addDocument(createDocument("维基百科:关于中文维基百科99999999999999999999999999999999999999", "维基百科:关于中文维基百科9999999999999999999999999999999"));
writer.addDocument(createDocument("维基百科:关于中文维基百科99999999999999999999999999999999999999", "维基百科:关于中文维基百科88888888888"));
writer.commit();
writer.close();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(idx));
System.out.println("命中个数:"+searcher.search(new QueryParser("title", analyzer).parse("title:'维基'"), 10).totalHits);
TopDocs topdoc = searcher.search(new QueryParser("title", analyzer).parse("title:'维基'"), 10);
ScoreDoc[] hits= topdoc.scoreDocs;
if(hits!=null && hits.length>0){
for(int i = 0; i < hits.length; i++){
Document hitDoc = searcher.doc(hits[i].doc);
System.out.println(hitDoc.get("content"));
}
}
}
}
pom文件为:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.cn.lucene.search</groupId>
<artifactId>lucene-search</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>lucene-search</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<lucene.version>6.2.1</lucene.version>
</properties>
<dependencies>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId> org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version> ${lucene.version}</version>
</dependency>
<dependency>
<groupId> org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version> ${lucene.version}</version>
</dependency>
<dependency>
<groupId> org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version> ${lucene.version}</version>
</dependency>
<dependency>
<groupId> org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version> ${lucene.version}</version>
</dependency>
<dependency>
<groupId>net.paoding</groupId>
<artifactId>paoding-analysis</artifactId>
<version>4.6.0</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/paoding-analysis.jar</systemPath>
</dependency>
</dependencies>
</project>
改良之后的paoding下载地址 http://download.csdn.net/detail/riapgypm/9648301