/**
* IK Analyzer Demo
* @param args
*/
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.IKSegmentation;
import org.wltea.analyzer.Lexeme;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;
import cn.jh.db.jdbc.DBO;
import cn.jh.db.jdbc.DBOManager;
/**
* @author linly
*
*/
public class IKAnalyzerDemo {
static final File INDEX_DIR = new File("index");
static void test() throws Exception{
String fieldName = "text";
//检索内容
IndexWriter iwriter = null;
RAMDirectory ramDir = new RAMDirectory();
IKAnalyzer ika=new IKAnalyzer();
DBO newsdbo = DBOManager.createDBO("CMS34");
{
iwriter=new IndexWriter(ramDir, ika, true , IndexWriter.MaxFieldLength.LIMITED);
IndexWriter fileWriter = new IndexWriter(FSDirectory.open(INDEX_DIR),ika, true, IndexWriter.MaxFieldLength.LIMITED);
fileWriter.addIndexesNoOptimize(new Directory[]{ramDir});
Document doc = new Document();
doc.add(new Field(fieldName, "1234", Field.Store.YES, Field.Index.ANALYZED));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.close();
fileWriter.commit();
fileWriter.close();
}
for(int i=0;i<1;i++){
iwriter = new IndexWriter(ramDir, ika, false , IndexWriter.MaxFieldLength.LIMITED);
int m=1000*i;
System.out.println("select Title from News_tab order by newsId desc limit "+m+", 1000 ");
List<String> titles = newsdbo.getObjList("select Title from News_tab limit "+m+", 5000 ", String.class);
for(String t:titles){
Document doc = new Document();
doc.add(new Field(fieldName, t, Field.Store.YES, Field.Index.ANALYZED));
iwriter.addDocument(doc);
}
iwriter.close();
IndexWriter fileWriter = new IndexWriter(FSDirectory.open(INDEX_DIR),ika, false, IndexWriter.MaxFieldLength.LIMITED);
fileWriter.addIndexesNoOptimize(new Directory[]{ramDir});
// = new IndexWriter(ramDir, ika, true );
fileWriter.commit();
fileWriter.close();
}
}
static void test2() throws Exception{
IKAnalyzer ika=new IKAnalyzer();
RAMDirectory directory = null;
IndexSearcher isearcher = null;
//建立内存索引对象
IndexWriter iwriter = null;
//实例化搜索器
//
// directory = new RAMDirectory();
// iwriter = new IndexWriter(directory, ika, true , IndexWriter.MaxFieldLength.LIMITED);
// iwriter.addIndexesNoOptimize(new Directory[]{FSDirectory.open(INDEX_DIR)});
// iwriter.close();
isearcher = new IndexSearcher(FSDirectory.open(INDEX_DIR));
//在索引器中使用IKSimilarity相似度评估器
isearcher.setSimilarity(new IKSimilarity());
String keyword = "铲除网上黄毒需法律亮剑";
IKSegmentation ikSeg = new IKSegmentation(new StringReader(keyword),
true);
keyword="";
try {
Lexeme l = null;
while ((l = ikSeg.next()) != null) {
if (l.getLexemeType() == 0){
System.out.println(l.getLexemeText());
keyword+=l.getLexemeText()+" ";
}
}
} catch (IOException e) {
e.printStackTrace();
}
String fieldName = "text";
//使用IKQueryParser查询分析器构造Query对象
Query query = IKQueryParser.parse(fieldName, keyword);
//搜索相似度最高的5条记录
int max=20;
TopDocs topDocs = isearcher.search(query , max);
System.out.println("命中:" + topDocs.totalHits);
//输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = 0; i < topDocs.totalHits&&i<max; i++){
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
System.out.println("内容:" + targetDoc.toString()+"\t\t"+targetDoc);
}
}
public static void main(String[] args) throws Exception{
test();
test2();
// test2();
// test();
// test2();
// test();
// test2();
// test();
// test2();
}
}
IKAnalyzerDemo 全文检索及分词实例
最新推荐文章于 2023-02-01 11:47:50 发布