最近在项目中要判断词语是否是热搜词,所以在网上搜索了一边,感觉lucene在分词和索引领域的
技
术还是相对比较成熟的,所以研究了一下和大家分享。
|- 这个类运行所需要的文件,XXX.txt-->当建立索引时会在硬盘建立一个文件夹索引。
|- 【建立索引】、【搜索词语】
|- 索引只需要建立一次。当第二次运行时就直接调用。
|- 具体用法在参照main方法
packagecom.yitaosoft.title.util; importjava.io.File; importjava.io.FileReader; importjava.io.IOException; importorg.apache.lucene.document.Document; importorg.apache.lucene.document.Field; importorg.apache.lucene.index.CorruptIndexException; importorg.apache.lucene.index.IndexReader; importorg.apache.lucene.index.IndexWriter; importorg.apache.lucene.index.IndexWriterConfig; importorg.apache.lucene.queryParser.ParseException; importorg.apache.lucene.queryParser.QueryParser; importorg.apache.lucene.search.IndexSearcher; importorg.apache.lucene.search.Query; importorg.apache.lucene.search.ScoreDoc; importorg.apache.lucene.search.TopDocs; importorg.apache.lucene.store.Directory; importorg.apache.lucene.store.FSDirectory; importorg.apache.lucene.store.LockObtainFailedException; importorg.apache.lucene.util.Version; importorg.wltea.analyzer.lucene.IKAnalyzer; importcom.yitaosoft.title.property.AppConstants; publicclassLuceneSearcherUtil { publicstaticvoid bulidIndex(String indexPath,String filePath){ IndexWriter writer = null; try{ // 1. 创建Directory Directory directory = FSDirectory.open(newFile(indexPath)); // 2. 创建IndexWriter IndexWriterConfig iwc = newIndexWriterConfig( Version.LUCENE_35,newIKAnalyzer()); writer = newIndexWriter(directory,iwc); // 3. 创建Document对象 Document doc = newDocument(); // 4. 为Document对象添加Field File file = newFile(filePath); doc.add(newField("content",newFileReader(file))); // 5. 通过IndexWriter添加文档到索引中 writer.addDocument(doc); }catch(CorruptIndexException e) { e.printStackTrace(); }catch(LockObtainFailedException e) { e.printStackTrace(); }catch(IOException e) { e.printStackTrace(); }finally{ if(writer!=null){ try{ writer.close(); }catch(CorruptIndexException e) { e.printStackTrace(); }catch(IOException e) { e.printStackTrace(); } } } } publicstaticint search(String indexPath,String keyWords){ inttotalHit = 0; IndexReader reader = null; try{ // 1. 创建Directory Directory directory = FSDirectory.open(newFile(indexPath)); // 2. 创建IndexReader reader = IndexReader.open(directory); // 3. 根据IndexReader创建IndexSearcher IndexSearcher searcher = newIndexSearcher(reader); // 4. 创建搜索的Query QueryParser parser = newQueryParser( Version.LUCENE_35,"content",newIKAnalyzer()); parser.setDefaultOperator(QueryParser.AND_OPERATOR); // 检索的对象 Query query = parser.parse(keyWords); // Query query = new TermQuery(new Term("content",keyWords)); // 5. 根据searcher搜索并且返回 TopDocs TopDocs tds = searcher.search(query, 10); // 6. 根据TopDocs获取ScoreDoc对象 ScoreDoc[] sds = tds.scoreDocs; // 可以根据totalHits来判断是否找到 totalHit = tds.totalHits; for(ScoreDoc sd : sds){ @SuppressWarnings("unused") Document d = searcher.doc(sd.doc); } }catch(IOException e) { e.printStackTrace(); }catch(ParseException e) { e.printStackTrace(); }finally{ if(reader!=null){ try{ reader.close(); }catch(IOException e) { e.printStackTrace(); } } } returntotalHit; } publicstaticvoid main(String[] args) { // bulidIndex(AppConstants.INDEX_PATH,AppConstants.WORD_DB); inthit = LuceneSearcherUtil.search(AppConstants.INDEX_PATH, "秋装"); if(hit > 0){ System.out.println("热搜词"); }else{ System.out.println("非热搜词"); } } }