首先运行 @Test public void testSave(){ Article article=new Article(); article.setId(1); //模拟数据 article.setTitle("双12"); article.setContent("双12就要疯狂购"); //创建索引 Lucene上下文 帮我去保存的 articleDAO.save(article); } 高亮import entity.Article; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.wltea.analyzer.lucene.IKAnalyzer; import util.AritcleDocumentUtils; import util.Configuration; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; import org.apache.lucene.search.highlight.*; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.junit.Test; import java.io.IOException; import java.io.StringReader; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; public class HighLighterTest { @Test public void search() throws IOException, ParseException, InvalidTokenOffsetsException { String word="疯狂"; IndexReader indexReader= DirectoryReader.open(Configuration.getDirectory()); //1.6 索引搜索 IndexSearcher indexSearcher=new IndexSearcher(indexReader); //1.7 针对一个字段进行查询。 入参列名 和 分词器 QueryParser queryParser=new MultiFieldQueryParser(new String[]{"title","content"},Configuration.getAnalyzer()); //1.8 类似 Query抽象类 Query query=queryParser.parse(word); // 1.9 去索引目录中查询,返回的是TopDocs对象,里面存放的就是上面放的document文档对象 TopDocs topDocs = indexSearcher.search(query, 100); //1.10 int totalHits -- 命中的查询的总数。 int count= (int) topDocs.totalHits;//总结果数 System.out.println(count+"总记录数"); //1.11 ScoreDoc 里面有我们真正的文档对象 ScoreDoc[] scoreDocs = topDocs.scoreDocs; /** * 分割线==================================================高亮思路一 */ //一、创建并配置高亮器 //指定摘要的大小 //创建高亮器 Formatter formatter=new SimpleHTMLFormatter("<font color='red'>","</font>"); Scorer scorer=new QueryScorer(query,"content"); Highlighter highlighter=new Highlighter(formatter,scorer); //摘要 Fragmenter fragmenter=new SimpleFragmenter(20); highlighter.setTextFragmenter(fragmenter); //处理结果 List<Article> list=new ArrayList<Article>(); for (int i=0;i<scoreDocs.length;i++){ int docid=scoreDocs[i].doc; //1.12 根据内部编号 取出真正的 Document数据 Document doc= indexSearcher.doc(docid); /** * *分割线二============================================ * * */ //二:做高亮操作 如果高亮字段中没有关键词,返回null String text=highlighter.getBestFragment(Configuration.getAnalyzer(),"content",doc.get("content")); ((Field) doc.getField("content")).setStringValue(text); //把Document转化成Article Article article= AritcleDocumentUtils.documentToArticle(doc); list.add(article); } for (Article arti:list){ System.out.println(arti.getTitle()); System.out.println(arti.getContent()); } } }分词
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.index.Term; import org.apache.lucene.util.Version; import org.junit.Test; import org.wltea.analyzer.lucene.IKAnalyzer; import java.io.IOException; import java.io.StringReader; /** * Created by Happy on 2017-12-01. */ public class AnalyzerTest { @Test //分词器 public void test() throws Exception { //String word="a good person,Happy Every Day"; //String word="我为何不哭,因为我仅存的,就只有坚强了"; String word="中华人民共和国,小星星,我们是"; //Analyzer analyzer = new StandardAnalyzer();//一元分词 // Analyzer analyzer=new CJKAnalyzer(); //二元分词 // Analyzer analyzer=new SmartChineseAnalyzer(); //智能中文分词 //IK分词 Analyzer analyzer=new IKAnalyzer(true); testAnalyzer(analyzer,word); } //使用指定的分词器对指定的文本进行分词 public void testAnalyzer(Analyzer analyzer, String text) throws Exception { System.out.println("分词器:" + analyzer.getClass()); StringReader reader= new StringReader(text); TokenStream tokenStream = analyzer.tokenStream("content",reader); tokenStream.reset(); CharTermAttribute cta =tokenStream.addAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()) { System.out.println(cta); } reader.close(); } }搜索 import entity.Article; import util.AritcleDocumentUtils; import util.Configuration; import util.LuceneUtil; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.junit.Test; import java.io.IOException; import java.util.ArrayList; import java.util.List; /** * Created by Happy on 2017-12-04. */ public class QueryTest { @Test //关键词查询 public void testPrimaryKey() throws ParseException, InvalidTokenOffsetsException, IOException { Query query= new TermQuery(new Term("content","疯狂")); search(query); } @Test //通配符查询 Struts2 WildCard *_* public void testWildCard() throws ParseException, InvalidTokenOffsetsException, IOException { /** * ?:1个任意字符 * *:0或者多个任意字符 */ Query query= new WildcardQuery(new Term("content","疯*")); search(query); } @Test //查询所有 public void testAll() throws ParseException, InvalidTokenOffsetsException, IOException { Query query= new MatchAllDocsQuery(); search(query); } @Test //模糊查询 public void testLike() throws ParseException, InvalidTokenOffsetsException, IOException { //第二个参数指定最小相似度,表示有多少写对了就查出来 Query query= new FuzzyQuery(new Term("title","pring"),2); search(query); } public void search(Query query) throws IOException, ParseException, InvalidTokenOffsetsException { /* String word="content:疯狂";*/ IndexReader indexReader= DirectoryReader.open(Configuration.getDirectory()); //1.6 索引搜索 IndexSearcher indexSearcher=new IndexSearcher(indexReader); /* //1.7 针对一个字段进行查询。 入参列名 和 分词器 QueryParser queryParser=new MultiFieldQueryParser(new String[]{"title","content"},Configuration.getAnalyzer()); //1.8 类似 Query抽象类 Query query=queryParser.parse(word);*/ // Query query= new TermQuery(new Term("content","疯狂")); // 1.9 去索引目录中查询,返回的是TopDocs对象,里面存放的就是上面放的document文档对象 TopDocs topDocs = indexSearcher.search(query,100); ScoreDoc[] scoreDocs = topDocs.scoreDocs; //处理结果 List<Article> list=new ArrayList<Article>(); for (int i=0;i<scoreDocs.length;i++){ ScoreDoc scoreDoc=scoreDocs[i]; System.out.println("得分"+scoreDoc.score); int docid=scoreDocs[i].doc; //1.12 根据内部编号 取出真正的 Document数据 Document doc= indexSearcher.doc(docid); //把Document转化成Article Article article= AritcleDocumentUtils.documentToArticle(doc); list.add(article); } for (Article arti:list){ System.out.println("id===="+arti.getId()); System.out.println(arti.getTitle()); System.out.println(arti.getContent()); } } }
Lucene 高亮 分词 搜索
最新推荐文章于 2024-01-25 16:56:55 发布