lucene基础知识

最新推荐文章于 2024-08-01 08:03:40 发布

BoltBear

最新推荐文章于 2024-08-01 08:03:40 发布

阅读量1.2k

点赞数

分类专栏： web 文章标签： java

本文链接：https://blog.csdn.net/woaini886353/article/details/124588067

版权

web 专栏收录该内容

37 篇文章 2 订阅

订阅专栏

注：在MyEclipse中可以通过Ctrl+Shift+R和通配符查询相关的资源。

1、全文检索的概念
<1>从大量的信息中快速、准确地查找出要的信息。
<2>搜索的内容是文本信息（不是多媒体）。
<3>根据文本的关键词进行搜索，而不是根据语义进行搜索。
<4>全面、快速、准确是衡量全文检索系统的关键指标。
<5>搜索时英文不区分大小写。
<6>结果列表由相关度排序。
<7>全文搜索有站内搜索和垂直搜索

2、全文搜索与数据搜索的区别
数据库搜索的缺点：
<1>搜索效果比较差。
<2>在搜索的结果中，有大量的数据被搜索出来，有很多数据是没有用的。
<3>查询速度在大量的情况下是很难做到快速的。

3、互联网搜索结构图

由上图可以看出，互联网搜索就是利用爬虫搜索信息并且在索引库中建立索引，这样索引库中的信息就可能与网络上的网页信息不一致，从而导致有时候利用引擎搜索的网页会找不到相关的网站。

4、lucene的大致结构框图

由上图可以看出，利用lucene的api进行增删改操作其实就是对索引库的操作。查询操作就是根据索引库的索引查询出内容然后返回。在存入索引库的时候，其实是存入对象的某些字段及其值，并且转换为Document，然后存入索引库。

5、lucene建立索引图

6、使用lucene
lucene的核心jar包有：
(核心包)lucene-core-3.0.1.jar
(分词器)lucene-analyzers-3.1.0.jar
(高亮器)lucene-highlighter-3.1.0.jar
lucene-memory-3.1.0.jar

<1>导入核心jar包
<2>创建一个需要用lucene进行操作的类
<3>利用lucene的API操作此类的对象。
cn.itheima.lucene.bean.Article.java
public class Article {
private Long id ;
private String title ;
private String content ;

   public Long getId() {
         return id ;
  }
   public void setId(Long id) {
         this.id = id;
  }
   public String getTitle() {
         return title ;
  }
   public void setTitle(String title) {
         this.title = title;
  }
   public String getContent() {
         return content ;
  }
   public void setContent(String content) {
         this.content = content;
  }

}

cn.itheima.lucene.test.LuceneTest.java
/**

1、创建article对象，并把该对象放入到索引库中

2、根据关键词检索article对象
/
public class LuceneTest {
/*
* 1、创建article对象
* 2、创建indexWriter对象
* 3、把article对象写入到索引库中
*/
@Test
public void testCreateIndex() throws Exception {
Article article = new Article();
article.setId(1L);
article.setTitle( “lucene可以用来做搜索引擎” );
article.setContent( “baidu、google搜索引擎公司” );

     Directory directory = FSDirectory. open(new File("./indexDir" ));
     Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
      //MaxFieldLength限制字段内容的大小
     IndexWriter indexWriter = new IndexWriter(directory,analyzer,MaxFieldLength.LIMITED);
     
     Document document = new Document();
     

  //name代表存放在索引库中的名称   
     Field field1 = new Field("id",article.getId().toString(),Store.YES,Index.NOT_ANALYZED);
     Field field2 = new Field("title" ,article.getTitle(),Store.YES,Index. ANALYZED);
     Field field3 = new Field("content" ,article.getContent(),Store.YES,Index. ANALYZED);
     
     document.add(field1);
     document.add(field2);
     document.add(field3);
     
     indexWriter.addDocument(document);
     
     indexWriter.close();

}

执行结果：

cn.itheima.lucene.test.LuceneTest.java
public class LuceneTest {
@Test
public void testSearch() throws Exception {

        Directory directory = FSDirectory. open(new File("./indexDir" ));
        IndexSearcher indexSearcher = new IndexSearcher(directory);
        
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
        QueryParser queryParser = new QueryParser(Version.LUCENE_30 ,"title" ,analyzer);
         //关键字
        Query query = queryParser.parse( "lucene");
        
         //TopDocs代表目录库的引用
        TopDocs topDocs = indexSearcher.search(query,1);
         //根据关键词搜索出来的总的记录数
         int totalHits = topDocs.totalHits ;
        
        ScoreDoc[] scoreDocs = topDocs. scoreDocs;
        
        List<Article> articles = new ArrayList<Article>();
        
         for(ScoreDoc scoreDoc : scoreDocs){
               //关键词对应的索引值
               int index = scoreDoc.doc ;
               //相关度得分
               float score = scoreDoc.score ;
              
              Document doc = indexSearcher.doc(index);
              
              Article article = new Article();
              article.setId(Long. parseLong(doc.get("id")));
              article.setTitle(doc.get( "title"));
              article.setContent(doc.get( "content"));
              
              articles.add(article);
        }
        
         for(Article article : articles){
              System. out.println(article.getId() + "\n" + article.getTitle() + "\n" + article.getContent());
        }
        
  }

}

执行结果：

7、改写上面的程序
cn.itheima.lucene.utils.DocumentUtils.java
public class DocumentUtils {

   public static Document article2Document(Article article){
        Document document = new Document();
        
        Field field1 = new Field("id",article.getId().toString(),Store.YES,Index.NOT_ANALYZED);
        Field field2 = new Field("title" ,article.getTitle(),Store.YES,Index. ANALYZED);
        Field field3 = new Field("content" ,article.getContent(),Store.YES,Index. ANALYZED);
        
        document.add(field1);
        document.add(field2);
        document.add(field3);
        
         return document;
  }
  
   public static Article document2Article(Document document){
         Article article = new Article ();
        
        article.setId(Long. parseLong(document.get("id")));
        article.setTitle(document.get( "title"));
        article.setContent(document.get( "content"));
        
         return article;
  }

}

cn.itheima.lucene.utils.LuceneUtils.java
public class LuceneUtils {

   public static Directory directory;
   public static Analyzer analyzer;
  
   static{
         try {
               directory = FSDirectory.open( new File("./indexDir" ));
               analyzer = new StandardAnalyzer(Version.LUCENE_30);
        } catch (Exception e) {
              e.printStackTrace();
        }
  }

}

cn.itheima.lucene.test.LuceneTest.java
public class LuceneTest {

   @Test
   public void testCreateIndex() throws Exception {
        Article article = new Article();
        article.setId(1L);
        article.setTitle( "lucene可以用来做搜索引擎" );
        article.setContent( "baidu、google搜索引擎公司" );
        
        IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory ,LuceneUtils.analyzer,MaxFieldLength. LIMITED);
        
        Document document = DocumentUtils. article2Document(article);
        
        indexWriter.addDocument(document);
        
        indexWriter.close();
  }
  
   @Test
   public void testSearch() throws Exception {
        
        IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.directory );
        
        QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30 ,new String[]{"title" ,"content" },LuceneUtils.analyzer);
        Query query = queryParser.parse( "lucene");
        
        TopDocs topDocs = indexSearcher.search(query,3);
        
        ScoreDoc[] scoreDocs = topDocs. scoreDocs;
        
        List<Article> articles = new ArrayList<Article>();
        
         for(ScoreDoc scoreDoc : scoreDocs){
               int index = scoreDoc.doc ;
              
              Document doc = indexSearcher.doc(index);
              
              Article article = DocumentUtils. document2Article(doc);
              
              articles.add(article);
        }
        
         for(Article article : articles){
              System. out.println(article.getId() + "\n" + article.getTitle() + "\n" + article.getContent());
        }
        
  }

}

执行结果：

8、删除数据
Term就是对关键词的对象封装，包含两个属性：field、text。
LuceneTest.java
public class LuceneTest {

   /**
   * 删除并不是把原来的cfs文件删除掉了，而是把原来的基础上多了一个del文件。
   */
   @Test
   public void testDelete() throws Exception {
        IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory ,LuceneUtils.analyzer,MaxFieldLength. LIMITED);
        Term term = new Term("title" ,"lucene" );
        indexWriter.deleteDocuments(term);
        indexWriter.close();
  }

}

9、更新操作
public class LuceneTest {

   @Test
   public void testUpdate() throws Exception {
        IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory ,LuceneUtils.analyzer,MaxFieldLength. LIMITED);
        Term term = new Term("title" ,"lucene" );
        Article article = new Article();
        article.setId(1L);
        article.setTitle( "lucene可以用来做搜索引擎" );
        article.setContent( "baidu搜索引擎公司" );
        //Term为删除，Document为增加
        indexWriter.updateDocument(term, DocumentUtils.article2Document(article));
        indexWriter.close();
  }

}

执行结果：

总结：更新操作实际上是先删除后增加。

10、保持数据库与索引库的同步

在一个系统中，如果索引功能存在，那么数据库和索引库应该是同时存在的。这个时候需要保证索引库的数据和数据库中的数据保持一致性。可以在数据库进行增删改操作的时候对索引库也进行相应的操作。这样就可以保证数据库与索引库的一致性。

11、同一个索引库不能同时存在两个IndexWriter
cn.itheima.lucene.test.IndexWriterTest.java
public class IndexWriterTest {

   @Test
   public void test () throws Exception {
        IndexWriter indexWriter1 = new IndexWriter(LuceneUtils.directory ,LuceneUtils.analyzer,MaxFieldLength. LIMITED);
        IndexWriter indexWriter2 = new IndexWriter(LuceneUtils.directory ,LuceneUtils.analyzer,MaxFieldLength. LIMITED);
  }

}

执行结果：

总结：<1>由上图可见，只要有一个IndexWriter操作索引库，那么它就会对索引库加锁，只要这个indexWriter没有关闭从而解锁，那么其他的IndexWriter和IndexSearch就无法访问索引库。如果使用了IndexSearch去查询索引库，也获取不了任何东西。
<2>当indexWriter关闭的时候，释放IO流的资源并且释放锁。
<3>索引库的最多的操作是检索，后台维护的操作还是比较少的，所以不需要很多的IndexWriter。

12、索引库的优化
如果执行了多次上面的testCreateIndex,就会出现如下图所示的情况。

一般情况下，只要达到一定数目的cfs文件，那么lucene就会自动合并cfs文件从而达到优化的目的。不过有时候，需要手动进行优化。

cn.itheima.lucene.test.IndexWriterTest.java
public class LuceneTest {

   @Test
   public void testOptimize() throws Exception {
        IndexWriter indexWriter = new IndexWriter(LuceneUtils. directory,LuceneUtils.analyzer,MaxFieldLength.LIMITED);
        indexWriter. optimize();
        indexWriter.close();
  }

}

执行结果：

13、内存索引库的特点：
1、查询效率比较快
2、数据不是持久化数据
文件索引库的特点：
1、查询效率比较慢
2、数据是持久化的
所以可以将内存索引库和文件索引库结合起来。

cn.itheima.lucene.test.DirectoryTest.java
/**

将数据存放如内存索引库，然后查询显示出来
*/
public class DirectoryTest {

@Test
public void testRamDirectory() throws Exception {
      /**
      * 创建内存索引库
      */
     Directory ramDirectory = new RAMDirectory();
     
     Article article = new Article();
     article.setId(1L);
     article.setTitle( "lucene可以用来做搜索引擎" );
     article.setContent( "baidu、google搜索引擎公司" );
     
     IndexWriter indexWriter = new IndexWriter(ramDirectory,LuceneUtils.analyzer,MaxFieldLength.LIMITED);
     
     Document document = DocumentUtils.article2Document(article);
     
     indexWriter.addDocument(document);
     
     indexWriter.close();
     
      this.showData(ramDirectory);

}

private void showData(Directory directory) throws Exception{
     
     IndexSearcher indexSearcher = new IndexSearcher(directory);
     
     QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30 ,new String[]{"title" ,"content" },LuceneUtils.analyzer);
     Query query = queryParser.parse( "lucene");
     
     TopDocs topDocs = indexSearcher.search(query,3);
     
     ScoreDoc[] scoreDocs = topDocs. scoreDocs;
     
     List<Article> articles = new ArrayList<Article>();
     
      for(ScoreDoc scoreDoc : scoreDocs){
            int index = scoreDoc.doc ;
           
           Document doc = indexSearcher.doc(index);
           
           Article article = DocumentUtils.document2Article(doc);
           
           articles.add(article);
     }
     
      for(Article article : articles){
           System. out.println(article.getId() + "\n" + article.getTitle() + "\n" + article.getContent());
     }

}
}

执行结果：

cn.itheima.lucene.test.DirectoryTest.java
public class DirectoryTest {

   /**
   * 内存索引库和文件索引库合并的情况
   */
   @Test
   public void testFileAndRam() throws Exception {
        
         /**
         * 1、创建两个IndexWriter，一个对应文件索引库，一个对应内存索引库
         * 2、把文件索引库复制到内存索引库
         * 3、内存索引库与应用程序进行交互
         * 4、将内存索引库同步到文件索引库
         */
        Directory fileDirectory = FSDirectory. open(new File("./indexDir" ));
        Directory ramDirectory = new RAMDirectory(fileDirectory);
        
        IndexWriter ramIndexWriter = new IndexWriter(ramDirectory,LuceneUtils.analyzer,MaxFieldLength.LIMITED);
         //如果下面的参数中不写true，那么就是追加索引，如果写了，就是清空，重新添加。
        IndexWriter fileIndexWriter = new IndexWriter(fileDirectory,LuceneUtils.analyzer,true,MaxFieldLength. LIMITED);
        
        Article article = new Article();
        article.setId(2L);
        article.setTitle( "lucene可以用来做搜索引擎2" );
        article.setContent( "baidu、google搜索引擎公司2" );
        
        ramIndexWriter.addDocument(DocumentUtils. article2Document(article));
        
        ramIndexWriter.close();
         //一定要在执行下面这条语句之前就要关闭ramIndexWriter，否则，数据就无法将新增的数据同步到文件IndexWriter中。
        fileIndexWriter.addIndexesNoOptimize(ramDirectory);
        
        fileIndexWriter.close();
        
        this.showData(fileDirectory);
  }

}

执行结果：

注：lucene提供了一些方法可以做很多个索引库出来（在一个项目中），可以针对某一个索引库进行检索，还可以针对合并的索引库进行检索。fileIndexWrriter.addIndexesNoOptimize(ramDirectory)。

14、分词器
针对不同的语言有不同的分词器。

cn.itheima.lucene.test.AnalyzerTest.java
public class AnalyzerTest {

   @Test
   public void testAnalyzer_En() throws Exception{
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
        String text = "Creates a searcher";
         this.testAnalyzer(analyzer, text);
  }
  
   //下面的这段能够将分词期分离出来的词打印出来
   private void testAnalyzer(Analyzer analyzer,String text) throws Exception {
        TokenStream tokenStream = analyzer.tokenStream( "content", new StringReader(text));
        tokenStream.addAttribute(TermAttribute. class);
         while(tokenStream.incrementToken()){
              TermAttribute termAttribute = tokenStream.getAttribute(TermAttribute.class);
              System. out.println(termAttribute.term());
        }
  }

}

执行结果：

总结：英文分词器分词的三个步骤：
<1>切分关键词
<2>去掉听用此
<3>把大写变为小写

单词法分词
cn.itheima.lucene.test.AnalyzerTest.java
public class AnalyzerTest {

   @Test
   public void testAnalyzer_CH1() throws Exception{
        Analyzer analyzer = new ChineseAnalyzer();
         String text = "黑马训练营";
         this.testAnalyzer(analyzer, text);
  }

}

执行结果：

二词法分词
cn.itheima.lucene.test.AnalyzerTest.java
public class AnalyzerTest {

   @Test
   public void testAnalyzer_CH1() throws Exception{
        Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
        String text = "黑马训练营" ;
        this.testAnalyzer(analyzer, text);
  }

}

执行结果：

IKAnalyzer的使用
<1>拷贝IKAnalyzer3.2.0Stable.jar。
<2>拷贝ext_stopword.dic(必须设置成UTF-8编码，否则无法使用)、IKAnalyzer.cfg.xml。
<3>使用IKAnalyzer进行分词解析。

cn.itheima.lucene.test.AnalyzerTest.java
public class AnalyzerTest {

   @Test
   public void testAnalyzer_CH3() throws Exception{
        Analyzer analyzer = new IKAnalyzer();
        String text = "黑马训练营";
         this.testAnalyzer(analyzer, text);
  }

}

执行结果：

注：之所以能够达到这样的效果，是因为IKAnalyzer的jar包中包含了很多的dic文件，里面设置了大量的中文词汇。

IKAnalyzer还提供了扩展配置，使用户可以自己添加dic文件（包括扩展字典和扩展停止词字典，并且必须是UTF-8编码类型的，项目也必须是UTF-8编码的）。

IKAnalyzer.cfg.xml

<?xml version ="1.0" encoding="UTF-8" ?>

   <entry key ="ext_dict" >/mydict.dic</ entry>

   <entry key ="ext_stopwords" >/ext_stopword.dic</ entry>

mydict.dic
方立勋

执行结果：

ext_stopword.dic
方立勋

执行结果：

15、高亮
高亮的作用是：1、使关键词高亮 2、控制摘要的大小
cn.itheima.lucene.test.HighlighterTest.java
public class HighlighterTest {

   @Test
   public void testHighlighter() throws Exception {

        IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.directory );
        
        QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30 ,new String[]{"title" ,"content" },LuceneUtils.analyzer);
        Query query = queryParser.parse( "lucene");
        
         /**
         * 添加高亮器
         */
        Scorer scorer = new QueryScorer(query);
         /**
         * 规定要高亮的文本的前缀和后缀，只适合网页
         */
        Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>");
        Highlighter highlighter = new Highlighter(formatter,scorer);
        
         /**
         * 设置摘要的大小
         */
        Fragmenter fragmenter = new SimpleFragmenter(20);
        highlighter.setTextFragmenter(fragmenter);
        
        TopDocs topDocs = indexSearcher.search(query,3);
        
        ScoreDoc[] scoreDocs = topDocs. scoreDocs;
        
        List<Article> articles = new ArrayList<Article>();
        
         for(ScoreDoc scoreDoc : scoreDocs){
               int index = scoreDoc.doc ;
              
              Document doc = indexSearcher.doc(index);
               /**
               * 使用高亮器
               * LuceneUtils.analyzer 用分词器把高亮部分的词分出来
               * field 针对哪个字段进行高亮
               * documnent.get("title") 获取要高亮的字段
               */
              String fragment = highlighter.getBestFragment(LuceneUtils.analyzer, "title", doc.get("title" ));
              doc.getField( "title").setValue(fragment);
              Article article = DocumentUtils. document2Article(doc);
              articles.add(article);
        }
        
         for(Article article : articles){
              System. out.println(article.getId() + "\n" + article.getTitle() + "\n" + article.getContent());
        }
        
  }

}

执行结果：

cn.itheima.lucene.test.HighlighterTest.java
public class HighlighterTest {

   @Test
   public void testHighlighter () throws Exception {

        IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.directory );
        
        QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30 ,new String[]{"title" ,"content" },LuceneUtils.analyzer);
        Query query = queryParser.parse( "lucene");
        
         /**
         * 添加高亮器
         */
        Scorer scorer = new QueryScorer(query);
         /**
         * 规定要高亮的文本的前缀和后缀，只适合网页
         */
        Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>");
        Highlighter highlighter = new Highlighter(formatter,scorer);
        
         /**
         * 设置摘要的大小
         */
        Fragmenter fragmenter = new SimpleFragmenter(20);
        highlighter.setTextFragmenter(fragmenter);
        
        TopDocs topDocs = indexSearcher.search(query,3);
        
        ScoreDoc[] scoreDocs = topDocs. scoreDocs;
        
        List<Article> articles = new ArrayList<Article>();
        
         for(ScoreDoc scoreDoc : scoreDocs){
               int index = scoreDoc.doc ;
              
              Document doc = indexSearcher.doc(index);
               /**
               * 使用高亮器
               * LuceneUtils.analyzer 用分词器把高亮部分的词分出来
               * field 针对哪个字段进行高亮
               * documnent.get("title") 获取要高亮的字段
               */
              String fragment1 = highlighter.getBestFragment(LuceneUtils.analyzer, "title", doc.get("title" ));
              String fragment2 = highlighter.getBestFragment(LuceneUtils.analyzer, "content", doc.get("content" ));
               //如果这里不设置，那么就可能出现设置的值为null的情况
               if(fragment1 != null){
                    doc.getField( "title").setValue(fragment1);
              }
               //if(fragment2 != null){
                    doc.getField( "content").setValue(fragment2);
               //}
              Article article = DocumentUtils. document2Article(doc);
              articles.add(article);
        }
        
         for(Article article : articles){
              System. out.println(article.getId() + "\n" + article.getTitle() + "\n" + article.getContent());
        }
        
  }

}

执行结果：

16、分页
cn.itheima.lucene.test.DispageTest.java
public class DispageTest {

   @Test
   public void testDispage() throws Exception{
        testSearch(0, 25);
  }
  
   private void testSearch(int firstResult, int maxResult) throws Exception {
        
        IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.directory );
        
        QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30 ,new String[]{"title" ,"content" },LuceneUtils.analyzer);
        Query query = queryParser.parse( "lucene");
        
        TopDocs topDocs = indexSearcher.search(query,25);
        
        ScoreDoc[] scoreDocs = topDocs. scoreDocs;
        
        List<Article> articles = new ArrayList<Article>();
        
         for(int i = firstResult; i < (firstResult + maxResult); i++){
               int index = scoreDocs[i].doc ;
              
              Document doc = indexSearcher.doc(index);
              
              Article article = DocumentUtils. document2Article(doc);
              
              articles.add(article);
        }
        
         for(Article article : articles){
              System. out.println(article.getId() + "\n" + article.getTitle() + "\n" + article.getContent());
        }
        
  }

}

执行结果：

cn.itheima.lucene.test.DispageTest.java
public class DispageTest {

   @Test
   public void testDispage () throws Exception{
        testSearch(20, 30);
  }
  
   private void testSearch(int firstResult, int maxResult) throws Exception {
        
        IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.directory );
        
        QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30 ,new String[]{"title" ,"content" },LuceneUtils.analyzer);
        Query query = queryParser.parse( "lucene");
        
        TopDocs topDocs = indexSearcher.search(query,25);
        
        ScoreDoc[] scoreDocs = topDocs. scoreDocs;
        
        List<Article> articles = new ArrayList<Article>();
        
         //为了防止越界，所有有必要取最小值
         int length = Math.min(topDocs. totalHits, firstResult + maxResult);
        
         for(int i = firstResult; i < length; i++){
               int index = scoreDocs[i].doc ;
              
              Document doc = indexSearcher.doc(index);
              
              Article article = DocumentUtils. document2Article(doc);
              
              articles.add(article);
        }
        
         for(Article article : articles){
              System. out.println(article.getId() + "\n" + article.getTitle() + "\n" + article.getContent());
        }
        
  }

}

执行结果：

17、搜索方式
cn.itheima.lucene.test.LuceneTest.java
public class LuceneTest {
@Test
public void testCreateIndexBatch() throws Exception {
IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory ,LuceneUtils.analyzer,MaxFieldLength. LIMITED);
for(int i = 1; i <= 25; i++){
Article article = new Article();
article.setId(Long. parseLong(i + “”));
article.setTitle( “lucene可以用来做搜索引擎” );
article.setContent( “baidu搜索引擎公司” );
indexWriter.addDocument(DocumentUtils. article2Document(article));
}
indexWriter.close();
}
}

cn.itheima.lucene.utils.DocumentUtils.java
public class DocumentUtils {

   public static Document article2Document(Article article){
        Document document = new Document();
        
         //由于这里存储的是String类型，所以使用范围查询的时候并不能查询出来任何结果，应该使用lucene自己提供的类型转换器进行转换
         //Field field1 = new Field("id",article.getId().toString(),Store.YES,Index.NOT_ANALYZED);
        Field field1 = new Field("id",NumericUtils.longToPrefixCoded(article.getId()).toString(),Store. YES,Index.NOT_ANALYZED);        
        Field field2 = new Field("title" ,article.getTitle(),Store.YES,Index. ANALYZED);
        Field field3 = new Field("content" ,article.getContent(),Store.YES,Index. ANALYZED);
        
         document.add(field1);
         document.add(field2);
         document.add(field3);
        
         return document;
  }
  
   public static Article document2Article(Document document){
        Article article = new Article();
        
        article.setId(NumericUtils. prefixCodedToLong(document.get("id")));
        article.setTitle(document.get( "title"));
        article.setContent(document.get( "content"));
        
         return article;
  }

}

cn.itheima.lucene.test.QueryTest.java
public class QueryTest {

   /**
   * 查询方式
   *     关键字查询
   *  查询所有的文档
   *  范围查询
   *  通配符查询 重点
   *  短语查询
   *  boolean查询 重点
   */
   @Test
   public void testTermQuery() throws Exception {
         //关键字查询就是把一个关键词封装在了一个对象中，根据该关键词进行查询
         //下面的参数大小写不能写错，因为这里没有分词器，不会转换为小写
        Term term = new Term("title" ,"lucene" );
        Query query = new TermQuery(term);
        testSearch(query);
  }
  
   @Test
   public void testQueryAllDocs() throws Exception {
         //查询所有的文档
        Query query = new MatchAllDocsQuery();
         this.testSearch(query);
  }
  
   @Test
   public void testQueryRange() throws Exception{
         //范围查询
        Query query = NumericRangeQuery. newLongRange("id", 5L, 15L, true, true );
         this.testSearch(query);
  }
  
   @Test
   public void testQueryWildCard() throws Exception{
         //通配符查询 *代表任意多个任意字符 ?代表任意一个字符
        Term term = new Term("title" ,"l*?" );
        Query query = new WildcardQuery(term);
         this.testSearch(query);
  }
  
   @Test
   public void testQueryPharse() throws Exception {
         //短语查询
        Term term = new Term("title" ,"lucene" );
        Term term2 = new Term("title" ,"搜索" );
         //因为不是同一个字段的，所以会报错
         //Term term2 = new Term("content","baidu");
        PhraseQuery query = new PhraseQuery();
         //两个以上的短语查询，如果不设置位置参数，那么什么也查不出来
        query.add(term,0);
        query.add(term2,7);
         //query.add(term2);
         this.testSearch(query);
        
  }
  
   /**
   * boolean查询
   * Occur.MUST 必须满足该条件 and
   * Occur.MUST_NOT 必须不能满足该条件
   * Occur.SHOULD 可以有可以没有 or
   */
   @Test
   public void testBooleanQuery() throws Exception{
        Term term = new Term("title" ,"l*" );
        Query query = new WildcardQuery(term);
        
        Term term2 = new Term("content" ,"baidu" );
        Query query2 = new WildcardQuery(term2);
        
        BooleanQuery booleanQuery = new BooleanQuery();
         //Occur.MUST 必须满足该条件
         //Occur.MUST_NOT 必须不能满足该条件
        booleanQuery.add(query, Occur. MUST);
         //执行下面的语句，不会执行出来任何结果
         //booleanQuery.add(query, Occur.MUST_NOT);
        booleanQuery.add(query, Occur. MUST);
         this.testSearch(booleanQuery);
  }
  
   private void testSearch(Query query) throws Exception {
        
        IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.directory );
                    
        TopDocs topDocs = indexSearcher.search(query,25);
        
        ScoreDoc[] scoreDocs = topDocs. scoreDocs;
        
        List<Article> articles = new ArrayList<Article>();
        
         for(ScoreDoc scoreDoc : scoreDocs){
               int index = scoreDoc.doc ;
              
              Document doc = indexSearcher.doc(index);
              
              Article article = DocumentUtils. document2Article(doc);
              
              articles.add(article);
        }
        
         for(Article article : articles){
              System. out.println(article.getId() + "\n" + article.getTitle() + "\n" + article.getContent());
        }
        
  }

}

18、score
cn.itheima.lucene.test.LuceneTest.java
public class LuceneTest {

   @Test
   public void testCreateIndex() throws Exception {
        Article article = new Article();
        article.setId(26L);
        article.setTitle( "lucene可以用来做搜索引擎" );
        article.setContent( "baidu、google搜索引擎公司" );
        
        IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory ,LuceneUtils.analyzer,MaxFieldLength. LIMITED);
        
        Document document = DocumentUtils. article2Document(article);
         //通过下面这种方式可以提高score值，应用场景为竞价排名
        document.setBoost(100);
        
        indexWriter.addDocument(document);
        
        indexWriter.close();
  }

}

cn.itheima.lucene.test.ScoreTest.java
/**

1、相同的关键词，相同的结构
```
      得分一样
```
2、相同的结构，不同的关键词

      得分不一样(lucene和搜索的得分是不一样的，一般情况下，中文比英文的得分高)

3、不同的结构，相同的关键词

      关键词出现的次数越多，得分越高

4、竞价
*/
public class ScoreTest {

@Test
public void testSearch() throws Exception {
     
     IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.directory );
     
     QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30 ,new String[]{"title" ,"content" },LuceneUtils.analyzer);
     Query query = queryParser.parse( "lucene");
     
     TopDocs topDocs = indexSearcher.search(query,25);
     
     ScoreDoc[] scoreDocs = topDocs. scoreDocs;
     
     List< Article> articles = new ArrayList< Article>();
     
      for(ScoreDoc scoreDoc : scoreDocs){
           System. out.println(scoreDoc.score );
           
            int index = scoreDoc.doc ;
           
           Document doc = indexSearcher.doc(index);

            Article article = DocumentUtils.document2Article(doc);
           
           articles.add(article);
     }
     
      for(Article article : articles){
           System. out.println(article.getId() + "\n" + article.getTitle() + "\n" + article.getContent());
     }

}
}
在这里插入图片描述