lucene检索引擎 IK分词增删改查全套代码

mjf853912229

于 2019-09-29 16:43:33 发布

阅读量151

点赞数

本文链接：https://blog.csdn.net/mjf853912229/article/details/101699248

版权

package com.utils;

import java.io.File;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Locale;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.zhsh.manageplatform.beans.ReptlieContent;

/**
*
* Description: {实现索引的创建、更新、删除、查询}
* @throws :Exception
* @see :com.utils
* @author:米
* @date :2019-9-18
* Note: Nothing much.
*/
public class IndexesAndQuery {
   /**
   * @param PATH:索引文件存放地址
   *
   * */
   private static String PATH = "E://www/Indexs/";


   /**
   *
   * Description: {索引的创建}
   * @param ：list,存放索引实体;PATH:索引存放地址
   * @return ：
   * @throws :Exception
   * @see :com.utils
   * @author:米
   * @date :2019-9-18
   * Note: Nothing much.
   */
   @SuppressWarnings("deprecation")
   public static void indexCreat(List<ReptlieContent> list){
       IndexWriter writer = null;
       try {
           //封装庖丁解牛中文分词器
           Analyzer analyzer = new IKAnalyzer();
           //判断目录是否为空，空则创建索引
           //建立信息索引，document类似数据库的行，field类似数据库的列
           File file = new File(PATH);
/**
* 先删除该目录下的所有文件，然后在更新的
* */
           if (file.isDirectory()) {
               File[] files = file.listFiles();
               for (File f : files) {
                      f.delete();
                  }
               }
/**如果目录不存在，则会自动创建
* FSDirectory：表示文件系统目录，即会存储在计算机本地磁盘，继承于
* org.apache.lucene.store.BaseDirectory
* 同理还有：org.apache.lucene.store.RAMDirectory：存储在内存中
*open 方法传入的 Path 对象

*/
Directory directory = FSDirectory.open(file);

/** 创建索引写配置对象，传入分词器
* Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
* */
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
/**创建索引写对象，用于正式写入索引和文档数据*/
writer=new IndexWriter(directory,config);
for (ReptlieContent re:list){
           String id = re.getContentuuid();
           String title =re.getContenttitle()==null?"":re.getContenttitle();
           String author =re.getContentauthor()==null?"":re.getContentauthor();
           String source=re.getContentsource()==null?"":re.getContentsource();
           String fluuid=re.getFluuid()==null?"":re.getFluuid();
           String pushtime =re.getReleasetime()==null?"":re.getReleasetime();
           System.err.println(id+">>>"+title+">>>"+author+">>>"+source+">>>"+pushtime+">>>"+fluuid);
           Document doc=new Document();
           doc.add(new Field("uuid", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
           doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));
           doc.add(new Field("author", author, Field.Store.YES, Field.Index.NOT_ANALYZED));
           doc.add(new Field("source", source, Field.Store.YES, Field.Index.NOT_ANALYZED));
           doc.add(new Field("pushtime", pushtime, Field.Store.YES, Field.Index.NOT_ANALYZED));
           doc.add(new Field("fluuid", fluuid, Field.Store.YES, Field.Index.NOT_ANALYZED));
           writer.addDocument(doc);
}
writer.commit();
           writer.close();
System.out.println("添加索引成功。。。。");
       } catch (CorruptIndexException e) {
           e.printStackTrace();
       } catch (LockObtainFailedException e) {
           e.printStackTrace();
       } catch (Exception e) {
           e.printStackTrace();
           System.out.println("添加索引失败。。。");
       }

   }



   /**
   *
   * Description: {索引查询}
   * @param ：queryWord ：检索的内容，从文章标题进行查询;PATH ：Lucene 索引文件所在目录
   * @return ： List<ReptlieContent>
   * @throws :Exception
   * @see :com.utils
   * @author:米
   * @date :2019-9-18
   * Note: Nothing much.
   */
public static List<ReptlieContent> indexSearch(String queryWord) throws Exception {
   List<ReptlieContent> list=new ArrayList<ReptlieContent>();
   File file = new File(PATH);
/** 创建分词器
* 1）创建索引与查询索引所用的分词器必须一致
* 2)现在使用中文分词器 IKAnalyzer
*/
/*Analyzer analyzer = new StandardAnalyzer();*/
Analyzer analyzer = new IKAnalyzer();


/**创建查询对象(QueryParser)：QueryParser(String f, Analyzer a)
* 第一个参数：默认搜索域，与创建索引时的域名称必须相同
* 第二个参数：分词器
* 默认搜索域作用：
* 如果搜索语法parse(String query)中指定了域名，则从指定域中搜索
* 如果搜索语法parse(String query)中只指定了查询关键字，则从默认搜索域中进行搜索
*/
QueryParser queryParser = new QueryParser(Version.LUCENE_47, "title", analyzer);

/** parse 表示解析查询语法，查询语法为："域名:搜索的关键字"
* parse("fileName:web")：则从fileName域中进行检索 web 字符串
* 如果为 parse("web")：则从默认搜索域 fileContext 中进行检索
* 1)查询不区分大小写
* 2)因为使用的是 StandardAnalyzer(标准分词器)，所以对英文效果很好，如果此时检索中文，基本是行不通的
*/
Query query = queryParser.parse(queryWord);
System.out.println("--------------------------");
System.out.println(query.toString());

/** 与创建索引和 Lucene 文档时一样，指定索引和文档的目录
* 即指定查询的索引库
* Lucene 7.4.0 中 FSDirectory.open 方法参数为 Path
* Lucene 4.10。3 中 FSDirectory.open 方法参数为 File
*/
/*Path path = Paths.get(indexDir.toURI());*/
Directory dir = FSDirectory.open(file);

/*** 创建索引库读对象
* DirectoryReader 继承于org.apache.lucene.index.IndexReader
* */
DirectoryReader directoryReader = DirectoryReader.open(dir);

/** 根据索引对象创建索引搜索对象
**/
IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

/**search(Query query, int n) 搜索
* 第一个参数：查询语句对象
* 第二个参数：指定查询最多返回多少条数据，此处则表示返回个数最多100条
*/
TopDocs topdocs = indexSearcher.search(query, 100);

System.out.println("查询结果总数：" + topdocs.totalHits);

/**从搜索结果对象中获取结果集
* 如果没有查询到值，则 ScoreDoc[] 数组大小为 0
* */
ScoreDoc[] scoreDocs = topdocs.scoreDocs;

ScoreDoc loopScoreDoc = null;
for (int i = 0; i < scoreDocs.length; i++) {

loopScoreDoc = scoreDocs[i];

/**获取文档 id 值
* 这是 Lucene 存储时自动为每个文档分配的值，相当于 Mysql 的主键 id
* */
int docID = loopScoreDoc.doc;

/**通过文档ID从硬盘中读取出对应的文档*/
Document document = directoryReader.document(docID);

/**get方法获取对应域名的值 * 如域名 key 值不存在，返回 null*/
ReptlieContent re=new ReptlieContent();
re.setContentuuid(document.get("uuid"));
re.setContenttitle(document.get("title"));
re.setContentauthor(document.get("author"));
re.setContentsource(document.get("author"));
re.setReleasetime(document.get("pushtime"));
re.setFluuid(document.get("fluuid"));
list.add(re);
}
return list;
}

/**
*
* Description: {批量更新}
* @param ：list:存放实体
* @return ：
* @throws :Exception
* @see :com.utils
* @author:米
* @date :2019-9-18
* Note: Nothing much.
*/
@SuppressWarnings("deprecation")
   public static void indexUpds(List<ReptlieContent> list){
       //封装庖丁解牛中文分词器
       IndexWriter writer = null;
       try {
           //封装庖丁解牛中文分词器
           Analyzer analyzer = new IKAnalyzer();
           //判断目录是否为空，空则创建索引
           //建立信息索引，document类似数据库的行，field类似数据库的列
           File file = new File(PATH);


/**如果目录不存在，则会自动创建
* FSDirectory：表示文件系统目录，即会存储在计算机本地磁盘，继承于
* org.apache.lucene.store.BaseDirectory
* 同理还有：org.apache.lucene.store.RAMDirectory：存储在内存中
* Lucene 7.4.0 版本 open 方法传入的 Path 对象
* Lucene 4.10.3 版本 open 方法传入的是 File 对象
*/
Directory directory = FSDirectory.open(file);

/** 创建索引写配置对象，传入分词器
* Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
* */
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
/**创建索引写对象，用于正式写入索引和文档数据*/
writer=new IndexWriter(directory,config);
for (ReptlieContent re:list){
           String id = re.getContentuuid();
           String title =re.getContenttitle()==null?"":re.getContenttitle();
           String author =re.getContentauthor()==null?"":re.getContentauthor();
           String source=re.getContentsource()==null?"":re.getContentsource();
           String fluuid=re.getFluuid()==null?"":re.getFluuid();
           String pushtime =re.getReleasetime()==null?"":re.getReleasetime();
           System.out.println(id+">>>"+title+">>>"+author+">>>"+source+">>>"+pushtime);
           Document doc=new Document();
           doc.add(new Field("uuid", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
           doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));
           doc.add(new Field("author", author, Field.Store.YES, Field.Index.NOT_ANALYZED));
           doc.add(new Field("source", source, Field.Store.YES, Field.Index.NOT_ANALYZED));
           doc.add(new Field("pushtime", pushtime, Field.Store.YES, Field.Index.NOT_ANALYZED));
           doc.add(new Field("fluuid", fluuid, Field.Store.YES, Field.Index.NOT_ANALYZED));
           writer.updateDocument(new Term("uuid", id),doc);
}
writer.commit();
           writer.close();
System.out.println("修改索引成功。。。。");
       } catch (Exception e) {
           System.out.println("修改索引失败。");
           e.printStackTrace();
       }

   }
/**
*
* Description: {单条更新}
* @param ：ReptlieContent:要创建的实体
* @return ：
* @throws :
* @see :com.utils
* @author:米
* @date :2019-9-18
* Note: Nothing much.
*/
   @SuppressWarnings("deprecation")
   public static void indexUpd(ReptlieContent re){
       //封装庖丁解牛中文分词器
       IndexWriter writer = null;
       try {
           //封装庖丁解牛中文分词器
           Analyzer analyzer = new IKAnalyzer();
           //判断目录是否为空，空则创建索引
           //建立信息索引，document类似数据库的行，field类似数据库的列
           File file = new File(PATH);


/**如果目录不存在，则会自动创建
* FSDirectory：表示文件系统目录，即会存储在计算机本地磁盘，继承于
* org.apache.lucene.store.BaseDirectory
* 同理还有：org.apache.lucene.store.RAMDirectory：存储在内存中
* Lucene 7.4.0 版本 open 方法传入的 Path 对象
* Lucene 4.10.3 版本 open 方法传入的是 File 对象
*/
Directory directory = FSDirectory.open(file);

/** 创建索引写配置对象，传入分词器
* Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
* Lucene 4.10.3 版本 IndexWriterConfig 构造器需要指定 Version.LUCENE_4_10_3
* */
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
/**创建索引写对象，用于正式写入索引和文档数据*/
writer=new IndexWriter(directory,config);
       String id = re.getContentuuid();
       String title =re.getContenttitle()==null?"":re.getContenttitle();
       String author =re.getContentauthor()==null?"":re.getContentauthor();
       String source=re.getContentsource()==null?"":re.getContentsource();
       String fluuid=re.getFluuid()==null?"":re.getFluuid();
       String pushtime =re.getReleasetime()==null?"":re.getReleasetime();
       System.out.println(id+">>>"+title+">>>"+author+">>>"+source+">>>"+pushtime);
       Document doc=new Document();
       doc.add(new Field("uuid", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
       doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));
       doc.add(new Field("author", author, Field.Store.YES, Field.Index.NOT_ANALYZED));
       doc.add(new Field("source", source, Field.Store.YES, Field.Index.NOT_ANALYZED));
       doc.add(new Field("pushtime", pushtime, Field.Store.YES, Field.Index.NOT_ANALYZED));
       doc.add(new Field("fluuid", fluuid, Field.Store.YES, Field.Index.NOT_ANALYZED));
       writer.updateDocument(new Term("uuid", id),doc);
writer.commit();
           writer.close();
System.out.println("修改索引成功。。。。");
       } catch (Exception e) {
           System.out.println("修改索引失败。");
           e.printStackTrace();
       }

   }

   /**
   *
   * Description: {索引的删除}
   * @param ：id:要删除索引的唯一标识
   * @return ：
   * @throws :
   * @see :com.utils
   * @author:米
   * @date :2019-9-18
   * Note: Nothing much.
   */
   public static void indexDel(String id){
       //封装庖丁解牛中文分词器
       IndexWriter writer = null;
       try {
           Analyzer analyzer = new IKAnalyzer();
           File file = new File(PATH);
Directory directory = FSDirectory.open(file);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
writer=new IndexWriter(directory,config);
writer.deleteDocuments(new Term("uuid",id));
writer.close();
           System.out.println("删除索引成功");

       } catch (Exception e) {
           e.printStackTrace();
       }
   }

   /**
   *
   * Description: {格式化发布时间}
   * @param ：str:时间字符串，pattern：日期格式；locale：区域
   * @return ：
   * @throws :
   * @see :com.utils
   * @author:米
   * @date :2019-9-18
   * Note: Nothing much.
   */
   public static Date parse(String str, String pattern, Locale locale) {
       if (str == null || pattern == null) {
           return null;
       }
       try {
           return new SimpleDateFormat(pattern, locale).parse(str);
       } catch (ParseException e) {
           e.printStackTrace();
       }
       return null;
   }

}

pom.xml配置下载包

<dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-core</artifactId>
       <version>4.7.2</version>
       </dependency>

       <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-queryparser</artifactId>
       <version>4.7.2</version>
       </dependency>
       <dependency>
       <groupId>com.janeluo</groupId>
       <artifactId>ikanalyzer</artifactId>
       <version>2012_u6</version>
       </dependency>

       <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-analyzers-common</artifactId>
       <version>4.7.2</version>
       </dependency>

mjf853912229

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
lucene检索引擎 IK分词增删改查全套代码

package com.utils;import java.io.File;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.ArrayList;import java.util.Date;import java.util.List;import java.util....
复制链接

扫一扫