package com.utils;
import java.io.File;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.zhsh.manageplatform.beans.ReptlieContent;
/**
*
* Description: {实现索引的创建、更新、删除、查询}
* @throws :Exception
* @see :com.utils
* @author:米
* @date :2019-9-18
* Note: Nothing much.
*/
public class IndexesAndQuery {
/**
* @param PATH:索引文件存放地址
*
* */
private static String PATH = "E://www/Indexs/";
/**
*
* Description: {索引的创建}
* @param :list,存放索引实体;PATH:索引存放地址
* @return :
* @throws :Exception
* @see :com.utils
* @author:米
* @date :2019-9-18
* Note: Nothing much.
*/
@SuppressWarnings("deprecation")
public static void indexCreat(List<ReptlieContent> list){
IndexWriter writer = null;
try {
//封装庖丁解牛中文分词器
Analyzer analyzer = new IKAnalyzer();
//判断目录是否为空,空则创建索引
//建立信息索引,document类似数据库的行,field类似数据库的列
File file = new File(PATH);
/**
* 先删除该目录下的所有文件,然后在更新的
* */
if (file.isDirectory()) {
File[] files = file.listFiles();
for (File f : files) {
f.delete();
}
}
/**如果目录不存在,则会自动创建
* FSDirectory:表示文件系统目录,即会存储在计算机本地磁盘,继承于
* org.apache.lucene.store.BaseDirectory
* 同理还有:org.apache.lucene.store.RAMDirectory:存储在内存中
*open 方法传入的 Path 对象
*/
Directory directory = FSDirectory.open(file);
/** 创建 索引写配置对象,传入分词器
* Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
* */
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
/**创建 索引写对象,用于正式写入索引和文档数据*/
writer=new IndexWriter(directory,config);
for (ReptlieContent re:list){
String id = re.getContentuuid();
String title =re.getContenttitle()==null?"":re.getContenttitle();
String author =re.getContentauthor()==null?"":re.getContentauthor();
String source=re.getContentsource()==null?"":re.getContentsource();
String fluuid=re.getFluuid()==null?"":re.getFluuid();
String pushtime =re.getReleasetime()==null?"":re.getReleasetime();
System.err.println(id+">>>"+title+">>>"+author+">>>"+source+">>>"+pushtime+">>>"+fluuid);
Document doc=new Document();
doc.add(new Field("uuid", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("author", author, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("source", source, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("pushtime", pushtime, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("fluuid", fluuid, Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
}
writer.commit();
writer.close();
System.out.println("添加索引成功。。。。");
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
System.out.println("添加索引失败。。。");
}
}
/**
*
* Description: {索引查询}
* @param :queryWord :检索的内容,从文章标题进行查询;PATH :Lucene 索引文件所在目录
* @return : List<ReptlieContent>
* @throws :Exception
* @see :com.utils
* @author:米
* @date :2019-9-18
* Note: Nothing much.
*/
public static List<ReptlieContent> indexSearch(String queryWord) throws Exception {
List<ReptlieContent> list=new ArrayList<ReptlieContent>();
File file = new File(PATH);
/** 创建分词器
* 1)创建索引 与 查询索引 所用的分词器必须一致
* 2)现在使用 中文分词器 IKAnalyzer
*/
/*Analyzer analyzer = new StandardAnalyzer();*/
Analyzer analyzer = new IKAnalyzer();
/**创建查询对象(QueryParser):QueryParser(String f, Analyzer a)
* 第一个参数:默认搜索域,与创建索引时的域名称必须相同
* 第二个参数:分词器
* 默认搜索域作用:
* 如果搜索语法parse(String query)中指定了域名,则从指定域中搜索
* 如果搜索语法parse(String query)中只指定了查询关键字,则从默认搜索域中进行搜索
*/
QueryParser queryParser = new QueryParser(Version.LUCENE_47, "title", analyzer);
/** parse 表示解析查询语法,查询语法为:"域名:搜索的关键字"
* parse("fileName:web"):则从fileName域中进行检索 web 字符串
* 如果为 parse("web"):则从默认搜索域 fileContext 中进行检索
* 1)查询不区分大小写
* 2)因为使用的是 StandardAnalyzer(标准分词器),所以对英文效果很好,如果此时检索中文,基本是行不通的
*/
Query query = queryParser.parse(queryWord);
System.out.println("--------------------------");
System.out.println(query.toString());
/** 与创建 索引 和 Lucene 文档 时一样,指定 索引和文档 的目录
* 即指定查询的索引库
* Lucene 7.4.0 中 FSDirectory.open 方法参数为 Path
* Lucene 4.10。3 中 FSDirectory.open 方法参数为 File
*/
/*Path path = Paths.get(indexDir.toURI());*/
Directory dir = FSDirectory.open(file);
/*** 创建 索引库读 对象
* DirectoryReader 继承于org.apache.lucene.index.IndexReader
* */
DirectoryReader directoryReader = DirectoryReader.open(dir);
/** 根据 索引对象创建 索引搜索对象
**/
IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
/**search(Query query, int n) 搜索
* 第一个参数:查询语句对象
* 第二个参数:指定查询最多返回多少条数据,此处则表示返回个数最多100条
*/
TopDocs topdocs = indexSearcher.search(query, 100);
System.out.println("查询结果总数:" + topdocs.totalHits);
/**从搜索结果对象中获取结果集
* 如果没有查询到值,则 ScoreDoc[] 数组大小为 0
* */
ScoreDoc[] scoreDocs = topdocs.scoreDocs;
ScoreDoc loopScoreDoc = null;
for (int i = 0; i < scoreDocs.length; i++) {
loopScoreDoc = scoreDocs[i];
/**获取 文档 id 值
* 这是 Lucene 存储时自动为每个文档分配的值,相当于 Mysql 的主键 id
* */
int docID = loopScoreDoc.doc;
/**通过文档ID从硬盘中读取出对应的文档*/
Document document = directoryReader.document(docID);
/**get方法 获取对应域名的值 * 如域名 key 值不存在,返回 null*/
ReptlieContent re=new ReptlieContent();
re.setContentuuid(document.get("uuid"));
re.setContenttitle(document.get("title"));
re.setContentauthor(document.get("author"));
re.setContentsource(document.get("author"));
re.setReleasetime(document.get("pushtime"));
re.setFluuid(document.get("fluuid"));
list.add(re);
}
return list;
}
/**
*
* Description: {批量更新}
* @param :list:存放实体
* @return :
* @throws :Exception
* @see :com.utils
* @author:米
* @date :2019-9-18
* Note: Nothing much.
*/
@SuppressWarnings("deprecation")
public static void indexUpds(List<ReptlieContent> list){
//封装庖丁解牛中文分词器
IndexWriter writer = null;
try {
//封装庖丁解牛中文分词器
Analyzer analyzer = new IKAnalyzer();
//判断目录是否为空,空则创建索引
//建立信息索引,document类似数据库的行,field类似数据库的列
File file = new File(PATH);
/**如果目录不存在,则会自动创建
* FSDirectory:表示文件系统目录,即会存储在计算机本地磁盘,继承于
* org.apache.lucene.store.BaseDirectory
* 同理还有:org.apache.lucene.store.RAMDirectory:存储在内存中
* Lucene 7.4.0 版本 open 方法传入的 Path 对象
* Lucene 4.10.3 版本 open 方法传入的是 File 对象
*/
Directory directory = FSDirectory.open(file);
/** 创建 索引写配置对象,传入分词器
* Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
* */
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
/**创建 索引写对象,用于正式写入索引和文档数据*/
writer=new IndexWriter(directory,config);
for (ReptlieContent re:list){
String id = re.getContentuuid();
String title =re.getContenttitle()==null?"":re.getContenttitle();
String author =re.getContentauthor()==null?"":re.getContentauthor();
String source=re.getContentsource()==null?"":re.getContentsource();
String fluuid=re.getFluuid()==null?"":re.getFluuid();
String pushtime =re.getReleasetime()==null?"":re.getReleasetime();
System.out.println(id+">>>"+title+">>>"+author+">>>"+source+">>>"+pushtime);
Document doc=new Document();
doc.add(new Field("uuid", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("author", author, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("source", source, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("pushtime", pushtime, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("fluuid", fluuid, Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.updateDocument(new Term("uuid", id),doc);
}
writer.commit();
writer.close();
System.out.println("修改索引成功。。。。");
} catch (Exception e) {
System.out.println("修改索引失败。");
e.printStackTrace();
}
}
/**
*
* Description: {单条更新}
* @param :ReptlieContent:要创建的实体
* @return :
* @throws :
* @see :com.utils
* @author:米
* @date :2019-9-18
* Note: Nothing much.
*/
@SuppressWarnings("deprecation")
public static void indexUpd(ReptlieContent re){
//封装庖丁解牛中文分词器
IndexWriter writer = null;
try {
//封装庖丁解牛中文分词器
Analyzer analyzer = new IKAnalyzer();
//判断目录是否为空,空则创建索引
//建立信息索引,document类似数据库的行,field类似数据库的列
File file = new File(PATH);
/**如果目录不存在,则会自动创建
* FSDirectory:表示文件系统目录,即会存储在计算机本地磁盘,继承于
* org.apache.lucene.store.BaseDirectory
* 同理还有:org.apache.lucene.store.RAMDirectory:存储在内存中
* Lucene 7.4.0 版本 open 方法传入的 Path 对象
* Lucene 4.10.3 版本 open 方法传入的是 File 对象
*/
Directory directory = FSDirectory.open(file);
/** 创建 索引写配置对象,传入分词器
* Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
* Lucene 4.10.3 版本 IndexWriterConfig 构造器需要指定 Version.LUCENE_4_10_3
* */
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
/**创建 索引写对象,用于正式写入索引和文档数据*/
writer=new IndexWriter(directory,config);
String id = re.getContentuuid();
String title =re.getContenttitle()==null?"":re.getContenttitle();
String author =re.getContentauthor()==null?"":re.getContentauthor();
String source=re.getContentsource()==null?"":re.getContentsource();
String fluuid=re.getFluuid()==null?"":re.getFluuid();
String pushtime =re.getReleasetime()==null?"":re.getReleasetime();
System.out.println(id+">>>"+title+">>>"+author+">>>"+source+">>>"+pushtime);
Document doc=new Document();
doc.add(new Field("uuid", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("author", author, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("source", source, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("pushtime", pushtime, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("fluuid", fluuid, Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.updateDocument(new Term("uuid", id),doc);
writer.commit();
writer.close();
System.out.println("修改索引成功。。。。");
} catch (Exception e) {
System.out.println("修改索引失败。");
e.printStackTrace();
}
}
/**
*
* Description: {索引的删除}
* @param :id:要删除索引的唯一标识
* @return :
* @throws :
* @see :com.utils
* @author:米
* @date :2019-9-18
* Note: Nothing much.
*/
public static void indexDel(String id){
//封装庖丁解牛中文分词器
IndexWriter writer = null;
try {
Analyzer analyzer = new IKAnalyzer();
File file = new File(PATH);
Directory directory = FSDirectory.open(file);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
writer=new IndexWriter(directory,config);
writer.deleteDocuments(new Term("uuid",id));
writer.close();
System.out.println("删除索引成功");
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*
* Description: {格式化发布时间}
* @param :str:时间字符串,pattern:日期格式;locale:区域
* @return :
* @throws :
* @see :com.utils
* @author:米
* @date :2019-9-18
* Note: Nothing much.
*/
public static Date parse(String str, String pattern, Locale locale) {
if (str == null || pattern == null) {
return null;
}
try {
return new SimpleDateFormat(pattern, locale).parse(str);
} catch (ParseException e) {
e.printStackTrace();
}
return null;
}
}
pom.xml配置下载包
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.7.2</version>
</dependency>
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>4.7.2</version>
</dependency>