一、环境概况
1.1基础环境
名称 | 软件 | 版本 |
---|---|---|
开发工具 | IDEA | 2017 |
SDK | JDK | 1.8 |
索引 | lucene | 6.0.0 |
1.2创建4个文件,用于创建索引
二、创建工程
2.1 创建一个maven工程,pom文件如下:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>spring-clould</artifactId>
<groupId>com.cc.springcloud</groupId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>lucene6.0.0-manager</artifactId>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<java.version>1.8</java.version>
<lucene.version>6.0.0</lucene.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
</dependencies>
</project>
二、创建工程
2.1 创建索引工具类FileIndexUtils
package util;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.Date;
public class FileIndexUtils {
private static Directory directory = null;
private static final String INDEXDIR = "D:\\indexFile\\test-search-advance\\";
private static final String FILES = "D:\\indexFile\\test-search-advance-files";
static {
try {
directory = FSDirectory.open(Paths.get(INDEXDIR));
} catch (IOException e) {
e.printStackTrace();
}
}
public static Directory getDirectory() {
return directory;
}
public static void createIndex() {
//创建索引写入对象
IndexWriter writer = null;
//创建索引写入配置对象,StandardAnalyzer为分析器、分词器
IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
try {
writer = new IndexWriter(directory, iwc);
File file = new File(FILES);
Document doc;
for (File f :file.listFiles()) {
doc = new Document();
FileReader fileReader = new FileReader(f);
StringBuffer sb = new StringBuffer();
char[] buffer = new char[1];
while ((fileReader.read(buffer)) != -1) {
sb.append(buffer);
}
//TextField会对内容进行索引并分词,存储内容,但不建立正排索引
doc.add(new TextField("content", sb.toString(), Field.Store.YES));
doc.add(new TextField("filename", f.getName(), Field.Store.YES));
//对文本域建立正排索引,需要使用SortedDocValuesField
doc.add(new SortedDocValuesField("fname",new BytesRef(f.getName())));
//StringField不分词,建立索引,存储内容
doc.add(new StringField("path", f.getAbsolutePath(), Field.Store.YES));
//NumericDocValuesField为LongPoint类型建立正排索引用于排序 聚合,不存储内容
doc.add(new NumericDocValuesField("date", f.lastModified()));
//用于存储
doc.add(new StoredField("date_store", f.lastModified()));
doc.add(new NumericDocValuesField("size", f.length()));
doc.add(new StoredField("size_store", f.length()));
writer.addDocument(doc);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (writer != null) {
try {
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
2.2 创建查询类Search
package lucene;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import util.FileIndexUtils;
import java.io.IOException;
import java.text.SimpleDateFormat;
public class Search {
private static IndexReader reader = null;
static{
try {
reader = DirectoryReader.open(FileIndexUtils.getDirectory());
} catch (IOException e) {
e.printStackTrace();
}
}
public IndexSearcher getSearcher() {
if (reader == null) {
try {
reader = DirectoryReader.open(FileIndexUtils.getDirectory());
} catch (IOException e) {
e.printStackTrace();
}
} else {
try {
IndexReader tr = DirectoryReader.openIfChanged((DirectoryReader) reader);
if (tr != null) {
reader.close();
reader = tr;
}
} catch (IOException e) {
e.printStackTrace();
}
}
return new IndexSearcher(reader);
}
public void search(String queryStr, Sort sort) {
IndexSearcher searcher = getSearcher();
//Term term = new Term("content", queryStr);
QueryParser parser = new QueryParser("content", new StandardAnalyzer());
try {
//Query query = new TermQuery(term);
Query query = parser.parse(queryStr);
System.out.println("Query:"+query);
TopDocs tds;
if (sort != null) {
tds = searcher.search(query, 5, sort);
} else {
tds = searcher.search(query, 5);
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
for (ScoreDoc sd :
tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc+":(score:"+sd.score+")[filename:"+d.get("filename")+"][path:"+d.get("path")
+"][size:"+d.get("size_store")+"][date:"+sdf.format(Long.valueOf(d.get("date_store")))
+"]");
}
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
}
2.3 创建查询类Search
import lucene.Search;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.junit.Before;
import org.junit.Test;
import util.FileIndexUtils;
public class MainTest {
private Search st;
@Before
public void init() {
//先创建索引
FileIndexUtils.createIndex();
st = new Search();
}
@Test
public void test() {
//默认,通过评分来排序
// st.search("java",null);
//Sort.INDEXORDER 通过doc的id进行排序
// st.search("java", Sort.INDEXORDER);
//显式通过评分来排序
// st.search("java", Sort.RELEVANCE);
//根据文件大小进行排序
// st.search("java", new Sort(new SortField("size", SortField.Type.LONG,true)));
//根据日期进行排序 最新在最前面
// st.search("java", new Sort(new SortField("date", SortField.Type.LONG,true)));
//根据文件名排序
// st.search("java", new Sort(new SortField("fname", SortField.Type.STRING,true)));
//Sort的构造函数 支持对多个域进行排序
/* public Sort(SortField... fields) {
setSort(fields);
}*/
//先根据大小排 如果相同 再通过名字排
/*st.search("java",new Sort(new SortField("size", SortField.Type.LONG,true),new SortField("fname",
SortField.Type.STRING,true)));*/
//先根据大小排 如果相同 再通过评分排
st.search("java",new Sort(new SortField("size", SortField.Type.LONG,true),SortField.FIELD_DOC));
}
}
2.4 运行测试用例创建索引
2.5 到创建的索引目录下查看索引文件
2.6 用Luke工具查看索引文件
2.7结束