一、搭建基本环境
在eclipse中创建普通的maven项目,并在pom.xml中引入相应的依赖。
这里只给出pom.xml中的依赖
<dependencies>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
</dependencies>
二、文档操作
1、添加文档
2、删除文档
3、修改文档
package com.feiyang.lucene;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Before;
import org.junit.Test;
public class IndexingTest {
//模拟三个数据信息
private String ids[]={"1","2","3"};
private String citys[]={"qingdao","nanjing","shanghai"};
private String descs[]={
"Qingdao is a beautiful city.",
"Nanjing is a city of culture.",
"Shanghai is a bustling city."
};
//创建存储索引的目录
private Directory dir;
/**
* 将索引信息设置进文档中,然后写人对应的目录下
* @throws Exception
*/
@Before
public void setUp() throws Exception {
dir = FSDirectory.open(Paths.get("D:\\lucene2"));
IndexWriter writer = getWriter();
for(int i=0;i<ids.length;i++){
Document doc = new Document();
doc.add(new StringField("id",ids[i],Field.Store.YES));
doc.add(new StringField("city",citys[i],Field.Store.YES));
doc.add(new TextField("desc",descs[i],Field.Store.NO));
writer.addDocument(doc);
}
writer.close();
}
/**
* 测试写了几个文档
* @throws Exception
*/
@Test
public void testIndexWriter() throws Exception {
IndexWriter writer = getWriter();
System.out.println("写了"+writer.numDocs()+"个文档");
writer.close();
}
/**
* 测试读取文档
* @throws Exception
*/
@Test
public void testIndexReader() throws Exception{
IndexReader reader = DirectoryReader.open(dir);
System.out.println("最大文档数:"+reader.maxDoc());
System.out.println("实际文档数:"+reader.numDocs());
reader.close();
}
/**
* 测试删除,在合并前
* @throws Exception
*/
@Test
public void testDeleteBeforeMerge()throws Exception{
IndexWriter writer = getWriter();
System.out.println("删除前:"+writer.numDocs());
writer.deleteDocuments(new Term("id","1"));
writer.commit();
System.out.println("最大文档数:"+writer.maxDoc());
System.out.println("实际文档数:"+writer.numDocs());
writer.close();
}
/**
* 测试删除,在合并后
* @throws Exception
*/
@Test
public void testDeleteAfterMerge()throws Exception{
IndexWriter writer = getWriter();
System.out.println("删除前:"+writer.numDocs());
writer.deleteDocuments(new Term("id","1"));
writer.forceMergeDeletes();//强制删除
writer.commit();
System.out.println("最大文档数:"+writer.maxDoc());
System.out.println("实际文档数:"+writer.numDocs());
writer.close();
}
/**
* 测试更新
*/
@Test
public void testUpdate()throws Exception{
IndexWriter writer = getWriter();
Document doc = new Document();
doc.add(new StringField("id","1",Field.Store.YES));
doc.add(new StringField("city","beijing",Field.Store.YES));
doc.add(new TextField("desc","beijing is a city",Field.Store.NO));
writer.updateDocument(new Term("id","1"),doc);
writer.close();
}
/**
* 获取索引输出流
* @return
* @throws Exception
*/
private IndexWriter getWriter() throws Exception{
Analyzer analyzer = new StandardAnalyzer();//标准分词器
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(dir, conf);
return writer;
}
}
4、文本域加权:lucene对查询出来有默认的排序规则,比如按单词所占的比例,位置等因素进行排序,而我们可以通过对某一文本域进行加权来提升他的排名。
package com.feiyang.lucene;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
public class IndexingTest2 {
//准备数据源
private String ids[]={"1","2","3","4"};
private String authors[]={"Jack","Marry","John","Json"};
private String positions[]={"accounting","technician","salesperson","boss"};
private String titles[]={"Java is a good language.","Java is a cross platform language","Java powerful","You should learn java"};
private String contents[]={
"If possible, use the same JRE major version at both index and search time.",
"When upgrading to a different JRE major version, consider re-indexing. ",
"Different JRE major versions may implement different versions of Unicode,",
"For example: with Java 1.4, `LetterTokenizer` will split around the character U+02C6,"
};
//索引目录
private Directory dir;
/**
* 获取IndexWriter实例
* @return
* @throws Exception
*/
private IndexWriter getWriter()throws Exception{
Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
IndexWriter writer=new IndexWriter(dir, iwc);
return writer;
}
/**
* 生成索引
* @throws Exception
*/
@Test
public void index()throws Exception{
dir=FSDirectory.open(Paths.get("D:\\lucene3"));
IndexWriter writer=getWriter();
for(int i=0;i<ids.length;i++){
Document doc=new Document();
doc.add(new StringField("id", ids[i], Field.Store.YES));
doc.add(new StringField("author",authors[i],Field.Store.YES));
doc.add(new StringField("position",positions[i],Field.Store.YES));
// 加权操作:对职位是boss的进行加权操作,默认是1,这里我们变成1.5f
TextField field=new TextField("title", titles[i], Field.Store.YES);
if("boss".equals(positions[i])){
field.setBoost(1.5f);
}
doc.add(field);
doc.add(new TextField("content", contents[i], Field.Store.NO));
writer.addDocument(doc); // 添加文档
}
writer.close();
}
/**
* 查询
* @throws Exception
*/
@Test
public void search()throws Exception{
dir=FSDirectory.open(Paths.get("D:\\lucene3"));
IndexReader reader=DirectoryReader.open(dir);
IndexSearcher is=new IndexSearcher(reader);
String searchField="title";
String q="java";
Term t=new Term(searchField,q);
Query query=new TermQuery(t);
TopDocs hits=is.search(query, 10);
System.out.println("匹配 '"+q+"',总共查询到"+hits.totalHits+"个文档");
for(ScoreDoc scoreDoc:hits.scoreDocs){
Document doc=is.doc(scoreDoc.doc);
System.out.println(doc.get("author"));
}
reader.close();
}
}