浅层学习Lucene
package com.itheima.lucene;
import java.awt.TextField;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class Lucene {
// 创建目录对象,指定索引库的存放位置;FSDirectory文件系统;RAMDirectory内存
private static final String INDEX_PATH = "D://index";
@Test
public void lucene() throws IOException {
// 准备数据 数据封装在Document对象中
Document doc = new Document();
// 数据有不同字段,字段类型也不同
// LongField IntField DoubleField....
// StringField(索引但是默认不分词,数据被当成一个词条), Term(词条)
// TextField(索引并且根据索引写入器中定义的分词器来进行分词,数据被当做多个词条)
LongField id = new LongField("id", 1L, Store.YES);
doc.add(id);
StringField title = new StringField("title", "Goolge", Store.YES);
doc.add(title);
doc.add(new org.apache.lucene.document.TextField("content", "副总裁离开谷歌加盟Facebook", Store.YES));
// 创建分词器对象
// StandardAnalyzer analyzer = new StandardAnalyzer();
IKAnalyzer analyzer = new IKAnalyzer();
// 创建索引写入器配置对象,第一个参数版本VerSion.LATEST,第一个参数分词器
IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, analyzer);
// 创建目录对象,指定索引库的存放位置;FSDirectory文件系统;RAMDirectory内存
Directory directory = FSDirectory.open(new File(INDEX_PATH));
// 创建索引写入器
IndexWriter indexWriter = new IndexWriter(directory, conf);
// 向索引库写入文档对象
indexWriter.addDocument(doc);
// 提交
indexWriter.commit();
// 关闭
indexWriter.close();
}
@Test
public void search() throws Exception {
//创建查询条件对象
QueryParser queryParser = new QueryParser("content", new IKAnalyzer());
Query query = queryParser.parse("副总裁");
//设置查询的索引文件路径
DirectoryReader reader = DirectoryReader.open(FSDirectory.open(new File(INDEX_PATH)));
//创建索引对象,将查询语句放入索引对象的search方法中
IndexSearcher indexsearch = new IndexSearcher(reader);
//获取结果集
TopDocs topDocs = indexsearch.search(query, Integer.MAX_VALUE);
ScoreDoc[] scoredoc = topDocs.scoreDocs;
//遍历结果集
for (ScoreDoc score : scoredoc) {
Integer scoreId = score.doc;
Document doucument = indexsearch.doc(scoreId);
System.out.println("文档得分=" + score.score);
System.out.println("搜索结果集=" + doucument.get("id"));
System.out.println("搜索到的结果集title = "+doucument.get("title"));
System.out.println("搜索到的结果集content = " + doucument.get("content"));
}
}
//添加索引方法
@Test
public void add() throws Exception {
for (long i = 0; i < 30; i++) {
Document doc = new Document();
LongField id = new LongField("id", i, Store.YES);
doc.add(id);
StringField title = new StringField("title", "Goolge", Store.YES);
doc.add(title);
doc.add(new org.apache.lucene.document.TextField("content", "副总裁离开谷歌加盟Facebook", Store.YES));
// StandardAnalyzer analyzer = new StandardAnalyzer();
IKAnalyzer analyzer = new IKAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, analyzer);
Directory directory = FSDirectory.open(new File("D://index"));
IndexWriter indexWriter = new IndexWriter(directory, conf);
indexWriter.addDocument(doc);
indexWriter.commit();
indexWriter.close();
}
}
/**
* 抽取出公用的查询方法
* @param query
* @throws IOException
* @throws ParseException
*/
public void baseSearch(Query query) throws IOException {
DirectoryReader reader = DirectoryReader.open(FSDirectory.open(new File(INDEX_PATH)));
//创建索引查询对象
IndexSearcher indexSearcher = new IndexSearcher(reader);
//topDocs:排名前 n 的结果集
TopDocs topDocs = indexSearcher.search(query, Integer.MAX_VALUE);
//得分文档集合
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc sd : scoreDocs) {
Integer docID = sd.doc;
Document document = indexSearcher.doc(docID);
System.out.println("文档的得分:"+sd.score);
System.out.println("搜索到的结果集id = " + document.get("id"));
System.out.println("搜索到的结果集title = " + document.get("title"));
System.out.println("搜索到的结果集content = " + document.get("content"));
}
indexSearcher.getIndexReader().close();
}
/**
* TermQuery : 词条搜索 单个词条的搜索,输入的内容会被当做一个完整的词条,不会再对搜索参数进行分词
* @throws IOException
*/
@Test
public void termQuery() {
Term term = new Term("content","传");
Query query = new TermQuery(term);
try {
baseSearch(query);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//删除操作
@Test
public void del() throws IOException {
//StandardAnalyzer analyzer = new StandardAnalyzer();
IKAnalyzer analyzer = new IKAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, analyzer);
IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(INDEX_PATH)), conf);
//条件删除
//Term term = new Term("content", "副总裁");
//indexWriter.deleteDocuments(term);
//全部删除
indexWriter.deleteAll();
indexWriter.commit();
indexWriter.close();
}
//更新替换删除查到的所有对象
@Test
public void updataIndex() throws IOException{
IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, new IKAnalyzer());
IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(INDEX_PATH)), conf);
Term term = new Term("content","副总裁");
Document doc = new Document();
LongField id = new LongField("id", 1L, Store.YES);
doc.add(id);
StringField title = new StringField("title", "Goolge", Store.YES);
doc.add(title);
org.apache.lucene.document.TextField content = new org.apache.lucene.document.TextField("content", "播客,程序员!", Store.YES);
doc.add(content);
indexWriter.updateDocument(term, doc);
indexWriter.commit();
indexWriter.close();
}
/**
* FuzzyQuery : 模糊查询
*
* 自动补齐或切换位置,至多两次机会
*
* 若大于2?
*/
@Test
public void fuzzyQueryTest(){
Term term = new Term("content", "马程序");
Query query = new FuzzyQuery(term);
try {
baseSearch(query);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* WildcardQuery : 通配符搜索
* ?号代表单个字符,*号代表N个字符
*
*/
@Test
public void wildcardQueryTest(){
Term term = new Term("content", "?智播*");
Query query = new WildcardQuery(term);
try {
baseSearch(query);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* NumericRangeQuery:数值范围查询
*/
@Test
public void NumericRangeQueryTest(){
// NumericRangeQuery.newLongRange("搜索的目标字段", 起始范围, 结束范围, 是否包含最小, 是否包含最大);
Query query = NumericRangeQuery.newLongRange("id", 1L, 3L, true, true);
try {
baseSearch(query);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* BooleanQuery : 组合搜索
*/
@Test
public void booleanQueryTest(){
BooleanQuery query = new BooleanQuery();
NumericRangeQuery<Long> rangeQuery = NumericRangeQuery.newLongRange("id", 1L, 2L, true, false);
Term term = new Term("content", "啊方法老大和");
FuzzyQuery fuzzyQuery = new FuzzyQuery(term);
// 交集: A结果集must(必须) + B结果集must(必须) = A和B之间共同的部分
// 并集: should + should = A和B的结果集合并
//数值范围查询 返回5——29
query.add(rangeQuery, Occur.SHOULD);
//模糊查询 返回一条
query.add(fuzzyQuery,Occur.SHOULD);
try {
baseSearch(query);
} catch (Exception e) {
e.printStackTrace();
}
}
}