lucene是一个全文检索引擎工具包,下面来简单的介绍下Lucene常用的API介绍
1:对一个文件下面的所有文件进行索引创建:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
* 对一个文件里面的内容建立索引,包括文件名和文件的内容
* 1:创建分词器。标准分词器
* 2:创建IndexWriter IndexWriter是索引过程中的核心组件
* 3:创建document 创建Field
* 4:写入
* 5:关闭资源
* @author 韩利鹏
*/
public class CreateLocal {
private static String file_dir = "D:/decstop/luceneTxt";// 带索引的本地文件目录
// 索引存放目录
private static String index_dir = "D:/decstop/index";
public static void index(String filedir) throws Exception {
//1:创建中文分词器
//Analyzer analyzer = new StandardAnalyzer(); //lucene自带的标准分词器,对中文的分词支持不好,做中文的分词不建议使用
Analyzer analyzer = new IKAnalyzer(); //IKAnalyzer是一个中文分词器,有中文的时候建议使用这个
File indexfile = new File(index_dir);
Directory directorty = FSDirectory.open(indexfile);
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
//2:创建indexwriter
IndexWriter writer = new IndexWriter(directorty, cfg);
// 遍历目录 循环读取文件 逐个建立文件索引
File file = new File(filedir);
File[] files = file.listFiles();
for (File f : files) {
if (f.isDirectory()) {
index(f.getPath());
} else {
Document doc = new Document();
// 文件名字
// Store:如果是yes 则说明存储到文档中
//Field name = new StringField("fileName", f.getName(), Store.YES);
Field name = new TextField("fileName", f.getName(), Store.YES);
// 文件内容
BufferedReader reader = new BufferedReader(new FileReader(f));
String temp = null;
StringBuffer sb = new StringBuffer();
while ((temp = reader.readLine()) != null) {
sb.append(temp);
}
reader.close();
String context = sb.toString();
// 把文件内容读到索引中去 这样在搜索索引时就能看到
Field body = new TextField("context", context, Store.YES);
doc.add(name);
doc.add(body);
writer.addDocument(doc);
}
}
writer.close();
}
public static void main(String[] args) throws Exception {
index(file_dir);
}
}
2:搜索
package lucene;
import java.io.File;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/**
* 搜索
* 1:创建indexSercher 指定索引库的地址
* 2:创建Query (查询条件)
* 3:执行查询
* 4:遍历得出结果
* 5:关闭资源
* @author 韩利鹏
*/
public class SearchIndexOnLocal {
public static void doSearch() throws Exception{
//创建indexSearch 指定索引库的地址
File indexFile =new File("D:/decstop/index");
Directory directory =FSDirectory.open(indexFile);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher search = new IndexSearcher(reader);
//创建query 参数一:搜索域 field name 第二个参数: 分词器 与添加的分词器一致
QueryParser parser = new QueryParser("fileName", new StandardAnalyzer());
//通过QueryParser对象创建query 参数为lucene的查询语句
Query query = parser.parse("text1 OR text2");
//通过indexSearcher来搜索索引 int 条数
TopDocs topDocs = search.search(query, 10);
//根据查询条件匹配出的记录总数
int count= topDocs.totalHits;
System.out.println("查询出来的记录:"+count);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for(ScoreDoc scoreDoc:scoreDocs){
//获取当前scoreDoc的id
int docId = scoreDoc.doc;
//通过document的id来获取每个Field域的值
Document doc = search.doc(docId);
System.out.println("文件名字:"+doc.get("fileName"));
System.out.println("文件的内容:"+doc.get("context"));
}
reader.close();
}
public static void main(String[] args) throws Exception {
doSearch();
}
}
3删除和修改索引
package lucene;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
* 删除和修改
*/
public class UpdateIndexOnLocal {
/**
* 根据条件删除
*/
public static void deleteIndex() throws Exception{
//创建IndexWriter
Analyzer analyzer = new StandardAnalyzer();
File indexfile = new File("D:/decstop/index");
Directory directorty = FSDirectory.open(indexfile);// cun chu
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
IndexWriter writer = new IndexWriter(directorty, cfg);
//writer.deleteAll(); //工作中不要用,且用且珍惜
//参数,域名 对应删除的值
//删除时,最好根据document 中的唯一值
writer.deleteDocuments(new Term("fileName", "text1"));
writer.close();
}
/**
* 修改索引
*/
public static void updateIndex() throws Exception{
//创建IndexWriter
Analyzer analyzer = new StandardAnalyzer();
File indexfile = new File("D:/decstop/index");
Directory directorty = FSDirectory.open(indexfile);// cun chu
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
IndexWriter writer = new IndexWriter(directorty, cfg);
//创建一个文档域,再创建一个File域进行更新
Document doc = new Document();
doc.add(new TextField("content", "abcdefghigk",Store.YES));
//进行更新
writer.updateDocument(new Term("fileName","text1"), doc);
writer.close();
}
public static void main(String[] args) throws Exception {
//deleteIndex();
updateIndex();
}
}
Lucene的搜索语法: