lucene初学,
创建索引,查询等
需要lucene-4.6.0的jar包,,我直接导入的全部的!
Util 自己写的
package com.test.lucene;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class LuceneUtils {
/**
* 获取IndexWriter来生成索引
*
* @param indexpath 索引文件存放的目录文件夹路劲
* @param version Version.XXX版本
* @param openMode OpenMode.XXX 索引创建方式,新建,追加等等
* @return IndexWriter
*/
public static IndexWriter createIndexWriter(String indexpath,
Version version, OpenMode openMode) {
IndexWriter writer = null;
try {
// 索引文件存放的目录文件夹
File indexfile = new File(indexpath);
// lucene是将一句句话,一段话Field,分成一个个词Term进行索引搜索的。
Analyzer analyzer = new StandardAnalyzer(version);
// 向E:\\aa保存建立的索引Index内容
Directory dir = FSDirectory.open(indexfile);
IndexWriterConfig iwc = new IndexWriterConfig(version, analyzer);
// 即创建新索引文件,OpenMode.CREATE_OR_APPEND 表示创建或追加到已有索引文件
iwc.setOpenMode(openMode);
// 用到IndexWriter类,这里需要传入的参数为:(索引目录文件夹,配置)
writer = new IndexWriter(dir, iwc);
} catch (IOException e) {
e.printStackTrace();
}
return writer;
}
/**
* 创建索引
*
* @param dirpath
* 被索引的目录文件夹
* @param indexpath
* 索引文件存放的目录文件夹
* @param openMode
* 索引创建方式OpenMode.CREATE_OR_APPEND 表示创建或追加到已有索引文件
* @return boolean 创建成功或失败
*/
public static boolean createDocumentIndex(String dirpath, String indexpath,
OpenMode openMode) {
boolean bool = false;
// 被索引的目录文件夹
File dirfile = new File(dirpath);
try {
IndexWriter writer = createIndexWriter(indexpath,
Version.LUCENE_46, openMode);
File[] txtfiles = dirfile.listFiles();
long startTime = new Date().getTime();
for (int i = 0; i < txtfiles.length; i++) {
if (txtfiles[i].isFile()
&& txtfiles[i].getName().endsWith(".txt")) {
System.out.println("文件" + txtfiles[i].getCanonicalPath()
+ "正在索引中。。。");
// Read将txt内容存进内存
Reader read = new FileReader(txtfiles[i]);
// 创建Document的实例
Document doc = new Document();
// 将field存进索引的Document
// Document添加读取的文章内容(缓存在内存中的文章内容read)
doc.add(new TextField("content", read));
// Document添加文章对应路径信息等
doc.add(new TextField("path",
txtfiles[i].getAbsolutePath(), Store.YES));
// index加Document,索引创建成功
writer.addDocument(doc);
}
}
// 查看IndexWriter里面有多少个索引
System.out.println("numDocs" + writer.numDocs());
// 索引优化optimize(),合并磁盘上的索引文件,以便减少文件的数量,从而也减少搜索索引的时间
// 3.5以后已过时 writer.optimize(); 以下替代,使用代价较高
writer.forceMerge(1);
// 注意关闭IndexWriter,立即将索引文件写入到目录磁盘中,生成索引文件
writer.close();
long endTime = new Date().getTime();
System.out.println("共花了" + (endTime - startTime) + "毫秒将文档增加到索引中"
+ indexpath);
bool = true;
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return bool;
}
/**
* 查询
*
* @param queryStr
* 要查询的文字
* @param searcher
* 查询器
* @return 查询结果
*/
public static TopDocs queryString(String queryStr, IndexSearcher searcher) {
TopDocs tDocs = null;
try {
if (searcher == null) {
System.out.println("索引目录不存在");
return tDocs;
}
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
QueryParser qp = new QueryParser(Version.LUCENE_46, "content",
analyzer);
// 查询命令
Query query = qp.parse(queryStr);
// 结果保存在Hits中
tDocs = searcher.search(query, null, 10000);
} catch (ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return tDocs;
}
/**
* 创建查询器
*
* @param indexpath
* 索引所在路劲
* @return
* @throws IOException
*/
public static IndexSearcher getSearcher(String indexpath)
throws IOException {
// 索引目录
File indexDir = new File(indexpath);
if (!indexDir.exists()) {
System.out.println("索引目录不存在");
return null;
}
// 创建directory,Index的映射地址,相当于电话本
FSDirectory directory = FSDirectory.open(indexDir);
IndexReader irIndexReader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(irIndexReader);
return searcher;
}
/**
* 高亮设置
*
* @param query
* @param doc
* @param field
* @return
*/
public static String toHighlighter(Query query, Document doc, String field) {
try {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter(
"<font color=\"red\">", "</font>");
Highlighter highlighter = new Highlighter(simpleHtmlFormatter,
new QueryScorer(query));
TokenStream tokenStream1 = analyzer.tokenStream("text",
new StringReader(doc.get(field)));
String highlighterStr = highlighter.getBestFragment(tokenStream1,
doc.get(field));
return highlighterStr == null ? doc.get(field) : highlighterStr;
} catch (IOException e) {
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
e.printStackTrace();
}
return null;
}
public void sqlTest() {
String indexpath = "";
String sql = "";
String url = "jdbc:mysql://localhost:3306/myuser";
try {
Class.forName("com.mysql.jdbc.Driver");
// 连接数据库
Connection conn = DriverManager.getConnection(url,"root" ,"root" );
PreparedStatement stmt = conn.prepareStatement(sql);
ResultSet rs = stmt.executeQuery();
IndexWriter indexWriter = createIndexWriter(indexpath, Version.LUCENE_46, OpenMode.CREATE);
// new IndexWriter(dir, new StandardAnalyzer(
// Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
while (rs.next()) {
System.out.println(rs.getString("name"));
Document doc = new Document();
doc.add(new TextField("name", (String)rs.getString("name"), Store.YES));
indexWriter.addDocument(doc);
}
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
测试类
package com.test.lucene;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopDocs;
public class LuceneTest {
public static void main(String[] args ) {
String dirpath = "E:\\aaDocumentTest";
String indexpath = "E:\\aa";
String queryStr = "怎样";
boolean result = true;
result = LuceneUtils.createDocumentIndex(dirpath, indexpath,OpenMode.CREATE);
IndexSearcher searcher;
try {
if(result){
searcher = LuceneUtils.getSearcher(indexpath);
TopDocs tDocs = LuceneUtils.queryString(queryStr, searcher);
System.out.println("一共索引出:" + tDocs.totalHits + "个文件!");
List<IndexableField> fList = null;
for (int j = 0; j < tDocs.totalHits; j++) {
Document doc = searcher.doc(tDocs.scoreDocs[j].doc);
System.out.println("File:" + doc.toString());
System.out.println("File:" + doc.getField("path"));
fList = doc.getFields();
}
for (IndexableField indexableField : fList) {
System.out.println(indexableField.name());
System.out.println(indexableField.stringValue());
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}