Lucene 4.10 + Mysql 5.5 创建数据库表索引(Lucene 学习序列1)
Lucene是apache软件基金会4 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包,即它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。
Code:
package com.qiuzhping.lucene;
/*
* System Abbrev :
* system Name :
* Component No :
* Component Name:
* File name :QueryDataFromDb.java
* Author :Peter.Qiu
* Date :2015年7月28日
* Description : <description>
*/
/* Updation record 1:
* Updation date : 2015年7月28日
* Updator : Peter.Qiu
* Trace No: <Trace No>
* Updation No: <Updation No>
* Updation Content: <List all contents of updation and all methods updated.>
*/
import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Random;
import java.util.UUID;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
* <Description functions in a word>
* <Detail description>
*
* @author Peter.Qiu
* @version [Version NO, 2015年7月28日]
* @see [Related classes/methods]
* @since [product/module version]
*/
public class QueryDataFromDb {
private Directory directory = null;
private static boolean insertFlag = true;
public static Connection getConnection() throws SQLException,
java.lang.ClassNotFoundException {
String url = "jdbc:mysql:///hpsdb";
Class.forName("com.mysql.jdbc.Driver");
String userName = "root";
String password = "123456";
Connection con = DriverManager.getConnection(url, userName, password);
return con;
}
public static void insertData() throws ClassNotFoundException, SQLException{
Connection conn = getConnection();
Statement st = conn.createStatement();
Random random = new Random();
for(int j = 0 ; j < 10 && insertFlag; j ++){
StringBuffer sql = new StringBuffer("insert student (name,math) values");
for(int i = 0 ; i < 100000 ; i++){
String uuid = UUID.randomUUID().toString().replaceAll("-", "");
sql.append("('" + uuid + "'," + random.nextInt(100) + "),");
}
String insert = sql.toString().substring(0, sql.length() - 1);
st.execute(insert);
}
st.close();
conn.close();
}
public void index() throws SQLException, ClassNotFoundException, IOException {
IndexWriter writer = null;
try {
directory = FSDirectory.open(new File("C:/lucene/index02"));
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_4_10_4,
analyzer);
conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
conf.setMaxBufferedDocs(100);
writer = new IndexWriter(directory, conf);
insertFlag = false;
insertData();
Connection conn = getConnection();
Statement st = conn.createStatement();
long count = 1;
for(int i = 0 ; i < 10; i ++){
String query = "select * from student limit "+ i * 100000+","+ 100000;
ResultSet result = st.executeQuery(query);
while (result.next()) {
Document document = new Document();
document.add(new StringField("id", result.getString("id"),
Field.Store.YES));
document.add(new StringField("name", result
.getString("name"), Field.Store.YES));
document.add(new StringField("math", result
.getString("math"), Field.Store.YES));
writer.addDocument(document);
count ++;
}
}
System.out.println("Total record : "+count);
writer.close();
st.close();
conn.close();
} finally {
try {
if (writer != null) {
writer.close();
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public IndexSearcher getSearcher() throws IOException {
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
return searcher;
}
public void searchByTerm(String field, String name, int num) throws IOException {
IndexSearcher searcher = getSearcher();
// WildcardQuery 模糊查找
// TermQuery 精确查找
Query query = new TermQuery(new Term(field, name));
TopDocs tds = searcher.search(query, num);
System.out.println("count:" + tds.totalHits);
for (ScoreDoc sd : tds.scoreDocs) {
Document doc = searcher.doc(sd.doc);
System.out.println("id:" + doc.get("id"));
System.out.println("name:" + doc.get("name"));
System.out.println("math:" + doc.get("math"));
}
}
public static void main(String[] args) throws ClassNotFoundException,
SQLException, IOException {
QueryDataFromDb indexUtil = new QueryDataFromDb();
indexUtil.index();
int i = 0 ;
long start = System.currentTimeMillis();
//查找前90分前2名的信息
System.out.println("查找前90分前2名的信息");
indexUtil.searchByTerm("math", "90", 2);
System.out.println(i+" Spend time:"+(System.currentTimeMillis() - start) + " ms");
}
}
测试的结果是:
Total record : 1000001
查找前90分前2名的信息
count:36212
id:298904
name:636ab6012e4b429ea54d176f28f5fa1c
math:90
id:299156
name:085af1feb39b42f0be1c6e9f3f814526
math:90
0 Spend time:805 ms
涉及到Lucene 核心包链接:http://mirrors.hust.edu.cn/apache/lucene/java/4.10.4/
代码片段涉及到:
lucene-analyzers-common-4.10.4.jar
lucene-core-4.10.4.jar
lucene-queryparser-4.10.4.jar
mysql-connector-java-5.1.35.jar