参考网页:http://footman265.iteye.com/blog/849744
搞了一天半,终于利用lucene工具Demo完了我想要的功能,这其中包括为数据库建立增量索引、从索引文件根据id删除索引、单字段查询功能、多字段查询功能、多条件查询功能以及查询结果关键字高亮显示的功能。今天晚些的时候把这些功能进行了整理。看样子一时半会还下不了班,就把Demo的结果 一 一 列举下来吧。。。
理论参考:http://lianj-lee.iteye.com/category/69005?show_full=true
Lucene3.0对数据库建立索引:http://269181927.iteye.com/blog/789779
1. 所需要的文件(见附件)
依赖包:
lucene-core-2.4.0.jar lucene工具包
lucene-highlighter-2.4.0.jar 高亮显示工具包
IKAnalyzer2.0.2OBF.jar 分词工具(支持字典分词)
mysql-connector-java-5.0.3-bin 链接mysql驱动
数据表:
pd_ugc.sql(所在数据库为lucenetest)
类文件:
在附件index.rar和test.rar,解压后放入java工程中的src下即可
2. 为数据库建立增量索引
参考网页:http://www.blogjava.net/laoding/articles/279230.html
- package index;
- //--------------------- Change Logs----------------------
- // <p>@author zhiqiang.zhang Initial Created at 2010-12-23<p>
- //-------------------------------------------------------
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileReader;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.io.PrintWriter;
- import java.sql.Connection;
- import java.sql.DriverManager;
- import java.sql.ResultSet;
- import java.sql.Statement;
- import java.util.Date;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- //增量索引
- /*
- * 实现思路:首次查询数据库表所有记录,对每条记录建立索引,并将最后一条记录的id存储到storeId.txt文件中
- * 当新插入一条记录时,再建立索引时不必再对所有数据重新建一遍索引,
- * 可根据存放在storeId.txt文件中的id查出新插入的数据,只对新增的数据新建索引,并把新增的索引追加到原来的索引文件中
- * */
- public class IncrementIndex {
- public static void main(String[] args) {
- try {
- IncrementIndex index = new IncrementIndex();
- String path = "E:\\workspace2\\Test\\lucene_test\\poiIdext";//索引文件的存放路径
- String storeIdPath = "E:\\workspace2\\Test\\lucene_test\\storeId.txt";//存储ID的路径
- String storeId = "";
- Date date1 = new Date();
- storeId = index.getStoreId(storeIdPath);
- ResultSet rs = index.getResult(storeId);
- System.out.println("开始建立索引。。。。");
- index.indexBuilding(path, storeIdPath, rs);
- Date date2 = new Date();
- System.out.println("耗时:"+(date2.getTime()-date1.getTime())+"ms");
- storeId = index.getStoreId(storeIdPath);
- System.out.println(storeId);//打印出这次存储起来的ID
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- public static void buildIndex(String indexFile, String storeIdFile) {
- try {
- String path = indexFile;//索引文件的存放路径
- String storeIdPath = storeIdFile;//存储ID的路径
- String storeId = "";
- storeId = getStoreId(storeIdPath);
- ResultSet rs = getResult(storeId);
- indexBuilding(path, storeIdPath, rs);
- storeId = getStoreId(storeIdPath);
- System.out.println(storeId);//打印出这次存储起来的ID
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- public static ResultSet getResult(String storeId) throws Exception {
- Class.forName("com.mysql.jdbc.Driver").newInstance();
- String url = "jdbc:mysql://localhost:3306/lucenetest";
- String userName = "root";
- String password = "****";
- Connection conn = DriverManager.getConnection(url, userName, password);
- Statement stmt = conn.createStatement();
- String sql = "select * from pd_ugc";
- ResultSet rs = stmt.executeQuery(sql + " where id > '" + storeId + "'order by id");
- return rs;
- }
- public static boolean indexBuilding(String path, String storeIdPath, ResultSet rs) {
- try {
- Analyzer luceneAnalyzer = new StandardAnalyzer();
- // 取得存储起来的ID,以判定是增量索引还是重新索引
- boolean isEmpty = true;
- try {
- File file = new File(storeIdPath);
- if (!file.exists()) {
- file.createNewFile();
- }
- FileReader fr = new FileReader(storeIdPath);
- BufferedReader br = new BufferedReader(fr);
- if (br.readLine() != null) {
- isEmpty = false;
- }
- br.close();
- fr.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- //isEmpty=false表示增量索引
- IndexWriter writer = new IndexWriter(path, luceneAnalyzer, isEmpty);
- String storeId = "";
- boolean indexFlag = false;
- String id;
- String name;
- String address;
- String citycode;
- while (rs.next()) {
- id = rs.getInt("id") + "";
- name = rs.getString("name");
- address = rs.getString("address");
- citycode = rs.getString("citycode");
- writer.addDocument(Document(id, name, address, citycode));
- storeId = id;//将拿到的id给storeId,这种拿法不合理,这里为了方便
- indexFlag = true;
- }
- writer.optimize();
- writer.close();
- if (indexFlag) {
- // 将最后一个的ID存到磁盘文件中
- writeStoreId(storeIdPath, storeId);
- }
- return true;
- } catch (Exception e) {
- e.printStackTrace();
- System.out.println("出错了" + e.getClass() + "\n 错误信息为: " + e.getMessage());
- return false;
- }
- }
- public static Document Document(String id, String name, String address, String citycode) {
- Document doc = new Document();
- doc.add(new Field("id", id, Field.Store.YES, Field.Index.TOKENIZED));
- doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED));//查询字段
- doc.add(new Field("address", address, Field.Store.YES, Field.Index.TOKENIZED));
- doc.add(new Field("citycode", citycode, Field.Store.YES, Field.Index.TOKENIZED));//查询字段
- return doc;
- }
- // 取得存储在磁盘中的ID
- public static String getStoreId(String path) {
- String storeId = "";
- try {
- File file = new File(path);
- if (!file.exists()) {
- file.createNewFile();
- }
- FileReader fr = new FileReader(path);
- BufferedReader br = new BufferedReader(fr);
- storeId = br.readLine();
- if (storeId == null || storeId == "") storeId = "0";
- br.close();
- fr.close();
- } catch (Exception e) {
- e.printStackTrace();
- }
- return storeId;
- }
- // 将ID写入到磁盘文件中
- public static boolean writeStoreId(String path, String storeId) {
- boolean b = false;
- try {
- File file = new File(path);
- if (!file.exists()) {
- file.createNewFile();
- }
- FileWriter fw = new FileWriter(path);
- PrintWriter out = new PrintWriter(fw);
- out.write(storeId);
- out.close();
- fw.close();
- b = true;
- } catch (IOException e) {
- e.printStackTrace();
- }
- return b;
- }
- }
package index;
//--------------------- Change Logs----------------------
// <p>@author zhiqiang.zhang Initial Created at 2010-12-23<p>
//-------------------------------------------------------
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
//增量索引
/*
* 实现思路:首次查询数据库表所有记录,对每条记录建立索引,并将最后一条记录的id存储到storeId.txt文件中
* 当新插入一条记录时,再建立索引时不必再对所有数据重新建一遍索引,
* 可根据存放在storeId.txt文件中的id查出新插入的数据,只对新增的数据新建索引,并把新增的索引追加到原来的索引文件中
* */
public class IncrementIndex {
public static void main(String[] args) {
try {
IncrementIndex index = new IncrementIndex();
String path = "E:\\workspace2\\Test\\lucene_test\\poiIdext";//索引文件的存放路径
String storeIdPath = "E:\\workspace2\\Test\\lucene_test\\storeId.txt";//存储ID的路径
String storeId = "";
Date date1 = new Date();
storeId = index.getStoreId(storeIdPath);
ResultSet rs = index.getResult(storeId);
System.out.println("开始建立索引。。。。");
index.indexBuilding(path, storeIdPath, rs);
Date date2 = new Date();
System.out.println("耗时:"+(date2.getTime()-date1.getTime())+"ms");
storeId = index.getStoreId(storeIdPath);
System.out.println(storeId);//打印出这次存储起来的ID
} catch (Exception e) {
e.printStackTrace();
}
}
public static void buildIndex(String indexFile, String storeIdFile) {
try {
String path = indexFile;//索引文件的存放路径
String storeIdPath = storeIdFile;//存储ID的路径
String storeId = "";
storeId = getStoreId(storeIdPath);
ResultSet rs = getResult(storeId);
indexBuilding(path, storeIdPath, rs);
storeId = getStoreId(storeIdPath);
System.out.println(storeId);//打印出这次存储起来的ID
} catch (Exception e) {
e.printStackTrace();
}
}
public static ResultSet getResult(String storeId) throws Exception {
Class.forName("com.mysql.jdbc.Driver").newInstance();
String url = "jdbc:mysql://localhost:3306/lucenetest";
String userName = "root";
String password = "****";
Connection conn = DriverManager.getConnection(url, userName, password);
Statement stmt = conn.createStatement();
String sql = "select * from pd_ugc";
ResultSet rs = stmt.executeQuery(sql + " where id > '" + storeId + "'order by id");
return rs;
}
public static boolean indexBuilding(String path, String storeIdPath, ResultSet rs) {
try {
Analyzer luceneAnalyzer = new StandardAnalyzer();
// 取得存储起来的ID,以判定是增量索引还是重新索引
boolean isEmpty = true;
try {
File file = new File(storeIdPath);
if (!file.exists()) {
file.createNewFile();
}
FileReader fr = new FileReader(storeIdPath);
BufferedReader br = new BufferedReader(fr);
if (br.readLine() != null) {
isEmpty = false;
}
br.close();
fr.close();
} catch (IOException e) {
e.printStackTrace();
}
//isEmpty=false表示增量索引
IndexWriter writer = new IndexWriter(path, luceneAnalyzer, isEmpty);
String storeId = "";
boolean indexFlag = false;
String id;
String name;
String address;
String citycode;
while (rs.next()) {
id = rs.getInt("id") + "";
name = rs.getString("name");
address = rs.getString("address");
citycode = rs.getString("citycode");
writer.addDocument(Document(id, name, address, citycode));
storeId = id;//将拿到的id给storeId,这种拿法不合理,这里为了方便
indexFlag = true;
}
writer.optimize();
writer.close();
if (indexFlag) {
// 将最后一个的ID存到磁盘文件中
writeStoreId(storeIdPath, storeId);
}
return true;
} catch (Exception e) {
e.printStackTrace();
System.out.println("出错了" + e.getClass() + "\n 错误信息为: " + e.getMessage());
return false;
}
}
public static Document Document(String id, String name, String address, String citycode) {
Document doc = new Document();
doc.add(new Field("id", id, Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED));//查询字段
doc.add(new Field("address", address, Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("citycode", citycode, Field.Store.YES, Field.Index.TOKENIZED));//查询字段
return doc;
}
// 取得存储在磁盘中的ID
public static String getStoreId(String path) {
String storeId = "";
try {
File file = new File(path);
if (!file.exists()) {
file.createNewFile();
}
FileReader fr = new FileReader(path);
BufferedReader br = new BufferedReader(fr);
storeId = br.readLine();
if (storeId == null || storeId == "") storeId = "0";
br.close();
fr.close();
} catch (Exception e) {
e.printStackTrace();
}
return storeId;
}
// 将ID写入到磁盘文件中
public static boolean writeStoreId(String path, String storeId) {
boolean b = false;
try {
File file = new File(path);
if (!file.exists()) {
file.createNewFile();
}
FileWriter fw = new FileWriter(path);
PrintWriter out = new PrintWriter(fw);
out.write(storeId);
out.close();
fw.close();
b = true;
} catch (IOException e) {
e.printStackTrace();
}
return b;
}
}
3. 索引操作
- package index;
- import java.io.IOException;
- import java.io.Reader;
- import java.io.StringReader;
- import java.util.ArrayList;
- import java.util.Date;
- import java.util.List;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.StopFilter;
- import org.apache.lucene.analysis.Token;
- import org.apache.lucene.analysis.TokenStream;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.index.CorruptIndexException;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.queryParser.MultiFieldQueryParser;
- import org.apache.lucene.queryParser.ParseException;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.BooleanClause;
- import org.apache.lucene.search.BooleanQuery;
- import org.apache.lucene.search.Hits;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopDocCollector;
- import org.apache.lucene.search.highlight.Highlighter;
- import org.apache.lucene.search.highlight.QueryScorer;
- import org.apache.lucene.search.highlight.SimpleFragmenter;
- import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
- import org.mira.lucene.analysis.IK_CAnalyzer;
- public class IndexUtils {
- //0. 创建增量索引
- public static void buildIndex(String indexFile, String storeIdFile) {
- IncrementIndex.buildIndex(indexFile, storeIdFile);
- }
- //1. 单字段查询
- @SuppressWarnings("deprecation")
- public static List<IndexResult> queryByOneKey(IndexSearcher indexSearcher, String field,
- String key) {
- try {
- Date date1 = new Date();
- QueryParser queryParser = new QueryParser(field, new StandardAnalyzer());
- Query query = queryParser.parse(key);
- Hits hits = indexSearcher.search(query);
- Date date2 = new Date();
- System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms");
- List<IndexResult> list = new ArrayList<IndexResult>();
- for (int i = 0; i < hits.length(); i++) {
- list.add(getIndexResult(hits.doc(i)));
- }
- return list;
- } catch (ParseException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- return null;
- }
- //2. 多条件查询。这里实现的是and操作
- //注:要查询的字段必须是index的
- //即doc.add(new Field("pid", rs.getString("pid"), Field.Store.YES,Field.Index.TOKENIZED));
- @SuppressWarnings("deprecation")
- public static List<IndexResult> queryByMultiKeys(IndexSearcher indexSearcher, String[] fields,
- String[] keys) {
- try {
- BooleanQuery m_BooleanQuery = new BooleanQuery();
- if (keys != null && keys.length > 0) {
- for (int i = 0; i < keys.length; i++) {
- QueryParser queryParser = new QueryParser(fields[i], new StandardAnalyzer());
- Query query = queryParser.parse(keys[i]);
- m_BooleanQuery.add(query, BooleanClause.Occur.MUST);//and操作
- }
- Hits hits = indexSearcher.search(m_BooleanQuery);
- List<IndexResult> list = new ArrayList<IndexResult>();
- for (int i = 0; i < hits.length(); i++) {
- list.add(getIndexResult(hits.doc(i)));
- }
- return list;
- }
- } catch (ParseException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- return null;
- }
- //3.高亮显示 实现了单条件查询
- //可改造为多条件查询
- public static List<IndexResult> highlight(IndexSearcher indexSearcher, String key) {
- try {
- QueryParser queryParser = new QueryParser("name", new StandardAnalyzer());
- Query query = queryParser.parse(key);
- TopDocCollector collector = new TopDocCollector(800);
- indexSearcher.search(query, collector);
- ScoreDoc[] hits = collector.topDocs().scoreDocs;
- Highlighter highlighter = null;
- SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>",
- "</font>");
- highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
- highlighter.setTextFragmenter(new SimpleFragmenter(200));
- List<IndexResult> list = new ArrayList<IndexResult>();
- Document doc;
- for (int i = 0; i < hits.length; i++) {
- //System.out.println(hits[i].score);
- doc = indexSearcher.doc(hits[i].doc);
- TokenStream tokenStream = new StandardAnalyzer().tokenStream("name",
- new StringReader(doc.get("name")));
- IndexResult ir = getIndexResult(doc);
- ir.setName(highlighter.getBestFragment(tokenStream, doc.get("name")));
- list.add(ir);
- }
- return list;
- } catch (ParseException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- return null;
- }
- //4. 多字段查询
- @SuppressWarnings("deprecation")
- public static List<IndexResult> queryByMultiFileds(IndexSearcher indexSearcher,
- String[] fields, String key) {
- try {
- MultiFieldQueryParser mfq = new MultiFieldQueryParser(fields, new StandardAnalyzer());
- Query query = mfq.parse(key);
- Hits hits = indexSearcher.search(query);
- List<IndexResult> list = new ArrayList<IndexResult>();
- for (int i = 0; i < hits.length(); i++) {
- list.add(getIndexResult(hits.doc(i)));
- }
- return list;
- } catch (ParseException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- return null;
- }
- //5. 删除索引
- public static void deleteIndex(String indexFile, String id) throws CorruptIndexException,
- IOException {
- IndexReader indexReader = IndexReader.open(indexFile);
- indexReader.deleteDocuments(new Term("id", id));
- indexReader.close();
- }
- //6. 一元分词
- @SuppressWarnings("deprecation")
- public static String Standard_Analyzer(String str) {
- Analyzer analyzer = new StandardAnalyzer();
- Reader r = new StringReader(str);
- StopFilter sf = (StopFilter) analyzer.tokenStream("", r);
- System.out.println("=====StandardAnalyzer====");
- System.out.println("分析方法:默认没有词只有字(一元分词)");
- Token t;
- String results = "";
- try {
- while ((t = sf.next()) != null) {
- System.out.println(t.termText());
- results = results + " " + t.termText();
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- return results;
- }
- //7. 字典分词
- @SuppressWarnings("deprecation")
- public static String ik_CAnalyzer(String str) {
- Analyzer analyzer = new IK_CAnalyzer();
- Reader r = new StringReader(str);
- TokenStream ts = (TokenStream) analyzer.tokenStream("", r);
- System.out.println("=====IK_CAnalyzer====");
- System.out.println("分析方法:字典分词,正反双向搜索");
- Token t;
- String results = "";
- try {
- while ((t = ts.next()) != null) {
- System.out.println(t.termText());
- results = results + " " + t.termText();
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- return results;
- }
- //在结果中搜索
- public static void queryFromResults() {
- }
- //组装对象
- public static IndexResult getIndexResult(Document doc) {
- IndexResult ir = new IndexResult();
- ir.setId(doc.get("id"));
- ir.setName(doc.get("name"));
- ir.setAddress(doc.get("address"));
- ir.setCitycode(doc.get("citycode"));
- return ir;
- }
- }
package index;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.mira.lucene.analysis.IK_CAnalyzer;
public class IndexUtils {
//0. 创建增量索引
public static void buildIndex(String indexFile, String storeIdFile) {
IncrementIndex.buildIndex(indexFile, storeIdFile);
}
//1. 单字段查询
@SuppressWarnings("deprecation")
public static List<IndexResult> queryByOneKey(IndexSearcher indexSearcher, String field,
String key) {
try {
Date date1 = new Date();
QueryParser queryParser = new QueryParser(field, new StandardAnalyzer());
Query query = queryParser.parse(key);
Hits hits = indexSearcher.search(query);
Date date2 = new Date();
System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms");
List<IndexResult> list = new ArrayList<IndexResult>();
for (int i = 0; i < hits.length(); i++) {
list.add(getIndexResult(hits.doc(i)));
}
return list;
} catch (ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
//2. 多条件查询。这里实现的是and操作
//注:要查询的字段必须是index的
//即doc.add(new Field("pid", rs.getString("pid"), Field.Store.YES,Field.Index.TOKENIZED));
@SuppressWarnings("deprecation")
public static List<IndexResult> queryByMultiKeys(IndexSearcher indexSearcher, String[] fields,
String[] keys) {
try {
BooleanQuery m_BooleanQuery = new BooleanQuery();
if (keys != null && keys.length > 0) {
for (int i = 0; i < keys.length; i++) {
QueryParser queryParser = new QueryParser(fields[i], new StandardAnalyzer());
Query query = queryParser.parse(keys[i]);
m_BooleanQuery.add(query, BooleanClause.Occur.MUST);//and操作
}
Hits hits = indexSearcher.search(m_BooleanQuery);
List<IndexResult> list = new ArrayList<IndexResult>();
for (int i = 0; i < hits.length(); i++) {
list.add(getIndexResult(hits.doc(i)));
}
return list;
}
} catch (ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
//3.高亮显示 实现了单条件查询
//可改造为多条件查询
public static List<IndexResult> highlight(IndexSearcher indexSearcher, String key) {
try {
QueryParser queryParser = new QueryParser("name", new StandardAnalyzer());
Query query = queryParser.parse(key);
TopDocCollector collector = new TopDocCollector(800);
indexSearcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
Highlighter highlighter = null;
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>",
"</font>");
highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(200));
List<IndexResult> list = new ArrayList<IndexResult>();
Document doc;
for (int i = 0; i < hits.length; i++) {
//System.out.println(hits[i].score);
doc = indexSearcher.doc(hits[i].doc);
TokenStream tokenStream = new StandardAnalyzer().tokenStream("name",
new StringReader(doc.get("name")));
IndexResult ir = getIndexResult(doc);
ir.setName(highlighter.getBestFragment(tokenStream, doc.get("name")));
list.add(ir);
}
return list;
} catch (ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
//4. 多字段查询
@SuppressWarnings("deprecation")
public static List<IndexResult> queryByMultiFileds(IndexSearcher indexSearcher,
String[] fields, String key) {
try {
MultiFieldQueryParser mfq = new MultiFieldQueryParser(fields, new StandardAnalyzer());
Query query = mfq.parse(key);
Hits hits = indexSearcher.search(query);
List<IndexResult> list = new ArrayList<IndexResult>();
for (int i = 0; i < hits.length(); i++) {
list.add(getIndexResult(hits.doc(i)));
}
return list;
} catch (ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
//5. 删除索引
public static void deleteIndex(String indexFile, String id) throws CorruptIndexException,
IOException {
IndexReader indexReader = IndexReader.open(indexFile);
indexReader.deleteDocuments(new Term("id", id));
indexReader.close();
}
//6. 一元分词
@SuppressWarnings("deprecation")
public static String Standard_Analyzer(String str) {
Analyzer analyzer = new StandardAnalyzer();
Reader r = new StringReader(str);
StopFilter sf = (StopFilter) analyzer.tokenStream("", r);
System.out.println("=====StandardAnalyzer====");
System.out.println("分析方法:默认没有词只有字(一元分词)");
Token t;
String results = "";
try {
while ((t = sf.next()) != null) {
System.out.println(t.termText());
results = results + " " + t.termText();
}
} catch (IOException e) {
e.printStackTrace();
}
return results;
}
//7. 字典分词
@SuppressWarnings("deprecation")
public static String ik_CAnalyzer(String str) {
Analyzer analyzer = new IK_CAnalyzer();
Reader r = new StringReader(str);
TokenStream ts = (TokenStream) analyzer.tokenStream("", r);
System.out.println("=====IK_CAnalyzer====");
System.out.println("分析方法:字典分词,正反双向搜索");
Token t;
String results = "";
try {
while ((t = ts.next()) != null) {
System.out.println(t.termText());
results = results + " " + t.termText();
}
} catch (IOException e) {
e.printStackTrace();
}
return results;
}
//在结果中搜索
public static void queryFromResults() {
}
//组装对象
public static IndexResult getIndexResult(Document doc) {
IndexResult ir = new IndexResult();
ir.setId(doc.get("id"));
ir.setName(doc.get("name"));
ir.setAddress(doc.get("address"));
ir.setCitycode(doc.get("citycode"));
return ir;
}
}
查询索引结果对象:IndexResult
- package index;
- public class IndexResult {
- private String id;
- private String name;
- private String address;
- private String citycode;
- public String getId() {
- return id;
- }
- public void setId(String id) {
- this.id = id;
- }
- public String getName() {
- return name;
- }
- public void setName(String name) {
- this.name = name;
- }
- public String getAddress() {
- return address;
- }
- public void setAddress(String address) {
- this.address = address;
- }
- public String getCitycode() {
- return citycode;
- }
- public void setCitycode(String citycode) {
- this.citycode = citycode;
- }
- }
package index;
public class IndexResult {
private String id;
private String name;
private String address;
private String citycode;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
public String getCitycode() {
return citycode;
}
public void setCitycode(String citycode) {
this.citycode = citycode;
}
}
4. 测试类
- package test;
- /**
- * $Id$
- * Copyright 2009-2010 Oak Pacific Interactive. All rights reserved.
- */
- import index.IndexResult;
- import index.IndexUtils;
- import java.util.Date;
- import java.util.List;
- import org.apache.lucene.search.IndexSearcher;
- public class Test {
- //存放索引文件
- private static String indexFile = "E:\\workspace2\\Test\\lucene_test\\poiIdext";
- //存放id
- private static String storeIdFile = "E:\\workspace2\\Test\\lucene_test\\storeId.txt";
- public static void main(String[] args) throws Exception {
- //0. 创建增量索引
- IndexUtils.buildIndex(indexFile, storeIdFile);
- IndexSearcher indexSearcher = new IndexSearcher(indexFile);
- String key = IndexUtils.ik_CAnalyzer("静安中心");
- //1.单字段查询
- Date date1 = new Date();
- List<IndexResult> list = IndexUtils.queryByOneKey(indexSearcher, "name", key);
- Date date2 = new Date();
- System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
- + "条=======================================单字段查询");
- //printResults(list);
- //2.多条件查询
- String[] fields = { "name", "citycode" };
- String[] keys = { IndexUtils.ik_CAnalyzer("静安中心"), "0000" };
- date1 = new Date();
- list = IndexUtils.queryByMultiKeys(indexSearcher, fields, keys);
- date2 = new Date();
- System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
- + "条\n===============================多条件查询");
- printResults(list);
- //3.高亮显示 单字段查询
- System.out.println("\n\n");
- date1 = new Date();
- list = IndexUtils.highlight(indexSearcher, key);
- date2 = new Date();
- System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
- + "条\n======================================高亮显示");
- // printResults(list);
- //4. 多字段查询
- date1 = new Date();
- list = IndexUtils.queryByMultiFileds(indexSearcher, fields, key);
- date2 = new Date();
- System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
- + "条\n=====================================多字段查询");
- // printResults(list);
- //5. 删除索引中的字段 根据id进行删除
- IndexUtils.deleteIndex(indexFile, "123");
- }
- //打印结果
- public static void printResults(List<IndexResult> list) {
- if (list != null && list.size() > 0) {
- for (int i = 0; i < list.size(); i++) {
- System.out.println(list.get(i).getId() + "," + list.get(i).getName() + ","
- + list.get(i).getAddress() + "," + list.get(i).getCitycode()+"--->"+i);
- }
- }
- }
- }
package test;
/**
* $Id$
* Copyright 2009-2010 Oak Pacific Interactive. All rights reserved.
*/
import index.IndexResult;
import index.IndexUtils;
import java.util.Date;
import java.util.List;
import org.apache.lucene.search.IndexSearcher;
public class Test {
//存放索引文件
private static String indexFile = "E:\\workspace2\\Test\\lucene_test\\poiIdext";
//存放id
private static String storeIdFile = "E:\\workspace2\\Test\\lucene_test\\storeId.txt";
public static void main(String[] args) throws Exception {
//0. 创建增量索引
IndexUtils.buildIndex(indexFile, storeIdFile);
IndexSearcher indexSearcher = new IndexSearcher(indexFile);
String key = IndexUtils.ik_CAnalyzer("静安中心");
//1.单字段查询
Date date1 = new Date();
List<IndexResult> list = IndexUtils.queryByOneKey(indexSearcher, "name", key);
Date date2 = new Date();
System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
+ "条=======================================单字段查询");
//printResults(list);
//2.多条件查询
String[] fields = { "name", "citycode" };
String[] keys = { IndexUtils.ik_CAnalyzer("静安中心"), "0000" };
date1 = new Date();
list = IndexUtils.queryByMultiKeys(indexSearcher, fields, keys);
date2 = new Date();
System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
+ "条\n===============================多条件查询");
printResults(list);
//3.高亮显示 单字段查询
System.out.println("\n\n");
date1 = new Date();
list = IndexUtils.highlight(indexSearcher, key);
date2 = new Date();
System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
+ "条\n======================================高亮显示");
// printResults(list);
//4. 多字段查询
date1 = new Date();
list = IndexUtils.queryByMultiFileds(indexSearcher, fields, key);
date2 = new Date();
System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
+ "条\n=====================================多字段查询");
// printResults(list);
//5. 删除索引中的字段 根据id进行删除
IndexUtils.deleteIndex(indexFile, "123");
}
//打印结果
public static void printResults(List<IndexResult> list) {
if (list != null && list.size() > 0) {
for (int i = 0; i < list.size(); i++) {
System.out.println(list.get(i).getId() + "," + list.get(i).getName() + ","
+ list.get(i).getAddress() + "," + list.get(i).getCitycode()+"--->"+i);
}
}
}
}
5. 其它
全文索引:
目前的情况是,搜索hello,"hello world"、"hi hello, how are you"但"worldhello"显示不出来
默认情况下,QueryParser不支持通配符打头的查询(如,*ook)。不过在Lucene 2.1版本以后,他们可以通过调用QueryParser.setAllowLeadingWildcard( true )的 方法打开这一功能。注意,这是一个开销很大的操作:它需要扫描索引中全部记号的列表,来寻找匹配这个模式的词。(译注:高效支持这种后缀查询的办法是,建立反序的记号表,Lucene没有实现这一模式。)http://www.codechina.org/faq/show/42/
支持空格分词搜索:"厕所 26 沈阳" 这是三个词
不支持:“厕所沈阳”这是一个词
Lucene能实现“在搜索结果中搜索”的功能么,也就是说第二个搜索仅在第一个搜索结果中进行?
http://www.codechina.org/faq/show/63/
可以。主要有两种做法:
- 使用QueryFilter把第一个查询当作一个过滤器处理。(你可以在Lucene的邮件列表里面搜索 QueryFilter, Doug Cutting(Lucene的最初作者)反对这种做法。)
- 用BooleanQuery把前后两个查询结合起来,前一个查询使用 required选项。
我们推荐使用BooleanQuery的方法。
============
// 创建标准文本分析器, 标准的是可以支持的中文的
Analyzer luceneAnalyzer = new StandardAnalyzer();
indexWriter = new IndexWriter(indexDir, luceneAnalyzer, true);
// 可以说是创建一个新的写入工具
// 第一个参数是要索引建立在哪个目录里
// 第二个参数是新建一个文本分析器,这里用的是标准的大家也可以自己写一个
// 第三个参数如果是true,在建立索引之前先将c: \\index目录清空
poi_data_ugc搜索中,索引放在内存里还是磁盘上????
针对于lucene使用和优化
http://hi.baidu.com/lewutian/blog/item/48a86d03de58b984d43f7c1b.html
ucene入门实例(1):索引文本文件
http://www.java3z.com/cwbwebhome/article/article5/51021.html