Lucene3实例总结

参考网页:http://footman265.iteye.com/blog/849744

搞了一天半,终于利用lucene工具Demo完了我想要的功能,这其中包括为数据库建立增量索引、从索引文件根据id删除索引、单字段查询功能、多字段查询功能、多条件查询功能以及查询结果关键字高亮显示的功能。今天晚些的时候把这些功能进行了整理。看样子一时半会还下不了班,就把Demo的结果 一 一 列举下来吧。。。

理论参考:http://lianj-lee.iteye.com/category/69005?show_full=true

Lucene3.0对数据库建立索引:http://269181927.iteye.com/blog/789779

1. 所需要的文件(见附件)

依赖包:

lucene-core-2.4.0.jar lucene工具包

lucene-highlighter-2.4.0.jar 高亮显示工具包

IKAnalyzer2.0.2OBF.jar 分词工具(支持字典分词)

mysql-connector-java-5.0.3-bin 链接mysql驱动

数据表:

pd_ugc.sql(所在数据库为lucenetest)

类文件:

在附件index.rar和test.rar,解压后放入java工程中的src下即可

2. 为数据库建立增量索引

参考网页:http://www.blogjava.net/laoding/articles/279230.html

Java代码 复制代码 收藏代码
  1. package index;
  2. //--------------------- Change Logs----------------------
  3. // <p>@author zhiqiang.zhang Initial Created at 2010-12-23<p>
  4. //-------------------------------------------------------
  5. import java.io.BufferedReader;
  6. import java.io.File;
  7. import java.io.FileReader;
  8. import java.io.FileWriter;
  9. import java.io.IOException;
  10. import java.io.PrintWriter;
  11. import java.sql.Connection;
  12. import java.sql.DriverManager;
  13. import java.sql.ResultSet;
  14. import java.sql.Statement;
  15. import java.util.Date;
  16. import org.apache.lucene.analysis.Analyzer;
  17. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  18. import org.apache.lucene.document.Document;
  19. import org.apache.lucene.document.Field;
  20. import org.apache.lucene.index.IndexWriter;
  21. //增量索引
  22. /*
  23. * 实现思路:首次查询数据库表所有记录,对每条记录建立索引,并将最后一条记录的id存储到storeId.txt文件中
  24. * 当新插入一条记录时,再建立索引时不必再对所有数据重新建一遍索引,
  25. * 可根据存放在storeId.txt文件中的id查出新插入的数据,只对新增的数据新建索引,并把新增的索引追加到原来的索引文件中
  26. * */
  27. public class IncrementIndex {
  28. public static void main(String[] args) {
  29. try {
  30. IncrementIndex index = new IncrementIndex();
  31. String path = "E:\\workspace2\\Test\\lucene_test\\poiIdext";//索引文件的存放路径
  32. String storeIdPath = "E:\\workspace2\\Test\\lucene_test\\storeId.txt";//存储ID的路径
  33. String storeId = "";
  34. Date date1 = new Date();
  35. storeId = index.getStoreId(storeIdPath);
  36. ResultSet rs = index.getResult(storeId);
  37. System.out.println("开始建立索引。。。。");
  38. index.indexBuilding(path, storeIdPath, rs);
  39. Date date2 = new Date();
  40. System.out.println("耗时:"+(date2.getTime()-date1.getTime())+"ms");
  41. storeId = index.getStoreId(storeIdPath);
  42. System.out.println(storeId);//打印出这次存储起来的ID
  43. } catch (Exception e) {
  44. e.printStackTrace();
  45. }
  46. }
  47. public static void buildIndex(String indexFile, String storeIdFile) {
  48. try {
  49. String path = indexFile;//索引文件的存放路径
  50. String storeIdPath = storeIdFile;//存储ID的路径
  51. String storeId = "";
  52. storeId = getStoreId(storeIdPath);
  53. ResultSet rs = getResult(storeId);
  54. indexBuilding(path, storeIdPath, rs);
  55. storeId = getStoreId(storeIdPath);
  56. System.out.println(storeId);//打印出这次存储起来的ID
  57. } catch (Exception e) {
  58. e.printStackTrace();
  59. }
  60. }
  61. public static ResultSet getResult(String storeId) throws Exception {
  62. Class.forName("com.mysql.jdbc.Driver").newInstance();
  63. String url = "jdbc:mysql://localhost:3306/lucenetest";
  64. String userName = "root";
  65. String password = "****";
  66. Connection conn = DriverManager.getConnection(url, userName, password);
  67. Statement stmt = conn.createStatement();
  68. String sql = "select * from pd_ugc";
  69. ResultSet rs = stmt.executeQuery(sql + " where id > '" + storeId + "'order by id");
  70. return rs;
  71. }
  72. public static boolean indexBuilding(String path, String storeIdPath, ResultSet rs) {
  73. try {
  74. Analyzer luceneAnalyzer = new StandardAnalyzer();
  75. // 取得存储起来的ID,以判定是增量索引还是重新索引
  76. boolean isEmpty = true;
  77. try {
  78. File file = new File(storeIdPath);
  79. if (!file.exists()) {
  80. file.createNewFile();
  81. }
  82. FileReader fr = new FileReader(storeIdPath);
  83. BufferedReader br = new BufferedReader(fr);
  84. if (br.readLine() != null) {
  85. isEmpty = false;
  86. }
  87. br.close();
  88. fr.close();
  89. } catch (IOException e) {
  90. e.printStackTrace();
  91. }
  92. //isEmpty=false表示增量索引
  93. IndexWriter writer = new IndexWriter(path, luceneAnalyzer, isEmpty);
  94. String storeId = "";
  95. boolean indexFlag = false;
  96. String id;
  97. String name;
  98. String address;
  99. String citycode;
  100. while (rs.next()) {
  101. id = rs.getInt("id") + "";
  102. name = rs.getString("name");
  103. address = rs.getString("address");
  104. citycode = rs.getString("citycode");
  105. writer.addDocument(Document(id, name, address, citycode));
  106. storeId = id;//将拿到的id给storeId,这种拿法不合理,这里为了方便
  107. indexFlag = true;
  108. }
  109. writer.optimize();
  110. writer.close();
  111. if (indexFlag) {
  112. // 将最后一个的ID存到磁盘文件中
  113. writeStoreId(storeIdPath, storeId);
  114. }
  115. return true;
  116. } catch (Exception e) {
  117. e.printStackTrace();
  118. System.out.println("出错了" + e.getClass() + "\n 错误信息为: " + e.getMessage());
  119. return false;
  120. }
  121. }
  122. public static Document Document(String id, String name, String address, String citycode) {
  123. Document doc = new Document();
  124. doc.add(new Field("id", id, Field.Store.YES, Field.Index.TOKENIZED));
  125. doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED));//查询字段
  126. doc.add(new Field("address", address, Field.Store.YES, Field.Index.TOKENIZED));
  127. doc.add(new Field("citycode", citycode, Field.Store.YES, Field.Index.TOKENIZED));//查询字段
  128. return doc;
  129. }
  130. // 取得存储在磁盘中的ID
  131. public static String getStoreId(String path) {
  132. String storeId = "";
  133. try {
  134. File file = new File(path);
  135. if (!file.exists()) {
  136. file.createNewFile();
  137. }
  138. FileReader fr = new FileReader(path);
  139. BufferedReader br = new BufferedReader(fr);
  140. storeId = br.readLine();
  141. if (storeId == null || storeId == "") storeId = "0";
  142. br.close();
  143. fr.close();
  144. } catch (Exception e) {
  145. e.printStackTrace();
  146. }
  147. return storeId;
  148. }
  149. // 将ID写入到磁盘文件中
  150. public static boolean writeStoreId(String path, String storeId) {
  151. boolean b = false;
  152. try {
  153. File file = new File(path);
  154. if (!file.exists()) {
  155. file.createNewFile();
  156. }
  157. FileWriter fw = new FileWriter(path);
  158. PrintWriter out = new PrintWriter(fw);
  159. out.write(storeId);
  160. out.close();
  161. fw.close();
  162. b = true;
  163. } catch (IOException e) {
  164. e.printStackTrace();
  165. }
  166. return b;
  167. }
  168. }
package index;
//--------------------- Change Logs----------------------
// <p>@author zhiqiang.zhang Initial Created at 2010-12-23<p>
//-------------------------------------------------------
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

//增量索引
/*
 * 实现思路:首次查询数据库表所有记录,对每条记录建立索引,并将最后一条记录的id存储到storeId.txt文件中
 *         当新插入一条记录时,再建立索引时不必再对所有数据重新建一遍索引,
 *         可根据存放在storeId.txt文件中的id查出新插入的数据,只对新增的数据新建索引,并把新增的索引追加到原来的索引文件中
 * */
public class IncrementIndex {

    public static void main(String[] args) {
        try {
            IncrementIndex index = new IncrementIndex();
            String path = "E:\\workspace2\\Test\\lucene_test\\poiIdext";//索引文件的存放路径
            String storeIdPath = "E:\\workspace2\\Test\\lucene_test\\storeId.txt";//存储ID的路径
            String storeId = "";
            Date date1 = new Date();
            storeId = index.getStoreId(storeIdPath);
            ResultSet rs = index.getResult(storeId);
            System.out.println("开始建立索引。。。。");
            index.indexBuilding(path, storeIdPath, rs);
            Date date2 = new Date();
            System.out.println("耗时:"+(date2.getTime()-date1.getTime())+"ms");
            storeId = index.getStoreId(storeIdPath);
            System.out.println(storeId);//打印出这次存储起来的ID
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void buildIndex(String indexFile, String storeIdFile) {
        try {
            String path = indexFile;//索引文件的存放路径
            String storeIdPath = storeIdFile;//存储ID的路径
            String storeId = "";
            storeId = getStoreId(storeIdPath);
            ResultSet rs = getResult(storeId);
            indexBuilding(path, storeIdPath, rs);
            storeId = getStoreId(storeIdPath);
            System.out.println(storeId);//打印出这次存储起来的ID
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static ResultSet getResult(String storeId) throws Exception {
        Class.forName("com.mysql.jdbc.Driver").newInstance();
        String url = "jdbc:mysql://localhost:3306/lucenetest";
        String userName = "root";
        String password = "****";
        Connection conn = DriverManager.getConnection(url, userName, password);
        Statement stmt = conn.createStatement();
        String sql = "select  * from pd_ugc";
        ResultSet rs = stmt.executeQuery(sql + " where id > '" + storeId + "'order by id");
        return rs;
    }

    public static boolean indexBuilding(String path, String storeIdPath, ResultSet rs) {
        try {
            Analyzer luceneAnalyzer = new StandardAnalyzer();
            // 取得存储起来的ID,以判定是增量索引还是重新索引
            boolean isEmpty = true;
            try {
                File file = new File(storeIdPath);
                if (!file.exists()) {
                    file.createNewFile();
                }
                FileReader fr = new FileReader(storeIdPath);
                BufferedReader br = new BufferedReader(fr);
                if (br.readLine() != null) {
                    isEmpty = false;
                }
                br.close();
                fr.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
            //isEmpty=false表示增量索引
            IndexWriter writer = new IndexWriter(path, luceneAnalyzer, isEmpty);
            String storeId = "";
            boolean indexFlag = false;
            String id;
            String name;
            String address;
            String citycode;
            while (rs.next()) {
                id = rs.getInt("id") + "";
                name = rs.getString("name");
                address = rs.getString("address");
                citycode = rs.getString("citycode");
                writer.addDocument(Document(id, name, address, citycode));
                storeId = id;//将拿到的id给storeId,这种拿法不合理,这里为了方便
                indexFlag = true;
            }
            writer.optimize();
            writer.close();
            if (indexFlag) {
                // 将最后一个的ID存到磁盘文件中
                writeStoreId(storeIdPath, storeId);
            }
            return true;
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("出错了" + e.getClass() + "\n   错误信息为:   " + e.getMessage());
            return false;
        }

    }

    public static Document Document(String id, String name, String address, String citycode) {
        Document doc = new Document();
        doc.add(new Field("id", id, Field.Store.YES, Field.Index.TOKENIZED));
        doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED));//查询字段
        doc.add(new Field("address", address, Field.Store.YES, Field.Index.TOKENIZED));
        doc.add(new Field("citycode", citycode, Field.Store.YES, Field.Index.TOKENIZED));//查询字段
        return doc;
    }

    // 取得存储在磁盘中的ID
    public static String getStoreId(String path) {
        String storeId = "";
        try {
            File file = new File(path);
            if (!file.exists()) {
                file.createNewFile();
            }
            FileReader fr = new FileReader(path);
            BufferedReader br = new BufferedReader(fr);
            storeId = br.readLine();
            if (storeId == null || storeId == "") storeId = "0";
            br.close();
            fr.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return storeId;
    }

    // 将ID写入到磁盘文件中
    public static boolean writeStoreId(String path, String storeId) {
        boolean b = false;
        try {
            File file = new File(path);
            if (!file.exists()) {
                file.createNewFile();
            }
            FileWriter fw = new FileWriter(path);
            PrintWriter out = new PrintWriter(fw);
            out.write(storeId);
            out.close();
            fw.close();
            b = true;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return b;
    }
}

3. 索引操作

Java代码 复制代码 收藏代码
  1. package index;
  2. import java.io.IOException;
  3. import java.io.Reader;
  4. import java.io.StringReader;
  5. import java.util.ArrayList;
  6. import java.util.Date;
  7. import java.util.List;
  8. import org.apache.lucene.analysis.Analyzer;
  9. import org.apache.lucene.analysis.StopFilter;
  10. import org.apache.lucene.analysis.Token;
  11. import org.apache.lucene.analysis.TokenStream;
  12. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  13. import org.apache.lucene.document.Document;
  14. import org.apache.lucene.index.CorruptIndexException;
  15. import org.apache.lucene.index.IndexReader;
  16. import org.apache.lucene.index.Term;
  17. import org.apache.lucene.queryParser.MultiFieldQueryParser;
  18. import org.apache.lucene.queryParser.ParseException;
  19. import org.apache.lucene.queryParser.QueryParser;
  20. import org.apache.lucene.search.BooleanClause;
  21. import org.apache.lucene.search.BooleanQuery;
  22. import org.apache.lucene.search.Hits;
  23. import org.apache.lucene.search.IndexSearcher;
  24. import org.apache.lucene.search.Query;
  25. import org.apache.lucene.search.ScoreDoc;
  26. import org.apache.lucene.search.TopDocCollector;
  27. import org.apache.lucene.search.highlight.Highlighter;
  28. import org.apache.lucene.search.highlight.QueryScorer;
  29. import org.apache.lucene.search.highlight.SimpleFragmenter;
  30. import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
  31. import org.mira.lucene.analysis.IK_CAnalyzer;
  32. public class IndexUtils {
  33. //0. 创建增量索引
  34. public static void buildIndex(String indexFile, String storeIdFile) {
  35. IncrementIndex.buildIndex(indexFile, storeIdFile);
  36. }
  37. //1. 单字段查询
  38. @SuppressWarnings("deprecation")
  39. public static List<IndexResult> queryByOneKey(IndexSearcher indexSearcher, String field,
  40. String key) {
  41. try {
  42. Date date1 = new Date();
  43. QueryParser queryParser = new QueryParser(field, new StandardAnalyzer());
  44. Query query = queryParser.parse(key);
  45. Hits hits = indexSearcher.search(query);
  46. Date date2 = new Date();
  47. System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms");
  48. List<IndexResult> list = new ArrayList<IndexResult>();
  49. for (int i = 0; i < hits.length(); i++) {
  50. list.add(getIndexResult(hits.doc(i)));
  51. }
  52. return list;
  53. } catch (ParseException e) {
  54. e.printStackTrace();
  55. } catch (IOException e) {
  56. e.printStackTrace();
  57. }
  58. return null;
  59. }
  60. //2. 多条件查询。这里实现的是and操作
  61. //注:要查询的字段必须是index的
  62. //即doc.add(new Field("pid", rs.getString("pid"), Field.Store.YES,Field.Index.TOKENIZED));
  63. @SuppressWarnings("deprecation")
  64. public static List<IndexResult> queryByMultiKeys(IndexSearcher indexSearcher, String[] fields,
  65. String[] keys) {
  66. try {
  67. BooleanQuery m_BooleanQuery = new BooleanQuery();
  68. if (keys != null && keys.length > 0) {
  69. for (int i = 0; i < keys.length; i++) {
  70. QueryParser queryParser = new QueryParser(fields[i], new StandardAnalyzer());
  71. Query query = queryParser.parse(keys[i]);
  72. m_BooleanQuery.add(query, BooleanClause.Occur.MUST);//and操作
  73. }
  74. Hits hits = indexSearcher.search(m_BooleanQuery);
  75. List<IndexResult> list = new ArrayList<IndexResult>();
  76. for (int i = 0; i < hits.length(); i++) {
  77. list.add(getIndexResult(hits.doc(i)));
  78. }
  79. return list;
  80. }
  81. } catch (ParseException e) {
  82. e.printStackTrace();
  83. } catch (IOException e) {
  84. e.printStackTrace();
  85. }
  86. return null;
  87. }
  88. //3.高亮显示 实现了单条件查询
  89. //可改造为多条件查询
  90. public static List<IndexResult> highlight(IndexSearcher indexSearcher, String key) {
  91. try {
  92. QueryParser queryParser = new QueryParser("name", new StandardAnalyzer());
  93. Query query = queryParser.parse(key);
  94. TopDocCollector collector = new TopDocCollector(800);
  95. indexSearcher.search(query, collector);
  96. ScoreDoc[] hits = collector.topDocs().scoreDocs;
  97. Highlighter highlighter = null;
  98. SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>",
  99. "</font>");
  100. highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
  101. highlighter.setTextFragmenter(new SimpleFragmenter(200));
  102. List<IndexResult> list = new ArrayList<IndexResult>();
  103. Document doc;
  104. for (int i = 0; i < hits.length; i++) {
  105. //System.out.println(hits[i].score);
  106. doc = indexSearcher.doc(hits[i].doc);
  107. TokenStream tokenStream = new StandardAnalyzer().tokenStream("name",
  108. new StringReader(doc.get("name")));
  109. IndexResult ir = getIndexResult(doc);
  110. ir.setName(highlighter.getBestFragment(tokenStream, doc.get("name")));
  111. list.add(ir);
  112. }
  113. return list;
  114. } catch (ParseException e) {
  115. e.printStackTrace();
  116. } catch (IOException e) {
  117. e.printStackTrace();
  118. }
  119. return null;
  120. }
  121. //4. 多字段查询
  122. @SuppressWarnings("deprecation")
  123. public static List<IndexResult> queryByMultiFileds(IndexSearcher indexSearcher,
  124. String[] fields, String key) {
  125. try {
  126. MultiFieldQueryParser mfq = new MultiFieldQueryParser(fields, new StandardAnalyzer());
  127. Query query = mfq.parse(key);
  128. Hits hits = indexSearcher.search(query);
  129. List<IndexResult> list = new ArrayList<IndexResult>();
  130. for (int i = 0; i < hits.length(); i++) {
  131. list.add(getIndexResult(hits.doc(i)));
  132. }
  133. return list;
  134. } catch (ParseException e) {
  135. e.printStackTrace();
  136. } catch (IOException e) {
  137. e.printStackTrace();
  138. }
  139. return null;
  140. }
  141. //5. 删除索引
  142. public static void deleteIndex(String indexFile, String id) throws CorruptIndexException,
  143. IOException {
  144. IndexReader indexReader = IndexReader.open(indexFile);
  145. indexReader.deleteDocuments(new Term("id", id));
  146. indexReader.close();
  147. }
  148. //6. 一元分词
  149. @SuppressWarnings("deprecation")
  150. public static String Standard_Analyzer(String str) {
  151. Analyzer analyzer = new StandardAnalyzer();
  152. Reader r = new StringReader(str);
  153. StopFilter sf = (StopFilter) analyzer.tokenStream("", r);
  154. System.out.println("=====StandardAnalyzer====");
  155. System.out.println("分析方法:默认没有词只有字(一元分词)");
  156. Token t;
  157. String results = "";
  158. try {
  159. while ((t = sf.next()) != null) {
  160. System.out.println(t.termText());
  161. results = results + " " + t.termText();
  162. }
  163. } catch (IOException e) {
  164. e.printStackTrace();
  165. }
  166. return results;
  167. }
  168. //7. 字典分词
  169. @SuppressWarnings("deprecation")
  170. public static String ik_CAnalyzer(String str) {
  171. Analyzer analyzer = new IK_CAnalyzer();
  172. Reader r = new StringReader(str);
  173. TokenStream ts = (TokenStream) analyzer.tokenStream("", r);
  174. System.out.println("=====IK_CAnalyzer====");
  175. System.out.println("分析方法:字典分词,正反双向搜索");
  176. Token t;
  177. String results = "";
  178. try {
  179. while ((t = ts.next()) != null) {
  180. System.out.println(t.termText());
  181. results = results + " " + t.termText();
  182. }
  183. } catch (IOException e) {
  184. e.printStackTrace();
  185. }
  186. return results;
  187. }
  188. //在结果中搜索
  189. public static void queryFromResults() {
  190. }
  191. //组装对象
  192. public static IndexResult getIndexResult(Document doc) {
  193. IndexResult ir = new IndexResult();
  194. ir.setId(doc.get("id"));
  195. ir.setName(doc.get("name"));
  196. ir.setAddress(doc.get("address"));
  197. ir.setCitycode(doc.get("citycode"));
  198. return ir;
  199. }
  200. }
package index;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.mira.lucene.analysis.IK_CAnalyzer;

public class IndexUtils {

    //0. 创建增量索引
    public static void buildIndex(String indexFile, String storeIdFile) {
        IncrementIndex.buildIndex(indexFile, storeIdFile);
    }

    //1. 单字段查询
    @SuppressWarnings("deprecation")
    public static List<IndexResult> queryByOneKey(IndexSearcher indexSearcher, String field,
            String key) {
        try {
            Date date1 = new Date();
            QueryParser queryParser = new QueryParser(field, new StandardAnalyzer());
            Query query = queryParser.parse(key);
            Hits hits = indexSearcher.search(query);
            Date date2 = new Date();
            System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms");
            List<IndexResult> list = new ArrayList<IndexResult>();
            for (int i = 0; i < hits.length(); i++) {
                list.add(getIndexResult(hits.doc(i)));
            }
            return list;
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

    //2. 多条件查询。这里实现的是and操作
    //注:要查询的字段必须是index的
    //即doc.add(new Field("pid", rs.getString("pid"), Field.Store.YES,Field.Index.TOKENIZED));   
    @SuppressWarnings("deprecation")
    public static List<IndexResult> queryByMultiKeys(IndexSearcher indexSearcher, String[] fields,
            String[] keys) {

        try {
            BooleanQuery m_BooleanQuery = new BooleanQuery();
            if (keys != null && keys.length > 0) {
                for (int i = 0; i < keys.length; i++) {
                    QueryParser queryParser = new QueryParser(fields[i], new StandardAnalyzer());
                    Query query = queryParser.parse(keys[i]);
                    m_BooleanQuery.add(query, BooleanClause.Occur.MUST);//and操作
                }
                Hits hits = indexSearcher.search(m_BooleanQuery);
                List<IndexResult> list = new ArrayList<IndexResult>();
                for (int i = 0; i < hits.length(); i++) {
                    list.add(getIndexResult(hits.doc(i)));
                }
                return list;
            }
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

    //3.高亮显示  实现了单条件查询
    //可改造为多条件查询
    public static List<IndexResult> highlight(IndexSearcher indexSearcher, String key) {
        try {
            QueryParser queryParser = new QueryParser("name", new StandardAnalyzer());
            Query query = queryParser.parse(key);
            TopDocCollector collector = new TopDocCollector(800);
            indexSearcher.search(query, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;

            Highlighter highlighter = null;
            SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>",
                    "</font>");
            highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
            highlighter.setTextFragmenter(new SimpleFragmenter(200));
            List<IndexResult> list = new ArrayList<IndexResult>();
            Document doc;
            for (int i = 0; i < hits.length; i++) {
                //System.out.println(hits[i].score);
                doc = indexSearcher.doc(hits[i].doc);
                TokenStream tokenStream = new StandardAnalyzer().tokenStream("name",
                        new StringReader(doc.get("name")));
                IndexResult ir = getIndexResult(doc);
                ir.setName(highlighter.getBestFragment(tokenStream, doc.get("name")));
                list.add(ir);
            }
            return list;
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;

    }

    //4. 多字段查询
    @SuppressWarnings("deprecation")
    public static List<IndexResult> queryByMultiFileds(IndexSearcher indexSearcher,
            String[] fields, String key) {
        try {
            MultiFieldQueryParser mfq = new MultiFieldQueryParser(fields, new StandardAnalyzer());
            Query query = mfq.parse(key);
            Hits hits = indexSearcher.search(query);
            List<IndexResult> list = new ArrayList<IndexResult>();
            for (int i = 0; i < hits.length(); i++) {
                list.add(getIndexResult(hits.doc(i)));
            }

            return list;
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

    //5. 删除索引
    public static void deleteIndex(String indexFile, String id) throws CorruptIndexException,
            IOException {
        IndexReader indexReader = IndexReader.open(indexFile);
        indexReader.deleteDocuments(new Term("id", id));
        indexReader.close();
    }

    //6. 一元分词
    @SuppressWarnings("deprecation")
    public static String Standard_Analyzer(String str) {
        Analyzer analyzer = new StandardAnalyzer();
        Reader r = new StringReader(str);
        StopFilter sf = (StopFilter) analyzer.tokenStream("", r);
        System.out.println("=====StandardAnalyzer====");
        System.out.println("分析方法:默认没有词只有字(一元分词)");
        Token t;
        String results = "";
        try {
            while ((t = sf.next()) != null) {
                System.out.println(t.termText());
                results = results + " " + t.termText();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return results;
    }

    //7. 字典分词
    @SuppressWarnings("deprecation")
    public static String ik_CAnalyzer(String str) {
        Analyzer analyzer = new IK_CAnalyzer();
        Reader r = new StringReader(str);
        TokenStream ts = (TokenStream) analyzer.tokenStream("", r);
        System.out.println("=====IK_CAnalyzer====");
        System.out.println("分析方法:字典分词,正反双向搜索");
        Token t;
        String results = "";
        try {
            while ((t = ts.next()) != null) {
                System.out.println(t.termText());
                results = results + " " + t.termText();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return results;
    }

    //在结果中搜索
    public static void queryFromResults() {

    }

    //组装对象
    public static IndexResult getIndexResult(Document doc) {
        IndexResult ir = new IndexResult();
        ir.setId(doc.get("id"));
        ir.setName(doc.get("name"));
        ir.setAddress(doc.get("address"));
        ir.setCitycode(doc.get("citycode"));
        return ir;
    }
}

查询索引结果对象:IndexResult

Java代码 复制代码 收藏代码
  1. package index;
  2. public class IndexResult {
  3. private String id;
  4. private String name;
  5. private String address;
  6. private String citycode;
  7. public String getId() {
  8. return id;
  9. }
  10. public void setId(String id) {
  11. this.id = id;
  12. }
  13. public String getName() {
  14. return name;
  15. }
  16. public void setName(String name) {
  17. this.name = name;
  18. }
  19. public String getAddress() {
  20. return address;
  21. }
  22. public void setAddress(String address) {
  23. this.address = address;
  24. }
  25. public String getCitycode() {
  26. return citycode;
  27. }
  28. public void setCitycode(String citycode) {
  29. this.citycode = citycode;
  30. }
  31. }
package index;

public class IndexResult {

    private String id;

    private String name;

    private String address;

    private String citycode;

    
    public String getId() {
        return id;
    }
    public void setId(String id) {
        this.id = id;
    }
    public String getName() {
        return name;
    }
    public void setName(String name) {
        this.name = name;
    }

    
    public String getAddress() {
        return address;
    }
    public void setAddress(String address) {
        this.address = address;
    }
    public String getCitycode() {
        return citycode;
    }
    public void setCitycode(String citycode) {
        this.citycode = citycode;
    }
    
}

4. 测试类

Java代码 复制代码 收藏代码
  1. package test;
  2. /**
  3. * $Id$
  4. * Copyright 2009-2010 Oak Pacific Interactive. All rights reserved.
  5. */
  6. import index.IndexResult;
  7. import index.IndexUtils;
  8. import java.util.Date;
  9. import java.util.List;
  10. import org.apache.lucene.search.IndexSearcher;
  11. public class Test {
  12. //存放索引文件
  13. private static String indexFile = "E:\\workspace2\\Test\\lucene_test\\poiIdext";
  14. //存放id
  15. private static String storeIdFile = "E:\\workspace2\\Test\\lucene_test\\storeId.txt";
  16. public static void main(String[] args) throws Exception {
  17. //0. 创建增量索引
  18. IndexUtils.buildIndex(indexFile, storeIdFile);
  19. IndexSearcher indexSearcher = new IndexSearcher(indexFile);
  20. String key = IndexUtils.ik_CAnalyzer("静安中心");
  21. //1.单字段查询
  22. Date date1 = new Date();
  23. List<IndexResult> list = IndexUtils.queryByOneKey(indexSearcher, "name", key);
  24. Date date2 = new Date();
  25. System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
  26. + "条=======================================单字段查询");
  27. //printResults(list);
  28. //2.多条件查询
  29. String[] fields = { "name", "citycode" };
  30. String[] keys = { IndexUtils.ik_CAnalyzer("静安中心"), "0000" };
  31. date1 = new Date();
  32. list = IndexUtils.queryByMultiKeys(indexSearcher, fields, keys);
  33. date2 = new Date();
  34. System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
  35. + "条\n===============================多条件查询");
  36. printResults(list);
  37. //3.高亮显示 单字段查询
  38. System.out.println("\n\n");
  39. date1 = new Date();
  40. list = IndexUtils.highlight(indexSearcher, key);
  41. date2 = new Date();
  42. System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
  43. + "条\n======================================高亮显示");
  44. // printResults(list);
  45. //4. 多字段查询
  46. date1 = new Date();
  47. list = IndexUtils.queryByMultiFileds(indexSearcher, fields, key);
  48. date2 = new Date();
  49. System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
  50. + "条\n=====================================多字段查询");
  51. // printResults(list);
  52. //5. 删除索引中的字段 根据id进行删除
  53. IndexUtils.deleteIndex(indexFile, "123");
  54. }
  55. //打印结果
  56. public static void printResults(List<IndexResult> list) {
  57. if (list != null && list.size() > 0) {
  58. for (int i = 0; i < list.size(); i++) {
  59. System.out.println(list.get(i).getId() + "," + list.get(i).getName() + ","
  60. + list.get(i).getAddress() + "," + list.get(i).getCitycode()+"--->"+i);
  61. }
  62. }
  63. }
  64. }
package test;

/**
 * $Id$
 * Copyright 2009-2010 Oak Pacific Interactive. All rights reserved.
 */

import index.IndexResult;
import index.IndexUtils;

import java.util.Date;
import java.util.List;

import org.apache.lucene.search.IndexSearcher;

public class Test {

    //存放索引文件
    private static String indexFile = "E:\\workspace2\\Test\\lucene_test\\poiIdext";

    //存放id
    private static String storeIdFile = "E:\\workspace2\\Test\\lucene_test\\storeId.txt";

    public static void main(String[] args) throws Exception {
        //0. 创建增量索引
        IndexUtils.buildIndex(indexFile, storeIdFile);
        
        IndexSearcher indexSearcher = new IndexSearcher(indexFile);
        String key = IndexUtils.ik_CAnalyzer("静安中心");

        //1.单字段查询
        Date date1 = new Date();
        List<IndexResult> list = IndexUtils.queryByOneKey(indexSearcher, "name", key);
        Date date2 = new Date();
        System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
                + "条=======================================单字段查询");
        //printResults(list);

        //2.多条件查询
        String[] fields = { "name", "citycode" };
        String[] keys = { IndexUtils.ik_CAnalyzer("静安中心"), "0000" };
        date1 = new Date();
        list = IndexUtils.queryByMultiKeys(indexSearcher, fields, keys);
        date2 = new Date();
        System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
                + "条\n===============================多条件查询");
        printResults(list);

        //3.高亮显示  单字段查询
        System.out.println("\n\n");
        date1 = new Date();
        list = IndexUtils.highlight(indexSearcher, key);
        date2 = new Date();
        System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
                + "条\n======================================高亮显示");
       // printResults(list);

        //4. 多字段查询
        date1 = new Date();
        list = IndexUtils.queryByMultiFileds(indexSearcher, fields, key);
        date2 = new Date();
        System.out.println("耗时:" + (date2.getTime() - date1.getTime()) + "ms\n" + list.size()
                + "条\n=====================================多字段查询");
       // printResults(list);

        //5. 删除索引中的字段  根据id进行删除
        IndexUtils.deleteIndex(indexFile, "123");
    }

    //打印结果
    public static void printResults(List<IndexResult> list) {
        if (list != null && list.size() > 0) {
            for (int i = 0; i < list.size(); i++) {
                System.out.println(list.get(i).getId() + "," + list.get(i).getName() + ","
                        + list.get(i).getAddress() + "," + list.get(i).getCitycode()+"--->"+i);
            }
        }
    }
}

5. 其它

全文索引:

目前的情况是,搜索hello,"hello world"、"hi hello, how are you"但"worldhello"显示不出来

默认情况下,QueryParser不支持通配符打头的查询(如,*ook)。不过在Lucene 2.1版本以后,他们可以通过调用QueryParser.setAllowLeadingWildcard( true )的 方法打开这一功能。注意,这是一个开销很大的操作:它需要扫描索引中全部记号的列表,来寻找匹配这个模式的词。(译注:高效支持这种后缀查询的办法是,建立反序的记号表,Lucene没有实现这一模式。)http://www.codechina.org/faq/show/42/

支持空格分词搜索:"厕所 26 沈阳" 这是三个词

不支持:“厕所沈阳”这是一个词

Lucene能实现“在搜索结果中搜索”的功能么,也就是说第二个搜索仅在第一个搜索结果中进行?

http://www.codechina.org/faq/show/63/

可以。主要有两种做法:

  • 使用QueryFilter把第一个查询当作一个过滤器处理。(你可以在Lucene的邮件列表里面搜索 QueryFilter, Doug Cutting(Lucene的最初作者)反对这种做法。
  • BooleanQuery把前后两个查询结合起来,前一个查询使用 required选项。

我们推荐使用BooleanQuery的方法。

============

// 创建标准文本分析器, 标准的是可以支持的中文的

Analyzer luceneAnalyzer = new StandardAnalyzer();

indexWriter = new IndexWriter(indexDir, luceneAnalyzer, true);

// 可以说是创建一个新的写入工具

// 第一个参数是要索引建立在哪个目录里

// 第二个参数是新建一个文本分析器,这里用的是标准的大家也可以自己写一个

// 第三个参数如果是true,在建立索引之前先将c: \\index目录清空

poi_data_ugc搜索中,索引放在内存里还是磁盘上????


针对于lucene使用和优化

http://hi.baidu.com/lewutian/blog/item/48a86d03de58b984d43f7c1b.html

ucene入门实例(1):索引文本文件

http://www.java3z.com/cwbwebhome/article/article5/51021.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值