Lucene 学习开发

最新推荐文章于 2024-06-13 00:20:15 发布

sinbo

最新推荐文章于 2024-06-13 00:20:15 发布

阅读量6.6k

点赞数

分类专栏： Open Source 文章标签： lucene import string vector date integer

本文链接：https://blog.csdn.net/sinbo/article/details/41917

版权

Open Source 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

package com.highcom.hcgip.lucenesearch;

import javax.servlet.*;
import javax.servlet.http.*;
import java.io.*;
import java.util.*;
import com.highcom.hcgip.cm.deal.admin.*;
import com.highcom.hcgip.cm.dbmap.admin.*;
import com.highcom.hcgip.cm.model.*;
import com.highcom.hcgip.cm.util.*;
import com.highcom.hcgip.basic.common.*;
import com.highcom.hcgip.cm.constant.*;

/**
* Title: 
* Description: 
* Copyright: Copyright (c) 2004
* Company: Highcom
* @author 李新博
* @version 1.0
*/

public class Searcher
extends HttpServlet {
private static final String CONTENT_TYPE = "text/html; charset=GBK";
//Initialize global variables
public void init() throws ServletException {
}

//Process the HTTP Post request
public void doPost(HttpServletRequest request, HttpServletResponse response) throws
 ServletException, IOException {
 PrintWriter out = response.getWriter();
 String id = request.getParameter("classid");
 String q = request.getParameter("searchKeys");
 q = Uncode.exChinese(q);
 CategoryDBInterface category = new CategoryDBMap();
 if (id != null && id.equals("0")) {
 Date start = new Date();
 Vector v = category.getCategory();
 String[] indexDir = new String[v.size()];
 CategoryBean tempBean = null;
 for (int i = 0; i < v.size(); i++) {
 tempBean = (CategoryBean) v.get(i);
 indexDir[i] =tempBean.getIndexpath();
 }
 Vector it = Search.getSearch(indexDir,
 q);
 Date end = new Date();
 long time = end.getTime() - start.getTime();
 String t = time + "";
 //分页参数
 int pageCounter = 0;
 int currentPageIndex = 0;
 String pageIndex = (request.getParameter("pageIndex") == null) ? "0" :
 request.getParameter("pageIndex");
 currentPageIndex = Integer.parseInt(pageIndex);
 int totalRecord = 0;
 //分页结束
 Vector temp = new Vector();
 SplitPage ph = new SplitPage();
 ph.setPage(it, 20, currentPageIndex);
 pageCounter = ph.getTotalPages();
 totalRecord = ph.getTotalLines();
 request.setAttribute("totalRecord", new Integer(totalRecord));
 request.setAttribute("pageCounter", new Integer(pageCounter));
 request.setAttribute("currentPageIndex",
 new Integer(currentPageIndex));
 temp = ph.getPage(currentPageIndex); //分页后的查询内容
 request.setAttribute("result", temp);
 request.setAttribute("searchfor", "全部分类");
 request.setAttribute("time", t);
 request.setAttribute("searchKeys", q);
 request.setAttribute("classid", id);
 }
 else {
 Date start = new Date();
 Vector v = category.getCategory(Integer.parseInt(id));
 CategoryBean tempBean = (CategoryBean) v.get(0);
 Vector it = Search.getSearch(tempBean.getIndexpath(),
 q);
 Date end = new Date();
 long time = end.getTime() - start.getTime();
 String t = time + "";
 //分页参数
 int pageCounter = 0;
 int currentPageIndex = 0;
 String pageIndex = (request.getParameter("pageIndex") == null) ? "0" :
 request.getParameter("pageIndex");
 currentPageIndex = Integer.parseInt(pageIndex);
 int totalRecord = 0;
 //分页结束
 Vector temp = new Vector();
 SplitPage ph = new SplitPage();
 ph.setPage(it, 10, currentPageIndex);
 pageCounter = ph.getTotalPages();
 totalRecord = ph.getTotalLines();
 request.setAttribute("totalRecord", new Integer(totalRecord));
 request.setAttribute("pageCounter", new Integer(pageCounter));
 request.setAttribute("currentPageIndex",
 new Integer(currentPageIndex));
 temp = ph.getPage(currentPageIndex); //分页后的查询内容
 request.setAttribute("result", temp);
 request.setAttribute("searchfor", tempBean.getCategory());
 request.setAttribute("time", t);
 request.setAttribute("searchKeys", q);
 request.setAttribute("classid", id);

}
dispatch(request, response,ConstantList.PAGE_BASIC_SEARCHRESULT);
}

protected void dispatch(HttpServletRequest request,
                          HttpServletResponse response,
                          String page) throws javax.servlet.ServletException,
      IOException {
    RequestDispatcher dispatcher =
        getServletContext().getRequestDispatcher(page);
    dispatcher.forward(request, response);
}

//Clean up resources
public void destroy() {
}
}

package com.highcom.hcgip.lucenesearch;

import java.io.IOException;
import java.util.*;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Hits;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.analysis.cjk.*;
import com.highcom.hcgip.basic.common.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.search.*;
import java.io.*;

public class Search {

public static Vector getSearch(String indexDir, String q) {
 Hits hits = null;
 Vector it = new Vector();
 File segments = new File(indexDir + File.separator + "segments");
 if(segments.exists()){
 try {
 Searcher searcher = new IndexSearcher(indexDir); //构建搜索器，并指向索引目录
 Analyzer analyzer = new CJKAnalyzer(); //构建语言分析器
 Query contentsquery = QueryParser.parse(q, "contents", analyzer); //查询解析器：使用和索引同样的语言分析器StandardAnalyzer
 Query proposalquery = QueryParser.parse(q, "proposal", analyzer);
 BooleanQuery comboQuery = new BooleanQuery();
 comboQuery.add(contentsquery, false, false);
 comboQuery.add(proposalquery, false, false);
 hits = searcher.search(comboQuery); //搜索结果使用Hits存储，类似于数据库返回的记录集
 for (int i = 0; i < hits.length(); i++) {
 Document doc = hits.doc(i);
 LuceneBean luceneBean = new LuceneBean();
 luceneBean.setPath(doc.get("path"));
 luceneBean.setTitle(doc.get("title"));
 luceneBean.setDate(doc.get("date"));
 luceneBean.setType(doc.get("type"));
 luceneBean.setRealpath(doc.get("realpath"));
 it.add(luceneBean);
 }
 searcher.close();
 }
 catch (ParseException ex) {
 Log.debug(ex.toString());
 }
 catch (IOException ex) {
 Log.debug(ex.toString());
 }
 }

return it;
}

public static Vector getSearch(String[] indexDir, String q) {
 Hits hits = null;
 Vector it = new Vector();
 Analyzer analyzer = new CJKAnalyzer(); //构建语言分析器
 try {
 for (int j = 0; j < indexDir.length; j++) {
 File segments = new File(indexDir[j] + File.separator + "segments");
 if(!segments.exists()){
 continue;
 }
 Searcher searcher = new IndexSearcher(indexDir[j]); //构建搜索器，并指向索引目录
 Query contentsquery = QueryParser.parse(q, "contents", analyzer); //查询解析器：使用和索引同样的语言分析器StandardAnalyzer
 Query proposalquery = QueryParser.parse(q, "proposal", analyzer);
 BooleanQuery comboQuery = new BooleanQuery();
 comboQuery.add(contentsquery, false, false);
 comboQuery.add(proposalquery, false, false);
 hits = searcher.search(comboQuery); //搜索结果使用Hits存储，类似于数据库返回的记录集
 for (int i = 0; i < hits.length(); i++) {
 Document doc = hits.doc(i);
 LuceneBean luceneBean = new LuceneBean();
 luceneBean.setPath(doc.get("path"));
 luceneBean.setTitle(doc.get("title"));
 luceneBean.setDate(doc.get("date"));
 luceneBean.setType(doc.get("type"));
 luceneBean.setRealpath(doc.get("realpath"));
 it.add(luceneBean);
 }
 searcher.close();
 }
 }
 catch (ParseException ex) {
 Log.debug(ex.toString());
 }
 catch (IOException ex) {
 Log.debug(ex.toString());
 }
 return it;
}

public static void main(String[] args) {

Vector it = Search.getSearch("d://bb", "请求");
 LuceneBean luceneBean = new LuceneBean();
 System.out.println(it.size());
 if (it != null && it.size() > 0) {
 for (int i = 0; i < it.size(); i++) {
 luceneBean = (LuceneBean) it.get(i);
 System.out.print("<a href='" + luceneBean.getTitle() + "'>" +
 luceneBean.getTitle() + "</a>");
 System.out.print(luceneBean.getDate());

}
}

package com.highcom.hcgip.lucenesearch;

/**
* Title: 
* Description: 
* Copyright: Copyright (c) 2004
* Company: Highcom
* @author 李新博
* @version 1.0
*/

public class LuceneBean {

private String path;
private String title;
private String date;
private String type;
private String realpath;
public LuceneBean() {
}
public String getPath() {
    return path;
}
public void setPath(String path) {
    this.path = path;
}
public String getTitle() {
    return title;
}
public void setTitle(String title) {
    this.title = title;
}
public String getDate() {
    return date;
}
public void setDate(String date) {
    this.date = date;
}
public String getType() {
    return type;
}
public void setType(String type) {
    this.type = type;
}
public String getRealpath() {
    return realpath;
}
public void setRealpath(String realpath) {
    this.realpath = realpath;
}
}

package com.highcom.hcgip.lucenesearch;

import org.apache.lucene.analysis.cn.*;
import org.apache.lucene.index.IndexWriter;
import java.io.File;
import java.io.Reader;
import java.io.FileInputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Date;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateField;
import org.apache.lucene.analysis.cjk.*;
import java.io.*;
import com.highcom.hcgip.basic.common.*;
import com.highcom.hcgip.cm.util.*;
import org.htmlparser.beans.*;

/**
* 建立索引文件
* Title: 
* Description: 
* Copyright: Copyright (c) 2004
* Company: Highcom
* @author 李新博
* @version 1.0
*/
public class IndexRunner {
/**
 * 创建索引文件
 * @param filePath 要创建索引文件的原文件，或者是文件夹
 * @param indexDir 索引文件存放的目录
 * @param showPath 检索出来的路径
 * @param it boolean类型，是否追加索引
 */
public static synchronized void createIndex(String filePath, String indexDir,
 String showPath, String type, boolean it) {
 IndexWriter writer = null;
 try {
 writer = new IndexWriter(indexDir, new CJKAnalyzer(), it);
 indexDocs(writer, new File(filePath), showPath, type); //数据源，文件形式
 writer.optimize();
 writer.close();
 }
 catch (IOException ex) {
 Log.debug(" public static void createIndex===" + ex.toString());
 }
}

private static void indexDocs(IndexWriter writer, File file, String showPath,
 String type) {
 if (file.isDirectory()) {
 String[] files = file.list();
 for (int i = 0; i < files.length; i++)
 indexDocs(writer, new File(file, files[i]), showPath, type);
 }
 else {
 if (file.getPath().endsWith(".html") || file.getPath().endsWith(".htm") ||
 file.getPath().endsWith(".txt")) {

try {
writer.addDocument(Document(file, showPath, type));

        }
        catch (IOException ex) {
          Log.debug(" private static void indexDocs==" + ex.toString());
        }
      }
    }
}

private static Document Document(File f, String showPath, String type) {
    Document doc = null;
    try {
      doc = null;
      doc = new Document(); //创建lucene接受的数据格式Document
      doc.add(Field.UnIndexed("realpath",f.getPath())); //创建字段名为path的字段，不索引，只存储，
      doc.add(Field.UnIndexed("path", showPath + f.getName())); //创建字段名为path的字段，不索引，只存储，
      doc.add(Field.Keyword("date", DateFormat.dateFormat2(new Date())));
      doc.add(Field.Keyword("type", type));
      doc.add(Field.Text("title", f.getName().split("_")[0]));
      doc.add(Field.Text("proposal",PropositionParser.getProposition(f.getPath())));
      ///FileInputStream is = new FileInputStream(f);
      //Reader reader = new BufferedReader(new InputStreamReader(is));
      //doc.add(Field.Text("contents", reader)); //创建字段名为contents的字段来存需用索引的内容
      StringBean sb = new StringBean();
      sb.setLinks(false);
      sb.setCollapse(true);
      sb.setURL(f.getPath());
      sb.setReplaceNonBreakingSpaces(true);
      doc.add(Field.Text("contents",sb.getStrings()));