package com.highcom.hcgip.lucenesearch;
import javax.servlet.*;
import javax.servlet.http.*;
import java.io.*;
import java.util.*;
import com.highcom.hcgip.cm.deal.admin.*;
import com.highcom.hcgip.cm.dbmap.admin.*;
import com.highcom.hcgip.cm.model.*;
import com.highcom.hcgip.cm.util.*;
import com.highcom.hcgip.basic.common.*;
import com.highcom.hcgip.cm.constant.*;
/**
* <p>Title: </p>
* <p>Description: </p>
* <p>Copyright: Copyright (c) 2004</p>
* <p>Company: Highcom</p>
* @author 李新博
* @version 1.0
*/
public class Searcher
extends HttpServlet {
private static final String CONTENT_TYPE = "text/html; charset=GBK";
//Initialize global variables
public void init() throws ServletException {
}
//Process the HTTP Post request
public void doPost(HttpServletRequest request, HttpServletResponse response) throws
ServletException, IOException {
PrintWriter out = response.getWriter();
String id = request.getParameter("classid");
String q = request.getParameter("searchKeys");
q = Uncode.exChinese(q);
CategoryDBInterface category = new CategoryDBMap();
if (id != null && id.equals("0")) {
Date start = new Date();
Vector v = category.getCategory();
String[] indexDir = new String[v.size()];
CategoryBean tempBean = null;
for (int i = 0; i < v.size(); i++) {
tempBean = (CategoryBean) v.get(i);
indexDir[i] =tempBean.getIndexpath();
}
Vector it = Search.getSearch(indexDir,
q);
Date end = new Date();
long time = end.getTime() - start.getTime();
String t = time + "";
//分页参数
int pageCounter = 0;
int currentPageIndex = 0;
String pageIndex = (request.getParameter("pageIndex") == null) ? "0" :
request.getParameter("pageIndex");
currentPageIndex = Integer.parseInt(pageIndex);
int totalRecord = 0;
//分页结束
Vector temp = new Vector();
SplitPage ph = new SplitPage();
ph.setPage(it, 20, currentPageIndex);
pageCounter = ph.getTotalPages();
totalRecord = ph.getTotalLines();
request.setAttribute("totalRecord", new Integer(totalRecord));
request.setAttribute("pageCounter", new Integer(pageCounter));
request.setAttribute("currentPageIndex",
new Integer(currentPageIndex));
temp = ph.getPage(currentPageIndex); //分页后的查询内容
request.setAttribute("result", temp);
request.setAttribute("searchfor", "全部分类");
request.setAttribute("time", t);
request.setAttribute("searchKeys", q);
request.setAttribute("classid", id);
}
else {
Date start = new Date();
Vector v = category.getCategory(Integer.parseInt(id));
CategoryBean tempBean = (CategoryBean) v.get(0);
Vector it = Search.getSearch(tempBean.getIndexpath(),
q);
Date end = new Date();
long time = end.getTime() - start.getTime();
String t = time + "";
//分页参数
int pageCounter = 0;
int currentPageIndex = 0;
String pageIndex = (request.getParameter("pageIndex") == null) ? "0" :
request.getParameter("pageIndex");
currentPageIndex = Integer.parseInt(pageIndex);
int totalRecord = 0;
//分页结束
Vector temp = new Vector();
SplitPage ph = new SplitPage();
ph.setPage(it, 10, currentPageIndex);
pageCounter = ph.getTotalPages();
totalRecord = ph.getTotalLines();
request.setAttribute("totalRecord", new Integer(totalRecord));
request.setAttribute("pageCounter", new Integer(pageCounter));
request.setAttribute("currentPageIndex",
new Integer(currentPageIndex));
temp = ph.getPage(currentPageIndex); //分页后的查询内容
request.setAttribute("result", temp);
request.setAttribute("searchfor", tempBean.getCategory());
request.setAttribute("time", t);
request.setAttribute("searchKeys", q);
request.setAttribute("classid", id);
}
dispatch(request, response,ConstantList.PAGE_BASIC_SEARCHRESULT);
}
protected void dispatch(HttpServletRequest request,
HttpServletResponse response,
String page) throws javax.servlet.ServletException,
IOException {
RequestDispatcher dispatcher =
getServletContext().getRequestDispatcher(page);
dispatcher.forward(request, response);
}
//Clean up resources
public void destroy() {
}
}
package com.highcom.hcgip.lucenesearch;
import java.io.IOException;
import java.util.*;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Hits;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.analysis.cjk.*;
import com.highcom.hcgip.basic.common.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.search.*;
import java.io.*;
public class Search {
public static Vector getSearch(String indexDir, String q) {
Hits hits = null;
Vector it = new Vector();
File segments = new File(indexDir + File.separator + "segments");
if(segments.exists()){
try {
Searcher searcher = new IndexSearcher(indexDir); //构建搜索器,并指向索引目录
Analyzer analyzer = new CJKAnalyzer(); //构建语言分析器
Query contentsquery = QueryParser.parse(q, "contents", analyzer); //查询解析器:使用和索引同样的语言分析器StandardAnalyzer
Query proposalquery = QueryParser.parse(q, "proposal", analyzer);
BooleanQuery comboQuery = new BooleanQuery();
comboQuery.add(contentsquery, false, false);
comboQuery.add(proposalquery, false, false);
hits = searcher.search(comboQuery); //搜索结果使用Hits存储,类似于数据库返回的记录集
for (int i = 0; i < hits.length(); i++) {
Document doc = hits.doc(i);
LuceneBean luceneBean = new LuceneBean();
luceneBean.setPath(doc.get("path"));
luceneBean.setTitle(doc.get("title"));
luceneBean.setDate(doc.get("date"));
luceneBean.setType(doc.get("type"));
luceneBean.setRealpath(doc.get("realpath"));
it.add(luceneBean);
}
searcher.close();
}
catch (ParseException ex) {
Log.debug(ex.toString());
}
catch (IOException ex) {
Log.debug(ex.toString());
}
}
return it;
}
public static Vector getSearch(String[] indexDir, String q) {
Hits hits = null;
Vector it = new Vector();
Analyzer analyzer = new CJKAnalyzer(); //构建语言分析器
try {
for (int j = 0; j < indexDir.length; j++) {
File segments = new File(indexDir[j] + File.separator + "segments");
if(!segments.exists()){
continue;
}
Searcher searcher = new IndexSearcher(indexDir[j]); //构建搜索器,并指向索引目录
Query contentsquery = QueryParser.parse(q, "contents", analyzer); //查询解析器:使用和索引同样的语言分析器StandardAnalyzer
Query proposalquery = QueryParser.parse(q, "proposal", analyzer);
BooleanQuery comboQuery = new BooleanQuery();
comboQuery.add(contentsquery, false, false);
comboQuery.add(proposalquery, false, false);
hits = searcher.search(comboQuery); //搜索结果使用Hits存储,类似于数据库返回的记录集
for (int i = 0; i < hits.length(); i++) {
Document doc = hits.doc(i);
LuceneBean luceneBean = new LuceneBean();
luceneBean.setPath(doc.get("path"));
luceneBean.setTitle(doc.get("title"));
luceneBean.setDate(doc.get("date"));
luceneBean.setType(doc.get("type"));
luceneBean.setRealpath(doc.get("realpath"));
it.add(luceneBean);
}
searcher.close();
}
}
catch (ParseException ex) {
Log.debug(ex.toString());
}
catch (IOException ex) {
Log.debug(ex.toString());
}
return it;
}
public static void main(String[] args) {
Vector it = Search.getSearch("d://bb", "请求");
LuceneBean luceneBean = new LuceneBean();
System.out.println(it.size());
if (it != null && it.size() > 0) {
for (int i = 0; i < it.size(); i++) {
luceneBean = (LuceneBean) it.get(i);
System.out.print("<a href='" + luceneBean.getTitle() + "'>" +
luceneBean.getTitle() + "</a>");
System.out.print(luceneBean.getDate());
}
}
}
}
package com.highcom.hcgip.lucenesearch;
/**
* <p>Title: </p>
* <p>Description: </p>
* <p>Copyright: Copyright (c) 2004</p>
* <p>Company: Highcom</p>
* @author 李新博
* @version 1.0
*/
public class LuceneBean {
private String path;
private String title;
private String date;
private String type;
private String realpath;
public LuceneBean() {
}
public String getPath() {
return path;
}
public void setPath(String path) {
this.path = path;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getRealpath() {
return realpath;
}
public void setRealpath(String realpath) {
this.realpath = realpath;
}
}
package com.highcom.hcgip.lucenesearch;
import org.apache.lucene.analysis.cn.*;
import org.apache.lucene.index.IndexWriter;
import java.io.File;
import java.io.Reader;
import java.io.FileInputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Date;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateField;
import org.apache.lucene.analysis.cjk.*;
import java.io.*;
import com.highcom.hcgip.basic.common.*;
import com.highcom.hcgip.cm.util.*;
import org.htmlparser.beans.*;
/**
* 建立索引文件
* <p>Title: </p>
* <p>Description: </p>
* <p>Copyright: Copyright (c) 2004</p>
* <p>Company: Highcom</p>
* @author 李新博
* @version 1.0
*/
public class IndexRunner {
/**
* 创建索引文件
* @param filePath 要创建索引文件的原文件,或者是文件夹
* @param indexDir 索引文件存放的目录
* @param showPath 检索出来的路径
* @param it boolean类型,是否追加索引
*/
public static synchronized void createIndex(String filePath, String indexDir,
String showPath, String type, boolean it) {
IndexWriter writer = null;
try {
writer = new IndexWriter(indexDir, new CJKAnalyzer(), it);
indexDocs(writer, new File(filePath), showPath, type); //数据源,文件形式
writer.optimize();
writer.close();
}
catch (IOException ex) {
Log.debug(" public static void createIndex===" + ex.toString());
}
}
private static void indexDocs(IndexWriter writer, File file, String showPath,
String type) {
if (file.isDirectory()) {
String[] files = file.list();
for (int i = 0; i < files.length; i++)
indexDocs(writer, new File(file, files[i]), showPath, type);
}
else {
if (file.getPath().endsWith(".html") || file.getPath().endsWith(".htm") ||
file.getPath().endsWith(".txt")) {
try {
writer.addDocument(Document(file, showPath, type));
}
catch (IOException ex) {
Log.debug(" private static void indexDocs==" + ex.toString());
}
}
}
}
private static Document Document(File f, String showPath, String type) {
Document doc = null;
try {
doc = null;
doc = new Document(); //创建lucene接受的数据格式Document
doc.add(Field.UnIndexed("realpath",f.getPath())); //创建字段名为path的字段,不索引,只存储,
doc.add(Field.UnIndexed("path", showPath + f.getName())); //创建字段名为path的字段,不索引,只存储,
doc.add(Field.Keyword("date", DateFormat.dateFormat2(new Date())));
doc.add(Field.Keyword("type", type));
doc.add(Field.Text("title", f.getName().split("_")[0]));
doc.add(Field.Text("proposal",PropositionParser.getProposition(f.getPath())));
///FileInputStream is = new FileInputStream(f);
//Reader reader = new BufferedReader(new InputStreamReader(is));
//doc.add(Field.Text("contents", reader)); //创建字段名为contents的字段来存需用索引的内容
StringBean sb = new StringBean();
sb.setLinks(false);
sb.setCollapse(true);
sb.setURL(f.getPath());
sb.setReplaceNonBreakingSpaces(true);
doc.add(Field.Text("contents",sb.getStrings()));
}
catch (Exception ex) {
Log.debug("private static Document Document==" + ex.toString());
}
return doc;
}
}