lucene基本的搜索功能

对网上盛传的lucene使用方法进行了一些修改.网上的是对某个目录下所有HTML文件进行索引和搜索。但是不支持多重目录下的搜索。这里做了一点修改。大部分还是网上的代码。
Constants.java
package testlucene;

public class Constants {
//要索引的文件的存放路径
public final static String INDEX_FILE_PATH = "c:\\dataDir";

//索引的存放位置
public final static String INDEX_STORE_PATH = "c:\\indexDir";
}


LuceneIndex.java
package testlucene;
import java.io.*;
import java.util.*;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.mira.lucene.analysis.IK_CAnalyzer;

public class LuceneIndex {
private IndexWriter writer = null;

public LuceneIndex(){
try {
writer = new IndexWriter(Constants.INDEX_STORE_PATH,new IK_CAnalyzer(),true);
//true表示可以重写(覆盖?)
}catch(Exception e){
e.printStackTrace();
}
}

@SuppressWarnings("deprecation")
private Document getDocument(File f) throws Exception{
//为每个文件建立一个Document文档,里面增加内容
Document doc = new Document();
if(f.isFile()){
FileInputStream is = new FileInputStream(f);
Reader reader = new BufferedReader(new InputStreamReader(is));
doc.add(new Field("contents",reader));
doc.add(new Field("path",f.getCanonicalPath(),Field.Store.YES,Field.Index.TOKENIZED));
}


return doc;
}

public void writeToIndex() throws Exception{
File folder = new File(Constants.INDEX_FILE_PATH);
if(folder.isDirectory()){
File[] files = getFileList(new File(Constants.INDEX_FILE_PATH));
for(int i=0; i<files.length; i++){
File file = new File(files[i].toString());
Document doc = getDocument(file);
System.out.println("正在为文件 (" + file + ") 建立索引...");
writer.addDocument(doc);
}
}
}

public void close()throws Exception{
writer.close();
}

public static void main(String[] args)throws Exception{
LuceneIndex indexer = new LuceneIndex();
Date start = new Date();
indexer.writeToIndex();
Date end = new Date();
System.out.println("建立索引用时 " + (end.getTime() - start.getTime()) + "毫秒");
indexer.close();
}

@SuppressWarnings("unchecked")
private File[] getFileList(File file){
File[] list = null;
ArrayList show = new ArrayList();
if(file.isFile()){list = new File[1];list[0] = file;return list;}
else if(file.isDirectory()){
File[] subDir = file.listFiles();

for(int j=0;j<subDir.length;j++){
if(subDir[j].isFile()){

show.add(subDir[j]);
}else if(subDir[j].isDirectory()){
File[] third = getFileList(subDir[j]);
for(int k=0;null!=third&&k<third.length;k++)
show.add(third[k]);
}
}
}
list = new File[show.size()];
for(int m=0;m<show.size();m++)list[m]=new File(show.get(m).toString());
return list;
}

}


package testlucene;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.util.*;
import org.apache.lucene.document.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.search.*;
import org.mira.lucene.analysis.IK_CAnalyzer;

public class LuceneSearch {
private IndexSearcher searcher = null;
private Query query = null;
private File shopInfoTxt = null;
private RandomAccessFile bw;



public LuceneSearch() {
try {
searcher = new IndexSearcher(Constants.INDEX_STORE_PATH);
} catch (Exception e) {
e.printStackTrace();
}
}

@SuppressWarnings("deprecation")
public final Hits Search(String keyword) {
System.out.println("正在检索关键字 " + keyword);
try {
query = new QueryParser("contents", new IK_CAnalyzer())
.parse(keyword);
Date start = new Date();
Hits hits = searcher.search(query);
Date end = new Date();
System.out.println("检索完成,用时" + (end.getTime() - start.getTime())
+ "毫秒");
return hits;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}

@SuppressWarnings("deprecation")
public String printResult(Hits h, String test) {//显示关键字再哪个文件的哪行,用|隔开
if (h.length() == 0) {
System.out.println("对不起,没有找到您要的结果。");
return "";
} else {
for (int i = 0; i < h.length(); i++) {
try {
Document doc = h.doc(i);
System.out.print("这是第" + (i + 1) + "个检索到的结果,文件名为 :");
System.out.println(doc.get("path"));

BufferedReader br = new BufferedReader(new FileReader(doc.get("path")));
String line = null;
int lineNum = 0;
while ((line = br.readLine()) != null) {
lineNum++;

if (line.indexOf(test) != -1)
return doc.get("path")+"|"+lineNum;
}

} catch (Exception e) {
e.printStackTrace();
}
}
}
return "";
}

public static void main(String[] args) throws Exception {

LuceneSearch temp = new LuceneSearch();


//得到分类信息
String content = temp.getInnerContent3("nav_w","main_w");
content = content.replaceAll(" ", "");
content = content.replaceAll(">", ">");
System.out.println("==========分类信息:=============\n"+content+"\n================");
String typeinfo = content;

//得到商家具体信息
String shopInfo = "";
content = temp.getInnerContent3("shopInfo","shopRemark");
shopInfo = content;
//DataOutputStream write = new DataOutputStream(new FileOutputStream(temp.getShopInfoTxt()));
//write.write(typeinfo.getBytes());

temp.getBw().write(typeinfo.getBytes());

System.out.println("得到商家信息:\n"+content);

//地址信息
int addStart = content.indexOf("地址");
int addEnd = content.indexOf("电话");
if(addStart>0&&addEnd>0){
content = content.substring(addStart+3, addEnd);
content = content.replaceAll(" ", "");
System.out.println("==========地址信息:=============\n"+content+"\n===============");
}
//write.write(content.getBytes());

temp.getBw().write(content.getBytes());
temp.getBw().write("\n".getBytes());

//电话信息
content = shopInfo;
int telStart = content.indexOf("电话");
int telEnd = content.indexOf("报错");
if(telStart>0&&telEnd>0){
content = content.substring(telStart+3,telEnd);
content = content.replaceAll(" ", "");
System.out.println("=================电话信息:================\n"+content+"\n==================");
//write.write(content.getBytes());
temp.getBw().write(content.getBytes());
temp.getBw().write("\n".getBytes());
}

//商家介绍
content = shopInfo;
int introStart = content.indexOf("商户简介");
int introEnd = content.indexOf("分类标签");

if(introStart>0&&introEnd>0){
content = content.substring(introStart+5,introEnd);
content = content.replaceAll(" ", "");
System.out.println("=================商家介绍:===============\n"+content+"\n===================");
//write.write(content.getBytes());
temp.getBw().write(content.getBytes());
temp.getBw().write("\n".getBytes());
}

//分类标签
content = shopInfo;
int typeStart = content.indexOf("分类标签");
int typeEnd = content.indexOf("网友推荐");
if(typeStart>0&&typeEnd>0){
content = content.substring(typeStart+4,typeEnd);
content = content.replaceAll(" ", "");
System.out.println("=================分类标签:===============\n"+content+"\n===================");
//write.write(content.getBytes());
temp.getBw().write(content.getBytes());
temp.getBw().write("\n".getBytes());
}

//网友推荐
content = shopInfo;
int suggestEnd = content.lastIndexOf(")");
if(typeEnd>0&&suggestEnd>0){
content = content.substring(typeEnd+4,suggestEnd+1);
content = content.replaceAll(" ", "");
System.out.println("=================网友推荐:===============\n"+content+"\n===================");
//write.write(content.getBytes());
//write.close();
temp.getBw().write(content.getBytes());
temp.getBw().write("\n".getBytes());
}

temp.getBw().close();
}




@SuppressWarnings("deprecation")
public String getInnerContent3(String first,String sec) throws Exception, FileNotFoundException{
//选择两个字段之间的内容,读取其中的内容
LuceneSearch test = new LuceneSearch();
Hits h = null;
String scrrenString = "";

//first = "nav_w";//起始位置
h = test.Search(first);
String start = test.printResult(h, first);
String fileName = start.substring(0, start.indexOf("|"));
int startLine = Integer.parseInt(start.substring(start.indexOf("|")+1, start.length()));

//sec = "main_w";//截止位置
h = test.Search(sec);
String end = test.printResult(h, sec);
String fileName2 = start.substring(0, start.indexOf("|"));
int endLine = Integer.parseInt(end.substring(end.indexOf("|")+1, end.length()));

if(fileName2.equalsIgnoreCase(fileName)){
String tempFileName = "";
tempFileName = fileName.substring(fileName.lastIndexOf("\\")).replace(".html", ".txt");
tempFileName = fileName.substring(fileName.lastIndexOf("\\")).replace(".htm", ".txt");
shopInfoTxt = new File("c:/temp/",tempFileName);
if(!shopInfoTxt.exists())shopInfoTxt.createNewFile();
bw = new RandomAccessFile(shopInfoTxt,"rw");
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName),"utf-8"));
String line = null;
int lineNum = 0;
System.out.println("sentences from "+startLine+" to "+endLine);
while ((line = br.readLine()) != null) {
lineNum++;

if (lineNum>=startLine&&lineNum<endLine)scrrenString+=line;
}
scrrenString = getShortFormat(scrrenString);
scrrenString = scrrenString.substring(scrrenString.indexOf(":")+1);
}
return scrrenString;
}

public File getShopInfoTxt() {
return shopInfoTxt;
}

public RandomAccessFile getBw() {
return bw;
}
private static String getShortFormat(String content){//去掉<>里面的内容
String finalString = content.trim();
int first = finalString.indexOf("<");
int end = finalString.indexOf(">");
if(first>-1&&end>-1){
finalString = finalString.substring(0, first).trim()+finalString.substring(end+1, finalString.length()).trim();
finalString = getShortFormat(finalString);
}
return finalString;
}


}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值