改进一下昨天写的东西:
建立引索:
package jim.Lucene35;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import tool.FileList;
import tool.FileText;
public class Lucene35 {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
new creatIndex();
}
}
class creatIndex{
Directory directory = null;//用来决定引索目录的储存方式
IndexWriter writer = null;//引索器
Document document = null;//引索文件
Field field = null;//字段信息
IndexWriterConfig iwc = null;//用来选择lucene的版本以及分词器的版本
String indexPath = "index";//引索目录的储存地址
String title = "title";//文件的标题
String content = "content";//文件的内容
String [] files = null;//用来记录文件夹里所有文件的地址
public creatIndex(){//构造器
try {
directory = FSDirectory.open(new File(indexPath));//创建directory,其储存方式为在硬盘上储存
} catch (IOException e) {
System.out.println("创建Directory时发生错误!");
// TODO Auto-generated catch block
e.printStackTrace();
}
iwc = new IndexWriterConfig(Version.LUCENE_35, new IKAnalyzer());//选择lucene的版本以及分词器的版本
try {
writer = new IndexWriter(directory,iwc);//创建引索器
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
System.out.println("创建IndexWriter时发生错误!");
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
files = FileList.getFiles("testFiles");//记录文件夹里所有文件的地址
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("无法打开存放要搜索的文件的文件夹");
e.printStackTrace();
}//存放要搜索的文件的文件夹
int num = files.length;//记录文件夹里所有文件的总数
for( int i = 0; i < num ; i++ ){
document = new Document();//创建索引文件
File file = new File(files[i]);
title = file.getName();//取得文件的名字
field = new Field("title",title,Field.Store.YES,Index.NOT_ANALYZED);//创建索引字段
document.add(field);
content = FileText.getText(file);
field = new Field("content",content,Field.Store.NO,Index.ANALYZED);
document.add(field);
String Path = file.getPath();//获取文件的路径
field = new Field("path",Path,Field.Store.YES,Index.NOT_ANALYZED);
document.add(field);
System.out.println("File: "+title+" Indexed");
try {
writer.addDocument(document);
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
System.out.println("将Document写入IndexWriter时错误!");
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
try {
writer.close();//关闭索引器
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("IndexWriter关闭时错误!");
e.printStackTrace();
}
System.out.println("Index is Created!");
}
}
建立搜索:
package jim.Lucene35;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
class Search {
Directory directory = null;//存储方式
String indexPath = "index";//引索存放的目录
IndexReader reader = null;//读入引索
IndexSearcher searcher = null;//确定搜索对象
QueryParser parser = null;//用于确定搜索时的引索的版本以及分词器
Query query = null;//记录要搜索的词语
TopDocs tds = null;//记录搜索后返回的结果
Document document = null;//存放搜索结果以便于提取结果
ScoreDoc[] sds = null;//存放TopDocs传来的内容(搜索结果)
public Search(){
try {
directory = FSDirectory.open(new File(indexPath));
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("创建Directory时发生错误!");
e.printStackTrace();
}//创建directory,其储存方式为在硬盘上储存
try {
reader = IndexReader.open(directory);
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
System.out.println("创建IndexReader时发生错误!");
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("创建IndexReader时发生错误!");
e.printStackTrace();
}
searcher = new IndexSearcher(reader);
parser = new QueryParser(Version.LUCENE_35,"content",new IKAnalyzer());
try {
query = parser.parse("java");
} catch (ParseException e) {
// TODO Auto-generated catch block
System.out.println("query = parser.parse(\"keyword\")时发生错误");
e.printStackTrace();
}
try {
tds = searcher.search(query,5);
} catch (IOException e) {
System.out.println("std = searcher.search(query,5);时发生错误");
// TODO Auto-generated catch block
e.printStackTrace();
}
sds = tds.scoreDocs;
System.out.println("一共搜索到: "+sds.length+" 条");
if(sds.length != 0){
for(@SuppressWarnings("unused") ScoreDoc sd:sds){
try {
document = searcher.doc(sds[0].doc);
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
System.out.println("document = searcher.doc(sd.doc);时发生错误");
e.printStackTrace();
} catch (IOException e) {
System.out.println("document = searcher.doc(sd.doc);时发生错误");
e.printStackTrace();
}
System.out.println(document.get("title")+"["+document.get("path")+"]");
}
}
else
System.out.println("The word you enter can't be found!");
try {
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("关闭reader时发生错误!");
e.printStackTrace();
}
System.out.println("Finished");
}
}
public class Searcher {
public static void main(String [] args){
new Search();
}
}
今天的收获:
学会使用分词器了.
今天的不足:
对数据解析不太了解.