每日一练搜索改进版_平台搜索改进-CSDN博客

本文链接：https://blog.csdn.net/jimzhai/article/details/7506404

改进一下昨天写的东西:

建立引索:

package jim.Lucene35;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import tool.FileList;
import tool.FileText;

public class Lucene35 {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub

		new creatIndex();
	}

}
class creatIndex{
	Directory directory = null;//用来决定引索目录的储存方式
	IndexWriter writer = null;//引索器
	Document document = null;//引索文件
	Field field = null;//字段信息
	IndexWriterConfig iwc = null;//用来选择lucene的版本以及分词器的版本
	String indexPath = "index";//引索目录的储存地址
	String title = "title";//文件的标题
	String content = "content";//文件的内容
	String [] files = null;//用来记录文件夹里所有文件的地址
	public creatIndex(){//构造器
		try {
			directory = FSDirectory.open(new File(indexPath));//创建directory,其储存方式为在硬盘上储存

		} catch (IOException e) {
			System.out.println("创建Directory时发生错误!");
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		iwc = new IndexWriterConfig(Version.LUCENE_35, new IKAnalyzer());//选择lucene的版本以及分词器的版本
		try {
			writer = new IndexWriter(directory,iwc);//创建引索器
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			System.out.println("创建IndexWriter时发生错误!");
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} 
		try {
			files = FileList.getFiles("testFiles");//记录文件夹里所有文件的地址
		} catch (IOException e) {
			// TODO Auto-generated catch block
			System.out.println("无法打开存放要搜索的文件的文件夹");
			e.printStackTrace();
		}//存放要搜索的文件的文件夹
		int num = files.length;//记录文件夹里所有文件的总数
		for( int i = 0; i < num ; i++ ){
			document = new Document();//创建索引文件
			File file = new File(files[i]);
			title = file.getName();//取得文件的名字
			field = new Field("title",title,Field.Store.YES,Index.NOT_ANALYZED);//创建索引字段
			document.add(field);
			content = FileText.getText(file);
			field = new Field("content",content,Field.Store.NO,Index.ANALYZED);
			document.add(field);
			String Path = file.getPath();//获取文件的路径
			field = new Field("path",Path,Field.Store.YES,Index.NOT_ANALYZED);
			document.add(field);
			System.out.println("File: "+title+"  Indexed");
			try {
				writer.addDocument(document);
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				System.out.println("将Document写入IndexWriter时错误！");
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		try {
			writer.close();//关闭索引器
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			System.out.println("IndexWriter关闭时错误！");  
			e.printStackTrace();
		}
		System.out.println("Index is Created!");  
	}
	
}

建立搜索:

package jim.Lucene35;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

class Search {

	Directory directory = null;//存储方式
	String indexPath = "index";//引索存放的目录
	IndexReader reader = null;//读入引索
	IndexSearcher searcher = null;//确定搜索对象
	QueryParser parser = null;//用于确定搜索时的引索的版本以及分词器
	Query query = null;//记录要搜索的词语
	TopDocs tds = null;//记录搜索后返回的结果
	Document document = null;//存放搜索结果以便于提取结果
	ScoreDoc[] sds = null;//存放TopDocs传来的内容(搜索结果)
	public Search(){
		try {
			directory = FSDirectory.open(new File(indexPath));
		} catch (IOException e) {
			// TODO Auto-generated catch block
			System.out.println("创建Directory时发生错误!");
			e.printStackTrace();
		}//创建directory,其储存方式为在硬盘上储存
		try {
			reader = IndexReader.open(directory);
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			System.out.println("创建IndexReader时发生错误!");
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			System.out.println("创建IndexReader时发生错误!");
			e.printStackTrace();
		}
		searcher = new IndexSearcher(reader);
		parser = new QueryParser(Version.LUCENE_35,"content",new IKAnalyzer());
		try {
			query = parser.parse("java");
		} catch (ParseException e) {
			// TODO Auto-generated catch block
			System.out.println("query = parser.parse(\"keyword\")时发生错误");
			e.printStackTrace();
		}
		try {
			tds = searcher.search(query,5);
		} catch (IOException e) {
			System.out.println("std = searcher.search(query,5);时发生错误");
			// TODO Auto-generated catch block
			e.printStackTrace();
		} 
		sds = tds.scoreDocs;
		System.out.println("一共搜索到: "+sds.length+" 条");
		if(sds.length != 0){
		for(@SuppressWarnings("unused") ScoreDoc sd:sds){
			
			try {
				document = searcher.doc(sds[0].doc);
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				System.out.println("document = searcher.doc(sd.doc);时发生错误");
				e.printStackTrace();
			} catch (IOException e) {
				System.out.println("document = searcher.doc(sd.doc);时发生错误");
				e.printStackTrace();
			}
			System.out.println(document.get("title")+"["+document.get("path")+"]");
			
		}
	}
		else
			System.out.println("The word you enter can't be found!");
		
		try {
			reader.close();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			System.out.println("关闭reader时发生错误!");
			e.printStackTrace();
		}
		
		System.out.println("Finished");
	}
	
}
public class Searcher {
	public static void main(String [] args){
		new Search();
	}
}

今天的收获:

学会使用分词器了.

今天的不足:

对数据解析不太了解.