Lucene3.5 实例

最新推荐文章于 2018-02-08 17:16:14 发布

Xwin1989

最新推荐文章于 2018-02-08 17:16:14 发布

阅读量3.1k

点赞数 1

分类专栏： Java Lucene 文章标签： lucene string myeclipse exception file path

本文链接：https://blog.csdn.net/Xwin1989/article/details/7212292

版权

Java 同时被 2 个专栏收录

11 篇文章 0 订阅

订阅专栏

Lucene

1 篇文章 0 订阅

订阅专栏

方法主体

package cn.vincent;

import java.io.File;
import java.io.IOException;
import java.util.logging.SimpleFormatter;


import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.NumberTools;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKSimilarity;

public class LuceneSearch {

	File filePath = new File("E:\\Workspaces\\MyEclipse for Spring\\luceneDemo\\luceneIndex");
	
	File file = new File(
			"E:\\Workspaces\\MyEclipse for Spring\\luceneDemo\\luceneDatasource\\IndexWriter addDocument's a javadoc .txt");
	File file2 = new File("E:\\Workspaces\\MyEclipse for Spring\\luceneDemo\\luceneDatasource\\小笑话_总统的房间 Room .txt");
	private Analyzer analyzer = new IKAnalyzer();

	@Test
	public void createIndex() throws Exception {
		buildIndex(file2);
		
	}

	private void buildIndex(File myfile) throws Exception {
		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer);
		iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
		IndexWriter indexWriter = null;
		;
		try {
			indexWriter = new IndexWriter(FSDirectory.open(filePath), iwc);
			Document doc = FileToDocument.fileToDocument(myfile);
			indexWriter.addDocument(doc);
		} finally {
			indexWriter.close(); // 必须关闭，否则Lucene无法保存索引
		}
	}

	@Test
	public void luceneQuery() {
		String queryStr = "room";
		String f = "content";
		String[] fields = { "name", "content" };
		try {
			// 构造查询器
			
			QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer);
//			QueryParser queryParser = new QueryParser(Version.LUCENE_35, f, analyzer);
			Query query = queryParser.parse(queryStr);
			search(query);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {

		}

	}

	private void search(Query query) throws Exception {
		IndexReader r = IndexReader.open(FSDirectory.open(filePath));
		IndexSearcher indexSearcher = new IndexSearcher(r);
		indexSearcher.setSimilarity(new IKSimilarity());

		TopDocs docs = indexSearcher.search(query, 10);
		int totalHits = docs.totalHits;
		System.out.println("total : " + totalHits);

		//高亮
		Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>");
		Scorer scorer = new QueryScorer(query);
		Highlighter highlighter = new Highlighter(formatter, scorer);
		
		Fragmenter fragmenter = new SimpleFragmenter(50);
		highlighter.setTextFragmenter(fragmenter);
		
		//取出当前页数据
		
		for (ScoreDoc doc : docs.scoreDocs) {
			Document document = indexSearcher.doc(doc.doc);
			MyFile myFile = new MyFile();
			myFile.setName(document.get("name"));
			myFile.setSize(NumberTools.stringToLong(document.get("size"))); 
			myFile.setPath(document.get("path"));
			
			//返回高亮后的结果，如果当前属性值没有出现关键字则出现null
			String hc = highlighter.getBestFragment(analyzer, "content", document.get("content"));
			if (hc == null) {
				String content = document.get("content");
				int endIndex = Math.min(50, content.length());
				hc = content.substring(0, endIndex);// 最多前50个字符
			}
			myFile.setContent(hc);
			
			System.out.println(myFile);
		}
		// 使用完毕需要关闭！
		r.close();
		indexSearcher.close();
	}
}

实体类

package cn.vincent;

public class MyFile {
	private String name;
	private String content;
	private Long size;
	private String path;
	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public String getContent() {
		return content;
	}
	public void setContent(String content) {
		this.content = content;
	}
	
	public Long getSize() {
		return size;
	}
	public void setSize(Long size) {
		this.size = size;
	}
	public String getPath() {
		return path;
	}
	public void setPath(String path) {
		this.path = path;
	}
	
	public  String toString(){
		System.out.println("name:" + this.name);
		System.out.println("content:" + this.content);
		System.out.println("size:" + this.size);
		System.out.println("path:" + this.path);
		return "";
	}
}

操作类：

package cn.vincent;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumberTools;

@SuppressWarnings("deprecation")
public class FileToDocument {
	public static  Document fileToDocument(File path){
		MyFile file = new MyFile();
		file.setName(path.getName());
		file.setContent(readFileContent(path));
		file.setSize(path.length());
		file.setPath(path.getPath());

		
		Document doc = new Document();
		doc.add(new Field("name",file.getName(),Store.YES,Index.ANALYZED));
		doc.add(new Field("content",file.getContent(),Store.YES,Index.ANALYZED));
		doc.add(new Field("size",NumberTools.longToString(file.getSize()),Store.YES,Index.NOT_ANALYZED));
		doc.add(new Field("path",file.getPath(),Store.YES,Index.NOT_ANALYZED));
		return doc;
	}
	
	private static String readFileContent(File path){
		StringBuffer content = new StringBuffer();
		try {
			BufferedReader bfReader = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
			for(String line = null; (line = bfReader.readLine()) != null;){
				content.append(line);
			}
		
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return content.toString();			
	}
}

写的时候，，封装Long--〉String类型的类NumberTools 标识以及过时？调用NumberTools.stringToLong方法用32进制缩小Long的长度，

找半天似乎也没有找到替代的？有谁知道不~

Xwin1989

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Lucene3.5 实例

方法主体package cn.vincent;import java.io.File;import java.io.IOException;import java.util.logging.SimpleFormatter;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.cn
复制链接

扫一扫

专栏目录