lucene4.6索引器及搜索器源码

最新推荐文章于 2021-02-16 11:21:11 发布

xiuxiupana

最新推荐文章于 2021-02-16 11:21:11 发布

阅读量268

点赞数

分类专栏： java lucene 文章标签： java lucene

本文链接：https://blog.csdn.net/JV_Kevin/article/details/88987811

版权

java 同时被 2 个专栏收录

25 篇文章 0 订阅

订阅专栏

lucene

1 篇文章 0 订阅

订阅专栏

简介：

索引器：

搜索器：

简介：

Lucene是一套用于全文检索和搜寻的开源程式库，由Apache软件基金会支持和提供。Lucene提供了一个简单却强大的应用程式接口，能够做全文索引和搜寻。在Java开发环境里Lucene是一个成熟的免费开源工具。就其本身而言，Lucene是当前以及最近几年最受欢迎的免费Java资讯检索程式库。

人们经常提到资讯检索程式库，虽然与搜索引擎有关，但不应该将资讯检索程式库与搜索引擎相混淆。

全文检索服务主要由两大部门构成：索引器及搜索器。索引器主要负责对文件名称及文件内容进行分词，并创建索引表。搜索器负责检索索引表，获取相关内容信息，并显示。

索引器：

package com.lm.IndexTxt;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 * lucene 索引创建
 * @author LM
 */
public class Indexer {
	
	private String fieldName;//索引段名称
	private String endStr;//问价后缀名称
	
	public Indexer(String fieldName, String endStr){
		this.fieldName = fieldName;
		this.endStr = endStr;
	}
	
	/**
	 * 创建索引
	 * @param indexDir 索引创建路径
	 * @param dataDir 文本保存路径
	 * @return
	 * @throws IOException
	 * @author lm
	 */
	public int index(File indexDir, File dataDir ) throws IOException {
		
		
		//数据路径不存在或不是目录
		if(!dataDir.exists() || !dataDir.isDirectory()){
			throw new IOException(dataDir + " does not exist or is not a directory");
		}
		
		// 分词器
		Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_46,true);
		// 索引创建器配置
		IndexWriterConfig indexWriterConfig=new IndexWriterConfig(Version.LUCENE_46,analyzer);
		// 文件目录
		Directory directory = FSDirectory.open(indexDir);
		if (IndexWriter.isLocked(directory)) {
			IndexWriter.unlock(directory);
		}
		
		// 索引创建器
		IndexWriter writer = new IndexWriter(directory,indexWriterConfig);
		writer.deleteAll();//先全部删除
		indexDirectory(writer, dataDir);
		int numIndexed = writer.numDocs();
		writer.close();
		
		return numIndexed;
	}
	
	/**
	 * 递归查找符合条件的文件
	 * @param writer
	 * @param dir
	 * @author lm
	 * @throws IOException 
	 */
	public void indexDirectory(IndexWriter writer,File dir) throws IOException{
		File[] files = dir.listFiles();
		for (File file : files) {
			if(file.isDirectory()){
				indexDirectory(writer, file);
			}else if(file.getName().endsWith(this.endStr)){
				indexFile(writer, file);
			}
		}	
	}
	
	/**
	 * 对文件创建索引
	 * @param writer
	 * @param f
	 * @throws IOException
	 * @author lm
	 */
	@SuppressWarnings("deprecation")
	public void indexFile(IndexWriter writer,File f) throws IOException{
		// 过滤隐藏文件，不存在文件，不可读文件
		if(f.isHidden() || !f.exists() || !f.canRead()) 
			return;
		System.out.println("Indexing: "+f.getCanonicalPath());
		
		Document doc = new Document();
//		String content = getTXT(f,"GBK");
//		doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
		doc.add(new Field(this.fieldName, f.getCanonicalPath(), Field.Store.YES, Field.Index.ANALYZED));
		writer.addDocument(doc);
	}
	
	/**
	 * 读取文件
	 * @param file
	 * @param charset
	 * @return
	 * @throws IOException
	 * @author lm
	 */
	public String getTXT(File file,String charset) throws IOException{
		FileInputStream fileInputStream = new FileInputStream(file);
		InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream,charset);
		BufferedReader reader = new BufferedReader(inputStreamReader);
		String line = new String();
		String result = new String();
		while( (line=reader.readLine())!=null ){
			result += line;
		}
		reader.close();
		return result;
	}
	
	// 主方法
	public static void main(String[] args) {
		
		Indexer indexer = new Indexer("filepath",".txt");
		try {
			File indexDir = new File("d:\\lucene\\index");
			File dataDir = new File("d:\\lucene\\file");
			int result = indexer.index(indexDir, dataDir);
			System.out.println("indexing "+result+" files.");
		} catch (IOException e) {
			e.printStackTrace();
		}
		
	}

}

运行结果：

Indexing: D:\lucene\file\2013年新员工报到通知.txt

Indexing: D:\lucene\file\create a index.txt

Indexing: D:\lucene\file\数据分析挖掘.txt

Indexing: D:\lucene\file\数据汇总统计.txt

Indexing: D:\lucene\file\数据预测与优化.txt

Indexing: D:\lucene\file\运行 Indexer.txt

indexing 6 files.

搜索器：

package com.lm.IndexTxt;

import java.io.File;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 * 搜索程序
 * @author LM
 */
public class Searcher {
	
	private File indexDir;//索引文件目录
	private String fieldName;//索引段名称
	
	
	public Searcher(String IndexDir,String fieldName){
		this.indexDir = new File(IndexDir); 
		this.fieldName = fieldName;
	}
	
	/**
	 * 查询
	 * @param indexDir
	 * @param q
	 * @throws Exception
	 * @author lm
	 */
	@SuppressWarnings("deprecation")
	public void search(String keywords) throws Exception {
		
		//①　打开索引
        Directory fsDir = FSDirectory.open(indexDir);
        IndexReader reader = IndexReader.open(fsDir);
        IndexSearcher is = new IndexSearcher(reader);
        
        //② 分析查询  fieldName：索引创建时的field名称
//        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
        Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_46,true);
        QueryParser queryParser = new QueryParser(Version.LUCENE_46, fieldName, analyzer);
        Query query = queryParser.parse(keywords);
        
        //③ 搜索索引
        TopDocs docs =is.search(query, 6);
        
        // ④ 得到匹配的文档
        ScoreDoc[] scoreDoc = docs.scoreDocs;
        System.err.println("Found " + docs.totalHits + " document(s) that matched query '" + keywords + "'.");
        for (int i = 0; i < scoreDoc.length; i++) {
        	Document midDoc = reader.document(scoreDoc[i].doc);
            System.out.println(midDoc.get(fieldName));
        }
        
        reader.close();
        
    }

	// 主函数
	public static void main(String[] args) {
		Searcher searcher = new Searcher("d:\\lucene\\index","filepath");
		try {
			searcher.search("预测");
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

}

结果：

Found 1 document(s) that matched query '预测'.
D:\lucene\file\数据预测与优化.txt

xiuxiupana

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
lucene4.6索引器及搜索器源码

目录简介：索引器：搜索器：简介：Lucene是一套用于全文检索和搜寻的开源程式库，由Apache软件基金会支持和提供。Lucene提供了一个简单却强大的应用程式接口，能够做全文索引和搜寻。在Java开发环境里Lucene是一个成熟的免费开源工具。就其本身而言，Lucene是当前以及最近几年最受欢迎的免费Java资讯检索程式库。人们经常提到资讯检索程式库，虽然与搜索引擎有关，...
复制链接

扫一扫