lucene 4.6.0 初学，

最新推荐文章于 2018-09-05 21:10:16 发布

uFreeWo

最新推荐文章于 2018-09-05 21:10:16 发布

阅读量1k

点赞数

分类专栏：搜索引擎 java 文章标签： lucene JAVA 搜索索引

本文链接：https://blog.csdn.net/bjl3738/article/details/37649363

版权

java 同时被 2 个专栏收录

10 篇文章 0 订阅

订阅专栏

搜索引擎

1 篇文章 0 订阅

订阅专栏

lucene初学，

创建索引，查询等

需要lucene-4.6.0的jar包，，我直接导入的全部的！

lucene-4.6.0.jar包

Util 自己写的

package com.test.lucene;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;


public class LuceneUtils {
	
	/**
	 * 获取IndexWriter来生成索引
	 * 
	 * @param indexpath 索引文件存放的目录文件夹路劲
	 * @param version Version.XXX版本
	 * @param openMode	OpenMode.XXX 索引创建方式，新建，追加等等
	 * @return IndexWriter
	 */
	public static IndexWriter createIndexWriter(String indexpath,
			Version version, OpenMode openMode) {
		IndexWriter writer = null;
		try {
			// 索引文件存放的目录文件夹
			File indexfile = new File(indexpath);
			// lucene是将一句句话，一段话Field，分成一个个词Term进行索引搜索的。
			Analyzer analyzer = new StandardAnalyzer(version);
			// 向E:\\aa保存建立的索引Index内容
			Directory dir = FSDirectory.open(indexfile);
			IndexWriterConfig iwc = new IndexWriterConfig(version, analyzer);
			// 即创建新索引文件，OpenMode.CREATE_OR_APPEND 表示创建或追加到已有索引文件
			iwc.setOpenMode(openMode);
			// 用到IndexWriter类，这里需要传入的参数为：（索引目录文件夹，配置）
			writer = new IndexWriter(dir, iwc);
		} catch (IOException e) {
			e.printStackTrace();
		}
		return writer;
	}

	/**
	 * 创建索引
	 * 
	 * @param dirpath
	 *            被索引的目录文件夹
	 * @param indexpath
	 *            索引文件存放的目录文件夹
	 * @param openMode
	 *            索引创建方式OpenMode.CREATE_OR_APPEND 表示创建或追加到已有索引文件
	 * @return boolean 创建成功或失败
	 */
	public static boolean createDocumentIndex(String dirpath, String indexpath,
			OpenMode openMode) {
		boolean bool = false;
		// 被索引的目录文件夹
		File dirfile = new File(dirpath);
		try {
			IndexWriter writer = createIndexWriter(indexpath,
					Version.LUCENE_46, openMode);

			File[] txtfiles = dirfile.listFiles();
			long startTime = new Date().getTime();

			for (int i = 0; i < txtfiles.length; i++) {
				if (txtfiles[i].isFile()
						&& txtfiles[i].getName().endsWith(".txt")) {
					System.out.println("文件" + txtfiles[i].getCanonicalPath()
							+ "正在索引中。。。");
					// Read将txt内容存进内存
					Reader read = new FileReader(txtfiles[i]);
					// 创建Document的实例
					Document doc = new Document();
					// 将field存进索引的Document
					// Document添加读取的文章内容（缓存在内存中的文章内容read）
					doc.add(new TextField("content", read));
					// Document添加文章对应路径信息等
					doc.add(new TextField("path",
							txtfiles[i].getAbsolutePath(), Store.YES));
					// index加Document，索引创建成功
					writer.addDocument(doc);
				}
			}

			// 查看IndexWriter里面有多少个索引
			System.out.println("numDocs" + writer.numDocs());

			// 索引优化optimize()，合并磁盘上的索引文件，以便减少文件的数量，从而也减少搜索索引的时间
			// 3.5以后已过时 writer.optimize(); 以下替代，使用代价较高
			writer.forceMerge(1);
			// 注意关闭IndexWriter，立即将索引文件写入到目录磁盘中，生成索引文件
			writer.close();
			long endTime = new Date().getTime();
			System.out.println("共花了" + (endTime - startTime) + "毫秒将文档增加到索引中"
					+ indexpath);
			bool = true;
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return bool;
	}

	/**
	 * 查询
	 * 
	 * @param queryStr
	 *            要查询的文字
	 * @param searcher
	 *            查询器
	 * @return 查询结果
	 */
	public static TopDocs queryString(String queryStr, IndexSearcher searcher) {
		TopDocs tDocs = null;
		try {
			if (searcher == null) {
				System.out.println("索引目录不存在");
				return tDocs;
			}
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
			QueryParser qp = new QueryParser(Version.LUCENE_46, "content",
					analyzer);
			// 查询命令
			Query query = qp.parse(queryStr);
			// 结果保存在Hits中
			tDocs = searcher.search(query, null, 10000);
		} catch (ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return tDocs;
	}

	/**
	 * 创建查询器
	 * 
	 * @param indexpath
	 *            索引所在路劲
	 * @return
	 * @throws IOException
	 */
	public static IndexSearcher getSearcher(String indexpath)
			throws IOException {
		// 索引目录
		File indexDir = new File(indexpath);
		if (!indexDir.exists()) {
			System.out.println("索引目录不存在");
			return null;
		}
		// 创建directory，Index的映射地址，相当于电话本
		FSDirectory directory = FSDirectory.open(indexDir);
		IndexReader irIndexReader = DirectoryReader.open(directory);
		IndexSearcher searcher = new IndexSearcher(irIndexReader);
		return searcher;
	}

	/**
	 * 高亮设置
	 * 
	 * @param query
	 * @param doc
	 * @param field
	 * @return
	 */
	public static String toHighlighter(Query query, Document doc, String field) {
		try {
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
			SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter(
					"<font color=\"red\">", "</font>");
			Highlighter highlighter = new Highlighter(simpleHtmlFormatter,
					new QueryScorer(query));
			TokenStream tokenStream1 = analyzer.tokenStream("text",
					new StringReader(doc.get(field)));
			String highlighterStr = highlighter.getBestFragment(tokenStream1,
					doc.get(field));
			return highlighterStr == null ? doc.get(field) : highlighterStr;
		} catch (IOException e) {
			e.printStackTrace();
		} catch (InvalidTokenOffsetsException e) {
			e.printStackTrace();
		}
		return null;
	}

	public void sqlTest() {

		String indexpath = "";
		String sql = "";
		String url = "jdbc:mysql://localhost:3306/myuser";
		try {
			Class.forName("com.mysql.jdbc.Driver");
			// 连接数据库
			Connection conn =  DriverManager.getConnection(url,"root" ,"root" );
			PreparedStatement stmt = conn.prepareStatement(sql);
			ResultSet rs = stmt.executeQuery();
			
			IndexWriter indexWriter = createIndexWriter(indexpath, Version.LUCENE_46, OpenMode.CREATE);
//				new IndexWriter(dir, new StandardAnalyzer(
//					Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
			while (rs.next()) {
				System.out.println(rs.getString("name"));
				Document doc = new Document();
				doc.add(new TextField("name", (String)rs.getString("name"), Store.YES));
				indexWriter.addDocument(doc);
			}
		} catch (ClassNotFoundException e) {
			e.printStackTrace();
		} catch (SQLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		
	}
}

测试类

package com.test.lucene;

import java.io.IOException;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopDocs;

public class LuceneTest {

	public static void main(String[] args )  {
		String dirpath = "E:\\aaDocumentTest";
		String indexpath = "E:\\aa";
		String queryStr = "怎样";
		boolean result = true;
		result = LuceneUtils.createDocumentIndex(dirpath, indexpath,OpenMode.CREATE);
		IndexSearcher searcher;
		try {
			if(result){
				searcher = LuceneUtils.getSearcher(indexpath);
				TopDocs tDocs = LuceneUtils.queryString(queryStr, searcher);
				System.out.println("一共索引出:" + tDocs.totalHits + "个文件！");
				List<IndexableField> fList = null;
				for (int j = 0; j < tDocs.totalHits; j++) {
					Document doc = searcher.doc(tDocs.scoreDocs[j].doc);
					System.out.println("File:" + doc.toString());

					System.out.println("File:" + doc.getField("path"));
					
					fList = doc.getFields();
				}
				
				for (IndexableField indexableField : fList) {
					System.out.println(indexableField.name());
					System.out.println(indexableField.stringValue());
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		
		
	}
}

uFreeWo

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
lucene 4.6.0 初学，

lucene初学，package com.test.lucene;import java.io.File;import java.io.FileReader;import java.io.IOException;import java.io.Reader;import java.io.StringReader;import java.sql.Connection;i
复制链接

扫一扫