lucene的IKAnalyzer分词器的使用

最新推荐文章于 2024-01-10 14:08:37 发布

张大仙是个妖怪

最新推荐文章于 2024-01-10 14:08:37 发布

阅读量452

点赞数

本文链接：https://blog.csdn.net/NDKHBWH/article/details/49768149

版权

IkAnalyzer.cfg.xml

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">  
<properties>  
	<comment>IK Analyzer 扩展配置</comment>
	<!--用户可以在这里配置自己的扩展字典 -->
	<entry key="ext_dict">ext.dic;</entry> 
	
	<!--用户可以在这里配置自己的扩展停止词字典-->
	<entry key="ext_stopwords">stopword.dic;</entry> 
	
</properties>

ext.dic

stopword.dic

导入dic文件：文件格式必须是无BOM格式的UTF-8.

public class Lucene {
	//创建索引程序
	@Test
	public void createIndex() throws IOException{
		//指定存储索引库位置
		String path = "F:\\indexs";
		//和索引库进行关联
		FSDirectory d = FSDirectory.open(new File(path));
		<span style="color:#FF0000;">//创建分词器
		Analyzer analyzer = new IKAnalyzer();</span>
		//indexWriterConfig是对IndexWriter进行一些配置,lucene版本,最大缓存文档数
		IndexWriterConfig conf = new   IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);
		//使用indexWriter进行写入索引
		IndexWriter indexWriter = new  IndexWriter(d, conf);
		//创建document,IndexWriter把文档对象写入到索引库
		Document doc = new Document();
		//模拟数据
		//StringField:这个域不进行分词，Store.YES:表示存储document。
		//Store.NO:表示不存储。
		doc.add(new StringField("id", "1", Store.YES));
		doc.add(new TextField("title", "Lucene基础教程", Store.YES));
		doc.add(new TextField("content","Lucene是一套用于开发搜索引擎的一套javaAPI！", Store.YES));
		//写入索引
		indexWriter.addDocument(doc);
		//提交
		indexWriter.commit();
		indexWriter.close();
	}
}

	// 查询索引库代码
	@Test
	public void queryIndex() throws Exception {
		// 准备索引库位置目录
		String path = "F:\\indexs";
		// 读取索引
		DirectoryReader reader = DirectoryReader.open(FSDirectory
				.open(new File(path)));
		// 使用indexSearcher来搜索索引库
		IndexSearcher indexSearcher = new IndexSearcher(reader);
		// 准备查询条件
		String qName = "基础";
		// 创建查询解析器
		QueryParser qParser = new QueryParser("title",  <span style="color:#FF0000;">new IKAnalyzer()</span>);
		// 解析查询字段，使用基本分词器算法，给你的查询条件在进行分词
		Query query = qParser.parse(qName);
		// 查询前10条记录,返回匹配度最高的10条记录
		// 返回文档概要信息：文档ID，文档的得分：得分越高，匹配度越高
		TopDocs topDocs = indexSearcher.search(query, 10);
		// 获取文档的命中数
		int totalHits = topDocs.totalHits;
		System.out.println("文档的名字数量：" + totalHits);
		// 获取文档得分数组
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		//遍历得分数组
		for (ScoreDoc sdoc : scoreDocs) {
			//获取文档id/
			int docID = sdoc.doc;
			//获取文档得分
			float score = sdoc.score;
			//更据文档id查询文档对象
			Document doc = indexSearcher.doc(docID);
			//获取id,title,context的值
			String id = doc.get("id");
			String title = doc.get("title");
			String content = doc.get("content");
			System.out.println("文档ID"+docID+"||文档得分："+score);
			System.out.println("ID"+id+"||title："+title+"||content"+content);
		}
	}
}