前提,这里之所以用第一个版本,以为这货的的版本问题造成的冲突
1.jar包
IKAnalyzer3.2.3Stable.jar
lucene-core-3.6.0.jar
package com.xx.utils;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;
import com.ishehui.entity.Article;
import com.ishehui.entity.Conchs;
/*****
* IKAnalyzer 中文分词 && 相似度匹配
* @author wenmeishuai
*
*/
public class IkTest {
/* 创建简单中文分析器 创建索引使用的分词器必须和查询时候使用的分词器一样,否则查询不到想要的结果 */
private static Analyzer analyzer = new IKAnalyzer(true);
// 索引保存目录
private static File indexFile = new File("d:\\indexDir\\");///data/work/videos/conchsindex/ d:\\indexDir\\
//需要搜索出的列
private static String[] fieldName = {"id","content"};
/**
* 查看IKAnalyzer 分词器是如何将一个完整的词组进行分词的
*
* @param text
* @param isMaxWordLength
*/
public static String splitWord(String text) {
try {
// 创建分词对象
// Analyzer analyzer = new IKAnalyzer(isMaxWordLength);
StringReader reader = new StringReader(text);
// 分词
TokenStream ts = analyzer.tokenStream("", reader);
CharTermAttribute term = ts.getAttribute(CharTermAttribute.class);
// 遍历分词数据
System.out.print("IKAnalyzer把关键字拆分的结果是:");
StringBuffer b = new StringBuffer();
while (ts.incrementToken()) {
System.out.print("【" + term.toString() + "】");
b.append("【" + term.toString() + "】");
}
reader.close();
return b.toString();
} catch (IOException e) {
e.printStackTrace();
}
return "";
}
/**
* 创建索引文件到磁盘中永久保存
* isCreateAll true :重新构建索引 false:在原有基础上添加
*/
public static void createConchsIndexFile(List<Conchs> cs,boolean isCreateAll) {
long startTime = System.currentTimeMillis();
System.out.println("*****************创建索引开始**********************");
Directory directory = null;
IndexWriter indexWriter = null;
try {
// 创建哪个版本的IndexWriterConfig,根据参数可知lucene是向下兼容的,选择对应的版本就好
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);
// 创建磁盘目录对象
directory = new SimpleFSDirectory(indexFile);
indexWriter = new IndexWriter(directory, indexWriterConfig);
// indexWriter = new IndexWriter(directory, analyzer, true,IndexWriter.MaxFieldLength.UNLIMITED);
// 这上面是使用内存保存索引的创建索引写入对象的例子,和这里的实现方式不一样,但是效果是一样的
if(isCreateAll){
// 为了避免重复插入数据,每次测试前 先删除之前的索引
indexWriter.deleteAll();
}
// 获取实体对象
for (int i = 0; i < cs.size(); i++) {
Conchs article = cs.get(i);
// indexWriter添加索引
Document doc = new Document();
doc.add(new Field("id", article.getId().toString(),Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("content", article.getContent().toString(),Field.Store.YES, Field.Index.ANALYZED));
// 添加到索引中去
indexWriter.addDocument(doc);
System.out.println("索引添加成功:第" + (i + 1) + "次!!");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (indexWriter != null) {
try {
indexWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (directory != null) {
try {
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
System.out.println("创建索引文件成功,总共花费" + (System.currentTimeMillis() - startTime) + "毫秒。");
System.out.println("*****************创建索引结束**********************");
}
/*****
* 从索引中搜索 (多字段 单条件)
* @param keyword
* @return
*/
public static List<String> getReultFromIndex(String keyword) {
IndexSearcher isearcher = null;
IndexReader indexReader = null;
try {
indexReader = IndexReader.open(FSDirectory.open(indexFile));
//实例化搜索器
isearcher = new IndexSearcher(indexReader);
//在索引器中使用IKSimilarity相似度评估器
isearcher.setSimilarity(new IKSimilarity());
// //使用IKQueryParser查询分析器构造Query对象
// QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36, fieldName,analyzer);
// //利用queryParser解析传递过来的检索关键字,完成Query对象的封装
// Query query = queryParser.parse(keyword);
Query query = IKQueryParser.parseMultiField(fieldName, keyword);
splitWord(keyword);// 显示拆分结果
//搜索相似度最高的20条记录
TopDocs topDocs = isearcher.search(query , 50);
System.out.println("命中:" + topDocs.totalHits);
//输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
List<String> returnList = new ArrayList<String>();
for (int i = 0; i < scoreDocs.length; i++){
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
System.out.println("命中内容: id:"+targetDoc.get("id")+" content:"+targetDoc.get("content"));
returnList.add(targetDoc.get("id"));
}
return returnList;
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
} finally{
if(isearcher != null){
try {
isearcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return null;
}
/****
* 从内存中检索 (多字段 单条件)
* @param keyword
* @param cs
* @return
*/
public static List<String> getReultFromMemory(String keyword,List<Conchs> cs){
Directory directory = null;
IndexWriter iwriter = null;
IndexSearcher isearcher = null;
try {
//建立内存索引对象将数据加载到内存中
directory = new RAMDirectory();
iwriter = new IndexWriter(directory, analyzer, true , IndexWriter.MaxFieldLength.LIMITED);
for(Conchs text:cs){
Document doc = new Document();
doc.add(new Field("content", text.getContent(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("id", text.getId().toString(), Field.Store.YES, Field.Index.ANALYZED));
System.out.println("id:"+text.getId()+" text:"+text.getContent());
iwriter.addDocument(doc);
}
iwriter.close();
//实例化搜索器 从内存中搜索出结果
isearcher = new IndexSearcher(directory);
//在索引器中使用IKSimilarity相似度评估器
isearcher.setSimilarity(new IKSimilarity());
//构造Query对象
// QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36, fieldName,analyzer);
//利用queryParser解析传递过来的检索关键字,完成Query对象的封装
// Query query = IKQueryParser.parseMultiField(fieldName, keyword);
// Query query = queryParser.parse(keyword);
Query query = IKQueryParser.parseMultiField(fieldName, keyword);
// 显示拆分结果 给开发者
splitWord(keyword);
//搜索相似度最高的20条记录
TopDocs topDocs = isearcher.search(query , 20);
System.out.println("命中:" + topDocs.totalHits);
//输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
List<String> returnList = new ArrayList<String>();
for (int i = 0; i < scoreDocs.length; i++){
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
System.out.println("内容:" + targetDoc.toString()+" id:"+targetDoc.get("id"));
returnList.add(targetDoc.get("id"));
}
return returnList;
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
} finally{
if(isearcher != null){
try {
isearcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if(directory != null){
try {
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return null;
}
/**
* 直接读取索引文件,查询索引记录
*
* @throws IOException
*/
public static void openIndexFile() {
long startTime = System.currentTimeMillis();
System.out.println("*****************读取索引开始**********************");
List<Article> articles = new ArrayList<Article>();
// 得到索引的目录
Directory directory = null;
IndexReader indexReader = null;
try {
directory = new SimpleFSDirectory(indexFile);
// 根据目录打开一个indexReader
// indexReader = IndexReader.open(directory);
indexReader = IndexReader.open(directory,false);
System.out.println("在索引文件中总共插入了" + indexReader.maxDoc() + "条记录。");
// 获取第一个插入的document对象
Document minDoc = indexReader.document(0);
// 获取最后一个插入的document对象
Document maxDoc = indexReader.document(indexReader.maxDoc() - 1);
// document对象的get(字段名称)方法获取字段的值
System.out.println("第一个插入的document对象的标题是:" + minDoc.get("title"));
System.out.println("最后一个插入的document对象的标题是:" + maxDoc.get("title"));
int docLength = indexReader.maxDoc();
for (int i = 0; i < docLength; i++) {
Document doc = indexReader.document(i);
Article article = new Article();
if (doc.get("id") == null) {
System.out.println("id为空");
} else {
article.setId(Integer.parseInt(doc.get("id")));
// if(article.getId().intValue() == 1){
// //删除索引
// indexReader.deleteDocument(1);
// System.out.println(indexReader.hasDeletions());
// }
article.setTitle(doc.get("title"));
article.setContent(doc.get("content"));
articles.add(article);
}
}
System.out.println("显示所有插入的索引记录:");
for (Article article : articles) {
System.out.println(article);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (directory != null) {
try {
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
System.out.println("直接读取索引文件成功,总共花费" + (System.currentTimeMillis() - startTime) + "毫秒。");
System.out.println("*****************读取索引结束**********************");
}
//test
public static List<String> getReultFromIndex11() {
String keyword = "分词";
String[] fieldName = {"id","title","content"};
String[] field = {"","Analyzer",keyword};
IndexSearcher isearcher = null;
IndexReader indexReader = null;
try {
indexReader = IndexReader.open(FSDirectory.open(indexFile));
//实例化搜索器
isearcher = new IndexSearcher(indexReader);
//在索引器中使用IKSimilarity相似度评估器
isearcher.setSimilarity(new IKSimilarity());
//使用IKQueryParser查询分析器构造Query对象
// QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36, fieldName,analyzer);
// //利用queryParser解析传递过来的检索关键字,完成Query对象的封装
// Query query = queryParser.parse(keyword);
//使用IKQueryParser查询分析器构造Query对象
Query query = IKQueryParser.parseMultiField(fieldName, field);
splitWord(keyword);// 显示拆分结果
//搜索相似度最高的20条记录
TopDocs topDocs = isearcher.search(query , 20);
System.out.println("命中:" + topDocs.totalHits);
//输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
List<String> returnList = new ArrayList<String>();
for (int i = 0; i < scoreDocs.length; i++){
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
System.out.println("内容:" + targetDoc.toString()+" id:"+targetDoc.get("id"));
returnList.add(targetDoc.get("id"));
}
return returnList;
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
} finally{
if(isearcher != null){
try {
isearcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return null;
}
public static void main(String[] args) throws IOException {
// String text="寻找真心朋友,只限女生,偶尔一起嗨皮下";
// StringReader sr=new StringReader(text);
// IKSegmenter ik=new IKSegmenter(sr, true);
// Lexeme lex=null;
// while((lex=ik.next())!=null){
// System.out.print(lex.getLexemeText()+"|");
// }
// createIndexFile();
// getReultFromIndex11();
// splitWord("最喜欢小猫");
getReultFromIndex("我喜欢小猫");
}
}