package cn;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class CURDIndex {
public static void main(String[] args) throws Exception {
//add();
//update();
delete(0);
//showAll();
//queryByPrefixQuery("寂寞");
//queryByRegexpQuery("寂寞");
}
// 添加
public static void add() throws Exception {
Directory directory = FSDirectory.open(new File(System.getProperty("user.dir")+File.separator+"dir"));
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter iw = new IndexWriter(directory, config);
File needIndex = new File(System.getProperty("user.dir")+File.separator+"src"+File.separator+"txt");
int id = 1;
for(File f : needIndex.listFiles()) {
Document d = new Document();
d.add(new TextField("id", id+"", Store.YES));
d.add(new TextField("title", f.getName(), Store.YES));
d.add(new TextField("content", readTxt(f), Store.YES));
iw.addDocument(d);
iw.commit();
id ++;
}
iw.close();
}
// 更新
public static void update() throws Exception {
Directory directory = FSDirectory.open(new File(System.getProperty("user.dir")+File.separator+"dir"));
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter iw = new IndexWriter(directory, config);
File needIndex = new File(System.getProperty("user.dir")+File.separator+"src"+File.separator+"txt");
Document d = new Document();
d.add(new TextField("id", "1", Store.YES));
d.add(new TextField("title", "2_"+needIndex.listFiles()[0].getName(), Store.YES));// 更新一下目录
d.add(new TextField("content", readTxt(needIndex.listFiles()[0]), Store.YES));
iw.updateDocument(new Term("id", "1"), d);// 更新ID等于1的索引
iw.commit();
iw.close();
}
// 删除
public static void delete(int type) throws Exception {
Directory directory = FSDirectory.open(new File(System.getProperty("user.dir")+File.separator+"dir"));
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter iw = new IndexWriter(directory, config);
if(type == 0) {
// 删除全部
iw.deleteAll();
}else {
iw.deleteDocuments(new Term("id", "1"));// 根据索引ID=1来删除
}
iw.commit();
iw.close();
}
// 查询
// 1、列出全部
public static void showAll() throws Exception {
Directory directory = FSDirectory.open(new File(System.getProperty("user.dir")+File.separator+"dir"));
IndexReader reader = DirectoryReader.open(directory);
for(int i = 0; i < reader.numDocs(); i ++) {
Document d = reader.document(i);
System.out.println("i:"+i);
System.out.println("id:"+d.get("id"));
System.out.println("title:"+d.get("title"));
System.out.println("content:"+d.get("content"));
}
}
// 2、关键字查询 TermQuery
public static void queryByKeyword(String keyword) throws Exception {
Directory directory = FSDirectory.open(new File("G:\\eclipseworkspace\\lucence\\dir"));
IndexReader read = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(read);
Query query = new TermQuery(new Term("content",keyword));
TopDocs top = searcher.search(query, 12);
System.out.println("得到"+top.totalHits+"条记录");
ScoreDoc[] scoreDocs = top.scoreDocs;
for (ScoreDoc scoreDoc :scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("title"));
System.out.println(doc.get("content"));
}
read.close();
}
// 3、前缀查询 PrefixQuery 比如:寂寞**
public static void queryByPrefixQuery(String keyword) throws Exception {
Directory directory = FSDirectory.open(new File("G:\\eclipseworkspace\\lucence\\dir"));
IndexReader read = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(read);
Query query = new PrefixQuery(new Term("content",keyword));
TopDocs top = searcher.search(query, 12);
System.out.println("得到"+top.totalHits+"条记录");
ScoreDoc[] scoreDocs = top.scoreDocs;
for (ScoreDoc scoreDoc :scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("title"));
System.out.println(doc.get("content"));
markKeyWord(query, doc);
}
read.close();
}
// 4、通配符查询 WildcardQuery 比如:*寂寞*
public static void queryByWildcardQuery(String keyword) throws Exception {
Directory directory = FSDirectory.open(new File("G:\\eclipseworkspace\\lucence\\dir"));
IndexReader read = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(read);
Query query = new WildcardQuery(new Term("content",keyword));
TopDocs top = searcher.search(query, 12);
System.out.println("得到"+top.totalHits+"条记录");
ScoreDoc[] scoreDocs = top.scoreDocs;
for (ScoreDoc scoreDoc :scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("title"));
System.out.println(doc.get("content"));
markKeyWord(query, doc);
}
read.close();
}
// 5、模糊查询 FuzzyQuery 用百分号,如:%渺
public static void queryByFuzzyQuery(String keyword) throws Exception {
Directory directory = FSDirectory.open(new File("G:\\eclipseworkspace\\lucence\\dir"));
IndexReader read = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(read);
Query query = new FuzzyQuery(new Term("content",keyword));
TopDocs top = searcher.search(query, 12);
System.out.println("得到"+top.totalHits+"条记录");
ScoreDoc[] scoreDocs = top.scoreDocs;
for (ScoreDoc scoreDoc :scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("title"));
System.out.println(doc.get("content"));
markKeyWord(query, doc);
}
read.close();
}
// 6、正则查询 RegexpQuery
public static void queryByRegexpQuery(String keyword) throws Exception {
Directory directory = FSDirectory.open(new File("G:\\eclipseworkspace\\lucence\\dir"));
IndexReader read = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(read);
Query query = new RegexpQuery(new Term("content",keyword));
TopDocs top = searcher.search(query, 12);
System.out.println("得到"+top.totalHits+"条记录");
ScoreDoc[] scoreDocs = top.scoreDocs;
for (ScoreDoc scoreDoc :scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("title"));
System.out.println(doc.get("content"));
markKeyWord(query, doc);
}
read.close();
}
// 7、boolean查询 BooleanQuery
public static void queryByBooleanQuery(String keyword) throws Exception {
Directory directory = FSDirectory.open(new File("G:\\eclipseworkspace\\lucence\\dir"));
IndexReader read = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(read);
BooleanQuery bool = new BooleanQuery();
bool.add(new RegexpQuery(new Term("content",keyword)), Occur.MUST);// 这个查询条件必须满足
bool.add(new FuzzyQuery(new Term("content",keyword)), Occur.MUST_NOT);// 这个查询条件必须无
bool.add(new WildcardQuery(new Term("content",keyword)), Occur.SHOULD);// 这个查询条件可能有
TopDocs top = searcher.search(bool, 12);
System.out.println("得到"+top.totalHits+"条记录");
ScoreDoc[] scoreDocs = top.scoreDocs;
for (ScoreDoc scoreDoc :scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("title"));
System.out.println(doc.get("content"));
markKeyWord(bool, doc);
}
read.close();
}
// 8、短语查询
public static void queryByPhraseQuery() throws Exception {
Directory directory = FSDirectory.open(new File("G:\\eclipseworkspace\\lucence\\dir"));
IndexReader read = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(read);
PhraseQuery pq = new PhraseQuery();
//pq.setSlop(2);// 设置多个term之前的间隔
pq.add(new Term("content","我们"));// 有序,符合才有记录
pq.add(new Term("content","你们"));
TopDocs top = searcher.search(pq, 12);
System.out.println("得到"+top.totalHits+"条记录");
ScoreDoc[] scoreDocs = top.scoreDocs;
for (ScoreDoc scoreDoc :scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("title"));
System.out.println(doc.get("content"));
markKeyWord(pq, doc);
}
read.close();
}
// 9、多短语查询
public static void queryByMultiPhraseQuery() throws Exception {
Directory directory = FSDirectory.open(new File("G:\\eclipseworkspace\\lucence\\dir"));
IndexReader read = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(read);
MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery();
multiPhraseQuery.add(new Term("content","我们"));
multiPhraseQuery.add(new Term("title","你们"));
TopDocs top = searcher.search(multiPhraseQuery, 12);
System.out.println("得到"+top.totalHits+"条记录");
ScoreDoc[] scoreDocs = top.scoreDocs;
for (ScoreDoc scoreDoc :scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("title"));
System.out.println(doc.get("content"));
markKeyWord(multiPhraseQuery, doc);
}
read.close();
}
// txt转文字
public static String readTxt(File file) {
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
String str = null;
StringBuffer sbf = new StringBuffer();
while((str = br.readLine()) != null){
sbf.append(str);
}
br.close();
return sbf.toString();
} catch (Exception e) {
e.printStackTrace();
return "";
}
}
/**
* 设置搜索结果高亮 用【】包着
* @param query 查询方法
* @param d 查出的文档
* @throws InvalidTokenOffsetsException
* @throws Exception
*/
public static void markKeyWord(Query query, Document d) throws Exception{
Analyzer analyzer = new IKAnalyzer();
QueryScorer scorer = new QueryScorer(query);
Formatter formatter = new SimpleHTMLFormatter(" 【", "】 ");
Highlighter highlight = new Highlighter(formatter,scorer);
Fragmenter fragmenter = new SimpleFragmenter(888);// 高亮字符串长度
highlight.setTextFragmenter(fragmenter);
TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(d.get("content")));
String highlightStr = highlight.getBestFragment(tokenStream, d.get("content"));
System.out.println("【显示搜索结果位置】");
System.out.println(highlightStr);
}
}