一:将文件夹下面的子文件作为数据源
package lucene;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import java.io.*;
/**
* 将文件夹下面的子文件作为数据源
* Created by xhga on 2018/5/31.
*/
public class LuceneFile {
public static void main(String[] args) throws IOException, InterruptedException {
getMultiSearcher("D:\\wenjian"); // 文件夹 下面就是你要查找的文件
}
private static Document createDocument(String title, String content) {
Document doc = new Document();
// 存储两个信息:fileName:文件名 content:文件类型
doc.add(new Field("fileName", title, TextField.TYPE_STORED));
doc.add(new Field("content", content, TextField.TYPE_STORED));
//如需添加在添加一条就是了 如:
//doc.add(new Field("author", author, TextField.TYPE_STORED)); // 作者标识
return doc;
}
public static IndexSearcher getMultiSearcher(String parentPath) throws IOException, InterruptedException{
Analyzer analyzer = new StandardAnalyzer();
Directory idx;
// 将索引存在内存中
idx = new RAMDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(idx, iwc);
File file = new File(parentPath);
File[] files = file.listFiles();
// 存储信息
for (int i = 0 ; i < files.length ; i ++) {
File file1 = files[i];
StringBuilder result = new StringBuilder();
BufferedReader in=new BufferedReader(new InputStreamReader(new FileInputStream(file1)));
String str;
while ((str = in.readLine()) != null)
{
result.append(System.lineSeparator()+str);
}
writer.addDocument(createDocument(file1.toString(), result.toString()));
in.close();
}
writer.commit();
writer.close();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(idx));
Query content = null;
try {
// 需要查询的类型,内容(content:表示内容,fileName:表示文件名)
content = new QueryParser("content", analyzer).parse("content:'硬实力'");
} catch (ParseException e) {
e.printStackTrace();
}
TopDocs topdoc = searcher.search(content, 10);
System.out.println("匹配到的文件数量:"+topdoc.totalHits+"查询时间时间:"+System.currentTimeMillis());
ScoreDoc[] hits= topdoc.scoreDocs;
for(ScoreDoc scoreDoc:hits){
Document hitDoc = searcher.doc(scoreDoc.doc);
System.out.println("文件名:"+hitDoc.get("fileName")+","+hitDoc.get("content"));
}
return searcher;
}
}
二:设置指定内容(可以通过查询数据库,作为数据源)
package lucene;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import java.nio.file.Paths;
import java.util.Random;
/**
* 设置指定内容(可以通过查询数据库,作为数据源)
*/
public class LuceneData {
private static Document createDocument(String title, String content) {
Document doc = new Document();
doc.add(new Field("content", content, TextField.TYPE_STORED));
doc.add(new Field("title", title, TextField.TYPE_STORED));
doc.add(new Field("author", "bobliu", TextField.TYPE_STORED));
return doc;
}
/**
* lucene简单实例 索引 查询 经济,分词器:标准分词器
*/
public static void testDemo() throws Exception{
Analyzer analyzer = new StandardAnalyzer();
Directory idx;
// 将索引存入指定位置
//idx = FSDirectory.open(Paths.get("D:\\index"));
// 将索引存在内存中
idx = new RAMDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(idx, iwc);
String[] strings = new String[]{"爱","王","张","李","周","马","习","花"};
for (int i = 0; i < 100000; i++) {
Random random = new Random();
int i1 = random.nextInt(strings.length);
int i2 = random.nextInt(strings.length);
int i3 = random.nextInt(strings.length);
String s = strings[i1] + strings[i2] + strings[i3];
// 设置内容
writer.addDocument(createDocument(String.valueOf(i), s));
}
writer.commit();
writer.close();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(idx));
Query content = new QueryParser("content", analyzer).parse("content:'王王王'");
TopDocs topdoc = searcher.search(content, 10);
System.out.println("命中个数:"+topdoc.totalHits+"时间:"+System.currentTimeMillis());
ScoreDoc[] hits= topdoc.scoreDocs;
for(ScoreDoc scoreDoc:hits){
Document hitDoc = searcher.doc(scoreDoc.doc);
System.out.println(hitDoc.get("content")+","+hitDoc.get("title"));
}
}
public static void main(String[] args) {
try {
testDemo();
} catch (Exception e) {
e.printStackTrace();
}
}
}