此处的xlsx是数据量达到20万行的大文件。
上jar包截图:
思路:
通过POI在SAX模式下将xlsx逐行读取并装入内存。
在内存中为List<list<Object>>
拿到装进内存中的list之后,使用lucene的代码如下:
package Tools;
import java.io.File;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import model.Tweet;
import model.User;
/**
* 基于Lucene的简单全文搜索工具
* 2019.03.24
*/
public class LuceneTool {
public void CreateIndex(List<List<Object>> result,String indexPath) throws Exception {
Analyzer analyzer = new StandardAnalyzer();//分词器
//创建索引写入器
Directory d = FSDirectory.open(new File(indexPath));//索引需要存放的位置
//创建索引写入器配置对象
IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, analyzer);
IndexWriter writer = new IndexWriter(d, conf);
//写入文档信息
//添加文档 定义字段的存储规则
FieldType type = new FieldType();
type.setIndexed(true); //是否要索引
type.setStored(true); //是否需要存储
for(int i = 1;i<result.size();i++) {
List<Object> rowData = result.get(i);
Document document = new Document();//一条数据
//new Field("字段名","字段内容","字段的配置属性")
document.add(new Field("字段1", "内容1", type));
document.add(new Field("字段2", "内容2", type));
document.add(new Field("字段3", "内容3", type));
writer.addDocument(document);
}
//需要把添加的记录保存
writer.commit();
writer.close();
}
/**
* 根据索引搜索
* @param keyword 关键词
* @param indexPath 索引路径
* @param maxNum 搜索最大结果数
* @return 搜索结果集
* @throws Exception
*/
public void searchIndex(String keyword,String indexPath,int maxNum) throws Exception {
//1.创建索引写入器
Directory d = FSDirectory.open(new File(indexPath));
//创建分词器
Analyzer analyzer = new StandardAnalyzer();
//打开索引目录
IndexReader r = DirectoryReader.open(d);
//创建索引查询对象
IndexSearcher searcher = new IndexSearcher(r);
QueryParser parser = new QueryParser("content", analyzer);
Query query = parser.parse(keyword);//查询keyword
//search(查询对象,符合条件的前n条记录)
TopDocs search = searcher.search(query, maxNum);//maxNum:前几个结果
// System.out.println("符合条件的记录有多少个:" + search.totalHits);
ScoreDoc[] scoreDocs = search.scoreDocs;
for (int i = 0; i < scoreDocs.length; i++) {
// System.out.println("分数:" + scoreDocs[i].score);//相关度的排序
int docId = scoreDocs[i].doc;//文档编号
Document document = searcher.doc(docId);
System.out.println(document.get("字段1"));
}
}
}