初学者，写了一个java下lucene全文检索简单实例，可直接运行

最新推荐文章于 2024-07-30 20:50:32 发布

秋牙

最新推荐文章于 2024-07-30 20:50:32 发布

阅读量1k

点赞数

本文链接：https://blog.csdn.net/ll327577416/article/details/50587579

版权

java 同时被 2 个专栏收录

1 篇文章 0 订阅

订阅专栏

lucene

1 篇文章 0 订阅

订阅专栏

一：第一个类文件，改类的方法作用为采集document文档，将指定路径的所有文件夹下的所有文件加载到document中

package ut.tst;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;

/**
* 采集文档document,将要索引的文本转换为document,document中包括过个field
* (域，将一个文档的各个属性分开，包括标题，内容，大小，路径等，类似字段的作用)
* @author Administrator
*/
public class indexUtils {
public static List<Document> file2document(String souDir){

List<Document> list = new ArrayList<Document>();
List<File> filelist = getFileList(new ArrayList<File>(),souDir);
for (File file :filelist) {
String filename = file.getName();
String fileContext="";
try {
fileContext = FileUtils.readFileToString(file);
} catch (IOException e) {
e.printStackTrace();
}
Long fileSize = FileUtils.sizeOf(file);
String filePath = file.getAbsolutePath();
//创建文档
Document document = new Document();
//创建域field
Field fieldName = new StringField("fieldName",filename, Store.YES);
Field fieldPath = new StoredField("fieldPath", filePath);
Field fieldContext = new org.apache.lucene.document.TextField("fieldContext", fileContext, Store.NO);
Field fieldSize = new LongField("fieldSize",fileSize , Store.YES);
document.add(fieldSize);
document.add(fieldPath);
document.add(fieldName);
document.add(fieldContext);
list.add(document);
}
return list;
}

private static List<File> getFileList(List<File> list,String filePath){
//源文件夹
File SourceDir = new File(filePath);
if(!SourceDir.exists()) SourceDir.mkdir();
File[] fileList = SourceDir.listFiles();
for (File file : fileList) {
if(file.isDirectory()){
getFileList(list,filePath+"//"+file.getName());
}else{
if(file.getName().lastIndexOf(".doc")>0||file.getName().lastIndexOf(".docx")>0){
list.add(file);
}
}
}
return list;
}

}

二：通过采集document，生成索引文件

package ut.tst;
import java.io.File;
import java.io.IOException;
import java.util.List;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class testIndex {
public static void createIndex(String indexDir,String souDir){
try {
//采集文档document
List<Document> docList = indexUtils.file2document(souDir);
//创建分词器
Analyzer standardAnalyzer = new MMAnalyzer();
//指定索引存储目录
Directory directory = null;
directory = FSDirectory.open(new File(indexDir).toPath());
//创建索引操作配置对象
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(standardAnalyzer);
// 定义索引操作对象indexWriter
IndexWriter indexwriter = new IndexWriter(directory, indexWriterConfig);
// 遍历目录下的文件生成的文档，调用indexWriter方法创建索引
for (Document document : docList) {
indexwriter.addDocument(document);
}
// 索引操作流关闭
indexwriter.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

三：进行查询，输出查询结果

package ut.tst;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class testSearch {
//索引文件
private static String indexDir="D://luceneDir";
//源文件
private static String souDir="D://data//临时第三标段";
//查询方法
private static void testTermQuery() throws IOException {
   // 创建查询对象，根据文件名称域搜索匹配文件名称的文档
Query query = new TermQuery(new Term("fieldContent", "西安"));
   // 指定索引目录
Directory directory = FSDirectory.open(new File(indexDir).toPath());
   // 定义IndexReader
IndexReader reader = DirectoryReader.open(directory);
// 创建indexSearcher
IndexSearcher indexSearcher = new IndexSearcher(reader);
// 执行搜索
TopDocs topDocs = indexSearcher.search(query, 100);
// 提取搜索结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
   System.out.println("共搜索到总记录数：" + topDocs.totalHits);
   for (ScoreDoc scoreDoc : scoreDocs) {
// 文档id
int docID = scoreDoc.doc;
// 得到文档
Document doc = indexSearcher.doc(docID);
// 输出文件内容
System.out.println("------------------------------");
System.out.println("文件名称 =" + doc.get("fieldName"));
System.out.println("文件大小 =" + doc.get("fieldSize"));
System.out.println("文件内容 =" + doc.get("fieldContent"));
}
   }

public static void main(String[] args) {
testIndex.createIndex(indexDir,souDir);
try {
testTermQuery();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

秋牙

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
初学者，写了一个java下lucene全文检索简单实例，可直接运行

一：第一个类文件，改类的方法作用为采集document文档，将指定路径的所有文件夹下的所有文件加载到document中package ut.tst;import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.List;import org.apache.
复制链接

扫一扫