根据文件内容搜索文件两种方式:
1、直接在服务器搜索,拼接Linux命令实现
2、把文件从服务器下载到本地,通过读取文件并匹配内容的方式实现,缺点是文件过多时时效很慢,过大时可能会搜索不准确
暂时只实现第二种方式
工具类: 搜索文件内容后返回文件名称
package com.util;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
public class FileContentSearch {
//private static String FolderName = "D:\\dataword";// 文件夹名字
private static String FileEnd = "docx";// 文件名称后缀 txt sql bat
//private static String SearchStr = "你好啊";//要查找的字符串
private static Boolean IngronCase = true;// 是否区分大小写
private static List<String> pathlist = new ArrayList<>();
public static List<String> search(String FolderName,String SearchStr) throws IOException {
//解析docx模板并获取document对象
XWPFDocument document = new XWPFDocument();
//获取XWPFRun对象输出整个文本内容
StringBuffer tempText = new StringBuffer();
pathlist = getFileList(FolderName);
List<String> fileNameList = new ArrayList<>();
for (int k = 0; k < pathlist.size(); k++) {
File file = new File(pathlist.get(k));
if (file.exists()) {
try {
document = new XWPFDocument(XWPFDocument.openPackage(file.toString()));
//获取整个文本对象
List<XWPFParagraph> allParagraph = document.getParagraphs();
for (XWPFParagraph xwpfParagraph : allParagraph) {
List<XWPFRun> runList = xwpfParagraph.getRuns();
for (XWPFRun xwpfRun : runList) {
tempText.append(xwpfRun.toString());
if (tempText != null) {
if (IngronCase) {
if (tempText.toString().contains(SearchStr)) {
System.out.println("找到了文件"+file.getName());
//截取文件名。不含后缀名
fileNameList.add(file.getName().substring(0, file.getName().indexOf(".")));
break;
}
} else {
if (tempText.toString().toLowerCase().contains(SearchStr.toLowerCase())) {
System.out.println("找到了");
fileNameList.add(file.getName().substring(0, file.getName().indexOf(".")));
break;
}
}
}
}
tempText.setLength(0);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} //文档内容
}
}
document.close();
return fileNameList.stream().distinct().collect(Collectors.toList());
}
public static List<String> getFileList(String strPath) {
File dir = new File(strPath);
File[] files = dir.listFiles(); // 该文件目录下文件全部放入数组
if (files != null) {
for (int i = 0; i < files.length; i++) {
String fileName = files[i].getName();
if (files[i].isDirectory()) { // 判断是文件还是文件夹
getFileList(files[i].getAbsolutePath()); // 获取文件绝对路径
} else if (fileName.endsWith("." + FileEnd)) { // 判断文件名是否以.avi结尾
String strFileName = files[i].getAbsolutePath();
pathlist.add(strFileName);
} else {
continue;
}
}
}
return pathlist;
}
}