本文记录自己平时用到的,利用POI接口,对老版本的word和excel以及txt文件进行内容读取操作。
package com.zte.lucene.tools;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Range;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
import jxl.read.biff.BiffException;
public class Toolkits {
/**
* 删除文件目录下的所有文件
* @param file 要删除的文件目录
* @return 如果成功,返回true.
*/
public static boolean deleteDir(File file) {
if (file.isDirectory()) {
File[] files = file.listFiles();
for (int i = 0; i < files.length; i++) {
deleteDir(files[i]);
}
}
file.delete();
return true;
}
/**
* 过滤目录下的文件
* @param path 想要获取文件的目录
* @return 返回文件list
*/
public static List<File> getFileLIst(String path) {
File[] files = new File(path).listFiles();
List<File> fileList = new ArrayList<File>();
for (File file : files) {
if (isTxtFile(file.getName())) {
fileList.add(file);
}
}
return fileList;
}
/**
* 判断是否为目标文件,目前支持txt xls doc格式
* @param fileName 文件名称
* @return 如果是文件类型满足过滤条件,返回true;否则返回false
*/
private static boolean isTxtFile(String fileName) {
if (fileName.lastIndexOf(".txt") > 0) {
return true;
} else if (fileName.lastIndexOf(".xls") > 0) {
return true;
} else if (fileName.lastIndexOf(".doc") > 0) {
return true;
}
return false;
}
/**
* 读取txt文件的内容
* @param file 想要读取的文件对象
* @return 返回文件内容
*/
public static String txt2String(File file) {
String result = "";
try {
//构造一个BufferedReader类来读取文件
BufferedReader br = new BufferedReader(new FileReader(file));
String s = null;
while ((s = br.readLine()) != null) {
result = result + "\n" + s;
}
br.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return result;
}
/**
* 读取doc文件内容
* @param file 想要读取的文件对象
* @return 返回文件内容
*/
public static String doc2String(File file) {
String reslut = "";
try {
FileInputStream fis = new FileInputStream(file);
HWPFDocument doc = new HWPFDocument(fis);
Range range = doc.getRange();
reslut += range.text();
fis.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
System.err.println("reader file error!");
} catch (IOException e) {
e.printStackTrace();
}
return reslut;
}
/**
* 读取xls文件内容
* @param file 想要读取的文件对象
* @return 返回文件内容
*/
public static String xls2String(File file) {
String reslut = "";
try {
FileInputStream fis = new FileInputStream(file);
StringBuilder sb = new StringBuilder();
jxl.Workbook rwb = Workbook.getWorkbook(fis);
Sheet[] sheets = rwb.getSheets();
for (int i = 0; i < sheets.length; i++) {
Sheet rSheet = rwb.getSheet(i);
for (int j = 0; j < rSheet.getRows(); j++) {
Cell[] cells = rSheet.getRow(j);
for (int k = 0; k < cells.length; k++) {
sb.append(cells[k].getContents());
sb.append(" ");
}
}
}
fis.close();
reslut += sb.toString();
} catch (Exception e) {
e.printStackTrace();
}
return reslut;
}
/**
* 关闭Lucene中的IndexWriter/indexReader索引操作
* @param directory
* @param indexWriter
* @param indexReader
* @throws IOException
*/
public static void closeIndexWrite(Directory directory ,IndexWriter indexWriter, IndexReader indexReader) throws IOException {
if (directory != null) {
directory.close();
}
if (indexWriter != null) {
indexWriter.close();
}
if (indexReader != null) {
indexReader.close();
}
}
}