实现对office文件内容的读取
使用jdk版本为1.7
使用jar包poi-3.9下载链接
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
/**
* Created by leo01 on 17-1-27.
*/
public class test {
public static void main(String[] args) throws Exception
{
String targetDirectory = "/home/leo01/Desktop";
//获取文件夹位置
File file = new File(targetDirectory);
//保存所有文件
String test[];
test = file.list();
//获取所有文件的文件名
for(int i = 0;i<test.length;i++)
{
String ss = "";
File f = new File(test[i]);
String filename = f.getName();
//prefix保存了文件后缀
String prefix = filename.substring(filename.lastIndexOf(".")+1);
//fis2 为文件路径
String fis2 = targetDirectory+File.separator+test[i];
FileInputStream fis = new FileInputStream(fis2);
//*.doc 文件读取
if("doc".equals(prefix))
{
WordExtractor wordExtractor;
try
{
FileInputStream docfile = new FileInputStream(fis2);
wordExtractor = new WordExtractor(docfile);
String[] paragraph = wordExtractor.getParagraphText();
for(int j=0;j<paragraph.length;j++)
{
//paragraph 里面存的是每个段落的文字
ss += paragraph[j].toString();
}
System.out.println(ss);
}
catch (Exception e)
{
e.printStackTrace();
}
}
//*.docx 文件读取
/*else if("docx".equals(prefix))
{
try
{
XWPFDocument xwpfDocument = new XWPFDocument(fis);
POIXMLTextExtractor ex = new XWPFWordExtractor(xwpfDocument);
//String.trim 为删除字符串前后空格
ss = ex.getText().trim();
System.out.println(ss);
}
catch (Exception e)
{
e.printStackTrace();
}
}*/
// *.xls 文件读取
else if("xls".equals(prefix))
{
HSSFWorkbook hssfWorkbook = new HSSFWorkbook(fis);
//获得第一个工作表Sheet
HSSFSheet hssfSheet = hssfWorkbook.getSheetAt(0);
//获得第一行ROW
HSSFRow hssfRow = hssfSheet.getRow(0);
//用StringBuffer 得到 Excel 表格第一行的内容并用都好分隔
StringBuffer stringBuffer = new StringBuffer();
for(int j=0;j<hssfRow.getLastCellNum();j++)
{
stringBuffer.append(hssfRow.getCell(j));
int fc = hssfRow.getLastCellNum()-1;
if(j != fc)
{
stringBuffer.append(",");
}
}
System.out.println(stringBuffer);
}
//*.xlsx 文件读取
/*else if("xlsx".equals(prefix))
{
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(fis);
//取得第一个工作表Sheet
XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(0);
//取得第一行
XSSFRow xssfRow = xssfSheet.getRow(0);
//循环列cell
StringBuffer stringBuffer = new StringBuffer();
for(int j =0;j<xssfRow.getLastCellNum();j++)
{
XSSFCell xssfCell = xssfRow.getCell(j);
stringBuffer.append(xssfCell);
int fc = xssfRow.getLastCellNum()-1;
if(j != fc)
{
stringBuffer.append(",");
}
}
System.out.println(stringBuffer);
}*/
//*.txt 文件读取
else if("txt".equals(prefix))
{
BufferedReader reader;
try
{
reader = new BufferedReader(new FileReader(fis2));
while(reader.ready())
{
ss += reader.readLine();
}
reader.close();
System.out.println(ss);
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
}
}
对docx和xlsx文件读取是总是出现错误java.lang.NoClassDefFoundError异常,还不知道为什么会出现这个异常。