###下载地址
https://archive.apache.org/dist/poi/release/bin/
###需要的jar包(我用的是3.10final)
- Poi-3.10-Final.jar (用于xls)
- Poi-ooxml-3.10-Final.jar (用于xlsx)
- Poi-ooxml-schemas-3.10.jar
- Xmlbeans-2.30.jar
- dom4j-1.6.1.jar
- poi-scratchpad-3.10-FINAL-20140208.jar(用于word,ppt)
###问题 - java.lang.NoSuchMethodError: org.apache.poi.util.IOUtils.copy(Ljava/io/InputStream;Ljava/io/OutputStream;) in Apache POI 这一类类似问题有很多
- read2bytes, rest 512byte也是这类问题,因为你用了wordextracer,不是原生的,在这里推荐使用原生的POI,不要去用那些乱七八糟裁剪过的
因为你的jar包是从不同的地方下的,所以有可能各个版本引用之间存在问题,我就是之前用了tm-extractors-0.4.jar,其实这个东西是poi-scratchpad里面的一部分,只不过有人重新打包了就发布出来,结果有一部分代码自动用了tm-extractors里面的而没有用POI里面的,造成了版本不一致。包括jxl也是POI的一部份
读doc
public static String readWord(String name)
{
FileInputStream in;
String text = null;
try
{
in = new FileInputStream(name);
WordExtractor extractor = new WordExtractor(in);
text = extractor.getText();
}
catch (FileNotFoundException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return text;
}
读docx
public static String readDocx(String path)
{
//都是只能用String,不能用Stringbuffer,还不知道原因
String text = null;
try {
InputStream is = new FileInputStream("doc/aaa.docx");
XWPFDocument doc = new XWPFDocument(is);
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
text = extractor.getText();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return text;
}
###读xls和xlsx
private static Logger log = Logger.getLogger("client");
public static String readXls(String path)
{
String text="";
try
{
FileInputStream is = new FileInputStream(path);
HSSFWorkbook excel=new HSSFWorkbook(is);
//获取第一个sheet
HSSFSheet sheet0=excel.getSheetAt(0);
for (Iterator rowIterator=sheet0.iterator();rowIterator.hasNext();)
{
HSSFRow row=(HSSFRow) rowIterator.next();
for (Iterator iterator=row.cellIterator();iterator.hasNext();)
{
HSSFCell cell=(HSSFCell) iterator.next();
//根据单元的的类型 读取相应的结果
if(cell.getCellType()==HSSFCell.CELL_TYPE_STRING) text+=cell.getStringCellValue()+"\t";
else if(cell.getCellType()==HSSFCell.CELL_TYPE_NUMERIC) text+=cell.getNumericCellValue()+"\t";
else if(cell.getCellType()==HSSFCell.CELL_TYPE_FORMULA) text+=cell.getCellFormula()+"\t";
}
text+="\n";
}
}
catch (Exception e)
{
// TODO Auto-generated catch block
e.printStackTrace();
log.warn(e);
}
return text;
}
public static String readXlsx(String path)
{
String text="";
try
{
OPCPackage pkg=OPCPackage.open(path);
XSSFWorkbook excel=new XSSFWorkbook(pkg);
//获取第一个sheet
XSSFSheet sheet0=excel.getSheetAt(0);
for (Iterator rowIterator=sheet0.iterator();rowIterator.hasNext();)
{
XSSFRow row=(XSSFRow) rowIterator.next();
for (Iterator iterator=row.cellIterator();iterator.hasNext();)
{
XSSFCell cell=(XSSFCell) iterator.next();
//根据单元的的类型 读取相应的结果
if(cell.getCellType()==XSSFCell.CELL_TYPE_STRING) text+=cell.getStringCellValue()+"\t";
else if(cell.getCellType()==XSSFCell.CELL_TYPE_NUMERIC) text+=cell.getNumericCellValue()+"\t";
else if(cell.getCellType()==XSSFCell.CELL_TYPE_FORMULA) text+=cell.getCellFormula()+"\t";
}
text+="\n";
}
}
catch (Exception e)
{
// TODO Auto-generated catch block
e.printStackTrace();
log.warn(e);
}
return text;
}