word部分介绍了通过poi读取.DOC和.DOCX文档文字并返回字符串的方法,这篇继续excel以及ppt内容
word部分连接:http://blog.csdn.net/runningterry/article/details/47086659
HSSF: MS-Excel 97-2003(.xls),基于BIFF8格式的JAVA接口。
HWPF: MS-Word 97-2003(.doc),基于BIFF8格式的JAVA接口。只支持.doc文件简单的操作,读写能力有限。(上一篇)
HSLF:MS - PowerPoint 97-2003 (.ppt),基于BIFF8格式的JAVA接口。
XSSF:MS-Excel 2007+(.xlsx),基于OOXML格式的JAVA接口。
XWPF:MS-Word 2007+(.docx),基于OOXML格式的JAVA接口。较HWPF功能完善。(上一篇)
XSLF: MS - PowerPoint 2007+(.pptx),基于OOXML格式的JAVA接口。较HWPF功能完善。
一:HSSF读取excel97-2003文本实例以及XSSF读去excle2007+文本实例
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class FileTextRead {
//直接读取Excel97-2003的全部内容 xls
public static String getTextFromExcel(String filePath){
InputStream is = null;
HSSFWorkbook wb = null;
String text="";
try {
is = new FileInputStream(filePath);
wb = new HSSFWorkbook(new POIFSFileSystem(is));
ExcelExtractor extractor=new ExcelExtractor(wb);
extractor.setFormulasNotResults(false);
extractor.setIncludeSheetNames(true);
text=extractor.getText();
extractor.close();
} catch (FileNotFoundException e) {
System.out.println("没有找到指定路径"+filePath);
e.printStackTrace();
} catch (IOException e) {
System.out.println("getTextFromExcel IO错误");
e.printStackTrace();
}
return text;
}
//读取Excel2007+的全部内容 xlsx
public static String getTextFromExcel2007(String filePath) {
InputStream is = null;
XSSFWorkbook workBook = null;
String text="";
try {
is = new FileInputStream(filePath);
workBook = new XSSFWorkbook(is);
XSSFExcelExtractor extractor=new XSSFExcelExtractor(workBook);
text=extractor.getText();
extractor.close();
} catch (FileNotFoundException e) {
System.out.println("没有找到指定路径"+filePath);
e.printStackTrace();
} catch (IOException e) {
System.out.println("getTextFromExcel2007 IO错误");
e.printStackTrace();
}
return text;
}
public static void main(String[] args) {
System.out.println(FileTextRead.getTextFromExcel("d:\\testExcel97-2003.xls"));
System.out.println(FileTextRead.getTextFromExcel2007("d:\\testExcel2007.xlsx"));
}
}
二:HSLF读取PowerPoint97-2003文本实例以及XSLF读去PowerPoint2007+文本实例
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
public class FileTextRead {
public static void main(String[] args) {
System.out.println(FileTextRead.getTextFromPPT("d:\\textPPT97-2003.ppt"));
System.out.println(FileTextRead.getTextFromPPT("d:\\textPPT2007.pptx"));
}
//直接抽取ppt97-2003的全部内容 ppt
public static String getTextFromPPT(String filePath) {
InputStream is = null;
PowerPointExtractor extractor = null;
String text="";
try {
is = new FileInputStream(filePath);
extractor = new PowerPointExtractor(is);
text=extractor.getText();
extractor.close();
} catch (FileNotFoundException e) {
System.out.println("没有找到指定路径"+filePath);
e.printStackTrace();
} catch (IOException e) {
System.out.println("getTextFromPPT IO错误");
e.printStackTrace();
}
return text;
}
//抽取幻灯片2007+全部内容 pptx
public static String getTextFromPPT2007(String filePath){
InputStream is = null;
XMLSlideShow slide = null;
String text="";
try {
is = new FileInputStream(filePath);
slide = new XMLSlideShow(is);
XSLFPowerPointExtractor extractor=new XSLFPowerPointExtractor(slide);
text=extractor.getText();
extractor.close();
} catch (FileNotFoundException e) {
System.out.println("没有找到指定路径"+filePath);
e.printStackTrace();
} catch (IOException e) {
System.out.println("getTextFromPPT2007 IO错误");
e.printStackTrace();
}
return text;
}
}