第一:下载POI,在http://jakarta.apache.org/poi/中,下载poi-bin-3.5-beta4-20081128.zip,解压后把jar包引入项目工程。
第二:处理Word(Word.java)
import org.apache.poi.hwpf.extractor.WordExtractor; import java.io.File; import java.io.InputStream;
public class Word { public static void main(String[] args) throws Exception { System.out.println(getContent("c://11.doc")); }
public static String getContent(String s) throws Exception { return getContent(new java.io.FileInputStream(s)); }
public static String getContent(File f) throws Exception { return getContent(new java.io.FileInputStream(f)); }
public static String getContent(InputStream is) throws Exception { String bodyText = null; WordExtractor ex = new WordExtractor(is); bodyText = ex.getText(); return bodyText; } }
|
第三:处理Excel(Excel.java)
import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFCell; import java.io.File; import java.io.InputStream; import java.text.SimpleDateFormat; import java.util.Date;
public class Excel { public static void main(String[] args) throws Exception { System.out.println(getContent("c://22.xls")); }
public static String getContent(String s) throws Exception { return getContent(new java.io.FileInputStream(s)); }
public static String getContent(File f) throws Exception { return getContent(new java.io.FileInputStream(f)); }
public static String getContent(InputStream is) throws Exception { StringBuffer content = new StringBuffer(); HSSFWorkbook workbook = new HSSFWorkbook(is); for (int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++) { HSSFSheet aSheet = workbook.getSheetAt(numSheets);// 获得一个sheet content.append("/n"); if (null == aSheet) { continue; } for (int rowNum = 0; rowNum <= aSheet.getLastRowNum(); rowNum++) { content.append("/n"); HSSFRow aRow = aSheet.getRow(rowNum); if (null == aRow) { continue; } for (short cellNum = 0; cellNum <= aRow.getLastCellNum(); cellNum++) {
HSSFCell aCell = aRow.getCell(cellNum); if (null == aCell) { continue; } if (aCell.getCellType() == HSSFCell.CELL_TYPE_STRING) { content.append(aCell.getRichStringCellValue() .getString()); } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) { boolean b = HSSFDateUtil.isCellDateFormatted(aCell); if (b) { Date date = aCell.getDateCellValue(); SimpleDateFormat df = new SimpleDateFormat( "yyyy-MM-dd"); content.append(df.format(date)); } } } } } return content.toString(); } }
|
第四:处理PowerPoint(PowerPoint.java)
import java.io.File; import java.io.InputStream; import org.apache.poi.hslf.HSLFSlideShow; import org.apache.poi.hslf.model.TextRun; import org.apache.poi.hslf.model.Slide; import org.apache.poi.hslf.usermodel.SlideShow;
public class PowerPoint { public static void main(String[] args) throws Exception { System.out.println(getContent("c://33.ppt")); }
public static String getContent(String s) throws Exception { return getContent(new java.io.FileInputStream(s)); }
public static String getContent(File f) throws Exception { return getContent(new java.io.FileInputStream(f)); }
public static String getContent(InputStream is) throws Exception { StringBuffer content = new StringBuffer(""); SlideShow ss = new SlideShow(new HSLFSlideShow(is)); Slide[] slides = ss.getSlides(); for (int i = 0; i < slides.length; i++) { TextRun[] t = slides[i].getTextRuns(); for (int j = 0; j < t.length; j++) { content.append(t[j].getText()); } content.append(slides[i].getTitle()); } return content.toString(); } }
|