1. 引用POI包读取word文档内容 import java.io.File; import java.io.FileInputStream; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.usermodel.Range; /** * * @author Administrator */ public class DocFile extends AFile{ @Override public String getContent(File f) throws Exception { FileInputStream fis = new FileInputStream(f); HWPFDocument doc = new HWPFDocument(fis); Range rang = doc.getRange(); String text = rang.text(); fis.close(); return text; } } 2.引用jxl包读取excel文档的内容 import java.io.File; import java.io.FileInputStream; import jxl.Cell; import jxl.Sheet; import jxl.Workbook; /** * * @author Administrator */ public class XlsFile extends AFile{ @Override public String getContent(File f) throws Exception { FileInputStream fis = new FileInputStream(f); StringBuilder sb = new StringBuilder(); jxl.Workbook rwb = Workbook.getWorkbook(fis); Sheet[] sheet = rwb.getSheets(); for (int i = 0; i < sheet.length; i++) { Sheet rs = rwb.getSheet(i); for (int j = 0; j < rs.getRows(); j++) { Cell[] cells = rs.getRow(j); for(int k=0;k<cells.length;k++) sb.append(cells[k].getContents()); } } fis.close(); return sb.toString(); } } 3.引用PDFBox读取pdf文档的内容 import com.wb.lucene.bo.DocDetailBean; import java.io.File; import java.io.FileInputStream; import org.apache.lucene.index.IndexWriter; import org.pdfbox.pdfparser.PDFParser; import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.util.PDFTextStripper; /** * * @author Administrator */ public class PdfFile extends AFile { @Override public String getContent(File f) throws Exception { FileInputStream fis = new FileInputStream(f); PDFParser p = new PDFParser(fis); p.parse(); PDDocument pdd = p.getPDDocument(); PDFTextStripper ts = new PDFTextStripper(); String c = super.toHtml(ts.getText(pdd)); pdd.close(); fis.close(); return c; } }