目标:用poi对doc、docx类型文件读取内容
jar包:
poi-3.13-20150929.jar
poi-ooxml-3.13-20150929.jar
poi-ooxml-schemas-3.13-20150929.jar
poi-scratchpad-3.13-20150929.jar
xmlbeans-2.6.0.jar
编写函数,录入文件名称,得到该word文件的段落数,根据段落数输出文本。
class如下:
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
public class DocumentUtil {
private static XWPFDocument readDocx(String fileName) throws IOException{
XWPFDocument xdoc = new XWPFDocument(POIXMLDocument.openPackage(fileName));
return xdoc;
}
private static HWPFDocument readDoc(String fileName) throws IOException{
InputStream is = new FileInputStream(fileName);
HWPFDocument xdoc = new HWPFDocument(is);
return xdoc;
}
public static int paragraphNums(String fileName) throws IOException{
int result=0;
if(fileName.endsWith(".docx")){
result=readDocx(fileName).getParagraphs().size();
}else if(fileName.endsWith(".doc")){
result=readDoc(fileName).getRange().numParagraphs();
}
return result;
}
public static String paragraphText(String fileName,int numb) throws IOException{
String result=null;
if(fileName.endsWith(".docx")){
result=readDocx(fileName).getParagraphs().get(numb).getText();
}else if(fileName.endsWith(".doc")){
result=readDoc(fileName).getRange().getParagraph(numb).text();
}
return result;
}
public static void main(String[] args) throws IOException {
String fileName="e:\\衡水市局关于国地税联合办税进一步改进服务的通知的落实情况.docx";
int numB=paragraphNums(fileName);
for(int i=0;i<numB;i++){
String textP=paragraphText(fileName,i);
System.out.println(textP);
System.out.println("--------");
}
}
}