需要的jar包:
poi-3.17.jar
poi-scratchpad-3.17.jar
poi-ooxml-3.17.jar
poi-ooxml-schemas-3.17.jar
下载地址:poi jar包下载地址
package jichu.test_simple;
import java.io.*;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
/**
* @Author:sks
* @Description:
* @Date:Created in 15:49 2018/1/24
* @Modified by:
**/
/*
*
* */
public class poi_doc {
public static void main(String[] args) {
File file = new File("C:\\YQ\\123.doc");
HWPFDocument doc = null;
try {
doc = new HWPFDocument(new FileInputStream(file));
} catch (IOException e) {
e.printStackTrace();
}
//通过 Doc对象直接获取Text
StringBuilder sb = doc.getText();
System.out.println(sb.toString());
//通过Range对象获取Text
Range range = doc.getRange();
String text = range.text();
//System.out.println(text);
}
}
缺点:虽然两种方式都能读取,但只能读取doc文件,无法读取docx文件