读取一个word里的内容,只能读取纯文字,word里不能有图片表格等,否则图片和表格就会成为乱码。输出结果在后台显示,代码如下:
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.apache.poi.hwpf.extractor.WordExtractor;
//输出文字
public class world {
public static void main(String [] args){
FileInputStream file;
try {
file = new FileInputStream("d:\\a.doc");
WordExtractor extractor;
try {
extractor = new WordExtractor(file);
String st=extractor.getText();
System.out.println(st);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.apache.poi.hwpf.extractor.WordExtractor;
//输出文字
public class world {
public static void main(String [] args){
FileInputStream file;
try {
file = new FileInputStream("d:\\a.doc");
WordExtractor extractor;
try {
extractor = new WordExtractor(file);
String st=extractor.getText();
System.out.println(st);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}