使用tm-extractors-0.4.jar来读取word文件
public static void getContentFromWord(String fileName){
FileInputStream in = null;try {
in = new FileInputStream (fileName);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
WordExtractor extractor = new WordExtractor();
try {
System.out.println(in.available());
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String str = null;
try {
str = extractor.extractText(in);
}
catch (Exception ex) {
}
// System.out.println("the result length is"+str.length());
System.out.println(str);
}