读取文件信息所需依赖
org.apache.poi
poi
4.1.2
org.apache.poi
poi-scratchpad
4.1.2
org.apache.poi
poi-ooxml
4.1.2
org.apache.pdfbox
pdfbox
2.0.12
org.apache.pdfbox
fontbox
2.0.12
读取doc文件内容
public static String readWord(String name)
{
FileInputStream in;
String text = null;
try
{
in = new FileInputStream(name);
WordExtractor extractor = new WordExtractor(in);
text = extractor.getText();
}
catch (FileNotFoundException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return text;
}
读取docx文件内容
public static String readDoc(MultipartFile file) {
if (file.isEmpty())return "";
WordExtractor wordExtractor = null;
try {
InputStream inputStream = file.getInputStream();
wordExtractor = new WordExtractor(inputStream);
} catch (IOException e) {
log.warn(e.toString());
e.printStackTrace();
}
return wordExtractor.getText();
}
读取xls文件内容
public static