通过apache poi这个包来实现读取 word2003和2007的内容,以二进制流的形式读取,无法保留格式
request.setCharacterEncoding("gbk");
response.setContentType("text/plain;charset=gbk");
String file = "test.docx";
// 获得项目根目录地址
String rootPath = this.getServletConfig().getServletContext().getRealPath("/");
// 获得文件完整的地址
String path=rootPath+file;
InputStream in = new FileInputStream(path);
System.out.println(path);
String bodyText="";
try {
// 转换成 PushbackinputStream
if (!in.markSupported()) {
in = new PushbackInputStream(in, 8);
}
// 其他word版本
if(POIFSFileSystem.hasPOIFSHeader(in))
{
HWPFDocument document = new HWPFDocument(in);
WordExtractor extractor = new WordExtractor(document);
bodyText = extractor.getText();
response.getWriter().write(bodyText);
return ;