对于docx文件,您可以使用mammoth,它将从.docx文件中提取文本。
var mammoth = require("mammoth");
mammoth.extractRawText({path: "./routes/aaa.docx"})
.then(function(result){
var text = result.value; // The raw text
console.log(text);
var messages = result.messages;
console.log(result);
}).done();
读取pdf文件 可以将word转换成pdf文件:
var pdfText = require('pdf-text')
var pathToPdf = __dirname + "/info.pdf"
var fs = require('fs')
var buffer = fs.readFileSync(pathToPdf)
pdfText(buffer, function(err, chunks) {
console.log(chunks)
})
mammoth 文档 : https://www.npmjs.com/package/mammoth
文章参考于 : https://stackoverflow.com/questions/9038231/can-i-read-pdf-or-word-docs-with-node-js