pdfjs的本版为2.5.207,尝试过其他版本但是导入会报错不知道什么原因
在package.json里写
"pdfjs-dist": "^2.5.207",
然后在终端输入 npm i 就可以安装了
npm i
html部分
<input
type="file"
id="fileXlsKocPdf"
accept=".pdf,.PDF"
@change="handleFile"
style="display: none"
/>
<button
onclick="event.preventDefault();fileXlsKocPdf.click()"
>
上传pdf文件
</button>
js部分
import * as PDFJS from "pdfjs-dist/es5/build/pdf.js";
PDFJS.GlobalWorkerOptions.workerSrc = import(
"pdfjs-dist/build/pdf.worker.entry"
);
// 上传pdf
const handleFile = (event) => {
const file = event.target.files[0];
if (file) {
const reader = new FileReader();
reader.onload = (e) => {
const data = new Uint8Array(e.target.result);
extractTextFromPDF(data);
};
reader.readAsArrayBuffer(file);
}
};
//读取pdf文件里的文字
const extractTextFromPDF = (data) => {
// PDFJS.getDocument(data)
PDFJS.getDocument(data).promise.then((pdf) => {
let textContent = "";
for (let pageNumber = 1; pageNumber <= pdf.numPages; pageNumber++) {
pdf
.getPage(pageNumber)
.then((page) => {
return page.getTextContent();
})
.then((content) => {
content.items.forEach((item) => {
textContent += item.str;
});
textContent = textContent + "\n";
if (pageNumber === pdf.numPages) {
const test = {
content: textContent,
aiapi: FormAllData.value.aiapi,
model: FormAllData.value.model,
};
//最终获取的数据
console.log(test);
}
});
}
});
};