在线查看PDF文件,pdf.js使用方法

 

PDF.js可以实现在html下直接浏览pdf文档,是一款开源的pdf文档读取解析插件,非常强大,能将PDF文件渲染成Canvas。PDF.js主要包含两个库文件,一个pdf.js和一个pdf.worker.js,一个负责API解析,一个负责核心解析。 

首先引入pdf.js文件<script type="text/javascript" src='pdf.js'></script> 
PDF.js大部分用法都是基于Promise的,PDFJS.getDocument(url)方法返回的就是一个Promise:

PDFJS.getDocument('helloworld.pdf').then(function(pdf) {
});

PDF的解析工作需要通过pdf.getPage(page)去执行,这个方法返回的也是一个Promise,因此可以去逐页解析PDF:

pdf.getPage(1).then(function(page) { 
});

官网地址:http://mozilla.github.io/pdf.js/

渲染页面

PDF页面有它自己的视窗,它定义了像素大小(n.72dpi和初始旋转。默认情况下,该窗口将缩放到PDF但是通过修改视图可以更改此操作。当创建了视图时,还会创建一个初始转换矩阵,它考虑到期望的规模、旋转,并转换坐标系统(0点)PDF文档底部左边,而画布0是 左。

var scale = 1.5;
var viewport = page.getViewport(scale);

var canvas = document.getElementById('the-canvas');
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;

var renderContext = {
  canvasContext: context,
  viewport: viewport
};
page.render(renderContext);
还可以自定义canvas大小:
var desiredWidth = 100;
var viewport = page.getViewport(1);
var scale = desiredWidth / viewport.width;
var scaledViewport = page.getViewport(scale);

 官方给出的示例:

ar url = '//cdn.mozilla.net/pdfjs/helloworld.pdf';
PDFJS.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';

var loadingTask = PDFJS.getDocument(url);
loadingTask.promise.then(function(pdf) {
  console.log('PDF loaded');
  
  var pageNumber = 1;
  pdf.getPage(pageNumber).then(function(page) {
    console.log('Page loaded');
    
    var scale = 1.5;
    var viewport = page.getViewport(scale);

    var canvas = document.getElementById('the-canvas');
    var context = canvas.getContext('2d');
    canvas.height = viewport.height;
    canvas.width = viewport.width;

    var renderContext = {
      canvasContext: context,
      viewport: viewport
    };
    var renderTask = page.render(renderContext);
    renderTask.then(function () {
      console.log('Page rendered');
    });
  });
}, function (reason) {
  console.error(reason);
});

 

另外较大的PDF文件可以用base 64编码方式加载,例如:

var pdfData = atob(
  'JVBERi0xLjcKCjEgMCBvYmogICUgZW50cnkgcG9pbnQKPDwKICAvVHlwZSAvQ2F0YWxvZwog' +
  'IC9QYWdlcyAyIDAgUgo+PgplbmRvYmoKCjIgMCBvYmoKPDwKICAvVHlwZSAvUGFnZXMKICAv' +
  'TWVkaWFCb3ggWyAwIDAgMjAwIDIwMCBdCiAgL0NvdW50IDEKICAvS2lkcyBbIDMgMCBSIF0K' +
  'Pj4KZW5kb2JqCgozIDAgb2JqCjw8CiAgL1R5cGUgL1BhZ2UKICAvUGFyZW50IDIgMCBSCiAg' +
  'L1Jlc291cmNlcyA8PAogICAgL0ZvbnQgPDwKICAgICAgL0YxIDQgMCBSIAogICAgPj4KICA+' +
  'PgogIC9Db250ZW50cyA1IDAgUgo+PgplbmRvYmoKCjQgMCBvYmoKPDwKICAvVHlwZSAvRm9u' +
  'dAogIC9TdWJ0eXBlIC9UeXBlMQogIC9CYXNlRm9udCAvVGltZXMtUm9tYW4KPj4KZW5kb2Jq' +
  'Cgo1IDAgb2JqICAlIHBhZ2UgY29udGVudAo8PAogIC9MZW5ndGggNDQKPj4Kc3RyZWFtCkJU' +
  'CjcwIDUwIFRECi9GMSAxMiBUZgooSGVsbG8sIHdvcmxkISkgVGoKRVQKZW5kc3RyZWFtCmVu' +
  'ZG9iagoKeHJlZgowIDYKMDAwMDAwMDAwMCA2NTUzNSBmIAowMDAwMDAwMDEwIDAwMDAwIG4g' +
  'CjAwMDAwMDAwNzkgMDAwMDAgbiAKMDAwMDAwMDE3MyAwMDAwMCBuIAowMDAwMDAwMzAxIDAw' +
  'MDAwIG4gCjAwMDAwMDAzODAgMDAwMDAgbiAKdHJhaWxlcgo8PAogIC9TaXplIDYKICAvUm9v' +
  'dCAxIDAgUgo+PgpzdGFydHhyZWYKNDkyCiUlRU9G');

PDFJS.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js'; var loadingTask = PDFJS.getDocument({data: pdfData}); loadingTask.promise.then(function(pdf) { console.log('PDF loaded'); var pageNumber = 1; pdf.getPage(pageNumber).then(function(page) { console.log('Page loaded'); var scale = 1.5; var viewport = page.getViewport(scale); var canvas = document.getElementById('the-canvas'); var context = canvas.getContext('2d'); canvas.height = viewport.height; canvas.width = viewport.width; var renderContext = { canvasContext: context, viewport: viewport }; var renderTask = page.render(renderContext); renderTask.then(function () { console.log('Page rendered'); }); }); }, function (reason) { console.error(reason); });

 

pdf翻页处理:

// If absolute URL from the remote server is provided, configure the CORS
// header on that server.
var url = '//cdn.mozilla.net/pdfjs/tracemonkey.pdf';

// The workerSrc property shall be specified.
PDFJS.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';

var pdfDoc = null,
    pageNum = 1,
    pageRendering = false,
    pageNumPending = null,
    scale = 0.8,
    canvas = document.getElementById('the-canvas'),
    ctx = canvas.getContext('2d');

/**
 * Get page info from document, resize canvas accordingly, and render page.
 * @param num Page number.
 */
function renderPage(num) {
  pageRendering = true;
pdfDoc.getPage(num).then(function(page) { var viewport = page.getViewport(scale); canvas.height = viewport.height; canvas.width = viewport.width; var renderContext = { canvasContext: ctx, viewport: viewport }; var renderTask = page.render(renderContext);
renderTask.promise.then(function() { pageRendering = false; if (pageNumPending !== null) { renderPage(pageNumPending); pageNumPending = null; } }); }); document.getElementById('page_num').textContent = num; }
function queueRenderPage(num) { if (pageRendering) { pageNumPending = num; } else { renderPage(num); } }
function onPrevPage() { if (pageNum <= 1) { return; } pageNum--; queueRenderPage(pageNum); } document.getElementById('prev').addEventListener('click', onPrevPage);
function onNextPage() { if (pageNum >= pdfDoc.numPages) { return; } pageNum++; queueRenderPage(pageNum); } document.getElementById('next').addEventListener('click', onNextPage);

PDFJS.getDocument(url).then(function(pdfDoc_) { pdfDoc = pdfDoc_; document.getElementById('page_count').textContent = pdfDoc.numPages; renderPage(pageNum); });

 

 

关于page方式的使用:

解析结果,我们可以看下这个对象提供的方法:

方法返回
getAnnotationsA promise that is resolved with an {Array} of the annotation objects.
getTextContentThat is resolved a TextContent object that represent the page text content.
getViewportContains ‘width’ and ‘height’ properties along with transforms required for rendering.
renderAn object that contains the promise, which is resolved when the page finishes rendering.

我们可以试试调用getTextContent方法,并将其结果打印出来:

pdf.getPage(1).then(function(page) { 
    console.log(page);
});

输入格式大致如下:

{
    "items": [
        {
            "str": "xxx",
            "dir": "xxx",
            "width": xxx,
            "height": xxx,
            "transform": [
                48,
                0,
                0,
                48,
                45.32495,
                679.04
            ],
            "fontName": "g_d0_f1"
        },
        {
            "str": " ",
            "dir": "ltr",
            "width": 9.600000000000001,
            "height": 2304,
            "transform": [
                48,
                0,
                0,
                48,
                285.325,
                679.04
            ],
            "fontName": "g_d0_f2"
        }
      ],
    "styles": {
        "g_d0_f1": {
            "fontFamily": "monospace",
            "ascent": 1.05810546875,
            "descent": -0.26171875,
            "vertical": false
        },
        "g_d0_f2": {
            "fontFamily": "sans-serif",
            "ascent": 0.74365234375,
            "descent": -0.25634765625
        }
    }
 }

PDF.js能将每页文本的字符串、位置、字体都解析出来。

官网用的viewer.js:http://mozilla.github.io/pdf.js/web/viewer.html,首先底图是一个Canvas,内容和PDF一样(通过下面介绍的page.render方法可以得到),底图之上是一个textLayer,这一层就是通过page.getTextContent()得到了字体的位置和样式,再覆盖在Canvas上。

我们可以直接使用官网view.html的demo,然后修改样式去掉用不掉的功能,简单粗暴。只需要在跳转链接后面加上参数就行,例:http://xxxx/viewer.html?file='xxxx.pdf';

 

转载于:https://www.cnblogs.com/zhanggf/p/8504317.html

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值