Pdf.js与文本选择
pdf.js with text selection
如何使PDF中的文本可选择?
我试过了。PDF写得很好,但没有文本选择
https://github.com/mozilla/pdf.jshttps://github.com/mozilla/pdf.js/blob/master/web/text_layer_builder.css
https://github.com/mozilla/pdf.js/blob/master/web/text_layer_builder.js
'use strict';
PDFJS.getDocument('file.pdf').then(function(pdf){
var page_num = 1;
pdf.getPage(page_num).then(function(page){
var scale = 1.5;
var viewport = page.getViewport(scale);
var canvas = document.getElementById('the-canvas');
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
var canvasOffset = $(canvas).offset();
var $textLayerDiv = $('#text-layer').css({
height : viewport.height+'px',
width : viewport.width+'px',
top : canvasOffset.top,
left : canvasOffset.left
});
page.render({
canvasContext : context,
viewport : viewport
});
page.getTextContent().then(function(textContent){
var textLayer = new TextLayerBuilder({
textLayerDiv : $textLayerDiv.get(0),
pageIndex : page_num - 1,
viewport : viewport
});
textLayer.setTextContent(textContent);
textLayer.render();
});
});
});
<body>
<div>
<canvas id="the-canvas" style="border:1px solid black;"></canvas>
<div id="text-layer" class="textLayer"></div>
</div>
</body>
在pdf.js版本2.8.61上,检查的答案不再工作,因为renderTextLayer()被集成到pdf.js中,不需要更多的外部源,也不需要jQuery。
以下代码将使PDF文本可选择。它将加载以下PDF文档作为示例,请将其替换为您自己的文档:
https://raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey pldi - 09. - pdf
它主要使用两个html元素:
<canvas id="the-canvas"></canvas>
<div class="textLayer"></div>
canvas用于显示不可选择的文档,. textlayerdiv用于显示可选择的文本。textlayerdiv上的文本都是透明的,所以不可见,它只提供选择效果。
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1.0,user-scalable=no">
<script src="//mozilla.github.io/pdf.js/build/pdf.js" crossorigin="anonymous"></script>
<link href="//mozilla.github.io/pdf.js/web/viewer.css" rel="stylesheet" type="text/css" />
<style type="text/css">
#the-canvas {
border: 1px solid black;
direction: ltr;
}
</style>
</head>
<body>
<h1>PDF.js Previous/Next example</h1>
<div>
<button id="prev">Previous</button>
<button id="next">Next</button>
<span>Page: <span id="page_num"></span> / <span id="page_count"></span></span>
</div>
<canvas id="the-canvas"></canvas>
<div class="textLayer"></div>
<script>
// If absolute URL from the remote server is provided, configure the CORS
// header on that server.
var url = '//raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey-pldi-09.pdf';
// Loaded via <script> tag, create shortcut to access PDF.js exports.
var pdfjsLib = window['pdfjs-dist/build/pdf'];
// The workerSrc property shall be specified.
pdfjsLib.GlobalWorkerOptions.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';
var pdfDoc = null,
pageNum = 1,
pageRendering = false,
pageNumPending = null,
//scale = 0.8,
scale = 1,
canvas = document.getElementById('the-canvas'),
ctx = canvas.getContext('2d');
/**
* Get page info from document, resize canvas accordingly, and render page.
* @param num Page number.
*/
function renderPage(num) {
pageRendering = true;
// Using promise to fetch the page
pdfDoc.getPage(num).then(function(page) {
var viewport = page.getViewport({scale: scale});
canvas.height = viewport.height;
canvas.width = viewport.width;
// Render PDF page into canvas context
var renderContext = {
canvasContext: ctx,
viewport: viewport
};
var renderTask = page.render(renderContext);
// Wait for rendering to finish
renderTask.promise.then(function() {
pageRendering = false;
if (pageNumPending !== null) {
// New page rendering is pending
renderPage(pageNumPending);
pageNumPending = null;
}
}).then(function() {
// Returns a promise, on resolving it will return text contents of the page
return page.getTextContent();
}).then(function(textContent) {
// Assign CSS to the textLayer element
var textLayer = document.querySelector(".textLayer");
textLayer.style.left = canvas.offsetLeft + 'px';
textLayer.style.top = canvas.offsetTop + 'px';
textLayer.style.height = canvas.offsetHeight + 'px';
textLayer.style.width = canvas.offsetWidth + 'px';
// Pass the data to the method for rendering of text over the pdf canvas.
pdfjsLib.renderTextLayer({
textContent: textContent,
container: textLayer,
viewport: viewport,
textDivs: []
});
});
});
// Update page counters
document.getElementById('page_num').textContent = num;
}
/**
* If another page rendering in progress, waits until the rendering is
* finised. Otherwise, executes rendering immediately.
*/
function queueRenderPage(num) {
if (pageRendering) {
pageNumPending = num;
} else {
renderPage(num);
}
}
/**
* Displays previous page.
*/
function onPrevPage() {
if (pageNum <= 1) {
return;
}
pageNum--;
queueRenderPage(pageNum);
}
document.getElementById('prev').addEventListener('click', onPrevPage);
/**
* Displays next page.
*/
function onNextPage() {
if (pageNum >= pdfDoc.numPages) {
return;
}
pageNum++;
queueRenderPage(pageNum);
}
document.getElementById('next').addEventListener('click', onNextPage);
/**
* Asynchronously downloads PDF.
*/
pdfjsLib.getDocument(url).promise.then(function(pdfDoc_) {
pdfDoc = pdfDoc_;
document.getElementById('page_count').textContent = pdfDoc.numPages;
// Initial/first page rendering
renderPage(pageNum);
});
</script>
</body>
</html>
你的javascript代码是完美的。你只需要包含文本图层生成器所依赖的UI实用程序:
https://github.com/mozilla/pdf.js/blob/master/web/ui_utils.js或者在HTML中:
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/ui_utils.js"></script>
如果你运行你的代码(没有ui_utils)并检查调试控制台,你会看到ReferenceError: CustomStyle is not defined
。在PDFjs的repo中快速搜索一下,你会发现它是在ui_utils.js中定义的。
这是我的最小但完整的代码供您参考。我在这里使用PDFjs的演示pdf。注意,在生产环境中,你不应该链接到raw.github.
<!DOCTYPE html><meta charset="utf-8">
<link rel="stylesheet" href="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/text_layer_builder.css" />
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js"></script>
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/ui_utils.js"></script>
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/text_layer_builder.js"></script>
<script src="https://mozilla.github.io/pdf.js/build/pdf.js"></script>
<body>
<div>
<canvas id="the-canvas" style="border:1px solid black;"></canvas>
<div id="text-layer" class="textLayer"></div>
</div>
<script>
'use strict';
PDFJS.getDocument('file.pdf').then(function(pdf){
var page_num = 1;
pdf.getPage(page_num).then(function(page){
var scale = 1.5;
var viewport = page.getViewport(scale);
var canvas = $('#the-canvas')[0];
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
var canvasOffset = $(canvas).offset();
var $textLayerDiv = $('#text-layer').css({
height : viewport.height+'px',
width : viewport.width+'px',
top : canvasOffset.top,
left : canvasOffset.left
});
page.render({
canvasContext : context,
viewport : viewport
});
page.getTextContent().then(function(textContent){
console.log( textContent );
var textLayer = new TextLayerBuilder({
textLayerDiv : $textLayerDiv.get(0),
pageIndex : page_num - 1,
viewport : viewport
});
textLayer.setTextContent(textContent);
textLayer.render();
});
});
});
</script>
经过几个小时的努力,我发现这篇文章对选择文本和使用没有节点的pdf.js非常有帮助。使用Mozilla的PDF. js在JavaScript中自定义PDF渲染
您好,您已经在HTML内容中创建了画布。
画布将不支持文本选择,所以你需要改变画布为另一种方式
- 用于选择/文本框操作的JavaScript
- 以编程方式选择文本Mobile Safari
- Firefox输入可以't在选择文本时滚动
- 如何在javascript中取消选择文本框
- 如何在html中打印选择文本
- SVG元素——处理和选择文本
- 选择文本并添加到本地存储
- 选择“p文本jquery”
- 为什么火狐在我更改 innerHTML 时会选择文本
- 根据换行符选择文本
- 如何按从当前单词到段落中该句子(.)结尾的范围选择文本
- 单击按钮时选择文本
- JavaScript 触发器事件在 Android 上选择文本
- .val() 选择文本而不是值字符串,当网页动态编辑源 html
- 为什么不用特殊字符替换选择文本,以及如何删除此特殊字符
- 使用链接多次选择文本
- 使用jQuery在焦点上选择文本框是't在移动浏览器中工作
- Safari存在文本输入问题,用户输入时会选择文本,导致文本丢失
- Bootstrap下拉菜单-仅复选框不选择文本
- 使用.on的多个下拉菜单选择文本仅适用于第一个下拉菜单