使用PDFJS从单个PDF页面提取图像
Extract images from a single PDF page using PDFJS
是否有一种方法可以使用PDFJS框架从单个PDF页面提取图像?经过一些谷歌搜索,我发现这个讨论讨论,但不幸的是它不是很具体,他谈到传递一个imageLayer到CanvasGraphics对象,但我不知道他的意思是什么
let imgIndex = 1;
let opList = await page.getOperatorList();
for (let k = 0; k < opList.fnArray.length; k++) {
if (opList.fnArray[k] == pdfjsLib.OPS.paintJpegXObject || opList.fnArray[k] == pdfjsLib.OPS.paintImageXObject) {
function getImage() {
return new Promise(async function (res, rej) {
let img = null
try {
//-------either get data from page.objs.get
img = page.objs.get(opList.argsArray[k][0])
} catch (err) {
if (opList.argsArray[k][0].startsWith("g_")) {
//---------or get data from page.commonObjs.get
img = page.commonObjs.get(opList.argsArray[k][0])
} else {
res()
}
}
try {
//------------------ directly creating image data from returned array does not print proper image also sometimes throw error.
// var idata = new ImageData(img.data, img.width);
var canvas = document.createElement('canvas');
canvas.width = img.width;
canvas.height = img.height;
var ctx = canvas.getContext('2d');
//------------------- below function will process data and print proper image on provided canvas context. kind parameter in returned data is used in this function to prcess the data
putBinaryImageData(ctx, img)
// ctx.putImageData(idata, 0, 0);
function getCanvasBlob(canvas) {
return new Promise(function (resolve, reject) {
canvas.toBlob(function (blob) {
resolve(blob)
})
})
}
getCanvasBlob(canvas).then((blob) => {
zip.folder('images').file(`page-${i}-image-${imgIndex}`, blob, { base64: false });
imgIndex++;
res()
}).catch((err) => {
// console.log(err)
res()
})
} catch (err) {
console.log(err)
res()
}
})
}
await getImage()
}
}
IMP -----------下面的函数取自PDF.js的源代码,将图像数据转换为适当的图像,因为PDF.js内置图像函数返回的数据是uint8clampedarray,不能像用于图像一样使用,该数据需要使用下面的函数进行处理。
export function putBinaryImageData(ctx, imgData, transferMaps = null) {
const FULL_CHUNK_HEIGHT = 16;
const ImageKind = {
GRAYSCALE_1BPP: 1,
RGB_24BPP: 2,
RGBA_32BPP: 3
};
if (typeof ImageData !== "undefined" && imgData instanceof ImageData) {
ctx.putImageData(imgData, 0, 0);
return;
}
const height = imgData.height,
width = imgData.width;
const partialChunkHeight = height % FULL_CHUNK_HEIGHT;
const fullChunks = (height - partialChunkHeight) / FULL_CHUNK_HEIGHT;
const totalChunks = partialChunkHeight === 0 ? fullChunks : fullChunks + 1;
const chunkImgData = ctx.createImageData(width, FULL_CHUNK_HEIGHT);
let srcPos = 0,
destPos;
const src = imgData.data;
const dest = chunkImgData.data;
let i, j, thisChunkHeight, elemsInThisChunk;
let transferMapRed, transferMapGreen, transferMapBlue, transferMapGray;
if (transferMaps) {
switch (transferMaps.length) {
case 1:
transferMapRed = transferMaps[0];
transferMapGreen = transferMaps[0];
transferMapBlue = transferMaps[0];
transferMapGray = transferMaps[0];
break;
case 4:
transferMapRed = transferMaps[0];
transferMapGreen = transferMaps[1];
transferMapBlue = transferMaps[2];
transferMapGray = transferMaps[3];
break;
}
}
if (imgData.kind === ImageKind.GRAYSCALE_1BPP) {
const srcLength = src.byteLength;
const dest32 = new Uint32Array(dest.buffer, 0, dest.byteLength >> 2);
const dest32DataLength = dest32.length;
const fullSrcDiff = width + 7 >> 3;
let white = 0xffffffff;
let black = _util.IsLittleEndianCached.value ? 0xff000000 : 0x000000ff;
if (transferMapGray) {
if (transferMapGray[0] === 0xff && transferMapGray[0xff] === 0) {
[white, black] = [black, white];
}
}
for (i = 0; i < totalChunks; i++) {
thisChunkHeight = i < fullChunks ? FULL_CHUNK_HEIGHT : partialChunkHeight;
destPos = 0;
for (j = 0; j < thisChunkHeight; j++) {
const srcDiff = srcLength - srcPos;
let k = 0;
const kEnd = srcDiff > fullSrcDiff ? width : srcDiff * 8 - 7;
const kEndUnrolled = kEnd & ~7;
let mask = 0;
let srcByte = 0;
for (; k < kEndUnrolled; k += 8) {
srcByte = src[srcPos++];
dest32[destPos++] = srcByte & 128 ? white : black;
dest32[destPos++] = srcByte & 64 ? white : black;
dest32[destPos++] = srcByte & 32 ? white : black;
dest32[destPos++] = srcByte & 16 ? white : black;
dest32[destPos++] = srcByte & 8 ? white : black;
dest32[destPos++] = srcByte & 4 ? white : black;
dest32[destPos++] = srcByte & 2 ? white : black;
dest32[destPos++] = srcByte & 1 ? white : black;
}
for (; k < kEnd; k++) {
if (mask === 0) {
srcByte = src[srcPos++];
mask = 128;
}
dest32[destPos++] = srcByte & mask ? white : black;
mask >>= 1;
}
}
while (destPos < dest32DataLength) {
dest32[destPos++] = 0;
}
ctx.putImageData(chunkImgData, 0, i * FULL_CHUNK_HEIGHT);
}
} else if (imgData.kind === ImageKind.RGBA_32BPP) {
const hasTransferMaps = !!(transferMapRed || transferMapGreen || transferMapBlue);
j = 0;
elemsInThisChunk = width * FULL_CHUNK_HEIGHT * 4;
for (i = 0; i < fullChunks; i++) {
dest.set(src.subarray(srcPos, srcPos + elemsInThisChunk));
srcPos += elemsInThisChunk;
if (hasTransferMaps) {
for (let k = 0; k < elemsInThisChunk; k += 4) {
if (transferMapRed) {
dest[k + 0] = transferMapRed[dest[k + 0]];
}
if (transferMapGreen) {
dest[k + 1] = transferMapGreen[dest[k + 1]];
}
if (transferMapBlue) {
dest[k + 2] = transferMapBlue[dest[k + 2]];
}
}
}
ctx.putImageData(chunkImgData, 0, j);
j += FULL_CHUNK_HEIGHT;
}
if (i < totalChunks) {
elemsInThisChunk = width * partialChunkHeight * 4;
dest.set(src.subarray(srcPos, srcPos + elemsInThisChunk));
if (hasTransferMaps) {
for (let k = 0; k < elemsInThisChunk; k += 4) {
if (transferMapRed) {
dest[k + 0] = transferMapRed[dest[k + 0]];
}
if (transferMapGreen) {
dest[k + 1] = transferMapGreen[dest[k + 1]];
}
if (transferMapBlue) {
dest[k + 2] = transferMapBlue[dest[k + 2]];
}
}
}
ctx.putImageData(chunkImgData, 0, j);
}
} else if (imgData.kind === ImageKind.RGB_24BPP) {
const hasTransferMaps = !!(transferMapRed || transferMapGreen || transferMapBlue);
thisChunkHeight = FULL_CHUNK_HEIGHT;
elemsInThisChunk = width * thisChunkHeight;
for (i = 0; i < totalChunks; i++) {
if (i >= fullChunks) {
thisChunkHeight = partialChunkHeight;
elemsInThisChunk = width * thisChunkHeight;
}
destPos = 0;
for (j = elemsInThisChunk; j--;) {
dest[destPos++] = src[srcPos++];
dest[destPos++] = src[srcPos++];
dest[destPos++] = src[srcPos++];
dest[destPos++] = 255;
}
if (hasTransferMaps) {
for (let k = 0; k < destPos; k += 4) {
if (transferMapRed) {
dest[k + 0] = transferMapRed[dest[k + 0]];
}
if (transferMapGreen) {
dest[k + 1] = transferMapGreen[dest[k + 1]];
}
if (transferMapBlue) {
dest[k + 2] = transferMapBlue[dest[k + 2]];
}
}
}
ctx.putImageData(chunkImgData, 0, i * FULL_CHUNK_HEIGHT);
}
} else {
throw new Error(`bad image kind: ${imgData.kind}`);
}
}
相关文章:
- 使用javascript从HTML网页中提取图像url
- 是否可以创建一个画布来提取图像数据,而无需实际渲染画布
- 我可以使用 javascript 从类别中提取图像吗?
- 使用 AngularJS 从
RSS 提要中提取图像 - 提取图像src并使其成为另一个图像的src
- 当源是IE/EDGE上的加密令牌视频时,Canvas未从azure媒体播放器中提取图像
- 如何使用动力学JS从其他图像中提取图像
- 从9GAG中提取图像
- 我需要什么类型的正则表达式来从markdown中提取图像url
- Jquery.backstretch.min.js从数据库或变量中提取图像
- 使用Javascript从HTML代码中提取图像url
- 使用PDFJS从单个PDF页面提取图像
- 如何使用jQuery从字符串中提取图像标签
- 从远程源Javascript中提取图像
- 从网页中提取图像源,其中img标签可能会在使用javascript etcq渲染页面时添加
- Angular或Three.js不会从服务器中提取图像,而是从浏览器缓存中提取图像
- 如何从数组中提取图像并放入列表中
- 如何创建Adobe Air应用程序,从应用程序外的计算机上的文件夹中提取图像
- Regex从xml中提取图像
- 从API中提取图像