How to convert PDF to XLSX from uploaded file (node for PDF to excel API in JavaScript and PDF.co Web API

What is PDF.co Web API? It is the Rest API that provides set of data extraction functions, tools for documents manipulation, splitting and merging of pdf files. Includes built-in OCR, images recognition, can generate and read barcodes from images, scans and pdf.

On-demand (REST Web API) version:
 Web API (on-demand version)

On-premise offline SDK for Windows:
 60 Day Free Trial (on-premise)

app.js

      
/*jshint esversion: 6 */ var https = require("https"); var path = require("path"); var fs = require("fs"); // `request` module is required for file upload. // Use "npm install request" command to install. var request = require("request"); // The authentication key (API Key). // Get your own by registering at https://app.pdf.co/documentation/api const API_KEY = "***********************************"; // Source PDF file const SourceFile = "./sample.pdf"; // Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. const Pages = ""; // PDF document password. Leave empty for unprotected documents. const Password = ""; // Destination XLSX file name const DestinationFile = "./result.xlsx"; // 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE. getPresignedUrl(API_KEY, SourceFile) .then(([uploadUrl, uploadedFileUrl]) => { // 2. UPLOAD THE FILE TO CLOUD. uploadFile(API_KEY, SourceFile, uploadUrl) .then(() => { // 3. CONVERT UPLOADED PDF FILE TO XLSX convertPdfToXlsx(API_KEY, uploadedFileUrl, Password, Pages, DestinationFile); }) .catch(e => { console.log(e); }); }) .catch(e => { console.log(e); }); function getPresignedUrl(apiKey, localFile) { return new Promise(resolve => { // Prepare request to `Get Presigned URL` API endpoint let queryPath = `/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name=${path.basename(SourceFile)}`; let reqOptions = { host: "api.pdf.co", path: encodeURI(queryPath), headers: { "x-api-key": API_KEY } }; // Send request https.get(reqOptions, (response) => { response.on("data", (d) => { let data = JSON.parse(d); if (data.error == false) { // Return presigned url we received resolve([data.presignedUrl, data.url]); } else { // Service reported error console.log("getPresignedUrl(): " + data.message); } }); }) .on("error", (e) => { // Request error console.log("getPresignedUrl(): " + e); }); }); } function uploadFile(apiKey, localFile, uploadUrl) { return new Promise(resolve => { fs.readFile(SourceFile, (err, data) => { request({ method: "PUT", url: uploadUrl, body: data, headers: { "Content-Type": "application/octet-stream" } }, (err, res, body) => { if (!err) { resolve(); } else { console.log("uploadFile() request error: " + e); } }); }); }); } function convertPdfToXlsx(apiKey, uploadedFileUrl, password, pages, destinationFile) { // Prepare request to `PDF To XLSX` API endpoint var queryPath = `/v1/pdf/convert/to/xlsx`; // JSON payload for api request var jsonPayload = JSON.stringify({ name: path.basename(destinationFile), password: password, pages: pages, url: uploadedFileUrl, async: true }); var reqOptions = { host: "api.pdf.co", method: "POST", path: queryPath, headers: { "x-api-key": apiKey, "Content-Type": "application/json", "Content-Length": Buffer.byteLength(jsonPayload, 'utf8') } }; // Send request var postRequest = https.request(reqOptions, (response) => { response.on("data", (d) => { response.setEncoding("utf8"); // Parse JSON response let data = JSON.parse(d); if (data.error == false) { console.log(`Job #${data.jobId} has been created!`); checkIfJobIsCompleted(data.jobId, data.url, destinationFile); } else { // Service reported error console.log("convertPdfToXlsx(): " + data.message); } }); }) .on("error", (e) => { // Request error console.log("convertPdfToXlsx(): " + e); }); // Write request data postRequest.write(jsonPayload); postRequest.end(); } function checkIfJobIsCompleted(jobId, resultFileUrl, destinationFile) { let queryPath = `/v1/job/check`; // JSON payload for api request let jsonPayload = JSON.stringify({ jobid: jobId }); let reqOptions = { host: "api.pdf.co", path: queryPath, method: "POST", headers: { "x-api-key": API_KEY, "Content-Type": "application/json", "Content-Length": Buffer.byteLength(jsonPayload, 'utf8') } }; // Send request var postRequest = https.request(reqOptions, (response) => { response.on("data", (d) => { response.setEncoding("utf8"); // Parse JSON response let data = JSON.parse(d); console.log(`Checking Job #${jobId}, Status: ${data.status}, Time: ${new Date().toLocaleString()}`); if (data.status == "working") { // Check again after 3 seconds setTimeout(function () { checkIfJobIsCompleted(jobId, resultFileUrl, destinationFile); }, 3000); } else if (data.status == "success") { // Download XLSX file var file = fs.createWriteStream(destinationFile); https.get(resultFileUrl, (response2) => { response2.pipe(file) .on("close", () => { console.log(`Generated XLSX file saved as "${destinationFile}" file.`); }); }); } else { console.log(`Operation ended with status: "${data.status}".`); } }) }); // Write request data postRequest.write(jsonPayload); postRequest.end(); }

package.json

      
{ "name": "test", "version": "1.0.0", "description": "PDF.co", "main": "app.js", "scripts": { }, "keywords": [ "pdf.co", "web", "api", "bytescout", "api" ], "author": "ByteScout & PDF.co", "license": "ISC", "dependencies": { "request": "^2.88.2" } }

VIDEO

ON-PREMISE OFFLINE SDK

Get 60 Day Free Trial

See also:

ON-DEMAND REST WEB API

Get Your API Key

See also:

Related Samples: