How to Convert PDF to XLS from the Uploaded File (Node for PDF to Excel API in JavaScript and PDF.co Web API
PDF.co Web API is the flexible Web API that includes a full set of functions from e-signature requests to data extraction, OCR, images recognition, PDF splitting and PDF splitting. Can also generate barcodes and read barcodes from images, scans and PDF.
This sample consists of the source code for converting PDF to XLS. Full source code is also available on our GitHub repository at this link.
On-demand (REST Web API) version:
Web API (on-demand version)
On-premise offline SDK for Windows:
60 Day Free Trial (on-premise)
Let’s review source code and it’s output first, then we’ll be analyzing source code.
Source Code
app.js
/*jshint esversion: 6 */ var https = require("https"); var path = require("path"); var fs = require("fs"); // `request` module is required for file upload. // Use "npm install request" command to install. var request = require("request"); // The authentication key (API Key). // Get your own by registering at https://app.pdf.co/documentation/api const API_KEY = "***********************************"; // Source PDF file const SourceFile = "./sample.pdf"; // Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. const Pages = ""; // PDF document password. Leave empty for unprotected documents. const Password = ""; // Destination XLS file name const DestinationFile = "./result.xls"; // 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE. getPresignedUrl(API_KEY, SourceFile) .then(([uploadUrl, uploadedFileUrl]) => { // 2. UPLOAD THE FILE TO CLOUD. uploadFile(API_KEY, SourceFile, uploadUrl) .then(() => { // 3. CONVERT UPLOADED PDF FILE TO XLS convertPdfToXls(API_KEY, uploadedFileUrl, Password, Pages, DestinationFile); }) .catch(e => { console.log(e); }); }) .catch(e => { console.log(e); }); function getPresignedUrl(apiKey, localFile) { return new Promise(resolve => { // Prepare request to `Get Presigned URL` API endpoint let queryPath = `/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name=${path.basename(SourceFile)}`; let reqOptions = { host: "api.pdf.co", path: encodeURI(queryPath), headers: { "x-api-key": API_KEY } }; // Send request https.get(reqOptions, (response) => { response.on("data", (d) => { let data = JSON.parse(d); if (data.error == false) { // Return presigned url we received resolve([data.presignedUrl, data.url]); } else { // Service reported error console.log("getPresignedUrl(): " + data.message); } }); }) .on("error", (e) => { // Request error console.log("getPresignedUrl(): " + e); }); }); } function uploadFile(apiKey, localFile, uploadUrl) { return new Promise(resolve => { fs.readFile(SourceFile, (err, data) => { request({ method: "PUT", url: uploadUrl, body: data, headers: { "Content-Type": "application/octet-stream" } }, (err, res, body) => { if (!err) { resolve(); } else { console.log("uploadFile() request error: " + e); } }); }); }); } function convertPdfToXls(apiKey, uploadedFileUrl, password, pages, destinationFile) { // Prepare request to `PDF To XLS` API endpoint var queryPath = `/v1/pdf/convert/to/xls`; // JSON payload for api request var jsonPayload = JSON.stringify({ name: path.basename(destinationFile), password: password, pages: pages, url: uploadedFileUrl }); var reqOptions = { host: "api.pdf.co", method: "POST", path: queryPath, headers: { "x-api-key": API_KEY, "Content-Type": "application/json", "Content-Length": Buffer.byteLength(jsonPayload, 'utf8') } }; // Send request var postRequest = https.request(reqOptions, (response) => { response.on("data", (d) => { response.setEncoding("utf8"); // Parse JSON response let data = JSON.parse(d); if (data.error == false) { // Download XLS file var file = fs.createWriteStream(destinationFile); https.get(data.url, (response2) => { response2.pipe(file) .on("close", () => { console.log(`Generated XLS file saved as "${destinationFile}" file.`); }); }); } else { // Service reported error console.log("readBarcodes(): " + data.message); } }); }) .on("error", (e) => { // Request error console.log("readBarcodes(): " + e); }); } // Write request data postRequest.write(jsonPayload); postRequest.end();
package.json
{ "name": "test", "version": "1.0.0", "description": "PDF.co", "main": "app.js", "scripts": { }, "keywords": [ "pdf.co", "web", "api", "bytescout", "api" ], "author": "ByteScout & PDF.co", "license": "ISC", "dependencies": { "request": "^2.88.2" } }
Output
Now that we’ve reviewed the source code and its output, let’s analyze the code briefly.
Initially, we’re preparing all necessary inputs such as API key, source file location, page numbers that we want to convert, the output file path, etc. Once the code is ready, we proceed with conversion. We can logically divide the code into two parts. First, we’re uploading the file to PDF.co cloud and getting a temporary public URL and secondly, we’re performing conversion to Excel by using that URL.
In order to upload the input PDF file to PDF.co cloud, we’re first getting its pre-signed URL. The PDF.co endpoint /v1/file/upload/get-presigned-url is used to perform this task. This request primarily returns two URLs as output, pre-signed URL (data.presignedUrl) and public URL (data.url). This pre-signed URL is used to actually perform file upload to the server, whereas public URL points to the uploaded file. Please note, this public URL is temporary and only available for a few minutes.
The PDF.co endpoint /v1/pdf/convert/to/xls is used to convert PDF to XLS. Input parameters are source file URL, the number of pages that needs to be converted, the name of the file, etc. If the size of the input file is larger, then there is the possibility of time out of operation. In these cases, we can execute this request in the asynchronous mode. Please refer API documentation for more information regarding different parameters available. The output of this request contains the URL of the converted XLS file.
Please try this sample in your machine to get the most of this article. Thank you for reading!
VIDEO
ON-PREMISE OFFLINE SDK
See also:
ON-DEMAND REST WEB API
Get Your API Key
See also: