How to convert PDF to HTML from URL (node for PDF to HTML API in JavaScript using PDF.co Web API

PDF.co Web API is the Rest API that provides set of data extraction functions, tools for documents manipulation, splitting and merging of pdf files. Includes built-in OCR, images recognition, can generate and read barcodes from images, scans and pdf.

On-demand (REST Web API) version:
 Web API (on-demand version)

On-premise offline SDK for Windows:
 60 Day Free Trial (on-premise)

app.js

      
var https = require("https"); var path = require("path"); var fs = require("fs"); // The authentication key (API Key). // Get your own by registering at https://app.pdf.co/documentation/api const API_KEY = "***********************************"; // Direct URL of source PDF file. const SourceFileUrl = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-html/sample.pdf"; // Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. const Pages = ""; // PDF document password. Leave empty for unprotected documents. const Password = ""; // Destination HTML file name const DestinationFile = "./result.html"; // Set to `true` to get simplified HTML without CSS. Default is the rich HTML keeping the document design. const PlainHtml = false; // Set to `true` if your document has the column layout like a newspaper. const ColumnLayout = false; // Prepare request to `PDF To HTML` API endpoint var queryPath = `/v1/pdf/convert/to/html?name=${path.basename(DestinationFile)}&password=${Password}&pages=${Pages}` + `&simple=${PlainHtml}&columns=${ColumnLayout}&url=${SourceFileUrl}&async=True`; var reqOptions = { host: "api.pdf.co", path: encodeURI(queryPath), headers: { "x-api-key": API_KEY } }; // Send request https.get(reqOptions, (response) => { response.on("data", (d) => { // Parse JSON response var data = JSON.parse(d); if (data.error == false) { console.log(`Job #${data.jobId} has been created!`); // Process returned job checkIfJobIsCompleted(data.jobId, data.url); } else { // Service reported error console.log(data.message); } }); }).on("error", (e) => { // Request error console.log(e); }); function checkIfJobIsCompleted(jobId, resultFileUrl) { let queryPath = `/v1/job/check?jobid=${jobId}`; let reqOptions = { host: "api.pdf.co", path: encodeURI(queryPath), method: "GET", headers: { "x-api-key": API_KEY } }; https.get(reqOptions, (response) => { response.on("data", (d) => { response.setEncoding("utf8"); // Parse JSON response let data = JSON.parse(d); console.log(`Checking Job #${jobId}, Status: ${data.status}, Time: ${new Date().toLocaleString()}`); if (data.status == "working") { // Check again after 3 seconds setTimeout(function(){ checkIfJobIsCompleted(jobId, resultFileUrl); }, 3000); } else if (data.status == "success") { // Download HTML file var file = fs.createWriteStream(DestinationFile); https.get(resultFileUrl, (response2) => { response2.pipe(file) .on("close", () => { console.log(`Generated HTML file saved as "${DestinationFile}" file.`); }); }); } else { console.log(`Operation ended with status: "${data.status}".`); } }) }); }

package.json

      
{ "name": "test", "version": "1.0.0", "description": "PDF.co", "main": "app.js", "scripts": { }, "keywords": [ "pdf.co", "web", "api", "bytescout", "api" ], "author": "ByteScout & PDF.co", "license": "ISC", "dependencies": { "request": "^2.88.2" } }

VIDEO

ON-PREMISE OFFLINE SDK

Get 60 Day Free Trial

See also:

ON-DEMAND REST WEB API

Get Your API Key

See also:

Related Samples: