Extract Bold Text from PDF in JavaScript using PDF.co Web API
In this tutorial, we will show you how to extract bold text from PDF in JavaScript using PDF.co Web API.
Step 1: Source Code
Open your text editor and create a JavaScript file with the following code:
/*jshint esversion: 6 */
var https = require("https");
var path = require("path");
var fs = require("fs");
// `request` module is required for file upload.
// Use "npm install request" command to install.
var request = require("request");
// The authentication key (API Key).
// Get your own by registering at https://app.pdf.co
const API_KEY = "***********************************";
// Source PDF file
const SourceFile = "./sample.pdf";
// Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
const Pages = "";
// PDF document password. Leave empty for unprotected documents.
const Password = "";
// Destination XML file name
const DestinationFile = "./result.xml";
// 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE.
getPresignedUrl(API_KEY, SourceFile)
.then(([uploadUrl, uploadedFileUrl]) => {
// 2. UPLOAD THE FILE TO CLOUD.
uploadFile(API_KEY, SourceFile, uploadUrl)
.then(() => {
// 3. CONVERT UPLOADED PDF FILE TO XML
convertPdfToXml(API_KEY, uploadedFileUrl, Password, Pages, DestinationFile);
})
.catch(e => {
console.log(e);
});
})
.catch(e => {
console.log(e);
});
function getPresignedUrl(apiKey, localFile) {
return new Promise(resolve => {
// Prepare request to `Get Presigned URL` API endpoint
let queryPath = `/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name=${path.basename(SourceFile)}`;
let reqOptions = {
host: "api.pdf.co",
path: encodeURI(queryPath),
headers: { "x-api-key": API_KEY }
};
// Send request
https.get(reqOptions, (response) => {
response.on("data", (d) => {
let data = JSON.parse(d);
if (data.error == false) {
// Return presigned url we received
resolve([data.presignedUrl, data.url]);
}
else {
// Service reported error
console.log("getPresignedUrl(): " + data.message);
}
});
})
.on("error", (e) => {
// Request error
console.log("getPresignedUrl(): " + e);
});
});
}
function uploadFile(apiKey, localFile, uploadUrl) {
return new Promise(resolve => {
fs.readFile(SourceFile, (err, data) => {
request({
method: "PUT",
url: uploadUrl,
body: data,
headers: {
"Content-Type": "application/octet-stream"
}
}, (err, res, body) => {
if (!err) {
resolve();
}
else {
console.log("uploadFile() request error: " + e);
}
});
});
});
}
function convertPdfToXml(apiKey, uploadedFileUrl, password, pages, destinationFile) {
// Prepare request to `PDF To XML` API endpoint
var queryPath = `/v1/pdf/convert/to/xml`;
// JSON payload for api request
var jsonPayload = JSON.stringify({
name: path.basename(destinationFile), password: password, pages: pages, url: uploadedFileUrl
});
var reqOptions = {
host: "api.pdf.co",
method: "POST",
path: queryPath,
headers: {
"x-api-key": apiKey,
"Content-Type": "application/json",
"Content-Length": Buffer.byteLength(jsonPayload, 'utf8')
}
};
// Send request
var postRequest = https.request(reqOptions, (response) => {
response.on("data", (d) => {
response.setEncoding("utf8");
// Parse JSON response
let data = JSON.parse(d);
if (data.error == false) {
// Download XML file
var file = fs.createWriteStream(destinationFile);
https.get(data.url, (response2) => {
response2.pipe(file)
.on("close", () => {
console.log(`Generated XML file saved as "${destinationFile}" file.`);
});
});
}
else {
// Service reported error
console.log("convertPdfToXml(): " + data.message);
}
});
})
.on("error", (e) => {
// Request error
console.log("convertPdfToXml(): " + e);
});
// Write request data
postRequest.write(jsonPayload);
postRequest.end();
}
Step 2: Install Requests Module
Next, install the request module for file upload. Type the npm install requests
in the Terminal.
Step 3: PDF.co API Key
On line 13
, add your API key inside the double quote. You can get the PDF.co API Key from your dashboard.
Step 4: Source and Destination Files
On line 17
, enter your source PDF file and type in your desired XML output file name in line 23
.
Step 5: Run the Program
Save your JavaScript file as app.js
, then in your Terminal just do:
node run app.js