How to Parse from URL(Node for Document Parser API in JavaScript and PDF.co Web API

PDF.co Web API: the Rest API that provides a set of data extraction functions, and tools for document manipulation, splitting, and merging of PDF files. Includes built-in OCR, and image recognition, and can generate and read barcodes from images, scans, and PDF.

On-demand (REST Web API) version:
 Web API (on-demand version)

On-premise offline SDK for Windows:
 60 Day Free Trial (on-premise)

MultiPageTable-template1.yml

      
templateName: Multipage Table Test templateVersion: 4 templatePriority: 0 detectionRules: keywords: - Sample document with multi-page table objects: - name: total objectType: field fieldProperties: fieldType: macros expression: TOTAL{{Spaces}}({{Number}}) regex: true dataType: decimal - name: table1 objectType: table tableProperties: start: expression: Item{{Spaces}}Description{{Spaces}}Price regex: true end: expression: TOTAL{{Spaces}}{{Number}} regex: true row: expression: '{{LineStart}}{{Spaces}}(?<itemNo>{{Digits}}){{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<price>{{Number}}){{Spaces}}(?<qty>{{Digits}}){{Spaces}}(?<extPrice>{{Number}})' regex: true columns: - name: itemNo dataType: integer - name: description dataType: string - name: price dataType: decimal - name: qty dataType: integer - name: extPrice dataType: decimal multipage: true

app.js

      
/*jshint esversion: 6 */ var https = require("https"); var fs = require("fs"); // `request` module is required for file upload. // Use "npm install request" command to install. var request = require("request"); // The authentication key (API Key). // Get your own by registering at https://app.pdf.co/documentation/api const API_KEY = "***********************************"; // Source PDF file const SourceFileUrl = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/document-parser/MultiPageTable.pdf"; // Destination PDF file name const DestinationFile = "./result.json"; // Template text. Use Document Parser SDK (https://bytescout.com/products/developer/documentparsersdk/index.html) // to create templates. // Read template from file: var templateText = fs.readFileSync("./MultiPageTable-template1.yml", "utf-8"); // URL for `Document Parser` API call var query = `https://api.pdf.co/v1/pdf/documentparser`; var jsonRequestObject = { url: SourceFileUrl, template: templateText }; request( { url: query, headers: { "x-api-key": API_KEY }, method: "POST", json: true, body: jsonRequestObject }, function (error, response, body) { if (error) { return console.error("Error: ", error); } // Parse JSON response let data = JSON.parse(JSON.stringify(body)); if (data.error == false) { //Download generated file var file = fs.createWriteStream(DestinationFile); https.get(data.url, (response2) => { response2.pipe(file) .on("close", () => { console.log(`Generated result file saved as "${DestinationFile}" file.`); }); }); } else { // Service reported error console.log("Error: " + data.message); } } );

package.json

      
{ "name": "test", "version": "1.0.0", "description": "PDF.co", "main": "app.js", "scripts": { }, "keywords": [ "pdf.co", "web", "api", "bytescout", "api" ], "author": "ByteScout & PDF.co", "license": "ISC", "dependencies": { "request": "^2.88.2" } }

VIDEO

ON-PREMISE OFFLINE SDK

Get 60 Day Free Trial

See also:

ON-DEMAND REST WEB API

Get Your API Key

See also: