document parser API in Salesforce and PDF.co Web API
PDF.co Web API is the Rest API that provides set of data extraction functions, tools for documents manipulation, splitting and merging of pdf files. Includes built-in OCR, images recognition, can generate and read barcodes from images, scans and pdf.
On-demand (REST Web API) version:
Web API (on-demand version)
On-premise offline SDK for Windows:
60 Day Free Trial (on-premise)
DocumentParserOutputAsJSON.cls
public class DocumentParserOutputAsJSON { // The authentication key (API Key). // Get your own by registering at https://app.pdf.co/documentation/api static String API_KEY = '***********************'; // Direct URL of source PDF file. static string SourceFileUrl = 'https://bytescout-com.s3-us-west-2.amazonaws.com/files/demo-files/cloud-api/document-parser/sample-invoice.pdf'; // PDF document password. Leave empty for unprotected documents. static string Password = ''; @TestVisible String jsonOutput; public void parseDocumentAsJSON() { try { // Create HTTP client instance Http http = new Http(); HttpRequest request = new HttpRequest(); // Set API Key request.setHeader('x-api-key', API_KEY); Boolean async = false; String inline = 'true'; String profiles = ''; Boolean encrypt = false; Boolean storeResult = false; // Prepare requests params as JSON // See documentation: https://apidocs.pdf.co // Create JSON payload JSONGenerator gen = JSON.createGenerator(true); gen.writeStartObject(); gen.writeStringField('url', SourceFileUrl); gen.writeStringField('outputFormat', 'JSON'); gen.writeStringField('templateId', '1'); gen.writeBooleanField('async', async); gen.writeBooleanField('encrypt', encrypt); gen.writeStringField('inline', inline); gen.writeStringField('password', password); gen.writeStringField('profiles', profiles); gen.writeBooleanField('storeResult', false); gen.writeEndObject(); // Convert dictionary of params to JSON String jsonPayload = gen.getAsString(); // URL of 'PDF Edit' endpoint string url = 'https://api.pdf.co/v1/pdf/documentparser'; request.setEndpoint(url); request.setHeader('Content-Type', 'application/json;charset=UTF-8'); request.setMethod('POST'); request.setBody(jsonPayload); // Execute request HttpResponse response = http.send(request); if(response.getStatusCode() == 200) { // Parse JSON response Map<String, Object> json = (Map<String, Object>)JSON.deserializeUntyped(response.getBody()); if ((Boolean)json.get('error') == false) { jsonOutput = response.getBody(); System.debug('JSON '+ jsonOutput); } } else { System.debug('Error Response ' + response.getBody()); System.Debug(' Status ' + response.getStatus()); System.Debug(' Status Code' + response.getStatusCode()); System.Debug(' Response String' + response.toString()); } } catch (Exception ex) { String errorBody = 'Message: ' + ex.getMessage() + ' -- Cause: ' + ex.getCause() + ' -- Stacktrace: ' + ex.getStackTraceString(); System.Debug(errorBody); } } }
DocumentParserOutputAsJSONTest.cls
@isTest public class DocumentParserOutputAsJSONTest { private testmethod static void testparseDocumentAsJSON() { Test.setMock(HttpCalloutMock.class, new DocumentParserOutputAsJSONTest.DocumentCreationMock()); DocumentParserOutputAsJSON docParse = new DocumentParserOutputAsJSON(); docParse.parseDocumentAsJSON(); System.assert(docParse.jsonOutput.contains('John A. Doe')); } private testmethod static void testparseDocumentAsJSONException() { DocumentParserOutputAsJSON docParse = new DocumentParserOutputAsJSON(); docParse.parseDocumentAsJSON(); System.assert(null == docParse.jsonOutput); } public class DocumentCreationMock implements HttpCalloutMock { public HTTPResponse respond(HTTPRequest req) { HttpResponse res = new HttpResponse(); String testBody = '{"hash":"John A. Doe","url":"https://pdf-temp-files.s3-us-west-2.amazonaws.com/0c336bfcef1a473d98492bda25d8da03/newDocument.pdf?X-Amz-Expires=3600&x-amz-security-token=FwoGZXIvYXdzEO7%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaDHWK1dY4d4lOgsheliKBATwE%2FZewASPTEnPxTn%2BOdYhP4h3gljAJfqbRvQptDX7wdWLmrBS7Tg4qTU6pAbxIdXChGPjBWpSbtiADJKmqkmyhkUmE8GSM1%2FGtJO6bga2pgzvFLXmzxjTf3%2BFNqwYOvbyApIZdVLoPpEKY6PlCflQtLTd30dhelm6xpB8pitbdhSjdz8KCBjIobVy%2Fjwybwp6OQgB%2FT6QkIo2dU07gtFREdn5jhRyvnS5lkccweBV1%2Bw%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIA4NRRSZPHMV5P3JOS/20210316/us-west-2/s3/aws4_request&X-Amz-Date=20210316T124309Z&X-Amz-SignedHeaders=host;x-amz-security-token&X-Amz-Signature=95287bf3c007fed4c2c5aeea1ce75c846cc6c68b22aaf35175ebe41a105f54e1","pageCount":1,"error":false,"status":200,"name":"newDocument","remainingCredits":9913694,"credits":3}'; res.setHeader('Content-Type', 'application/json'); res.setBody(testBody); res.setStatusCode(200); return res; } } }
VIDEO
ON-PREMISE OFFLINE SDK
See also:
ON-DEMAND REST WEB API
Get Your API Key
See also:
printable version:
PDF-co-Web-API-Salesforce-Document-Parser-Demo.pdf
PDF-co-Web-API-Salesforce-Document-Parser-Demo.pdf