How to parse simple document for document parser API in PHP with PDF.co Web API

PDF.co Web API: the flexible Web API that includes full set of functions from e-signature requests to data extraction, OCR, images recognition, pdf splitting and pdf splitting. Can also generate barcodes and read barcodes from images, scans and pdf.

On-demand (REST Web API) version:
 Web API (on-demand version)

On-premise offline SDK for Windows:
 60 Day Free Trial (on-premise)

AmazonAWS.yml

      
templateName: Amazon Web Services Invoice templateVersion: 4 templatePriority: 0 detectionRules: keywords: - Amazon Web Services - ATTN - Invoice objects: - name: total objectType: field fieldProperties: fieldType: macros expression: TOTAL AMOUNT DUE ON{{Anything}}{{Dollar}}({{Number}}) regex: true dataType: decimal - name: subTotal objectType: field fieldProperties: fieldType: macros expression: '{{LineStart}}{{Spaces}}Charges{{Spaces}}{{Dollar}}({{Number}})' regex: true dataType: decimal - name: dateIssued objectType: field fieldProperties: fieldType: macros expression: Invoice Date:{{Spaces}}({{Anything}}){{LineEnd}} regex: true dataType: date dateFormat: MMMM d , yyyy - name: invoiceId objectType: field fieldProperties: fieldType: macros expression: Invoice Number:{{Spaces}}({{Digits}}) regex: true - name: companyName objectType: field fieldProperties: fieldType: static expression: Amazon Web Services, Inc. regex: true - name: companyWebsite objectType: field fieldProperties: fieldType: static expression: aws.amazon.com regex: true - name: billTo objectType: field fieldProperties: fieldType: rectangle expression: Bill to Address:{{ToggleSingleLineMode}}({{AnythingGreedy}}) regex: true rectangle: - 33 - 115.5 - 213.75 - 72.75 pageIndex: 0 - name: currency objectType: field fieldProperties: fieldType: static expression: USD regex: true - name: table1 objectType: table tableProperties: start: expression: '{{LineStart}}{{Spaces}}Detail{{LineEnd}}' regex: true end: expression: '{{EndOfPage}}' regex: true row: expression: '{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}{{Dollar}}(?<unitPrice>{{Number}}){{LineEnd}}' regex: true columns: - name: unitPrice dataType: decimal

DigitalOcean.yml

      
templateName: DigitalOcean Invoice templateVersion: 4 templatePriority: 0 detectionRules: keywords: - DigitalOcean - 101 Avenue of the Americas - Invoice Number objects: - name: companyName objectType: field fieldProperties: fieldType: static expression: DigitalOcean regex: true - name: invoiceId objectType: field fieldProperties: fieldType: macros expression: 'Invoice Number: ({{Digits}})' regex: true - name: dateIssued objectType: field fieldProperties: fieldType: macros expression: 'Date Issued: ({{SmartDate}})' regex: true dataType: date dateFormat: auto-mdy - name: total objectType: field fieldProperties: fieldType: macros expression: 'Total: {{Dollar}}({{Number}})' regex: true dataType: decimal - name: currency objectType: field fieldProperties: fieldType: static expression: USD regex: true - name: table1 objectType: table tableProperties: start: expression: Description{{Spaces}}Hours regex: true end: expression: 'Total:' regex: true row: expression: '{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<hours>{{Digits}}){{Spaces}}(?<start>{{2Digits}}{{Minus}}{{2Digits}}{{Space}}{{2Digits}}{{Colon}}{{2Digits}}){{Spaces}}(?<end>{{2Digits}}{{Minus}}{{2Digits}}{{Space}}{{2Digits}}{{Colon}}{{2Digits}}){{Spaces}}{{Dollar}}(?<unitPrice>{{Number}})' regex: true columns: - name: hours dataType: integer - name: unitPrice dataType: decimal

Google.yml

      
templateName: Google Invoice templateVersion: 4 templatePriority: 0 detectionRules: keywords: - Google - 77-0493581 - Invoice objects: - name: invoiceId objectType: field fieldProperties: expression: Invoice number:{{Spaces}}({{Digits}}) regex: true - name: dateIssued objectType: field fieldProperties: expression: Issue date:{{Spaces}}({{SmartDate}}) regex: true dataType: date dateFormat: MMM d, yyyy - name: total objectType: field fieldProperties: expression: Amount due in USD:{{Spaces}}{{Number}} regex: true dataType: decimal - name: subTotal objectType: field fieldProperties: expression: Subtotal in USD:{{Spaces}}{{Number}} regex: true dataType: decimal - name: taxRate objectType: field fieldProperties: expression: State sales tax {{OpeningParenthesis}}{{Digits}}{{Percent}}{{ClosingParenthesis}} regex: true dataType: integer - name: tax objectType: field fieldProperties: expression: State sales tax{{Anything}}{{Number}}{{LineEnd}} regex: true dataType: decimal - name: companyName objectType: field fieldProperties: fieldType: static expression: Google LLC regex: true - name: billTo objectType: field fieldProperties: fieldType: rectangle regex: true rectangle: - 0 - 152 - 280 - 72 pageIndex: 0 - name: billingId objectType: field fieldProperties: expression: Billing ID:{{Spaces}}({{DigitsOrSymbols}}) regex: true - name: currency objectType: field fieldProperties: fieldType: static expression: USD regex: true - name: table1 objectType: table tableProperties: start: expression: Description{{Spaces}}Interval{{Spaces}}Quantity{{Spaces}}Amount regex: true end: expression: Subtotal in USD regex: true row: expression: '{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<interval>{{3Letters}}{{Space}}{{Digits}}{{Space}}{{Minus}}{{Space}}{{3Letters}}{{Space}}{{Digits}}){{Spaces}}(?<quantity>{{Digits}}){{Spaces}}(?<amount>{{Number}})' regex: true columns: - name: quantity dataType: integer - name: amount dataType: decimal

program.php

      
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Document Parse Results</title> </head> <body> <?php // Get submitted form data $apiKey = $_POST["apiKey"]; // The authentication key (API Key). Get your own by registering at https://app.pdf.co // 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE. // * If you already have the direct PDF file link, go to the step 3. // Create URL $url = "https://api.pdf.co/v1/file/upload/get-presigned-url" . "?contenttype=application/octet-stream"; // Create request $curl = curl_init(); curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey)); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // Execute request $result = curl_exec($curl); if (curl_errno($curl) == 0) { $status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE); if ($status_code == 200) { $json = json_decode($result, true); // Get URL to use for the file upload $uploadFileUrl = $json["presignedUrl"]; // Get URL of uploaded file to use with later API calls $uploadedFileUrl = $json["url"]; // 2. UPLOAD THE FILE TO CLOUD. $localFile = $_FILES["fileInput"]["tmp_name"]; $fileHandle = fopen($localFile, "r"); curl_setopt($curl, CURLOPT_URL, $uploadFileUrl); curl_setopt($curl, CURLOPT_HTTPHEADER, array("content-type: application/octet-stream")); curl_setopt($curl, CURLOPT_PUT, true); curl_setopt($curl, CURLOPT_INFILE, $fileHandle); curl_setopt($curl, CURLOPT_INFILESIZE, filesize($localFile)); // Execute request curl_exec($curl); fclose($fileHandle); if (curl_errno($curl) == 0) { $status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE); if ($status_code == 200) { // Read all template texts $templateText = file_get_contents($_FILES["fileTemplate"]["tmp_name"]); // 3. PARSE UPLOADED PDF DOCUMENT ParseDocument($apiKey, $uploadedFileUrl, $templateText); } else { // Display request error echo "<p>Status code: " . $status_code . "</p>"; echo "<p>" . $result . "</p>"; } } else { // Display CURL error echo "Error: " . curl_error($curl); } } else { // Display service reported error echo "<p>Status code: " . $status_code . "</p>"; echo "<p>" . $result . "</p>"; } curl_close($curl); } else { // Display CURL error echo "Error: " . curl_error($curl); } function ParseDocument($apiKey, $uploadedFileUrl, $templateText) { // (!) Make asynchronous job $async = TRUE; // Prepare URL for Document parser API call. // See documentation: https://apidocs.pdf.co/?#1-pdfdocumentparser $url = "https://api.pdf.co/v1/pdf/documentparser"; // Prepare requests params $parameters = array(); $parameters["url"] = $uploadedFileUrl; $parameters["template"] = $templateText; $parameters["async"] = $async; // Create Json payload $data = json_encode($parameters); // Create request $curl = curl_init(); curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey, "Content-type: application/json")); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_POST, true); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curl, CURLOPT_POSTFIELDS, $data); // Execute request $result = curl_exec($curl); echo $result . "<br/>"; if (curl_errno($curl) == 0) { $status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE); if ($status_code == 200) { $json = json_decode($result, true); if (!isset($json["error"]) || $json["error"] == false) { // URL of generated JSON file that will available after the job completion $resultFileUrl = $json["url"]; // Asynchronous job ID $jobId = $json["jobId"]; // Check the job status in a loop do { $status = CheckJobStatus($jobId, $apiKey); // Possible statuses: "working", "failed", "aborted", "success". // Display timestamp and status (for demo purposes) echo "<p>" . date(DATE_RFC2822) . ": " . $status . "</p>"; if ($status == "success") { // Display link to JSON file with information about parsed fields echo "<div><h2>Parsing Result:</h2><a href='" . $resultFileUrl . "' target='_blank'>" . $resultFileUrl . "</a></div>"; break; } else if ($status == "working") { // Pause for a few seconds sleep(3); } else { echo $status . "<br/>"; break; } } while (true); } else { // Display service reported error echo "<p>Error: " . $json["message"] . "</p>"; } } else { // Display request error echo "<p>Status code: " . $status_code . "</p>"; echo "<p>" . $result . "</p>"; } } else { // Display CURL error echo "Error: " . curl_error($curl); } } function CheckJobStatus($jobId, $apiKey) { $status = null; // Create URL $url = "https://api.pdf.co/v1/job/check"; // Prepare requests params $parameters = array(); $parameters["jobid"] = $jobId; // Create Json payload $data = json_encode($parameters); // Create request $curl = curl_init(); curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey, "Content-type: application/json")); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_POST, true); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curl, CURLOPT_POSTFIELDS, $data); // Execute request $result = curl_exec($curl); if (curl_errno($curl) == 0) { $status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE); if ($status_code == 200) { $json = json_decode($result, true); if (!isset($json["error"]) || $json["error"] == false) { $status = $json["status"]; } else { // Display service reported error echo "<p>Error: " . $json["message"] . "</p>"; } } else { // Display request error echo "<p>Status code: " . $status_code . "</p>"; echo "<p>" . $result . "</p>"; } } else { // Display CURL error echo "Error: " . curl_error($curl); } // Cleanup curl_close($curl); return $status; } ?> </body> </html>

VIDEO

ON-PREMISE OFFLINE SDK

Get 60 Day Free Trial

See also:

ON-DEMAND REST WEB API

Get Your API Key

See also: