How to Parse Document and Get Output in CSV format with Salesforce Apex and PDF.co

Sep 2, 2024·9 Minutes Read

In this step-by-step guide, we’ll observe steps on how to parse a PDF invoice document and get output in CSV format with Salesforce Apex and PDF.co. In the beginning, we’ll go through all the basic steps on salesforce integration. Later we’ll also see the code and demo video. Let’s get started!

Step 1: Create Remote Site Settings

Create remote site settings in the Salesforce Org like below.

Remote Site Settings

Please note that the URL is “https://api.pdf.co”.

Step 2: Enter API Key

You should have an API class in your org with the key.

API Key

Step 3: Create an Apex Class in Salesforce

Create an apex class in Salesforce like below and paste the code there.
Once you login to the Salesforce org, you will see the screen like below and click on “Developer Console

Developer Console

Create an apex class. For this, click on “Files” then “New” then “Apex Class”.

Apex Class

Write the class name “DocumentParserCSV” and click “Ok”. Now copy the DocumentParserCSV code in this file.

Similarly, create a new file with the name “DocumentParserCSVTest” and copy the code.

Step 4: Verify Code

To Verify the code, please open the execute Anonymous window and call the method below.

 Execute Anonymous window
Enter Apex Code

Then Click on “Execute”.

Step 5: See the Result

Now, You can see the result in the Debug Logs.

Step 6: Source Code Files:

API.cls

public class API {
    
    public static String  API_KEY = '********************************';
    
    //Get Call
    public static HttpResponse  getCall(String endPointURL)
    {
        Http http = new Http();
        HttpRequest request = new HttpRequest();
        //Set the key in header.
        request.setHeader('x-api-key', API_KEY);
        //Set the URL to invoke
        request.setEndpoint(endPointURL); 
        //Sets the type of method to be used for the HTTP request.
        request.setMethod('GET');
        /*
        The timeout is the maximum time to wait for establishing the HTTP connection.
        The same timeout is used for waiting for the request to start. When the request is executing, 
        such as retrieving or posting data, the connection is kept alive until the request finishes.
        */
        request.setTimeout(60000);
        HttpResponse response =  http.send(request);
        return response;
    }
    
    //Post Call
    public static HttpResponse  postCall(String endPointURL, String body, String contentType)
    {
        Http http = new Http();
        HttpRequest request = new HttpRequest();
        //Set the key in header.
        request.setHeader('x-api-key', API_KEY);
        //Set the URL to invoke
        request.setEndpoint(endPointURL);            
        request.setHeader('Content-Type', contentType);
        //Sets the type of method to be used for the HTTP request.
        request.setMethod('POST');
        //Sets the contents of the body for this request.
        request.setBody(body);
        /*
        The timeout is the maximum time to wait for establishing the HTTP connection.
        The same timeout is used for waiting for the request to start. When the request is executing, 
        such as retrieving or posting data, the connection is kept alive until the request finishes.
        */
        request.setTimeout(60000); 
        HttpResponse response =  http.send(request);
        return response;
    }
    
    //PUT Call for Blob Body
    public static HttpResponse  putCall(String endPointURL, Blob body, String contentType)
    {
        Http http = new Http();
        HttpRequest request = new HttpRequest();
        //Set the key in header.
        request.setHeader('x-api-key', API_KEY);
        //Set the URL to invoke
        request.setEndpoint(endPointURL);            
        request.setHeader('Content-Type', contentType);
        //Sets the type of method to be used for the HTTP request.
        request.setMethod('PUT');
        //Sets the contents of the body for this request using a Blob.
        request.setBodyAsBlob(body);
        /*
        The timeout is the maximum time to wait for establishing the HTTP connection.
        The same timeout is used for waiting for the request to start. When the request is executing, 
        such as retrieving or posting data, the connection is kept alive until the request finishes.
        */
        request.setTimeout(60000);
        HttpResponse response =  http.send(request);
        return response;
    }
}

APITest.cls

@isTest
public class APITest {

    private  testmethod static void getCallTest()
    {
        Test.setMock(HttpCalloutMock.class, new APITest.APIMock());
        Test.StartTest();
        HttpResponse resp = API.getCall('https://www.google.com/');
        Test.stopTest();
        System.assertNotEquals(null, resp);
    }

    private  testmethod static void postCallTest()
    {
        Test.setMock(HttpCalloutMock.class, new APITest.APIMock());
        Test.StartTest();
        HTTPResponse res = API.postCall('https://www.google.com/', 'jsonPayload', 'application/json');
        Test.stopTest();
        System.assertNotEquals(null, res);
    }

    private  testmethod static void putCallTest()
    {
        Test.setMock(HttpCalloutMock.class, new APITest.APIMock());
        Test.StartTest();
        HTTPResponse res = API.putCall('https://www.google.com/', Blob.valueOf('sourceFile'),'application/octet-stream');
        Test.stopTest();
        System.assertNotEquals(null, res);
    }

    public class APIMock implements HttpCalloutMock {
        public HTTPResponse respond(HTTPRequest req) {
            HttpResponse res = new HttpResponse();
            String testBody = '{"presignedUrl":"https://pdf-temp-files.s3-us-west-2.amazonaws.com/0c72bf56341142ba83c8f98b47f14d62/test.pdf?X-Amz-Expires=900&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIZJDPLX6D7EHVCKA/20200302/us-west-2/s3/aws4_request&X-Amz-Date=20200302T143951Z&X-Amz-SignedHeaders=host&X-Amz-Signature=8650913644b6425ba8d52b78634698e5fc8970157d971a96f0279a64f4ba87fc","url":"https://pdf-temp-files.s3-us-west-2.amazonaws.com/0c72bf56341142ba83c8f98b47f14d62/test.pdf?X-Amz-Expires=3600&x-amz-security-token=FwoGZXIvYXdzEGgaDA9KaTOXRjkCdCqSTCKBAW9tReCLk1fVTZBH9exl9VIbP8Gfp1pE9hg6et94IBpNamOaBJ6%2B9Vsa5zxfiddlgA%2BxQ4tpd9gprFAxMzjN7UtjU%2B2gf%2FKbUKc2lfV18D2wXKd1FEhC6kkGJVL5UaoFONG%2Fw2jXfLxe3nCfquMEDo12XzcqIQtNFWXjKPWBkQEvmii4tfTyBTIot4Na%2BAUqkLshH0R7HVKlEBV8btqa0ctBjwzwpWkoU%2BF%2BCtnm8Lm4Eg%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIA4NRRSZPHEGHTOA4W/20200302/us-west-2/s3/aws4_request&X-Amz-Date=20200302T143951Z&X-Amz-SignedHeaders=host;x-amz-security-token&X-Amz-Signature=243419ac4a9a315eebc2db72df0817de6a261a684482bbc897f0e7bb5d202bb9","error":false,"status":200,"name":"test.pdf","remainingCredits":98145}';
            res.setHeader('Content-Type', 'application/json');
            res.setBody(testBody);
            res.setStatusCode(200);
            return res;
        }
    }
}

DocumentParserCSV.cls

public class DocumentParserCSV {
    
    public void startProcessing()
    {
        try
        {
            JSONGenerator gen = JSON.createGenerator(true);
            gen.writeStartObject();
            gen.writeStringField('url', 'https://bytescout-com.s3-us-west-2.amazonaws.com/files/demo-files/cloud-api/document-parser/sample-invoice.pdf');
            gen.writeStringField('templateId', '1');
            gen.writeStringField('outputFormat', 'CSV');
            gen.writeStringField('generateCsvHeaders', 'true');
            
            gen.writeBooleanField('async', false);
            gen.writeStringField('encrypt', 'false');
            gen.writeStringField('inline', 'true');
            gen.writeStringField('password', '');
            gen.writeBooleanField('storeResult', false);
            
            gen.writeEndObject();
            
            // Convert dictionary of params to JSON
            String jsonPayload = gen.getAsString();
            String url = 'https://api.pdf.co/v1/pdf/documentparser';
            HttpResponse response =  API.postCall(url, jsonPayload, 'application/json');    
            
            Map<String, Object> json = (Map<String, Object>)JSON.deserializeUntyped(response.getBody());
            
            if(response.getStatusCode() == 200) 
            {
                if ((Boolean)json.get('error') == false)
                {
                    // Get URL of generated PDF file
                    String jsonBody =(String)json.get('body');
                    SYstem.Debug(jsonBody);
                }
            }
            else
            {
                System.debug('Error Response ' + response.getBody());
                System.Debug(' Status ' + response.getStatus());
                System.Debug(' Status Code' + response.getStatusCode());
                System.Debug(' Response String' + response.toString());
            }
            
        }
        catch (Exception ex)
        {
            String errorBody = 'Message: ' + ex.getMessage() + ' -- Cause: ' + ex.getCause() + ' -- Stacktrace: ' + ex.getStackTraceString();
            System.Debug(errorBody);
        }
    }
}

DocumentParserCSVTest.cls

@isTest
private class DocumentParserCSVTest
{
    private  testmethod static void testDocumentParserCSV()
    {
        Test.setMock(HttpCalloutMock.class, new DocumentParserCSVTest.DocumentCreationMock());
        DocumentParserCSV dc = new DocumentParserCSV();
        Test.startTest();
        dc.startProcessing();
        Test.stopTest();
    }
    
    private  testmethod static void testDocumentParserCSVError()
    {
        DocumentParserCSV dc = new DocumentParserCSV();
        Test.startTest();
        dc.startProcessing();
        Test.stopTest();
    }
    
    public class DocumentCreationMock implements HttpCalloutMock {
        public HTTPResponse respond(HTTPRequest req) {
            HttpResponse res = new HttpResponse();
            String testBody = '{"presignedUrl":"https://pdf-temp-files.s3-us-west-2.amazonaws.com/0c72bf56341142ba83c8f98b47f14d62/test.pdf?X-Amz-Expires=900&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIZJDPLX6D7EHVCKA/20200302/us-west-2/s3/aws4_request&X-Amz-Date=20200302T143951Z&X-Amz-SignedHeaders=host&X-Amz-Signature=8650913644b6425ba8d52b78634698e5fc8970157d971a96f0279a64f4ba87fc","url":"https://pdf-temp-files.s3-us-west-2.amazonaws.com/0c72bf56341142ba83c8f98b47f14d62/test.pdf?X-Amz-Expires=3600&x-amz-security-token=FwoGZXIvYXdzEGgaDA9KaTOXRjkCdCqSTCKBAW9tReCLk1fVTZBH9exl9VIbP8Gfp1pE9hg6et94IBpNamOaBJ6%2B9Vsa5zxfiddlgA%2BxQ4tpd9gprFAxMzjN7UtjU%2B2gf%2FKbUKc2lfV18D2wXKd1FEhC6kkGJVL5UaoFONG%2Fw2jXfLxe3nCfquMEDo12XzcqIQtNFWXjKPWBkQEvmii4tfTyBTIot4Na%2BAUqkLshH0R7HVKlEBV8btqa0ctBjwzwpWkoU%2BF%2BCtnm8Lm4Eg%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIA4NRRSZPHEGHTOA4W/20200302/us-west-2/s3/aws4_request&X-Amz-Date=20200302T143951Z&X-Amz-SignedHeaders=host;x-amz-security-token&X-Amz-Signature=243419ac4a9a315eebc2db72df0817de6a261a684482bbc897f0e7bb5d202bb9","error":false,"status":200,"name":"test.pdf","remainingCredits":98145}';
            res.setHeader('Content-Type', 'application/json');
            res.setBody(testBody);
            res.setStatusCode(200);
            return res;
        }
    }
}

Video Guide

Related Tutorials

See Related Tutorials