How to Convert Scanned PDF to Searchable PDF in Salesforce using PDF.co

Sep 2, 2024·13 Minutes Read

In this article, we’ll observe how to convert unsearchable PDFs to searchable PDFs in Salesforce. The code will be written in Salesforce Apex. And, PDF.co will be used to perform conversations.

As an input, files stored in Salesforce will be utilized. After the conversation, the output file will be stored back in the Salesforce file system.

Following are the step-by-step instructions for the whole process.

IN THIS TUTORIAL

Create Remote Site Settings

Create Apex Class in Salesforce

Add the Key

Upload PDF to Salesforce

Verify the Code

Search Files from the “App Launcher”

Source Code

Step 1: Create Remote Site Settings

Create Two remote site settings in the Salesforce Org like below.

The URLs used are as follows:

https://api.pdf.co
https://pdf-temp-files.s3.us-west-2.amazonaws.com

Step 2: Create Apex Class in Salesforce

Create an apex class in Salesforce like below and paste the code there. Once you login to Salesforce org, you will see the screen below and click on “Developer Console”.

Create an apex class. For this, click on “Files” then “New” then “Apex Class”.

Next, Write the class name “MakeSearchablePDFWithFileUpload” and click “Ok”. Now copy the full source code there. All source code is given in the later section of this article.

Step 3: Add the Key

In the MakeSearchablePDFWithFileUpload file, please add your API key by replacing the ‘***********************’;

Now, please add the Destination file name which will be stored as a PDF.

Step 4: Upload PDF to Salesforce

We must upload a PDF file to the Salesforce File System. For that, go to App Launcher, Click on Files. After that, you will see the button “Upload File”. From there, you can upload the file.

Step 5: Verify the Code

To verify the code, please open the execute Anonymous window and call the method below.

Then Click on “Execute”.

Step 6: Search Files from the “App Launcher”

Now, Search Files from the “App Launcher” and you will see your file there.

You will see your PDF file there.

Step 7: Source Code

Following are the source codes used in this article.

MakeSearchablePDFWithFileUpload.cls

public class MakeSearchablePDFWithFileUpload {
    public static String API_KEY = '**************************';
    public static string DestinationFile = 'Searchable PDF(Using File Upload)';    
    String[] fileName = new String[] {'sampleScannedPDF'};
    List urlList = new List();
    
    public void startProcessing()
    {
        try
        {
            for(String fname : fileName)
            {
                ContentVersion cv = [select Title, VersionData from ContentVersion where Title = :fname limit 1];
                Blob SourceFile  = cv.VersionData;
                //1. Prepare URL for "Get Presigned URL" API call
                string url = 'https://api.pdf.co/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name=:fname'; 
                HttpRequest req = new HttpRequest();
                req.setHeader('x-api-key', API_KEY);
                req.setEndpoint(url);
                req.setMethod('GET');
                req.setTimeout(60000);
                Http http = new Http();
                HTTPResponse res = http.send(req);
                if(res.getStatusCode() == 200) 
                {
                    System.Debug('res ' + res);
                    Map<String, Object> deserializedBody =  (Map<String, Object>)JSON.deserializeUntyped(res.getBody());
                    Boolean isError = Boolean.ValueOf(deserializedBody.get('error'));
                    if(isError == false)
                    {
                        // Get URL to use for the file upload
                        String uploadUrl = String.ValueOf(deserializedBody.get('presignedUrl'));
                        // Get URL of uploaded file to use with later API calls
                        String uploadedFileUrl = String.ValueOf(deserializedBody.get('url'));
                        SYstem.debug('uploadedFileUrl :: '+uploadedFileUrl);
                        // 2. UPLOAD THE FILE TO CLOUD.
                        if(uploadFile(API_KEY, uploadUrl, SourceFile))
                        {                            
                            urlList.add(uploadedFileUrl);  
                            System.debug('urlList ' + urlList);                           
                        }
                    }
                }
                else
                {
                    System.debug('Error Response ' + res.getBody());
                    System.Debug(' Status ' + res.getStatus());
                    System.Debug(' Status Code' + res.getStatusCode());
                    System.Debug(' Response String' + res.toString());
                } 
            }
            if (urlList.size() > 0)
            {                
                makePdfTextSearchable(urlList);
            }
        }
        catch(Exception ex)
        {
            String errorBody = 'Message: ' + ex.getMessage() + ' -- Cause: ' + ex.getCause() + ' -- Stacktrace: ' + ex.getStackTraceString();
            System.Debug(errorBody);
        }
    }   
    
    @TestVisible
    public static boolean uploadFile(String API_KEY, String url, Blob sourceFile)
    {
        HttpRequest req = new HttpRequest();
        req.setHeader('x-api-key', API_KEY);
        req.setHeader('Content-Type', 'application/octet-stream');
        req.setEndpoint(url);
        req.setMethod('PUT');
        req.setTimeout(60000);
        req.setBodyAsBlob(sourceFile);
        Http http = new Http();
        HTTPResponse res = http.send(req);
        if(res.getStatusCode() == 200) 
        {
            System.Debug(res.getBody());
            return true;
        }
        else
        {
            System.debug('Error Response ' + res.getBody());
            System.Debug(' Status ' + res.getStatus());
            System.Debug(' Status Code' + res.getStatusCode());
            System.Debug(' Response String' + res.toString());
            return false;
        }
    }
    
    public static void makePdfTextSearchable(List uploadedFileUrl)
    {
        System.debug('uploadedFileUrlList[0] :: '+uploadedFileUrl[0]);
        string SourceFileUrl = uploadedFileUrl[0]; 
        Map<string, Object> parameters = new Map<string, Object>();
        parameters.put('async', 'true');
        parameters.put('password', '');
        parameters.put('name', 'result.pdf');
        parameters.put('pages', '');        
        parameters.put('lang', 'eng');
        parameters.put('url', SourceFileUrl);
        string jsonPayload = Json.serialize(parameters); 
        try
        {    
            string url = 'https://api.pdf.co/v1/pdf/makesearchable';
            HttpRequest req = new HttpRequest();
            req.setBody(jsonPayload);
            req.setHeader('x-api-key', API_KEY);
            req.setHeader('Content-Type', 'application/json');
            req.setEndpoint(url);
            req.setMethod('POST');
            req.setTimeout(60000);
            Http http = new Http();
            HTTPResponse res = http.send(req);
            if(res.getStatusCode() == 200) 
            {
                System.Debug('res.getBody()' + res.getBody());
                Map<String, Object> deserializedBody =  (Map<String, Object>)JSON.deserializeUntyped(res.getBody());
                String urlVal = (String)deserializedBody.get('url');
                String jobId = (String)deserializedBody.get('jobId');
                SYstem.debug('urlVal >>> '+urlVal);
                do
                {
                    String statusVal = checkJobStatus(jobId);
                    if(statusVal == 'success')
                    {
                        downloadFile(urlVal, DestinationFile);   
                        break;
                    } 
                    else if(statusVal == 'working')
                    {
                        sleep(3000);
                    }
                    else
                    {
                        break;
                    }                        
                }while(true);
            }
            else
            {
                System.debug('Success Response ' + res.getBody());
                System.Debug(' Status ' + res.getStatus());
                System.Debug(' Status Code' + res.getStatusCode());
                System.Debug(' Status String' + res.toString());
            }
        }
        catch(Exception ex)
        {
            String errorBody = 'Message: ' + ex.getMessage() + ' -- Cause: ' + ex.getCause() + ' -- Stacktrace: ' + ex.getStackTraceString();
            System.Debug(errorBody);
        }        
    }    
    
    
    public static String checkJobStatus(String jobId)
    {
        String statusVal;
        try
        {            
            string url = 'https://api.pdf.co/v1/job/check?jobid=' + jobId;
            HttpRequest req = new HttpRequest();
            req.setHeader('x-api-key', API_KEY);
            req.setHeader('Content-Type', 'application/json');
            req.setEndpoint(url);
            req.setMethod('POST');
            req.setTimeout(60000);
            Http http = new Http();
            HTTPResponse res = http.send(req);
            if(res.getStatusCode() == 200) 
            {
                System.Debug('res ' + res.getBody());
                Map<String, Object> deserializedBody =  (Map<String, Object>)JSON.deserializeUntyped(res.getBody());
                statusVal = (String)deserializedBody.get('status');                
            }
            else
            {
                System.debug('Success Response ' + res.getBody());
                System.Debug(' Status ' + res.getStatus());
                System.Debug(' Status Code' + res.getStatusCode());
                System.Debug(' Status String' + res.toString());
            }            
        }
        catch(Exception ex)
        {
            String errorBody = 'Message: ' + ex.getMessage() + ' -- Cause: ' + ex.getCause() + ' -- Stacktrace: ' + ex.getStackTraceString();
            System.Debug(errorBody);
        }
        return statusVal;
    }
    
    @TestVisible
    private static void downloadFile(String extFileUrl, String DestinationFile)
    {
        Http h = new Http(); 
        HttpRequest req = new HttpRequest(); 
        extFileUrl = extFileUrl.replace(' ', '%20'); 
        req.setEndpoint(extFileUrl); 
        req.setMethod('GET'); 
        req.setHeader('Content-Type', 'application/pdf');
        req.setCompressed(true); 
        req.setTimeout(60000); 
        //Now Send HTTP Request
        HttpResponse res  = h.send(req); 
        if(res.getStatusCode() == 200) 
        {
            blob fileContent = res.getBodyAsBlob();
            ContentVersion conVer = new ContentVersion();
            conVer.ContentLocation = 'S'; // to use S specify this document is in Salesforce, to use E for external files
            conVer.PathOnClient = DestinationFile + '.pdf'; // The files name, extension is very important here which will help the file in preview.
            conVer.Title = DestinationFile; // Display name of the files
            conVer.VersionData = fileContent;
            insert conVer;
            System.Debug('Success');
        }
        else
        {
            System.debug('Error Response ' + res.getBody());
            System.Debug(' Status ' + res.getStatus());
            System.Debug(' Status Code' + res.getStatusCode());
            System.Debug(' Response String' + res.toString());
        }
    }
    
    public static void sleep(integer milliseconds) 
    {
        Long timeDiff = 0;
        DateTime firstTime = System.now();
        do {
            timeDiff = System.now().getTime() - firstTime.getTime();
        }
        while(timeDiff <= milliseconds);      
    }
}

MakeSearchablePDFWithFileUploadTest.cls

@isTest
private class MakeSearchablePDFWithFileUploadTest {

    @isTest static void testStartProcessing()
    {
        ContentVersion con = new ContentVersion();
        con.Title = 'sampleScannedPDF';
        con.VersionData=Blob.valueOf('Test Document');
        con.PathOnClient = 'sampleScannedPDF.pdf';
        insert con;
        
        Test.startTest();
        Test.setMock(HttpCalloutMock.class, new MakeSearchablePDFCallOutMock());
        MakeSearchablePDFWithFileUpload searchablePDF = new MakeSearchablePDFWithFileUpload();
        MakeSearchablePDFWithFileUpload.API_KEY = 'testapikey';
        searchablePDF.startProcessing();
        Test.stopTest();
        List cv = [select Id,Title from ContentVersion];
        System.assertEquals('sampleScannedPDF', cv[0].Title);
    }
    
    @isTest static void testStartProcessingForCatch()
    {
        ContentVersion con = new ContentVersion();
        con.Title = 'sampleScannedPDF';
        con.VersionData=Blob.valueOf('Test Document');
        con.PathOnClient = 'sampleScannedPDF.pdf';
        insert con;
        
        Test.startTest();
        MakeSearchablePDFWithFileUpload searchablePDF = new MakeSearchablePDFWithFileUpload();
        MakeSearchablePDFWithFileUpload.API_KEY = 'testapikey';
        searchablePDF.startProcessing();
        Test.stopTest();
        List cv = [select Id,Title from ContentVersion];
        System.assertEquals('sampleScannedPDF', cv[0].Title);
    }
    
    @isTest static void testmakePdfTextSearchableJobSuccess()
    {
        List urlList = new List();
        urlList.add('test');
        Test.startTest();
        Test.setMock(HttpCalloutMock.class, new MakeSearchablePDFCallOutMockForCheckStatusSuccess());
        MakeSearchablePDFWithFileUpload.makePdfTextSearchable(urlList);
        Test.stopTest();
        
    }
    
    @isTest static void testmakePdfTextSearchableJobSuccessForCatch()
    {
        List urlList = new List();
        urlList.add('test');
        Test.startTest();
        MakeSearchablePDFWithFileUpload.makePdfTextSearchable(urlList);
        Test.stopTest();
    }
    
    @isTest static void testSleep()
    {
        Test.startTest();
        MakeSearchablePDFWithFileUpload.sleep(3000);
        Test.stopTest();
    }
    
    public class MakeSearchablePDFCallOutMock implements HttpCalloutMock {
        
        public HTTPResponse respond(HTTPRequest request) {
            // Create a fake response
            HttpResponse response = new HttpResponse();
            response.setHeader('Content-Type', 'application/json');
            response.setBody('{"presignedUrl":"https://pdf-temp-files.s3-us-west-2.amazonaws.com/0c72bf56341142ba83c8f98b47f14d62/test.pdf?X-Amz-Expires=900&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIZJDPLX6D7EHVCKA/20200302/us-west-2/s3/aws4_request&X-Amz-Date=20200302T143951Z&X-Amz-SignedHeaders=host&X-Amz-Signature=8650913644b6425ba8d52b78634698e5fc8970157d971a96f0279a64f4ba87fc","url": "https://pdf-temp-files.s3.amazonaws.com/a0d52f35504e47148d1771fce875db7b/test.pdf", "pageCount": 1, "error": false, "Status": 200, "name": "test.pdf", "remainingCredits": 99033681, "credits": 35 } ');
            response.setStatusCode(200);
            return response; 
        }
    }
    
    public class MakeSearchablePDFCallOutMockForCheckStatusSuccess implements HttpCalloutMock {
        
        public HTTPResponse respond(HTTPRequest request) {
            // Create a fake response
            HttpResponse response = new HttpResponse();
            response.setHeader('Content-Type', 'application/json');
            response.setBody('{ "status": "success", "remainingCredits": 60227,"url": "https://pdf-temp-files.s3.amazonaws.com/a0d52f35504e47148d1771fce875db7b/result.pdf" } ');
            response.setStatusCode(200);
            return response; 
        }
    }
}