Convert PDF Invoice to Google Sheet using PDF.co and Google Apps Script

Imagine a situation where you need to write down a PDF invoice in Google Sheets for further processing. This is indeed a very common practice where people tend to maintain all invoice data in cloud-based storage, especially Google Sheets.

Isn’t it a great idea to customize your Google Sheet to have this conversion right into it? Yes, it is a nice feature to have and this article is all about that. We’ll be using Google Apps Script and PDF.co API to build this solution.

Before diving into the solution, let’s take time and look into the input invoice PDF and final output.

Input Invoice PDF

The following image shows the input invoice PDF. The invoice is in a standard format. Furthermore, it contains the most commonly used components such as product/quantity/price-wise information, invoice no, total amount, due date, etc.

Sample Invoice PDF
Sample Invoice PDF

Use this link to get a hold of the demo invoice PDF used here.

Output Spreadsheet

The final flow will be like this:

  • We have a new menu named “PDF.co”. Inside that sub-menu named “Get Invoice Information”.
  • Now, upon clicking on the “Get Invoice Information” sub-menu, it’ll open a prompt asking for a PDF Invoice URL.
PDF Invoice URL
PDF Invoice URL
  • Finally, after entering the invoice URL it’ll fill Google Sheet with invoice data as shown in the below output image.
Output PDF
Output PDF

Where to Write Code

Now, there are many ways we can customize Google Sheets. However, in our case, we are writing our own source code. Hence, “Script editor” is the place where all action happens.

Script Editor

Go to the “Tools -> Script editor” menu. It’ll open a new Script editor project like below. We have to write our source code in this online code file.

Apps Script Editor

You will find this online Code Editor very much useful and interesting. All source code is written in plain JavaScript and we have built-in support for google libraries.

Source Code

Please review the following source code, and we’ll analyze important pieces of it in the next section.

/**
 * Initial Declaration and References
 */

// Get UI
const ui = SpreadsheetApp.getUi();

// Get the active spreadsheet and the active sheet
const ss = SpreadsheetApp.getActiveSpreadsheet();
const ssid = ss.getId();

/**
 * Add PDF.co Menus in Google Spreadsheet
 */
function onOpen() {
  var menuItems = [
    {name: 'Get Invoice Information', functionName: 'getInvoiceInformation'} 
  ];
  ss.addMenu('PDF.co', menuItems);
}


/**
 * Function which gets Invoice Information using PDF.co
 */
function getInvoiceInformation() {
  
  let invoiceUrlPromptResp = ui.prompt("Please Provide Invoice URL:");
  let invoiceUrl = invoiceUrlPromptResp.getResponseText();
  
  if(invoiceUrlPromptResp.getSelectedButton() == ui.Button.OK && invoiceUrl && invoiceUrl.trim() !== ""){
      // Prepare Payload
      var data = {
        "url": invoiceUrl, //"https://bytescout-com.s3-us-west-2.amazonaws.com/files/demo-files/cloud-api/document-parser/sample-invoice.pdf",
        "outputFormat": "JSON",
        "templateId": "1",
        "async": false,
        "encrypt": "false",
        "inline": "true",
        "password": "",
        "profiles": "",
        "storeResult": false
      };

      // Prepare Request Options
      var options = {
        'method' : 'post',
        'contentType': 'application/json',
        'headers': {
          "x-api-key": "--enter-your-pdf-co-api-key-here--"
        },
        // Convert the JavaScript object to a JSON string.
        'payload' : JSON.stringify(data)
      };
      
      // Get Response
      // https://developers.google.com/apps-script/reference/url-fetch
      var pdfCoResponse = UrlFetchApp.fetch('https://api.pdf.co/v1/pdf/documentparser', options);

      var pdfCoRespText = pdfCoResponse.getContentText();
      var pdfCoRespJson = JSON.parse(pdfCoRespText);

      // Display Result
      if(!pdfCoRespJson.error){
        // Upload file to Google Drive
        showInvoiceResult(pdfCoRespJson.body);    
      }
      else{
        resultUrlCell.setValue(pdfCoRespJson.message);    
      }
  }
  else{
    ui.alert("Please Provide Invoice URL");
  }
}

/**
 * Render Invoice Data to Spreadsheet
 */
function showInvoiceResult(invResultBody){
  var cmpName = getObjectValue(invResultBody, "companyName");
  var invName = getObjectValue(invResultBody, "companyName2");
  var invoiceId = getObjectValue(invResultBody, "invoiceId");
  var issuedDate = getObjectValue(invResultBody, "dateIssued");
  var dueDate = getObjectValue(invResultBody, "dateDue");
  var bankAccount = getObjectValue(invResultBody, "bankAccount");
  var total = getObjectValue(invResultBody, "total");
  var subTotal = getObjectValue(invResultBody, "subTotal");
  var tax = getObjectValue(invResultBody, "tax");

  var tableData = getTableData(invResultBody, "table");

  var cellIndex = 1;

  if(cmpName && cmpName !== ""){
    ss.getRange(`A${cellIndex}`).setFontWeight("bold").setValue("Company Name");
    ss.getRange(`B${cellIndex}`).setValue(cmpName);
    cellIndex++;
  }

  if(invName && invName !== ""){
    ss.getRange(`A${cellIndex}`).setFontWeight("bold").setValue("Invoice Name");
    ss.getRange(`B${cellIndex}`).setValue(invName);
    cellIndex++;
  }

  if(invoiceId && invoiceId !== ""){
    ss.getRange(`A${cellIndex}`).setFontWeight("bold").setValue("Invoice #");
    ss.getRange(`B${cellIndex}`).setValue(invoiceId);
    cellIndex++;
  }

  if(issuedDate && issuedDate !== ""){
    ss.getRange(`A${cellIndex}`).setFontWeight("bold").setValue("Issued Date");
    ss.getRange(`B${cellIndex}`).setValue(issuedDate);
    cellIndex++;
  }

  if(dueDate && dueDate !== ""){
    ss.getRange(`A${cellIndex}`).setFontWeight("bold").setValue("Due Date");
    ss.getRange(`B${cellIndex}`).setValue(dueDate);
    cellIndex++;
  }

  if(bankAccount && bankAccount !== ""){
    ss.getRange(`A${cellIndex}`).setFontWeight("bold").setValue("Bank Account");
    ss.getRange(`B${cellIndex}`).setValue(bankAccount);
    cellIndex++;
  }

  if(total && total !== ""){
    ss.getRange(`A${cellIndex}`).setFontWeight("bold").setValue("Total");
    ss.getRange(`B${cellIndex}`).setValue(total);
    cellIndex++;
  }

  if(subTotal && subTotal !== ""){
    ss.getRange(`A${cellIndex}`).setFontWeight("bold").setValue("Sub Total");
    ss.getRange(`B${cellIndex}`).setValue(subTotal);
    cellIndex++;
  }

  if(tax && tax !== ""){
    ss.getRange(`A${cellIndex}`).setFontWeight("bold").setValue("Tax");
    ss.getRange(`B${cellIndex}`).setValue(tax);
    cellIndex++;
  }

  // Render Table
  if(tableData && tableData.length > 0){
    cellIndex++;

    ss.getRange(`A${cellIndex}`).setFontWeight("bold").setValue("Product Name");
    ss.getRange(`B${cellIndex}`).setFontWeight("bold").setValue("Item Price");
    ss.getRange(`C${cellIndex}`).setFontWeight("bold").setValue("Qty");
    ss.getRange(`D${cellIndex}`).setFontWeight("bold").setValue("Total Price");
    cellIndex++;

    for(var iTableData = 0; iTableData < tableData.length; iTableData++){ ss.getRange(`A${cellIndex}`).setValue(tableData[iTableData].prodName); ss.getRange(`B${cellIndex}`).setValue(tableData[iTableData].itmPrice); ss.getRange(`C${cellIndex}`).setValue(tableData[iTableData].qty); ss.getRange(`D${cellIndex}`).setValue(tableData[iTableData].totalPrice); cellIndex++; } } } /** * Get Json Object Value */ function getObjectValue(jsonBody, fieldName){ var oRet = ""; if(jsonBody && jsonBody.objects && jsonBody.objects.length > 0){
    var findObjField = jsonBody.objects.filter(x => x.name === fieldName && x.objectType === "field");
    if(findObjField && findObjField.length > 0){
      oRet = findObjField[0].value;
    }
  }

  return oRet;
}

/**
 * Get Table formatted data from input Json
 */
function getTableData(jsonBody, fieldName){
  var oRet = [];

  if(jsonBody && jsonBody.objects && jsonBody.objects.length > 0){
    var findObjTable = jsonBody.objects.filter(x => x.name === fieldName && x.objectType === "table");
    if(findObjTable && findObjTable.length > 0 && findObjTable[0].rows && findObjTable[0].rows.length > 0){
      var tableRows = findObjTable[0].rows;

      for(var iRow = 0; iRow < tableRows.length; iRow++){
        var qty = tableRows[iRow].column1.value;
        var prodName = tableRows[iRow].column2.value;
        var itmPrice = tableRows[iRow].column3.value;
        var totalPrice = tableRows[iRow].column4.value;

        oRet.push({ qty: qty, prodName: prodName, itmPrice: itmPrice, totalPrice: totalPrice });
      }
    }
  }

  return oRet;
}

Analyze Source Code

We can logically divide source code into the following sections.

  • Initial Declaration and References
  • Create PDF.co Menu
  • Integrate with PDF.co Endpoint
  • Render Google Sheet with Invoice dat

Initial Declaration and References

// Get UI
const ui = SpreadsheetApp.getUi();

// Get the active spreadsheet and the active sheet
const ss = SpreadsheetApp.getActiveSpreadsheet();
const ssid = ss.getId();
...

At the start of the program, we are declaring variables and assign them with spreadsheet references. These variables are widely used throughout the program. Therefore, it’s better to declare it once and use it more!

Create PDF.co Menu

Event “onOpen” contains code for generating a custom PDF.co menu. As the name suggests, This function is called each time a spreadsheet is opened.

function onOpen() {
  var menuItems = [
    {name: 'Get Invoice Information', functionName: 'getInvoiceInformation'} 
  ];
  ss.addMenu('PDF.co', menuItems);

Integrate with PDF.co Endpoint

This is the engine of the whole program! We’re consuming PDF.co Endpoint ‘/v1/pdf/documentparser’. Now, this Endpoint returns invoice data in JSON format. All heavy lifting is done at PDF.co side, and all we have to do is configure Endpoint with appropriate data.

All PDF.co Endpoint requires an API key in header. This is crucial for authenticating requests on the PDF.co side. You can easily get the PDF.co API key by signing up at this link.

      // Prepare Payload
      var data = {
        "url": invoiceUrl, //"https://bytescout-com.s3-us-west-2.amazonaws.com/files/demo-files/cloud-api/document-parser/sample-invoice.pdf",
        "outputFormat": "JSON",
        "templateId": "1",
        "async": false,
        "encrypt": "false",
        "inline": "true",
        "password": "",
        "profiles": "",
        "storeResult": false
      };

      // Prepare Request Options
      var options = {
        'method' : 'post',
        'contentType': 'application/json',
        'headers': {
          "x-api-key": "--enter-your-pdf-co-api-key-here--"
        },
        // Convert the JavaScript object to a JSON string.
        'payload' : JSON.stringify(data)
      };
      
      // Get Response
      // https://developers.google.com/apps-script/reference/url-fetch
      var pdfCoResponse = UrlFetchApp.fetch('https://api.pdf.co/v1/pdf/documentparser', options);

      var pdfCoRespText = pdfCoResponse.getContentText();
      var pdfCoRespJson = JSON.parse(pdfCoRespText);

Render Google Sheet with Invoice data

At this stage, we have all invoice data in JSON format. Now, all we need to do is fill Google Sheet cells with this information. It’ll give a nice user interface as shown in the demo GIF.

Function “showInvoiceResult” contains all logic to render a spreadsheet. Please review this method for more details.

This article is written by taking into account that users should be able to implement this right away. Please try to implement this at your side for better exposure. Thank you!

Video Guide