Extract Invoice Information with SharePoint and PDF.co

Sep 2, 2024·8 Minutes Read

In this short article, we’ll demonstrate source code for how to extract invoice information from PDF using SharePoint and PDF.co Web API. Let’s get into the source code.

Extract Invoices – Source Code Snippets

In order to get started with invoice extraction, we should analyze the code first. Take a look at the markup for Web Part visual control.

VisualWebPart1UserControl.ascx

<%@ Assembly Name="$SharePoint.Project.AssemblyFullName$" %>
<%@ Assembly Name="Microsoft.Web.CommandUI, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %> 
<%@ Register Tagprefix="SharePoint" Namespace="Microsoft.SharePoint.WebControls" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %> 
<%@ Register Tagprefix="Utilities" Namespace="Microsoft.SharePoint.Utilities" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %>
<%@ Register Tagprefix="asp" Namespace="System.Web.UI" Assembly="System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" %>
<%@ Import Namespace="Microsoft.SharePoint" %> 
<%@ Register Tagprefix="WebPartPages" Namespace="Microsoft.SharePoint.WebPartPages" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %>
<%@ Control Language="C#" AutoEventWireup="true" CodeBehind="VisualWebPart1UserControl.ascx.cs" Inherits="ParseSimpleDocumentWebPart.VisualWebPart1.VisualWebPart1UserControl" %>
Chose source file<br />
<asp:FileUpload ID="FileUpload1" runat="server" Width="600px" />
<br />
<br />
Template<br />
<asp:TextBox ID="TemplateTextBox" runat="server" Height="80px" TextMode="MultiLine" Width="600px"></asp:TextBox>
<br />
<br />
<asp:Button ID="StartButton" runat="server" OnClick="StartButton_Click" Text="Convert to CSV" style="width: 610px; padding-left: 0px; margin-left: 0px; padding-right: 0px; padding-right: 0px;"/>
<br />
<br />
Log<br />
<asp:TextBox ID="LogTextBox" runat="server" Height="80px" TextMode="MultiLine" Width="600px"></asp:TextBox>
<br />
<br />
Result<br />
<asp:TextBox ID="ResultTextBox" runat="server" Height="500px" TextMode="MultiLine" Width="600px"></asp:TextBox>

This is code behind for Web Part User Control.

VisualWebPart1UserControl.ascx.cs

using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.Net;
using System.Threading;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;

namespace ParseSimpleDocumentWebPart.VisualWebPart1
{
    public partial class VisualWebPart1UserControl : UserControl
    {
        // The authentication key (API Key).
        // Get your own by registering at https://app.pdf.co/documentation/api
        string API_KEY = Utils.API_KEY;
        // PDF document password. Leave empty for unprotected documents.
        const string Password = "";

        // (!) Make asynchronous job
        const bool Async = true;

        protected void Page_Load(object sender, EventArgs e)
        {
        }

        protected void StartButton_Click(object sender, EventArgs e)
        {
            // Template text. Use Document Parser SDK (https://bytescout.com/products/developer/documentparsersdk/index.html)
            // to create templates.

            // Create standard .NET web client instance
            ServicePointManager.Expect100Continue = true;
            ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
            WebClient webClient = new WebClient();

            if (!FileUpload1.HasFile && String.IsNullOrWhiteSpace(TemplateTextBox.Text))
            {
                LogTextBox.Text += "Select file and template \n";
                return;
            }

            // Set API Key
            webClient.Headers.Add("x-api-key", API_KEY);

            // 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE.
            // * If you already have a direct file URL, skip to the step 3.

            // Prepare URL for `Get Presigned URL` API call
            string query = Uri.EscapeUriString(string.Format(
                "https://api.pdf.co/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name={0}",
                FileUpload1.FileName));

            try
            {
                // Execute request
                string response = webClient.DownloadString(query);

                // Parse JSON response
                JObject json = JObject.Parse(response);

                if (json["error"].ToObject() == false)
                {
                    // Get URL to use for the file upload
                    string uploadUrl = json["presignedUrl"].ToString();
                    string uploadedFileUrl = json["url"].ToString();

                    // 2. UPLOAD THE FILE TO CLOUD.

                    webClient.Headers.Add("content-type", "application/octet-stream");
                    webClient.UploadData(uploadUrl, "PUT", FileUpload1.FileBytes);
                    webClient.Headers.Remove("content-type");

                    // 3. PARSE UPLOADED PDF DOCUMENT

                    // URL of `Document Parser` API call
                    string url = "https://api.pdf.co/v1/pdf/documentparser";

                    Dictionary<string, object> requestBody = new Dictionary<string, object>();
                    requestBody.Add("template", TemplateTextBox.Text);
                    requestBody.Add("name", FileUpload1.FileName);
                    requestBody.Add("url", uploadedFileUrl);
                    requestBody.Add("async", Async);

                    // Convert dictionary of params to JSON
                    string jsonPayload = JsonConvert.SerializeObject(requestBody);

                    // Execute request
                    response = webClient.UploadString(url, "POST", jsonPayload);

                    // Parse JSON response
                    json = JObject.Parse(response);

                    if (json["error"].ToObject() == false)
                    {
                        // Asynchronous job ID
                        string jobId = json["jobId"].ToString();
                        // Get URL of generated JSON file
                        string resultFileUrl = json["url"].ToString();

                        // Check the job status in a loop. 
                        // If you don't want to pause the main thread you can rework the code 
                        // to use a separate thread for the status checking and completion.
                        do
                        {
                            string status = CheckJobStatus(jobId); // Possible statuses: "working", "failed", "aborted", "success".

                            // Display timestamp and status (for demo purposes)
                            LogTextBox.Text += DateTime.Now.ToLongTimeString() + ": " + status + "\n";

                            if (status == "success")
                            {
                                // Download JSON result
                                var result = webClient.DownloadString(resultFileUrl);

                                LogTextBox.Text += "Generated JSON.\n";
                                ResultTextBox.Text += result;
                                break;
                            }
                            else if (status == "working")
                            {
                                // Pause for a few seconds
                                Thread.Sleep(3000);
                            }
                            else
                            {
                                LogTextBox.Text += status + " \n";
                                break;
                            }
                        }
                        while (true);
                    }
                    else
                    {
                        LogTextBox.Text += json["message"].ToString() + " \n";
                    }
                }
                else
                {
                    LogTextBox.Text += json["message"].ToString() + " \n";
                }
            }
            catch (Exception ex)
            {
                LogTextBox.Text += ex.ToString() + " \n";
            }

            webClient.Dispose();

            LogTextBox.Text += "\n";
            LogTextBox.Text += "Done...\n";
        }

        protected string CheckJobStatus(string jobId)
        {
            using (WebClient webClient = new WebClient())
            {
                // Set API Key
                webClient.Headers.Add("x-api-key", API_KEY);

                string url = "https://api.pdf.co/v1/job/check?jobid=" + jobId;

                string response = webClient.DownloadString(url);
                JObject json = JObject.Parse(response);

                return Convert.ToString(json["status"]);
            }
        }
    }
}

Source Code at GitHub

You can explore the full source code for this sample at this GitHub link.

Extract Invoices – Sample Screenshots

Step 1
Step 1
Step 2
Step 2
Step 3
Step 3
Step 4
Step 4
Step 5
Step 5

I hope this tutorial and code snippet is useful to you. Please try it yourself to get to know more.

Video Guide

Related Tutorials

See Related Tutorials