In this short article, we’ll demonstrate source code for how to extract invoice information from PDF using SharePoint and PDF.co. Let’s get into source code.

  1. Extract Invoices – Source Code Snippets
  2. Source Code at GitHub
  3. Extract Invoices – Demo Video
  4. Extract Invoices – Sample Screenshots

Extract Invoices – Source Code Snippets

Take a look at the markup for Web Part visual control.

VisualWebPart1UserControl.ascx

<%@ Assembly Name="$SharePoint.Project.AssemblyFullName$" %>
<%@ Assembly Name="Microsoft.Web.CommandUI, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %> 
<%@ Register Tagprefix="SharePoint" Namespace="Microsoft.SharePoint.WebControls" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %> 
<%@ Register Tagprefix="Utilities" Namespace="Microsoft.SharePoint.Utilities" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %>
<%@ Register Tagprefix="asp" Namespace="System.Web.UI" Assembly="System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" %>
<%@ Import Namespace="Microsoft.SharePoint" %> 
<%@ Register Tagprefix="WebPartPages" Namespace="Microsoft.SharePoint.WebPartPages" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %>
<%@ Control Language="C#" AutoEventWireup="true" CodeBehind="VisualWebPart1UserControl.ascx.cs" Inherits="ParseSimpleDocumentWebPart.VisualWebPart1.VisualWebPart1UserControl" %>
Chose source file<br />
<asp:FileUpload ID="FileUpload1" runat="server" Width="600px" />
<br />
<br />
Template<br />
<asp:TextBox ID="TemplateTextBox" runat="server" Height="80px" TextMode="MultiLine" Width="600px"></asp:TextBox>
<br />
<br />
<asp:Button ID="StartButton" runat="server" OnClick="StartButton_Click" Text="Convert to CSV" style="width: 610px; padding-left: 0px; margin-left: 0px; padding-right: 0px; padding-right: 0px;"/>
<br />
<br />
Log<br />
<asp:TextBox ID="LogTextBox" runat="server" Height="80px" TextMode="MultiLine" Width="600px"></asp:TextBox>
<br />
<br />
Result<br />
<asp:TextBox ID="ResultTextBox" runat="server" Height="500px" TextMode="MultiLine" Width="600px"></asp:TextBox>

This is code behind for Web Part User Control.

VisualWebPart1UserControl.ascx.cs

using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.Net;
using System.Threading;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;

namespace ParseSimpleDocumentWebPart.VisualWebPart1
{
    public partial class VisualWebPart1UserControl : UserControl
    {
        // The authentication key (API Key).
        // Get your own by registering at https://app.pdf.co/documentation/api
        string API_KEY = Utils.API_KEY;
        // PDF document password. Leave empty for unprotected documents.
        const string Password = "";

        // (!) Make asynchronous job
        const bool Async = true;

        protected void Page_Load(object sender, EventArgs e)
        {
        }

        protected void StartButton_Click(object sender, EventArgs e)
        {
            // Template text. Use Document Parser SDK (https://bytescout.com/products/developer/documentparsersdk/index.html)
            // to create templates.

            // Create standard .NET web client instance
            ServicePointManager.Expect100Continue = true;
            ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
            WebClient webClient = new WebClient();

            if (!FileUpload1.HasFile && String.IsNullOrWhiteSpace(TemplateTextBox.Text))
            {
                LogTextBox.Text += "Select file and template \n";
                return;
            }

            // Set API Key
            webClient.Headers.Add("x-api-key", API_KEY);

            // 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE.
            // * If you already have a direct file URL, skip to the step 3.

            // Prepare URL for `Get Presigned URL` API call
            string query = Uri.EscapeUriString(string.Format(
                "https://api.pdf.co/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name={0}",
                FileUpload1.FileName));

            try
            {
                // Execute request
                string response = webClient.DownloadString(query);

                // Parse JSON response
                JObject json = JObject.Parse(response);

                if (json["error"].ToObject() == false)
                {
                    // Get URL to use for the file upload
                    string uploadUrl = json["presignedUrl"].ToString();
                    string uploadedFileUrl = json["url"].ToString();

                    // 2. UPLOAD THE FILE TO CLOUD.

                    webClient.Headers.Add("content-type", "application/octet-stream");
                    webClient.UploadData(uploadUrl, "PUT", FileUpload1.FileBytes);
                    webClient.Headers.Remove("content-type");

                    // 3. PARSE UPLOADED PDF DOCUMENT

                    // URL of `Document Parser` API call
                    string url = "https://api.pdf.co/v1/pdf/documentparser";

                    Dictionary<string, object> requestBody = new Dictionary<string, object>();
                    requestBody.Add("template", TemplateTextBox.Text);
                    requestBody.Add("name", FileUpload1.FileName);
                    requestBody.Add("url", uploadedFileUrl);
                    requestBody.Add("async", Async);

                    // Convert dictionary of params to JSON
                    string jsonPayload = JsonConvert.SerializeObject(requestBody);

                    // Execute request
                    response = webClient.UploadString(url, "POST", jsonPayload);

                    // Parse JSON response
                    json = JObject.Parse(response);

                    if (json["error"].ToObject() == false)
                    {
                        // Asynchronous job ID
                        string jobId = json["jobId"].ToString();
                        // Get URL of generated JSON file
                        string resultFileUrl = json["url"].ToString();

                        // Check the job status in a loop. 
                        // If you don't want to pause the main thread you can rework the code 
                        // to use a separate thread for the status checking and completion.
                        do
                        {
                            string status = CheckJobStatus(jobId); // Possible statuses: "working", "failed", "aborted", "success".

                            // Display timestamp and status (for demo purposes)
                            LogTextBox.Text += DateTime.Now.ToLongTimeString() + ": " + status + "\n";

                            if (status == "success")
                            {
                                // Download JSON result
                                var result = webClient.DownloadString(resultFileUrl);

                                LogTextBox.Text += "Generated JSON.\n";
                                ResultTextBox.Text += result;
                                break;
                            }
                            else if (status == "working")
                            {
                                // Pause for a few seconds
                                Thread.Sleep(3000);
                            }
                            else
                            {
                                LogTextBox.Text += status + " \n";
                                break;
                            }
                        }
                        while (true);
                    }
                    else
                    {
                        LogTextBox.Text += json["message"].ToString() + " \n";
                    }
                }
                else
                {
                    LogTextBox.Text += json["message"].ToString() + " \n";
                }
            }
            catch (Exception ex)
            {
                LogTextBox.Text += ex.ToString() + " \n";
            }

            webClient.Dispose();

            LogTextBox.Text += "\n";
            LogTextBox.Text += "Done...\n";
        }

        protected string CheckJobStatus(string jobId)
        {
            using (WebClient webClient = new WebClient())
            {
                // Set API Key
                webClient.Headers.Add("x-api-key", API_KEY);

                string url = "https://api.pdf.co/v1/job/check?jobid=" + jobId;

                string response = webClient.DownloadString(url);
                JObject json = JObject.Parse(response);

                return Convert.ToString(json["status"]);
            }
        }
    }
}

Source Code at GitHub

You can explore the full source code for this sample at this GitHub link.

Extract Invoices – Demo Video

Extract Invoices – Sample Screenshots

 

I hope this code snippet is useful to you. Please try it yourself to get to know more. Thank You!

Related Pages:

Related Samples: