Extract Invoice Information with SharePoint and PDF.co
Sep 2, 2024·8 Minutes Read
In this short article, we’ll demonstrate source code for how to extract invoice information from PDF using SharePoint and PDF.co Web API. Let’s get into the source code.
Extract Invoices – Source Code Snippets
In order to get started with invoice extraction, we should analyze the code first. Take a look at the markup for Web Part visual control.
VisualWebPart1UserControl.ascx
<%@ Assembly Name="$SharePoint.Project.AssemblyFullName$" %>
<%@ Assembly Name="Microsoft.Web.CommandUI, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %>
<%@ Register Tagprefix="SharePoint" Namespace="Microsoft.SharePoint.WebControls" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %>
<%@ Register Tagprefix="Utilities" Namespace="Microsoft.SharePoint.Utilities" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %>
<%@ Register Tagprefix="asp" Namespace="System.Web.UI" Assembly="System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" %>
<%@ Import Namespace="Microsoft.SharePoint" %>
<%@ Register Tagprefix="WebPartPages" Namespace="Microsoft.SharePoint.WebPartPages" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %>
<%@ Control Language="C#" AutoEventWireup="true" CodeBehind="VisualWebPart1UserControl.ascx.cs" Inherits="ParseSimpleDocumentWebPart.VisualWebPart1.VisualWebPart1UserControl" %>
Chose source file<br />
<asp:FileUpload ID="FileUpload1" runat="server" Width="600px" />
<br />
<br />
Template<br />
<asp:TextBox ID="TemplateTextBox" runat="server" Height="80px" TextMode="MultiLine" Width="600px"></asp:TextBox>
<br />
<br />
<asp:Button ID="StartButton" runat="server" OnClick="StartButton_Click" Text="Convert to CSV" style="width: 610px; padding-left: 0px; margin-left: 0px; padding-right: 0px; padding-right: 0px;"/>
<br />
<br />
Log<br />
<asp:TextBox ID="LogTextBox" runat="server" Height="80px" TextMode="MultiLine" Width="600px"></asp:TextBox>
<br />
<br />
Result<br />
<asp:TextBox ID="ResultTextBox" runat="server" Height="500px" TextMode="MultiLine" Width="600px"></asp:TextBox>
This is code behind for Web Part User Control.
VisualWebPart1UserControl.ascx.cs
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.Net;
using System.Threading;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
namespace ParseSimpleDocumentWebPart.VisualWebPart1
{
public partial class VisualWebPart1UserControl : UserControl
{
// The authentication key (API Key).
// Get your own by registering at https://app.pdf.co/documentation/api
string API_KEY = Utils.API_KEY;
// PDF document password. Leave empty for unprotected documents.
const string Password = "";
// (!) Make asynchronous job
const bool Async = true;
protected void Page_Load(object sender, EventArgs e)
{
}
protected void StartButton_Click(object sender, EventArgs e)
{
// Template text. Use Document Parser SDK (https://bytescout.com/products/developer/documentparsersdk/index.html)
// to create templates.
// Create standard .NET web client instance
ServicePointManager.Expect100Continue = true;
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
WebClient webClient = new WebClient();
if (!FileUpload1.HasFile && String.IsNullOrWhiteSpace(TemplateTextBox.Text))
{
LogTextBox.Text += "Select file and template \n";
return;
}
// Set API Key
webClient.Headers.Add("x-api-key", API_KEY);
// 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE.
// * If you already have a direct file URL, skip to the step 3.
// Prepare URL for `Get Presigned URL` API call
string query = Uri.EscapeUriString(string.Format(
"https://api.pdf.co/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name={0}",
FileUpload1.FileName));
try
{
// Execute request
string response = webClient.DownloadString(query);
// Parse JSON response
JObject json = JObject.Parse(response);
if (json["error"].ToObject() == false)
{
// Get URL to use for the file upload
string uploadUrl = json["presignedUrl"].ToString();
string uploadedFileUrl = json["url"].ToString();
// 2. UPLOAD THE FILE TO CLOUD.
webClient.Headers.Add("content-type", "application/octet-stream");
webClient.UploadData(uploadUrl, "PUT", FileUpload1.FileBytes);
webClient.Headers.Remove("content-type");
// 3. PARSE UPLOADED PDF DOCUMENT
// URL of `Document Parser` API call
string url = "https://api.pdf.co/v1/pdf/documentparser";
Dictionary<string, object> requestBody = new Dictionary<string, object>();
requestBody.Add("template", TemplateTextBox.Text);
requestBody.Add("name", FileUpload1.FileName);
requestBody.Add("url", uploadedFileUrl);
requestBody.Add("async", Async);
// Convert dictionary of params to JSON
string jsonPayload = JsonConvert.SerializeObject(requestBody);
// Execute request
response = webClient.UploadString(url, "POST", jsonPayload);
// Parse JSON response
json = JObject.Parse(response);
if (json["error"].ToObject() == false)
{
// Asynchronous job ID
string jobId = json["jobId"].ToString();
// Get URL of generated JSON file
string resultFileUrl = json["url"].ToString();
// Check the job status in a loop.
// If you don't want to pause the main thread you can rework the code
// to use a separate thread for the status checking and completion.
do
{
string status = CheckJobStatus(jobId); // Possible statuses: "working", "failed", "aborted", "success".
// Display timestamp and status (for demo purposes)
LogTextBox.Text += DateTime.Now.ToLongTimeString() + ": " + status + "\n";
if (status == "success")
{
// Download JSON result
var result = webClient.DownloadString(resultFileUrl);
LogTextBox.Text += "Generated JSON.\n";
ResultTextBox.Text += result;
break;
}
else if (status == "working")
{
// Pause for a few seconds
Thread.Sleep(3000);
}
else
{
LogTextBox.Text += status + " \n";
break;
}
}
while (true);
}
else
{
LogTextBox.Text += json["message"].ToString() + " \n";
}
}
else
{
LogTextBox.Text += json["message"].ToString() + " \n";
}
}
catch (Exception ex)
{
LogTextBox.Text += ex.ToString() + " \n";
}
webClient.Dispose();
LogTextBox.Text += "\n";
LogTextBox.Text += "Done...\n";
}
protected string CheckJobStatus(string jobId)
{
using (WebClient webClient = new WebClient())
{
// Set API Key
webClient.Headers.Add("x-api-key", API_KEY);
string url = "https://api.pdf.co/v1/job/check?jobid=" + jobId;
string response = webClient.DownloadString(url);
JObject json = JObject.Parse(response);
return Convert.ToString(json["status"]);
}
}
}
}
Source Code at GitHub
You can explore the full source code for this sample at this GitHub link.
Extract Invoices – Sample Screenshots
I hope this tutorial and code snippet is useful to you. Please try it yourself to get to know more.
Video Guide
Related Tutorials
Quick Start with Document Parser Template Editor: How To Create a Template
Sep 2, 2024·5 Minutes Read