Convert PDF to CSV with SharePoint and PDF.co

In this short article, we’ll demonstrate source code for how to convert PDF to CSV format using SharePoint and PDF.co. Let’s get into source code.

Convert PDF to CSV – Source Code Snippets

Check out the markup for Web Part visual control.

VisualWebPart1UserControl.ascx

<%@ Assembly Name="$SharePoint.Project.AssemblyFullName$" %>
<%@ Assembly Name="Microsoft.Web.CommandUI, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %> 
<%@ Register Tagprefix="SharePoint" Namespace="Microsoft.SharePoint.WebControls" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %> 
<%@ Register Tagprefix="Utilities" Namespace="Microsoft.SharePoint.Utilities" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %>
<%@ Register Tagprefix="asp" Namespace="System.Web.UI" Assembly="System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" %>
<%@ Import Namespace="Microsoft.SharePoint" %> 
<%@ Register Tagprefix="WebPartPages" Namespace="Microsoft.SharePoint.WebPartPages" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %>
<%@ Control Language="C#" AutoEventWireup="true" CodeBehind="VisualWebPart1UserControl.ascx.cs" Inherits="PDFcoWebPart.VisualWebPart1.VisualWebPart1UserControl" %>
Chose file to convert<br />
<asp:FileUpload ID="FileUpload1" runat="server" Width="600px" />
<br />
<br />
<asp:Button ID="StartButton" runat="server" OnClick="StartButton_Click" Text="Convert to CSV" style="width: 610px; padding-left: 0px; margin-left: 0px; padding-right: 0px; padding-right: 0px;"/>
<br />
<br />
Log<br />
<asp:TextBox ID="LogTextBox" runat="server" Height="80px" TextMode="MultiLine" Width="600px"></asp:TextBox>
<br />
<br />
Result<br />
<asp:TextBox ID="ResultTextBox" runat="server" Height="500px" TextMode="MultiLine" Width="600px"></asp:TextBox>

This is code behind for Web Part User Control.

VisualWebPart1UserControl.ascx.cs

using System;
using System.Collections.Generic;
using System.Net;
using System.Threading;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;

namespace PDFcoWebPart.VisualWebPart1
{
    public partial class VisualWebPart1UserControl : UserControl
    {
        string API_KEY = Utils.API_KEY;

        // Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
        const string Pages = "";
        // PDF document password. Leave empty for unprotected documents.
        const string Password = "";
        const bool Async = true;

        protected void Page_Load(object sender, EventArgs e)
        {
        }

        protected void StartButton_Click(object sender, EventArgs e)
        {
            var fileData = FileUpload1.FileBytes;

            // Create standard .NET web client instance
            ServicePointManager.Expect100Continue = true;
            ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
            WebClient webClient = new WebClient();

            // Set API Key
            webClient.Headers.Add("x-api-key", API_KEY);

            // 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE.
            // * If you already have a direct file URL, skip to the step 3.

            // Prepare URL for `Get Presigned URL` API call
            string query = Uri.EscapeUriString(string.Format(
                "https://api.pdf.co/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name={0}",
                FileUpload1.FileName));

            try
            {
                // Execute request
                string response = webClient.DownloadString(query);

                // Parse JSON response
                JObject json = JObject.Parse(response);

                if (json["status"].ToString() != "error")
                {
                    // Get URL to use for the file upload
                    string uploadUrl = json["presignedUrl"].ToString();
                    string uploadedFileUrl = json["url"].ToString();

                    // 2. UPLOAD THE FILE TO CLOUD.
                    webClient.Headers.Add("content-type", "application/octet-stream");
                    //webClient.UploadFile(uploadUrl, "PUT", SourceFile); // You can use UploadData() instead if your file is byte[] or Stream
                    webClient.UploadData(uploadUrl, "PUT", fileData); // You can use UploadData() instead if your file is byte[] or Stream
                    webClient.Headers.Remove("content-type");

                    // 3. CONVERT UPLOADED PDF FILE TO CSV

                    // URL for `PDF To CSV` API call
                    var url = "https://api.pdf.co/v1/pdf/convert/to/csv";

                    // Prepare requests params as JSON
                    Dictionary<string, object> parameters = new Dictionary<string, object>();
                    parameters.Add("name", FileUpload1.FileName);
                    parameters.Add("password", Password);
                    parameters.Add("pages", Pages);
                    parameters.Add("url", uploadedFileUrl);
                    parameters.Add("async", Async);

                    // Convert dictionary of params to JSON
                    string jsonPayload = JsonConvert.SerializeObject(parameters);

                    try
                    {
                        // Execute POST request with JSON payload
                        response = webClient.UploadString(url, jsonPayload);

                        // Parse JSON response
                        json = JObject.Parse(response);

                        if (json["status"].ToString() != "error")
                        {
                            // Asynchronous job ID
                            string jobId = json["jobId"].ToString();
                            // URL of generated CSV file that will available after the job completion
                            string resultFileUrl = json["url"].ToString();

                            // Check the job status in a loop. 
                            // If you don't want to pause the main thread you can rework the code 
                            // to use a separate thread for the status checking and completion.
                            do
                            {
                                string status = CheckJobStatus(jobId); // Possible statuses: "working", "failed", "aborted", "success".

                                // Display timestamp and status (for demo purposes)
                                LogTextBox.Text += DateTime.Now.ToLongTimeString() + ": " + status;

                                if (status == "success")
                                {
                                    // Download CSV file
                                    var csvText = webClient.DownloadString(resultFileUrl);

                                    LogTextBox.Text += "Generated CSV.";
                                    ResultTextBox.Text += csvText;
                                    break;
                                }
                                else if (status == "working")
                                {
                                    // Pause for a few seconds
                                    Thread.Sleep(3000);
                                }
                                else
                                {
                                    LogTextBox.Text += status;
                                    break;
                                }
                            }
                            while (true);
                        }
                        else
                        {
                            LogTextBox.Text += json["message"].ToString();
                        }
                    }
                    catch (WebException ex)
                    {
                        LogTextBox.Text += ex.ToString();
                    }
                }
                else
                {
                    LogTextBox.Text += json["message"].ToString();
                }
            }
            catch (Exception ex)
            {
                LogTextBox.Text += ex.ToString();
            }

            webClient.Dispose();

            LogTextBox.Text += "";
            LogTextBox.Text += "Done...";

        }

        protected string CheckJobStatus(string jobId)
        {
            using (WebClient webClient = new WebClient())
            {
                // Set API Key
                webClient.Headers.Add("x-api-key", API_KEY);

                string url = "https://api.pdf.co/v1/job/check?jobid=" + jobId;

                string response = webClient.DownloadString(url);
                JObject json = JObject.Parse(response);

                return Convert.ToString(json["status"]);
            }
        }
    }
}

Source Code at GitHub

You can explore the full source code for this sample at this GitHub link.

Convert PDF to CSV – Sample Screenshots

Step 1
Step 1
Step 2
Step 2
Step 3
Step 3
Step 4
Step 4
Step 5
Step 5
Step 6
Step 6
Step 7
Step 7

I hope this tutorial and code snippet is useful to you. Please try it yourself to get to know more. Thank You!

Video Guide