Document Parser API in C# using PDF.co Web API
What is PDF.co Web API? It is the Rest API that provides set of data extraction functions, and tools for document manipulation, splitting and merging of PDF files. Includes built-in OCR, and image recognition, and can generate and read barcodes from images, scans, and PDFs.
On-demand (REST Web API) version:
Web API (on-demand version)
On-premise offline SDK for Windows:
60 Day Free Trial (on-premise)
BloodTestResultsToJson.csproj
<?xml version="1.0" encoding="utf-8"?> <Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" /> <PropertyGroup> <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> <ProjectGuid>{1E1C2C34-017E-4605-AE2B-55EA3313BE51}</ProjectGuid> <OutputType>Exe</OutputType> <RootNamespace>BloodTestResultsToJson</RootNamespace> <AssemblyName>BloodTestResultsToJson</AssemblyName> <TargetFrameworkVersion>v4.0</TargetFrameworkVersion> <FileAlignment>512</FileAlignment> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <PlatformTarget>AnyCPU</PlatformTarget> <DebugSymbols>true</DebugSymbols> <DebugType>full</DebugType> <Optimize>false</Optimize> <OutputPath>bin\Debug\</OutputPath> <DefineConstants>DEBUG;TRACE</DefineConstants> <ErrorReport>prompt</ErrorReport> <WarningLevel>4</WarningLevel> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> <PlatformTarget>AnyCPU</PlatformTarget> <DebugType>pdbonly</DebugType> <Optimize>true</Optimize> <OutputPath>bin\Release\</OutputPath> <DefineConstants>TRACE</DefineConstants> <ErrorReport>prompt</ErrorReport> <WarningLevel>4</WarningLevel> </PropertyGroup> <ItemGroup> <Reference Include="Newtonsoft.Json, Version=10.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL"> <HintPath>packages\Newtonsoft.Json.10.0.3\lib\net40\Newtonsoft.Json.dll</HintPath> <Private>True</Private> </Reference> <Reference Include="System" /> <Reference Include="System.Core" /> <Reference Include="System.Xml.Linq" /> <Reference Include="System.Data" /> <Reference Include="System.Xml" /> </ItemGroup> <ItemGroup> <Compile Include="Program.cs" /> </ItemGroup> <ItemGroup> <None Include="SampleBloodReport.yml"> <CopyToOutputDirectory>Always</CopyToOutputDirectory> </None> <None Include="SampleBloodReport.pdf"> <CopyToOutputDirectory>Always</CopyToOutputDirectory> </None> <None Include="packages.config" /> </ItemGroup> <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> <!-- To modify your build process, add your task inside one of the targets below and uncomment it. Other similar extension points exist, see Microsoft.Common.targets. <Target Name="BeforeBuild"> </Target> <Target Name="AfterBuild"> </Target> --> </Project>
BloodTestResultsToJson.sln
Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 2013 VisualStudioVersion = 12.0.40629.0 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BloodTestResultsToJson", "BloodTestResultsToJson.csproj", "{1E1C2C34-017E-4605-AE2B-55EA3313BE51}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU Release|Any CPU = Release|Any CPU EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {1E1C2C34-017E-4605-AE2B-55EA3313BE51}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {1E1C2C34-017E-4605-AE2B-55EA3313BE51}.Debug|Any CPU.Build.0 = Debug|Any CPU {1E1C2C34-017E-4605-AE2B-55EA3313BE51}.Release|Any CPU.ActiveCfg = Release|Any CPU {1E1C2C34-017E-4605-AE2B-55EA3313BE51}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection EndGlobal
Program.cs
using Newtonsoft.Json; using Newtonsoft.Json.Linq; using System; using System.Collections.Generic; using System.IO; using System.Net; using System.Threading; namespace BloodTestResultsToJson { class Program { // The authentication key (API Key). // Get your own by registering at https://app.pdf.co/documentation/api const String API_KEY = "***********************************"; // Sample document containing blood test report // Report consists of sample patient information such as patinet name, DOB, patientID // Report information such as report type, collection date and time // Test components such as complete blood count, Hemoglobin, White Blood Cell (WBC), Red Blood Cell (RBC), etc. const string SourceFile = @".\SampleBloodReport.pdf"; // PDF document password. Leave empty for unprotected documents. const string Password = ""; // Destination TXT file name const string DestinationFile = @".\result.json"; // (!) Make asynchronous job const bool Async = true; static void Main(string[] args) { // Template text. Use Document Parser SDK (https://bytescout.com/products/developer/documentparsersdk/index.html) // to create templates. // Read template from file: // Sample template which primarily extracts patient name, report date and test result table. String templateText = File.ReadAllText(@".\SampleBloodReport.yml"); // Create standard .NET web client instance WebClient webClient = new WebClient(); // Set API Key webClient.Headers.Add("x-api-key", API_KEY); // 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE. // * If you already have a direct file URL, skip to the step 3. // Prepare URL for `Get Presigned URL` API call string query = Uri.EscapeUriString(string.Format( "https://api.pdf.co/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name={0}", Path.GetFileName(SourceFile))); try { // Execute request string response = webClient.DownloadString(query); // Parse JSON response JObject json = JObject.Parse(response); if (json["error"].ToObject<bool>() == false) { // Get URL to use for the file upload string uploadUrl = json["presignedUrl"].ToString(); string uploadedFileUrl = json["url"].ToString(); // 2. UPLOAD THE FILE TO CLOUD. webClient.Headers.Add("content-type", "application/octet-stream"); webClient.UploadFile(uploadUrl, "PUT", SourceFile); // You can use UploadData() instead if your file is byte[] or Stream webClient.Headers.Remove("content-type"); // 3. PARSE UPLOADED PDF DOCUMENT // URL of `Document Parser` API call string url = "https://api.pdf.co/v1/pdf/documentparser"; // Prepare requests params as JSON Dictionary<string, object> requestBody = new Dictionary<string, object>(); requestBody.Add("template", templateText); requestBody.Add("name", Path.GetFileName(DestinationFile)); requestBody.Add("url", uploadedFileUrl); requestBody.Add("async", Async); // Convert dictionary of params to JSON string jsonPayload = JsonConvert.SerializeObject(requestBody); // Execute request response = webClient.UploadString(url, "POST", jsonPayload); // Parse JSON response json = JObject.Parse(response); if (json["error"].ToObject<bool>() == false) { // Asynchronous job ID string jobId = json["jobId"].ToString(); // Get URL of generated JSON file string resultFileUrl = json["url"].ToString(); // Check the job status in a loop. // If you don't want to pause the main thread you can rework the code // to use a separate thread for the status checking and completion. do { string status = CheckJobStatus(webClient, jobId); // Possible statuses: "working", "failed", "aborted", "success". // Display timestamp and status (for demo purposes) Console.WriteLine(DateTime.Now.ToLongTimeString() + ": " + status); if (status == "success") { // Download JSON file webClient.DownloadFile(resultFileUrl, DestinationFile); Console.WriteLine("Generated JSON file saved as \"{0}\" file.", DestinationFile); break; } else if (status == "working") { // Pause for a few seconds Thread.Sleep(3000); } else { Console.WriteLine(status); break; } } while (true); } else { Console.WriteLine(json["message"].ToString()); } } else { Console.WriteLine(json["message"].ToString()); } } catch (WebException e) { Console.WriteLine(e.ToString()); } webClient.Dispose(); Console.WriteLine(); Console.WriteLine("Press any key..."); Console.ReadKey(); } /// <summary> /// Check job status /// </summary> static string CheckJobStatus(WebClient webClient, string jobId) { string url = "https://api.pdf.co/v1/job/check?jobid=" + jobId; string response = webClient.DownloadString(url); JObject json = JObject.Parse(response); return Convert.ToString(json["status"]); } } }
SampleBloodReport.yml
templateName: BloodTestTemplate templateVersion: 4 templatePriority: 0 detectionRules: keywords: [] objects: - name: PatientName objectType: field fieldProperties: fieldType: rectangle rectangle: - 177.75 - 123.75 - 62.25 - 12.75 pageIndex: 0 - name: ReportName objectType: field fieldProperties: fieldType: rectangle expression: '{{SmartDate}}' dataType: date rectangle: - 335.25 - 94.5 - 65.25 - 12 pageIndex: 0 - name: TestResults objectType: table tableProperties: start: y: 261.75 pageIndex: 0 end: y: 712.5 pageIndex: 0 left: 41.25 right: 573.75 rowMergingRule: byBorders
packages.config
<?xml version="1.0" encoding="utf-8"?> <packages> <package id="Newtonsoft.Json" version="10.0.3" targetFramework="net40" /> </packages>
VIDEO
ON-PREMISE OFFLINE SDK
See also:
ON-DEMAND REST WEB API
Get Your API Key
See also:
printable version:
PDF-co-Web-API-C-sharp-Blood-Test-Results-to-JSON.pdf
PDF-co-Web-API-C-sharp-Blood-Test-Results-to-JSON.pdf