PDF to text API in C# with PDF.co Web API

How to use PDF.co Web API for PDF to text API in C#

ByteScout tutorials explain the material for programmers who use C#. PDF.co Web API was made to help with PDF to text API in C#. PDF.co Web API is the Rest API that provides set of data extraction functions, tools for documents manipulation, splitting and merging of pdf files. Includes built-in OCR, images recognition, can generate and read barcodes from images, scans and pdf.

The SDK samples like this one below explain how to quickly make your application do PDF to text API in C# with the help of PDF.co Web API. C# sample code is all you need: copy and paste the code to your C# application’s code editor, add a reference to PDF.co Web API (if you haven’t added yet) and you are ready to go! Use of PDF.co Web API in C# is also explained in the documentation included along with the product.

PDF.co Web API free trial version is available for download from our website. Free trial also includes programming tutorials along with source code samples.

On-demand (REST Web API) version:
 Web API (on-demand version)

On-premise offline SDK for Windows:
 60 Day Free Trial (on-premise)

ByteScoutWebApiExample.csproj

      
<?xml version="1.0" encoding="utf-8"?> <Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" /> <PropertyGroup> <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> <ProjectGuid>{1E1C2C34-017E-4605-AE2B-55EA3313BE51}</ProjectGuid> <OutputType>Exe</OutputType> <RootNamespace>ByteScoutWebApiExample</RootNamespace> <AssemblyName>ByteScoutWebApiExample</AssemblyName> <TargetFrameworkVersion>v4.0</TargetFrameworkVersion> <FileAlignment>512</FileAlignment> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <PlatformTarget>AnyCPU</PlatformTarget> <DebugSymbols>true</DebugSymbols> <DebugType>full</DebugType> <Optimize>false</Optimize> <OutputPath>bin\Debug\</OutputPath> <DefineConstants>DEBUG;TRACE</DefineConstants> <ErrorReport>prompt</ErrorReport> <WarningLevel>4</WarningLevel> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> <PlatformTarget>AnyCPU</PlatformTarget> <DebugType>pdbonly</DebugType> <Optimize>true</Optimize> <OutputPath>bin\Release\</OutputPath> <DefineConstants>TRACE</DefineConstants> <ErrorReport>prompt</ErrorReport> <WarningLevel>4</WarningLevel> </PropertyGroup> <ItemGroup> <Reference Include="Newtonsoft.Json, Version=10.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL"> <HintPath>packages\Newtonsoft.Json.10.0.3\lib\net40\Newtonsoft.Json.dll</HintPath> <Private>True</Private> </Reference> <Reference Include="System" /> <Reference Include="System.Core" /> <Reference Include="System.Xml.Linq" /> <Reference Include="System.Data" /> <Reference Include="System.Xml" /> </ItemGroup> <ItemGroup> <Compile Include="Program.cs" /> </ItemGroup> <ItemGroup> <None Include="packages.config" /> <None Include="profile.json"> <CopyToOutputDirectory>Always</CopyToOutputDirectory> </None> </ItemGroup> <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> <!-- To modify your build process, add your task inside one of the targets below and uncomment it. Other similar extension points exist, see Microsoft.Common.targets. <Target Name="BeforeBuild"> </Target> <Target Name="AfterBuild"> </Target> --> </Project>

ByteScoutWebApiExample.sln

      
Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 2013 VisualStudioVersion = 12.0.40629.0 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ByteScoutWebApiExample", "ByteScoutWebApiExample.csproj", "{1E1C2C34-017E-4605-AE2B-55EA3313BE51}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU Release|Any CPU = Release|Any CPU EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {1E1C2C34-017E-4605-AE2B-55EA3313BE51}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {1E1C2C34-017E-4605-AE2B-55EA3313BE51}.Debug|Any CPU.Build.0 = Debug|Any CPU {1E1C2C34-017E-4605-AE2B-55EA3313BE51}.Release|Any CPU.ActiveCfg = Release|Any CPU {1E1C2C34-017E-4605-AE2B-55EA3313BE51}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection EndGlobal

Program.cs

      
using System; using System.IO; using System.Net; using Newtonsoft.Json.Linq; namespace ByteScoutWebApiExample { class Program { // The authentication key (API Key). // Get your own by registering at https://app.pdf.co/documentation/api const String API_KEY = "***********************************"; // Direct URL of source PDF file. const string SourceFileUrl = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf"; // Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. const string Pages = ""; // PDF document password. Leave empty for unprotected documents. const string Password = ""; // Destination TXT file name const string DestinationFile = @".\result.txt"; /* Some of advanced options available through profiles: (JSON can be single/double-quoted and contain comments.) { "profiles": [ { "profile1": { "ExtractInvisibleText": true, // Invisible text extraction. Values: true / false "ExtractShadowLikeText": true, // Shadow-like text extraction. Values: true / false "ExtractAnnotations": true, // Whether to extract PDF annotations. "CheckPermissions": true, // Ignore document permissions. Values: true / false "DetectNewColumnBySpacesRatio": 1.2, // A ratio affecting number of spaces between words. } } ] } */ // Sample profile that sets advanced conversion options // Advanced options are properties of TextExtractor class from ByteScout Text Extractor SDK used in the back-end: // https://cdn.bytescout.com/help/BytescoutPDFExtractorSDK/html/8a2bae5a-346f-8338-b5aa-6f3522dca0d4.htm static string Profiles = File.ReadAllText("profile.json"); static void Main(string[] args) { // Create standard .NET web client instance WebClient webClient = new WebClient(); // Set API Key webClient.Headers.Add("x-api-key", API_KEY); // Prepare URL for `PDF To TXT` API call string query = Uri.EscapeUriString(string.Format( "https://api.pdf.co/v1/pdf/convert/to/text?name={0}&password={1}&pages={2}&url={3}&profiles={4}", Path.GetFileName(DestinationFile), Password, Pages, SourceFileUrl, Profiles)); try { // Execute request string response = webClient.DownloadString(query); // Parse JSON response JObject json = JObject.Parse(response); if (json["error"].ToObject<bool>() == false) { // Get URL of generated TXT file string resultFileUrl = json["url"].ToString(); // Download TXT file webClient.DownloadFile(resultFileUrl, DestinationFile); Console.WriteLine("Generated TXT file saved as \"{0}\" file.", DestinationFile); } else { Console.WriteLine(json["message"].ToString()); } } catch (WebException e) { Console.WriteLine(e.ToString()); } webClient.Dispose(); Console.WriteLine(); Console.WriteLine("Press any key..."); Console.ReadKey(); } } }

packages.config

      
<?xml version="1.0" encoding="utf-8"?> <packages> <package id="Newtonsoft.Json" version="10.0.3" targetFramework="net40" /> </packages>

profile.json

      
{ "profiles": [ { "profile1": { "TrimSpaces": "False", "PreserveFormattingOnTextExtraction": "True", "Unwrap": "True" } } ] }

VIDEO

ON-PREMISE OFFLINE SDK

Get 60 Day Free Trial

See also:

ON-DEMAND REST WEB API

Get Your API Key

See also:

Related Samples: