How to PDF make searchable API for advanced conversation options in Python using PDF.co Web API
What is PDF.co Web API? It is the flexible Web API that includes full set of functions from e-signature requests to data extraction, OCR, images recognition, pdf splitting and pdf splitting. Can also generate barcodes and read barcodes from images, scans and pdf.
On-demand (REST Web API) version:
Web API (on-demand version)
On-premise offline SDK for Windows:
60 Day Free Trial (on-premise)
MakeSearchablePdfFromUrl.py
import os import requests # pip install requests # The authentication key (API Key). # Get your own by registering at https://app.pdf.co/documentation/api API_KEY = "******************************************" # Base URL for PDF.co Web API requests BASE_URL = "https://api.pdf.co/v1" # Direct URL of source PDF file. SourceFileURL = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-make-searchable/sample.pdf" # Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. Pages = "" # PDF document password. Leave empty for unprotected documents. Password = "" # OCR language. "eng", "fra", "deu", "spa" supported currently. Let us know if you need more. Language = "eng" # Destination PDF file name DestinationFile = ".\\result.pdf" # Advanced Conversation Options Profiles = "{ 'profiles': [ { 'profile1': { 'OCRLanguage': 'eng', 'OCRResolution': 300 } } ] }" def main(args = None): makeSearchablePDF(SourceFileURL, DestinationFile) def makeSearchablePDF(uploadedFileUrl, destinationFile): """Make Uploaded PDF file Searchable using PDF.co Web API""" # Prepare requests params as JSON # See documentation: https://apidocs.pdf.co parameters = {} parameters["name"] = os.path.basename(destinationFile) parameters["password"] = Password parameters["pages"] = Pages parameters["lang"] = Language parameters["url"] = uploadedFileUrl parameters["profiles"] = Profiles # Prepare URL for 'Make Searchable PDF' API request url = "{}/pdf/makesearchable".format(BASE_URL) # Execute request and get response as JSON response = requests.post(url, data=parameters, headers={ "x-api-key": API_KEY }) if (response.status_code == 200): json = response.json() if json["error"] == False: # Get URL of result file resultFileUrl = json["url"] # Download result file r = requests.get(resultFileUrl, stream=True) if (r.status_code == 200): with open(destinationFile, 'wb') as file: for chunk in r: file.write(chunk) print(f"Result file saved as \"{destinationFile}\" file.") else: print(f"Request error: {response.status_code} {response.reason}") else: # Show service reported error print(json["message"]) else: print(f"Request error: {response.status_code} {response.reason}") def uploadFile(fileName): """Uploads file to the cloud""" # 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE. # Prepare URL for 'Get Presigned URL' API request url = "{}/file/upload/get-presigned-url?contenttype=application/octet-stream&name={}".format( BASE_URL, os.path.basename(fileName)) # Execute request and get response as JSON response = requests.get(url, headers={ "x-api-key": API_KEY }) if (response.status_code == 200): json = response.json() if json["error"] == False: # URL to use for file upload uploadUrl = json["presignedUrl"] # URL for future reference uploadedFileUrl = json["url"] # 2. UPLOAD FILE TO CLOUD. with open(fileName, 'rb') as file: requests.put(uploadUrl, data=file, headers={ "x-api-key": API_KEY, "content-type": "application/octet-stream" }) return uploadedFileUrl else: # Show service reported error print(json["message"]) else: print(f"Request error: {response.status_code} {response.reason}") return None if __name__ == '__main__': main()
VIDEO
ON-PREMISE OFFLINE SDK
See also:
ON-DEMAND REST WEB API
Get Your API Key
See also:
printable version:
PDF-co-Web-API-Python-Advanced-Conversation-Options.pdf
PDF-co-Web-API-Python-Advanced-Conversation-Options.pdf