How to convert PDF to HTML from uploaded file asynchronously for PDF to HTML API in Python using PDF.co Web API

Follow this simple tutorial to learn convert PDF to HTML from uploaded file asynchronously to have PDF to HTML API in Python

Writing of the code to convert PDF to HTML from uploaded file asynchronously in Python can be done by developers of any level using PDF.co Web API. PDF.co Web API was designed to assist PDF to HTML API in Python. PDF.co Web API is the Rest API that provides set of data extraction functions, tools for documents manipulation, splitting and merging of pdf files. Includes built-in OCR, images recognition, can generate and read barcodes from images, scans and pdf.

This simple and easy to understand sample source code in Python for PDF.co Web API contains different functions and options you should do calling the API to implement PDF to HTML API. This sample code in Python is all you need. Just copy-paste it to the code editor, then add a reference to PDF.co Web API and you are ready to try it! Check Python sample code examples to see if they respond to your needs and requirements for the project.

Free! Free! Free! ByteScout free trial version is available for FREE download from our website. Programming tutorials along with source code samples are assembled.

On-demand (REST Web API) version:
 Web API (on-demand version)

On-premise offline SDK for Windows:
 60 Day Free Trial (on-premise)

ConvertPdfToHtmlFromUploadedFileAsynchronously.py

      
""" Cloud API asynchronous "PDF To Text" job example. Allows to avoid timeout errors when processing huge or scanned PDF documents. """ import os import requests # pip install requests import time import datetime # The authentication key (API Key). # Get your own by registering at https://app.pdf.co/documentation/api API_KEY = "******************************************" # Base URL for PDF.co Web API requests BASE_URL = "https://api.pdf.co/v1" # Source PDF file SourceFile = ".\\sample.pdf" # Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. Pages = "" # PDF document password. Leave empty for unprotected documents. Password = "" # Destination Html file name DestinationFile = ".\\result.html" # Set to $true to get simplified HTML without CSS. Default is the rich HTML keeping the document design. PlainHtml = False # Set to $true if your document has the column layout like a newspaper. ColumnLayout = False # (!) Make asynchronous job Async = True def main(args = None): uploadedFileUrl = uploadFile(SourceFile) if (uploadedFileUrl != None): convertPdfToHtml(uploadedFileUrl, DestinationFile) def convertPdfToHtml(uploadedFileUrl, destinationFile): """Converts PDF To Html using PDF.co Web API""" # Prepare requests params as JSON # See documentation: https://apidocs.pdf.co parameters = {} parameters["async"] = Async parameters["name"] = os.path.basename(destinationFile) parameters["password"] = Password parameters["pages"] = Pages parameters["simple"] = PlainHtml parameters["columns"] = ColumnLayout parameters["url"] = uploadedFileUrl # Prepare URL for 'PDF To Html' API request url = "{}/pdf/convert/to/html".format(BASE_URL) # Execute request and get response as JSON response = requests.post(url, data=parameters, headers={ "x-api-key": API_KEY }) if (response.status_code == 200): json = response.json() if json["error"] == False: # Asynchronous job ID jobId = json["jobId"] # URL of the result file resultFileUrl = json["url"] # Check the job status in a loop. # If you don't want to pause the main thread you can rework the code # to use a separate thread for the status checking and completion. while True: status = checkJobStatus(jobId) # Possible statuses: "working", "failed", "aborted", "success". # Display timestamp and status (for demo purposes) print(datetime.datetime.now().strftime("%H:%M.%S") + ": " + status) if status == "success": # Download result file r = requests.get(resultFileUrl, stream=True) if (r.status_code == 200): with open(destinationFile, 'wb') as file: for chunk in r: file.write(chunk) print(f"Result file saved as \"{destinationFile}\" file.") else: print(f"Request error: {response.status_code} {response.reason}") break elif status == "working": # Pause for a few seconds time.sleep(3) else: print(status) break else: # Show service reported error print(json["message"]) else: print(f"Request error: {response.status_code} {response.reason}") def checkJobStatus(jobId): """Checks server job status""" url = f"{BASE_URL}/job/check?jobid={jobId}" response = requests.get(url, headers={ "x-api-key": API_KEY }) if (response.status_code == 200): json = response.json() return json["status"] else: print(f"Request error: {response.status_code} {response.reason}") return None def uploadFile(fileName): """Uploads file to the cloud""" # 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE. # Prepare URL for 'Get Presigned URL' API request url = "{}/file/upload/get-presigned-url?contenttype=application/octet-stream&name={}".format( BASE_URL, os.path.basename(fileName)) # Execute request and get response as JSON response = requests.get(url, headers={ "x-api-key": API_KEY }) if (response.status_code == 200): json = response.json() if json["error"] == False: # URL to use for file upload uploadUrl = json["presignedUrl"] # URL for future reference uploadedFileUrl = json["url"] # 2. UPLOAD FILE TO CLOUD. with open(fileName, 'rb') as file: requests.put(uploadUrl, data=file, headers={ "x-api-key": API_KEY, "content-type": "application/octet-stream" }) return uploadedFileUrl else: # Show service reported error print(json["message"]) else: print(f"Request error: {response.status_code} {response.reason}") return None if __name__ == '__main__': main()

VIDEO

ON-PREMISE OFFLINE SDK

Get 60 Day Free Trial

See also:

ON-DEMAND REST WEB API

Get Your API Key

See also: