|
"""Test client for Azure Form Recognizer |
|
|
|
Usage: |
|
forms.py [-v | --verbose] analyze <filename> |
|
forms.py [-v | --verbose] train <source> |
|
|
|
|
|
Options: |
|
-h --help show this help message |
|
-v --verbose show more output |
|
--version show version number and exit |
|
|
|
You should use the following environment variables to provide required values to the program: |
|
* FORMS_API_URL: Base path to the Azure Cognitive services, e.g. https://exampleorg.cognitiveservices.azure.com/formrecognizer/v1.0-preview/ |
|
* FORMS_API_KEY API: API key for Form Recognizer API |
|
* FORMS_MODEL_ID: Model id for custom form |
|
|
|
Conveniently, you can use the python-dotenv module for this :) |
|
|
|
""" |
|
import os |
|
import sys |
|
import json |
|
|
|
import requests |
|
from dotenv import load_dotenv |
|
from docopt import docopt |
|
|
|
|
|
def main(): |
|
"""Main function for the API client |
|
|
|
""" |
|
|
|
load_dotenv() |
|
args = docopt(__doc__, version='0.1') |
|
|
|
verbose = args.get('-v', False) or args.get('--verbose', False) |
|
|
|
if args['analyze']: |
|
analyze_form(args['<filename>'], verbose) |
|
elif args['train']: |
|
train_model(args['<source>'], verbose) |
|
|
|
|
|
def analyze_form(form_file_name, verbose=False): |
|
"""Analyze a form |
|
|
|
Use tha API and your trained model to analyze a form. Currently, the return value is written to a |
|
json file with same name as input form file. |
|
See documentation here: https://westus2.dev.cognitive.microsoft.com/docs/services/form-recognizer-api/operations/AnalyzeWithCustomModel |
|
|
|
""" |
|
|
|
if verbose: |
|
print(f'Analyzing form {form_file_name}') |
|
|
|
headers = { |
|
'Content-Type': 'application/pdf', |
|
'Ocp-Apim-Subscription-Key': os.environ['FORMS_API_KEY'], |
|
} |
|
|
|
with open(form_file_name, 'rb') as form_file: |
|
form_data = form_file.read() |
|
|
|
ENDPOINT_URL = f'{os.environ["FORMS_API_URL"]}/models/{os.environ["FORMS_MODEL_ID"]}/analyze' |
|
|
|
response = requests.post(ENDPOINT_URL, headers=headers, data=form_data) |
|
response.raise_for_status() |
|
response_json = response.json() |
|
|
|
with open(f'{form_file_name}.json', 'w') as form_json_file: |
|
if verbose: |
|
print(f'Saving to file {form_file_name}.json') |
|
json.dump(response_json, form_json_file, indent=4) |
|
|
|
|
|
def train_model(source, verbose=False): |
|
"""Train the form recognition model |
|
|
|
Currently only support the blob storage + SAS version. |
|
See documentation here: https://northeurope.dev.cognitive.microsoft.com/docs/services/form-recognizer-api/operations/TrainCustomModel |
|
|
|
""" |
|
|
|
if verbose: |
|
print('Training model') |
|
|
|
headers = { |
|
'Content-Type': 'application/json', |
|
'Ocp-Apim-Subscription-Key': os.environ['FORMS_API_KEY'], |
|
} |
|
|
|
ENDPOINT_URL = f'{os.environ["FORMS_API_URL"]}/train' |
|
print(ENDPOINT_URL) |
|
train_data = { |
|
'source': source |
|
} |
|
response = requests.post(ENDPOINT_URL, headers=headers, json=train_data) |
|
response.raise_for_status() |
|
response_json = response.json() |
|
print(response_json) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |