tgranqvist · August 25, 2019 21:37
diff --git a/.env b/.env
 FORMS_API_URL = _your api endpoint_
 FORMS_API_KEY = _your api key_
 FORMS_MODEL_ID = _your model id_
diff --git a/README.md b/README.md
diff --git a/forms.py b/forms.py
 """Test client for Azure Form Recognizer

 Usage:
 	forms.py [-v | --verbose] analyze <filename>
 	forms.py [-v | --verbose] train <source>


 Options:
 	-h --help	show this help message
 	-v --verbose show more output
 	--version	show version number and exit
 	
 You should use the following environment variables to provide required values to the program:
 * FORMS_API_URL: Base path to the Azure Cognitive services, e.g. https://exampleorg.cognitiveservices.azure.com/formrecognizer/v1.0-preview/
 * FORMS_API_KEY API: API key for Form Recognizer API
 * FORMS_MODEL_ID: Model id for custom form

 Conveniently, you can use the python-dotenv module for this :) 

 """
 import os
 import sys
 import json

 import requests
 from dotenv import load_dotenv
 from docopt import docopt


 def main():
 	"""Main function for the API client
 	
 	"""
 	
 	load_dotenv()
 	args = docopt(__doc__, version='0.1')
 	
 	verbose = args.get('-v', False) or args.get('--verbose', False)
 	
 	if args['analyze']:
 		analyze_form(args['<filename>'], verbose)
 	elif args['train']:
 		train_model(args['<source>'], verbose)


 def analyze_form(form_file_name, verbose=False):
 	"""Analyze a form
 	
 	Use tha API and your trained model to analyze a form. Currently, the return value is written to a 
 	json file with same name as input form file.
 	See documentation here: https://westus2.dev.cognitive.microsoft.com/docs/services/form-recognizer-api/operations/AnalyzeWithCustomModel 
 	
 	"""
 	
 	if verbose: 
 		print(f'Analyzing form {form_file_name}')

 	headers = {
 		'Content-Type': 'application/pdf',
 		'Ocp-Apim-Subscription-Key': os.environ['FORMS_API_KEY'],
 	}
 	
 	with open(form_file_name, 'rb') as form_file:
 		form_data = form_file.read()
 	
 	ENDPOINT_URL = f'{os.environ["FORMS_API_URL"]}/models/{os.environ["FORMS_MODEL_ID"]}/analyze'
 	
 	response = requests.post(ENDPOINT_URL, headers=headers, data=form_data)
 	response.raise_for_status()
 	response_json = response.json()
 	
 	with open(f'{form_file_name}.json', 'w') as form_json_file:
 		if verbose:
 			print(f'Saving to file {form_file_name}.json')
 		json.dump(response_json, form_json_file, indent=4)
 	

 def train_model(source, verbose=False):
 	"""Train the form recognition model

 	Currently only support the blob storage + SAS version.
 	See documentation here: https://northeurope.dev.cognitive.microsoft.com/docs/services/form-recognizer-api/operations/TrainCustomModel

 	"""
 	
 	if verbose: 
 		print('Training model')

 	headers = {
 		'Content-Type': 'application/json',
 		'Ocp-Apim-Subscription-Key': os.environ['FORMS_API_KEY'],
 	}
 	
 	ENDPOINT_URL = f'{os.environ["FORMS_API_URL"]}/train'
 	print(ENDPOINT_URL)
 	train_data = {
 		'source': source
 	}
 	response = requests.post(ENDPOINT_URL, headers=headers, json=train_data)
 	response.raise_for_status()
 	response_json = response.json()
 	print(response_json)
 	

 if __name__ == '__main__':
 	main()
diff --git a/Requirements.txt b/Requirements.txt
 requests
 docopt
 python-dotenv
	FORMS_API_URL = _your api endpoint_
	FORMS_API_KEY = _your api key_
	FORMS_MODEL_ID = _your model id_
	"""Test client for Azure Form Recognizer

	Usage:
	forms.py [-v \| --verbose] analyze <filename>
	forms.py [-v \| --verbose] train <source>


	Options:
	-h --help show this help message
	-v --verbose show more output
	--version show version number and exit

	You should use the following environment variables to provide required values to the program:
	* FORMS_API_URL: Base path to the Azure Cognitive services, e.g. https://exampleorg.cognitiveservices.azure.com/formrecognizer/v1.0-preview/
	* FORMS_API_KEY API: API key for Form Recognizer API
	* FORMS_MODEL_ID: Model id for custom form

	Conveniently, you can use the python-dotenv module for this :)

	"""
	import os
	import sys
	import json

	import requests
	from dotenv import load_dotenv
	from docopt import docopt


	def main():
	"""Main function for the API client

	"""

	load_dotenv()
	args = docopt(__doc__, version='0.1')

	verbose = args.get('-v', False) or args.get('--verbose', False)

	if args['analyze']:
	analyze_form(args['<filename>'], verbose)
	elif args['train']:
	train_model(args['<source>'], verbose)


	def analyze_form(form_file_name, verbose=False):
	"""Analyze a form

	Use tha API and your trained model to analyze a form. Currently, the return value is written to a
	json file with same name as input form file.
	See documentation here: https://westus2.dev.cognitive.microsoft.com/docs/services/form-recognizer-api/operations/AnalyzeWithCustomModel

	"""

	if verbose:
	print(f'Analyzing form {form_file_name}')

	headers = {
	'Content-Type': 'application/pdf',
	'Ocp-Apim-Subscription-Key': os.environ['FORMS_API_KEY'],
	}

	with open(form_file_name, 'rb') as form_file:
	form_data = form_file.read()

	ENDPOINT_URL = f'{os.environ["FORMS_API_URL"]}/models/{os.environ["FORMS_MODEL_ID"]}/analyze'

	response = requests.post(ENDPOINT_URL, headers=headers, data=form_data)
	response.raise_for_status()
	response_json = response.json()

	with open(f'{form_file_name}.json', 'w') as form_json_file:
	if verbose:
	print(f'Saving to file {form_file_name}.json')
	json.dump(response_json, form_json_file, indent=4)


	def train_model(source, verbose=False):
	"""Train the form recognition model

	Currently only support the blob storage + SAS version.
	See documentation here: https://northeurope.dev.cognitive.microsoft.com/docs/services/form-recognizer-api/operations/TrainCustomModel

	"""

	if verbose:
	print('Training model')

	headers = {
	'Content-Type': 'application/json',
	'Ocp-Apim-Subscription-Key': os.environ['FORMS_API_KEY'],
	}

	ENDPOINT_URL = f'{os.environ["FORMS_API_URL"]}/train'
	print(ENDPOINT_URL)
	train_data = {
	'source': source
	}
	response = requests.post(ENDPOINT_URL, headers=headers, json=train_data)
	response.raise_for_status()
	response_json = response.json()
	print(response_json)


	if __name__ == '__main__':
	main()