kennethleungty · August 21, 2025 03:44
diff --git a/langextract_extract.py b/langextract_extract.py
 import langextract as lx

 # Run PDF processor on insurance policy PDF and get concatenated parsed text
 processor = PDFProcessor("data/input/driveshield_specimen_policy_value_plan.pdf")
 input_text = processor.get_all_text()

 # Run extraction
 result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt,
    examples=examples,
    model_id="gemma3:4b",  
    model_url="http://localhost:11434",  # Endpoint URL for self-hosted model. Default Ollama server URL is used here.
    fence_output=False,  # Whether to expect/generate fenced output (```json or ```yaml). When True, model is prompted to generate fenced output and the resolver expects it. When False, raw JSON/YAML is expected.
    use_schema_constraints=False,  # Whether to generate schema constraints for models. LangExtract doesn't implement schema constraints for Ollama models yet
    max_char_buffer=2000,  # Max number of characters for inference
    extraction_passes=2,  # Number of sequential extraction attempts to improve recall and find additional entities. Defaults to 1 (standard single extraction). When > 1, the system performs multiple independent extractions and merges non-overlapping results.
    temperature=0.0
 )
	import langextract as lx

	# Run PDF processor on insurance policy PDF and get concatenated parsed text
	processor = PDFProcessor("data/input/driveshield_specimen_policy_value_plan.pdf")
	input_text = processor.get_all_text()

	# Run extraction
	result = lx.extract(
	text_or_documents=input_text,
	prompt_description=prompt,
	examples=examples,
	model_id="gemma3:4b",
	model_url="http://localhost:11434", # Endpoint URL for self-hosted model. Default Ollama server URL is used here.
	fence_output=False, # Whether to expect/generate fenced output (```json or ```yaml). When True, model is prompted to generate fenced output and the resolver expects it. When False, raw JSON/YAML is expected.
	use_schema_constraints=False, # Whether to generate schema constraints for models. LangExtract doesn't implement schema constraints for Ollama models yet
	max_char_buffer=2000, # Max number of characters for inference
	extraction_passes=2, # Number of sequential extraction attempts to improve recall and find additional entities. Defaults to 1 (standard single extraction). When > 1, the system performs multiple independent extractions and merges non-overlapping results.
	temperature=0.0
	)
No results found