Skip to content

Instantly share code, notes, and snippets.

@El3k0n
Created August 28, 2025 13:49
Show Gist options
  • Select an option

  • Save El3k0n/5dd1f9b5febf421212d7550e6d605c73 to your computer and use it in GitHub Desktop.

Select an option

Save El3k0n/5dd1f9b5febf421212d7550e6d605c73 to your computer and use it in GitHub Desktop.
from docling.document_converter import DocumentConverter
import os
from pathlib import Path
'''
This script uses the docling library to convert all the .pdf documents in the 'input' folder to .md documents in a 'output' folder
'''
def process_document_folder(input_folder, output_folder):
"""Process all PDFs in a folder and convert them to markdown"""
converter = DocumentConverter()
input_path = Path(input_folder)
output_path = Path(output_folder)
output_path.mkdir(exist_ok=True)
# Find all PDF files
pdf_files = list(input_path.glob("*.pdf"))
print(f"Found {len(pdf_files)} PDF files to process...")
for pdf_file in pdf_files:
try:
print(f"Processing: {pdf_file.name}")
# Convert document
result = converter.convert(str(pdf_file))
# Create output filename
output_file = output_path / f"{pdf_file.stem}.md"
# Save as markdown
with open(output_file, "w", encoding="utf-8") as f:
f.write(result.document.export_to_markdown())
print(f" Saved: {output_file.name}")
except Exception as e:
print(f" Error processing {pdf_file.name}: {e}")
if __name__=='__main__':
process_document_folder("./input", "./output")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment