|
#!/usr/bin/env python3 |
|
""" |
|
Generate an invoice PDF from a template DOCX and invoice data. |
|
|
|
Usage: |
|
python generate_invoice.py <invoice_data.json> <output.pdf> |
|
|
|
The invoice data JSON should contain: |
|
{ |
|
"invoice_number": "INV-2025-1002", |
|
"date": "2025-12-01", |
|
"due_date": "2025-12-31", |
|
"from": { |
|
"company": "Your Company Name", |
|
"address_line1": "123 Street", |
|
"address_line2": "City, State 12345", |
|
"contact": "Your Name", |
|
"email": "[email protected]" |
|
}, |
|
"to": { |
|
"company": "Client Company", |
|
"address_line1": "456 Avenue", |
|
"address_line2": "City, State 67890", |
|
"contact": "Client Name", |
|
"email": "[email protected]" |
|
}, |
|
"items": [ |
|
{"description": "Service 1", "quantity": 10, "unit_price": 100.00}, |
|
{"description": "Service 2", "quantity": 1, "unit_price": 500.00} |
|
], |
|
"vat_rate": 0.10, |
|
"payment_terms": "Net 30 Days" |
|
} |
|
""" |
|
|
|
import json |
|
import shutil |
|
import subprocess |
|
import sys |
|
import tempfile |
|
import zipfile |
|
from pathlib import Path |
|
|
|
|
|
def format_currency(amount: float) -> str: |
|
"""Format amount as currency string.""" |
|
return f"${amount:,.2f}" |
|
|
|
|
|
def unpack_docx(docx_path: Path, output_dir: Path) -> None: |
|
"""Extract DOCX contents to directory.""" |
|
output_dir.mkdir(parents=True, exist_ok=True) |
|
with zipfile.ZipFile(docx_path) as zf: |
|
zf.extractall(output_dir) |
|
|
|
|
|
def pack_docx(input_dir: Path, output_path: Path) -> None: |
|
"""Pack directory contents into DOCX.""" |
|
output_path.parent.mkdir(parents=True, exist_ok=True) |
|
with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf: |
|
for f in input_dir.rglob("*"): |
|
if f.is_file(): |
|
zf.write(f, f.relative_to(input_dir)) |
|
|
|
|
|
def process_invoice(template_path: Path, data: dict, output_pdf: Path) -> None: |
|
"""Process the invoice template with provided data and output as PDF.""" |
|
|
|
with tempfile.TemporaryDirectory() as temp_dir: |
|
temp_path = Path(temp_dir) |
|
unpacked_dir = temp_path / "unpacked" |
|
output_docx = temp_path / "invoice.docx" |
|
|
|
# Unpack template |
|
unpack_docx(template_path, unpacked_dir) |
|
|
|
# Read document.xml |
|
doc_xml_path = unpacked_dir / "word" / "document.xml" |
|
content = doc_xml_path.read_text(encoding="utf-8") |
|
|
|
# Calculate totals first |
|
items = data["items"] |
|
subtotal = sum(item["quantity"] * item["unit_price"] for item in items) |
|
vat_rate = data.get("vat_rate", 0.10) |
|
vat_amount = subtotal * vat_rate |
|
total = subtotal + vat_amount |
|
|
|
# Build replacement map - order matters! Replace longer/more unique strings first |
|
# Use a list of tuples to maintain order |
|
replacements = [ |
|
# Line item descriptions (long, unique strings first) |
|
("Project Consulting Hours", items[0]["description"] if len(items) > 0 else ""), |
|
("Software License Fee (Annual)", items[1]["description"] if len(items) > 1 else ""), |
|
("Custom Feature Development", items[2]["description"] if len(items) > 2 else ""), |
|
|
|
# Currency amounts (unique due to $ and commas) |
|
("$10,448.90", format_currency(total)), |
|
("$9,499.00", format_currency(subtotal)), |
|
("$6,000.00", format_currency(items[0]["quantity"] * items[0]["unit_price"]) if len(items) > 0 else ""), |
|
("$2,500.00", format_currency(items[2]["quantity"] * items[2]["unit_price"]) if len(items) > 2 else ""), |
|
("$999.00", format_currency(items[1]["quantity"] * items[1]["unit_price"]) if len(items) > 1 else ""), |
|
("$949.90", format_currency(vat_amount)), |
|
|
|
# Prices (with decimals - unique enough) |
|
("2,500.00", f"{items[2]['unit_price']:,.2f}" if len(items) > 2 else ""), |
|
("999.00", f"{items[1]['unit_price']:,.2f}" if len(items) > 1 else ""), |
|
("150.00", f"{items[0]['unit_price']:,.2f}" if len(items) > 0 else ""), |
|
|
|
# VAT label |
|
("VAT (10%)", f"VAT ({int(vat_rate * 100)}%)"), |
|
|
|
# Invoice metadata (unique patterns) |
|
("INV-2025-1001", data["invoice_number"]), |
|
("2025-11-30", data["date"]), |
|
("2025-12-30", data["due_date"]), |
|
|
|
# Addresses (full strings are unique) |
|
("123 Tech Drive, Suite 400", data["from"]["address_line1"]), |
|
("456 Commerce Blvd, Floor 10", data["to"]["address_line1"]), |
|
("Anytown, CA 90210", data["from"]["address_line2"]), |
|
("Metropolis, NY 10001", data["to"]["address_line2"]), |
|
|
|
# Emails (unique) |
|
("[email protected]", data["from"]["email"]), |
|
("[email protected]", data["to"]["email"]), |
|
|
|
# Company names (unique strings) |
|
("Innovate Solutions", data["from"]["company"]), |
|
("Global Marketing Co.", data["to"]["company"]), |
|
|
|
# Contact names |
|
("Jane Doe", data["from"]["contact"]), |
|
("John Smith", data["to"]["contact"]), |
|
|
|
# Payment terms |
|
("Net 30 Days", data.get("payment_terms", "Net 30 Days")), |
|
|
|
# Quantities - these are tricky, need to be last and use context |
|
# The template has: 40, 1, 1 for quantities |
|
# We'll replace them in the specific XML context |
|
] |
|
|
|
# Apply all replacements |
|
for old, new in replacements: |
|
if old and old in content: |
|
content = content.replace(old, new) |
|
|
|
# Handle quantities specially by finding them in table cell context |
|
# Template quantities appear as: >40<, >1<, >1< within w:t tags after the description |
|
# We need to be very careful here |
|
|
|
# For quantity replacements, we'll use the XML structure |
|
# The pattern is: description cell, then quantity cell, then price cell, then amount cell |
|
# Since descriptions are already replaced, we can use positional replacement |
|
|
|
# Template structure for line items: |
|
# Row 1: "Project Consulting Hours" -> qty "40" -> price "150.00" -> amount "$6,000.00" |
|
# Row 2: "Software License Fee (Annual)" -> qty "1" -> price "999.00" -> amount "$999.00" |
|
# Row 3: "Custom Feature Development" -> qty "1" -> price "2,500.00" -> amount "$2,500.00" |
|
|
|
# After other replacements, the quantities are the only standalone numbers left |
|
# We need to find and replace them in order |
|
|
|
# Find quantity positions and replace them sequentially |
|
# The pattern for quantity in a cell is: <w:t xml:space="preserve">NUMBER</w:t> |
|
# where NUMBER is just a number (40, 1, 1) |
|
|
|
# Replace quantity 40 (first line item) |
|
if len(items) > 0: |
|
content = content.replace( |
|
'<w:t xml:space="preserve">40</w:t>', |
|
f'<w:t xml:space="preserve">{items[0]["quantity"]}</w:t>', |
|
1 # Only replace first occurrence |
|
) |
|
|
|
# For the two "1" quantities, we need to find them after the price replacements |
|
# Since 999.00 and 2,500.00 have been replaced, we can find the nearby "1" values |
|
|
|
# Actually, let's use a different approach - replace specific XML patterns |
|
# The second item's quantity "1" appears right after "Software License Fee" price |
|
# But since descriptions are replaced, we need another approach |
|
|
|
# Let's look for the pattern where quantity appears between description and price cells |
|
# In the XML, after each description's </w:tc>, there's a qty cell with just a number |
|
|
|
# Simpler approach: count occurrences and replace the correct ones |
|
# First "1" after position of second description replacement |
|
# But this is fragile... |
|
|
|
# Most robust: just document that the template has specific quantities and replace them |
|
# Since "40" is unique, that's fine. For "1", we need to be smarter. |
|
|
|
# Alternative: modify the template to use placeholder text like {{QTY2}}, {{QTY3}} |
|
# But we're working with an existing template... |
|
|
|
# For now, let's try a targeted approach using the price as anchor |
|
# After "999.00" (now replaced), look backwards for the quantity cell |
|
# This is complex in a simple string replacement... |
|
|
|
# Pragmatic solution: The items with quantity 1 will keep "1" which is correct for our test |
|
# If user wants different quantities for items 2 and 3, we'd need smarter logic |
|
|
|
# For a more robust solution, let's replace based on position in the content |
|
# We know the order of items, so we can track positions |
|
|
|
# Actually, the cleanest approach is to accept that quantities 1 and 1 are hard to replace |
|
# unless we do proper XML parsing. Let's add that: |
|
|
|
if len(items) > 1: |
|
# Find the second and third quantity "1"s by looking for the pattern in context |
|
# After the first line item's row ends, the next table row has quantity |
|
import re |
|
|
|
# Find all standalone "1" quantities (not part of other numbers) |
|
# Pattern: within a table cell (w:tc), a w:t containing just "1" |
|
qty_pattern = r'(<w:tc[^>]*>.*?<w:t[^>]*>)(1)(</w:t>.*?</w:tc>)' |
|
|
|
# Count how many we've found and replace the 2nd and 3rd occurrences |
|
matches = list(re.finditer(qty_pattern, content, re.DOTALL)) |
|
|
|
# Filter to only those that look like quantity cells (after a description, before a price) |
|
# This is approximate - we're looking for cells that contain just "1" |
|
qty_only_pattern = r'<w:t xml:space="preserve">1</w:t>' |
|
occurrences = list(re.finditer(qty_only_pattern, content)) |
|
|
|
# Replace from end to start to preserve positions |
|
# Skip the first occurrence (which might be in address "Floor 10" but that's longer) |
|
# Actually "1" standalone would only be in quantity cells |
|
|
|
# The template should have exactly 2 cells with just "1" |
|
# Replace them with the 2nd and 3rd item quantities |
|
if len(occurrences) >= 2 and len(items) >= 2: |
|
# Replace from end to preserve indices |
|
for idx, match in enumerate(reversed(occurrences[:2])): |
|
item_idx = 2 - idx # Map to items[1] and items[2] |
|
if item_idx < len(items): |
|
new_qty = str(items[item_idx]["quantity"]) |
|
start, end = match.start(), match.end() |
|
old_text = '<w:t xml:space="preserve">1</w:t>' |
|
new_text = f'<w:t xml:space="preserve">{new_qty}</w:t>' |
|
content = content[:start] + new_text + content[end:] |
|
|
|
# Write modified document |
|
doc_xml_path.write_text(content, encoding="utf-8") |
|
|
|
# Pack back to DOCX |
|
pack_docx(unpacked_dir, output_docx) |
|
|
|
# Convert to PDF using LibreOffice |
|
output_dir = output_pdf.parent |
|
output_dir.mkdir(parents=True, exist_ok=True) |
|
|
|
# Find soffice executable (handles macOS and Linux paths) |
|
soffice_paths = [ |
|
"soffice", # Linux/in PATH |
|
"/Applications/LibreOffice.app/Contents/MacOS/soffice", # macOS |
|
"/usr/bin/soffice", |
|
"/usr/local/bin/soffice", |
|
] |
|
|
|
soffice_cmd = None |
|
for path in soffice_paths: |
|
if path == "soffice" or Path(path).exists(): |
|
soffice_cmd = path |
|
break |
|
|
|
if soffice_cmd is None: |
|
# Save DOCX as fallback |
|
fallback_docx = output_pdf.with_suffix(".docx") |
|
shutil.copy(output_docx, fallback_docx) |
|
print(f"LibreOffice not found. Invoice saved as DOCX: {fallback_docx}") |
|
print("Install LibreOffice to enable PDF generation.") |
|
return |
|
|
|
try: |
|
result = subprocess.run( |
|
[soffice_cmd, "--headless", "--convert-to", "pdf", "--outdir", str(output_dir), str(output_docx)], |
|
capture_output=True, |
|
text=True, |
|
timeout=60 |
|
) |
|
|
|
# Rename output to desired filename |
|
generated_pdf = output_dir / "invoice.pdf" |
|
if generated_pdf.exists() and generated_pdf != output_pdf: |
|
shutil.move(str(generated_pdf), str(output_pdf)) |
|
|
|
if not output_pdf.exists(): |
|
# Fallback to DOCX |
|
fallback_docx = output_pdf.with_suffix(".docx") |
|
shutil.copy(output_docx, fallback_docx) |
|
print(f"PDF generation failed. Invoice saved as DOCX: {fallback_docx}") |
|
print(f"Error: {result.stderr}") |
|
return |
|
|
|
print(f"Invoice generated: {output_pdf}") |
|
|
|
except FileNotFoundError: |
|
fallback_docx = output_pdf.with_suffix(".docx") |
|
shutil.copy(output_docx, fallback_docx) |
|
print(f"LibreOffice not found. Invoice saved as DOCX: {fallback_docx}") |
|
|
|
|
|
def main(): |
|
if len(sys.argv) != 3: |
|
print("Usage: python generate_invoice.py <invoice_data.json> <output.pdf>") |
|
sys.exit(1) |
|
|
|
data_file = Path(sys.argv[1]) |
|
output_pdf = Path(sys.argv[2]) |
|
|
|
# Template is in assets directory relative to this script |
|
script_dir = Path(__file__).parent |
|
template_path = script_dir.parent / "assets" / "invoice-template.docx" |
|
|
|
if not template_path.exists(): |
|
raise FileNotFoundError(f"Template not found: {template_path}") |
|
|
|
if not data_file.exists(): |
|
raise FileNotFoundError(f"Data file not found: {data_file}") |
|
|
|
with open(data_file) as f: |
|
data = json.load(f) |
|
|
|
process_invoice(template_path, data, output_pdf) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |