Skip to content

Instantly share code, notes, and snippets.

@MKyhos
Created November 23, 2025 14:28
Show Gist options
  • Select an option

  • Save MKyhos/fe3dcc5d41887aa45a9371476f5f06ac to your computer and use it in GitHub Desktop.

Select an option

Save MKyhos/fe3dcc5d41887aa45a9371476f5f06ac to your computer and use it in GitHub Desktop.
PrivatBank24 Account Statement Parser
#!/bin/python3
import pdfplumber
import polars as pl
from tqdm import tqdm
def clean_numeric_column(col_name: str) -> pl.Expr:
return (
pl.col(col_name)
.str.replace(",", ".")
.str.replace(" ", "")
.cast(pl.Float64)
.alias(col_name)
)
def extract_tables(pdf_path: str) -> pl.DataFrame:
tables = []
schema_names = [
"timestamp",
"account",
"details",
"amount_transaction_cur",
"amount_card_cur",
"amount_charge",
"amount_discount",
"balance_after_transaction",
]
with pdfplumber.open(pdf_path) as pdf:
for page in tqdm(pdf.pages):
page_tables = page.extract_tables()
for table in page_tables:
if table:
df = pl.DataFrame(
data=table[1:],
schema=table[0],
orient="row",
)
df.columns = schema_names
tables.append(df)
# Data parsing
data = pl.concat(tables, how="diagonal_relaxed")
data = data.with_columns(
pl.col("timestamp").str.to_datetime(format="%d.%m.%Y %H:%M").alias("timestamp"),
clean_numeric_column("amount_card_cur"),
clean_numeric_column("amount_charge"),
clean_numeric_column("amount_discount"),
clean_numeric_column("balance_after_transaction"),
).select(
"timestamp",
"account",
"details",
"amount_card_cur",
"amount_charge",
"amount_discount",
"balance_after_transaction",
)
return data
if __name__ == "__main__":
import sys
import os
args = sys.argv
if len(args) < 2:
print("Usage: python parse_privatbank.py <pdf_file>")
sys.exit(1)
pdf_path = args[1]
if not os.path.exists(pdf_path):
print(f"Error: File '{pdf_path}' not found")
sys.exit(1)
try:
print(f"Processing PDF: {pdf_path}")
data = extract_tables(pdf_path)
if data.shape[0] == 0:
print("No tables found in the PDF")
sys.exit(0)
output_path = f"extracted_{os.path.splitext(os.path.basename(pdf_path))[0]}.csv"
data.write_csv(output_path)
except Exception as e:
print(f"Error processing PDF: {e}")
sys.exit(1)
[project]
name = "parse_privatbank"
version = "0.1.0"
description = "Extracts CSV from PrivatBank24 account statemnts"
requires-python = ">=3.11"
dependencies = [
"pdfplumber>=0.11.7",
"polars>=1.33.1",
"tqdm>=4.67.1",
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment