Created
January 16, 2026 11:11
-
-
Save GGontijo/462c7f4920a102e738056d0e4dd2ddcb to your computer and use it in GitHub Desktop.
verificar lançamentos duplicados organizze
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| from datetime import datetime, timedelta | |
| from itertools import combinations | |
| def parse_date(s: str): | |
| # Organize costuma usar YYYY-MM-DD em "date" | |
| return datetime.strptime(s, "%Y-%m-%d").date() | |
| def cents_to_brl_str(cents: int) -> str: | |
| # Só pra imprimir amigável | |
| sign = "-" if cents < 0 else "" | |
| v = abs(cents) | |
| return f"{sign}R$ {v // 100},{v % 100:02d}" | |
| def find_possible_duplicates( | |
| data, | |
| *, | |
| max_days_diff=1, # tolerância de dias (0 = mesmo dia, 1 = até 1 dia de diferença, etc.) | |
| max_amount_diff_cents=0, # tolerância de valor (0 = valor igual; ex: 50 = até 50 centavos) | |
| same_sign_only=True, # evita comparar despesa com receita | |
| require_same_account=False, # se True, só compara dentro da mesma conta (accountUUID) | |
| ): | |
| txs = data.get("transactions", []) | |
| rows = [] | |
| for t in txs: | |
| # Campos básicos (ajuste se seu JSON tiver variações) | |
| try: | |
| d = parse_date(t["date"]) | |
| except Exception: | |
| # se faltar date, tenta firstOccurrenceDate | |
| d = parse_date(t.get("firstOccurrenceDate")) | |
| amt = int(t["amountInCents"]) | |
| rows.append( | |
| { | |
| "id": t.get("id"), | |
| "uuid": t.get("uuid"), | |
| "date": d, | |
| "amountInCents": amt, | |
| "description": (t.get("description") or "").strip(), | |
| "accountUUID": t.get("accountUUID"), | |
| } | |
| ) | |
| suspects = [] | |
| for a, b in combinations(rows, 2): | |
| # Filtros | |
| if same_sign_only and ((a["amountInCents"] < 0) != (b["amountInCents"] < 0)): | |
| continue | |
| if require_same_account and (a["accountUUID"] != b["accountUUID"]): | |
| continue | |
| days_diff = abs((a["date"] - b["date"]).days) | |
| amt_diff = abs(a["amountInCents"] - b["amountInCents"]) | |
| if days_diff <= max_days_diff and amt_diff <= max_amount_diff_cents: | |
| suspects.append( | |
| { | |
| "a_id": a["id"], | |
| "b_id": b["id"], | |
| "a_date": a["date"].isoformat(), | |
| "b_date": b["date"].isoformat(), | |
| "a_amount": a["amountInCents"], | |
| "b_amount": b["amountInCents"], | |
| "amount_diff_cents": amt_diff, | |
| "days_diff": days_diff, | |
| "a_desc": a["description"][:140], | |
| "b_desc": b["description"][:140], | |
| "accountUUID": a["accountUUID"], | |
| } | |
| ) | |
| # Ordena: primeiro mais “iguais” | |
| suspects.sort(key=lambda x: (x["amount_diff_cents"], x["days_diff"])) | |
| return rows, suspects | |
| def group_suspects_by_proximity(suspects): | |
| """ | |
| Agrupa pares suspeitos em componentes conectados (se A~B e B~C, vira um grupo). | |
| """ | |
| # Union-find simples | |
| parent = {} | |
| def find(x): | |
| parent.setdefault(x, x) | |
| if parent[x] != x: | |
| parent[x] = find(parent[x]) | |
| return parent[x] | |
| def union(x, y): | |
| rx, ry = find(x), find(y) | |
| if rx != ry: | |
| parent[ry] = rx | |
| for s in suspects: | |
| union(s["a_id"], s["b_id"]) | |
| groups = {} | |
| for s in suspects: | |
| root = find(s["a_id"]) | |
| groups.setdefault(root, set()).update([s["a_id"], s["b_id"]]) | |
| return [sorted(list(v)) for v in groups.values()] | |
| if __name__ == "__main__": | |
| # 1) cole seu JSON aqui como string: | |
| json_text = r"""PASTE_JSON_AQUI""" | |
| data = json.loads(json_text) | |
| # Ajuste as tolerâncias aqui: | |
| rows, suspects = find_possible_duplicates( | |
| data, | |
| max_days_diff=1, # ex: 0 = mesmo dia; 1 = até 1 dia | |
| max_amount_diff_cents=50, # ex: até 50 centavos de diferença | |
| same_sign_only=True, | |
| require_same_account=False, # coloque True se quiser travar por conta | |
| ) | |
| print(f"Transações analisadas: {len(rows)}") | |
| print(f"Pares suspeitos: {len(suspects)}\n") | |
| for s in suspects[:200]: # limita impressão | |
| print( | |
| f"- IDs {s['a_id']} x {s['b_id']} | " | |
| f"{s['a_date']} vs {s['b_date']} (Δ{s['days_diff']}d) | " | |
| f"{cents_to_brl_str(s['a_amount'])} vs {cents_to_brl_str(s['b_amount'])} " | |
| f"(Δ{cents_to_brl_str(s['amount_diff_cents'])})" | |
| ) | |
| print(f" A: {s['a_desc']}") | |
| print(f" B: {s['b_desc']}\n") | |
| groups = group_suspects_by_proximity(suspects) | |
| if groups: | |
| print("Grupos (possíveis duplicidades em cadeia):") | |
| for g in groups: | |
| print(" -", g) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment