Skip to content

Instantly share code, notes, and snippets.

@GGontijo
Created January 16, 2026 11:11
Show Gist options
  • Select an option

  • Save GGontijo/462c7f4920a102e738056d0e4dd2ddcb to your computer and use it in GitHub Desktop.

Select an option

Save GGontijo/462c7f4920a102e738056d0e4dd2ddcb to your computer and use it in GitHub Desktop.
verificar lançamentos duplicados organizze
import json
from datetime import datetime, timedelta
from itertools import combinations
def parse_date(s: str):
# Organize costuma usar YYYY-MM-DD em "date"
return datetime.strptime(s, "%Y-%m-%d").date()
def cents_to_brl_str(cents: int) -> str:
# Só pra imprimir amigável
sign = "-" if cents < 0 else ""
v = abs(cents)
return f"{sign}R$ {v // 100},{v % 100:02d}"
def find_possible_duplicates(
data,
*,
max_days_diff=1, # tolerância de dias (0 = mesmo dia, 1 = até 1 dia de diferença, etc.)
max_amount_diff_cents=0, # tolerância de valor (0 = valor igual; ex: 50 = até 50 centavos)
same_sign_only=True, # evita comparar despesa com receita
require_same_account=False, # se True, só compara dentro da mesma conta (accountUUID)
):
txs = data.get("transactions", [])
rows = []
for t in txs:
# Campos básicos (ajuste se seu JSON tiver variações)
try:
d = parse_date(t["date"])
except Exception:
# se faltar date, tenta firstOccurrenceDate
d = parse_date(t.get("firstOccurrenceDate"))
amt = int(t["amountInCents"])
rows.append(
{
"id": t.get("id"),
"uuid": t.get("uuid"),
"date": d,
"amountInCents": amt,
"description": (t.get("description") or "").strip(),
"accountUUID": t.get("accountUUID"),
}
)
suspects = []
for a, b in combinations(rows, 2):
# Filtros
if same_sign_only and ((a["amountInCents"] < 0) != (b["amountInCents"] < 0)):
continue
if require_same_account and (a["accountUUID"] != b["accountUUID"]):
continue
days_diff = abs((a["date"] - b["date"]).days)
amt_diff = abs(a["amountInCents"] - b["amountInCents"])
if days_diff <= max_days_diff and amt_diff <= max_amount_diff_cents:
suspects.append(
{
"a_id": a["id"],
"b_id": b["id"],
"a_date": a["date"].isoformat(),
"b_date": b["date"].isoformat(),
"a_amount": a["amountInCents"],
"b_amount": b["amountInCents"],
"amount_diff_cents": amt_diff,
"days_diff": days_diff,
"a_desc": a["description"][:140],
"b_desc": b["description"][:140],
"accountUUID": a["accountUUID"],
}
)
# Ordena: primeiro mais “iguais”
suspects.sort(key=lambda x: (x["amount_diff_cents"], x["days_diff"]))
return rows, suspects
def group_suspects_by_proximity(suspects):
"""
Agrupa pares suspeitos em componentes conectados (se A~B e B~C, vira um grupo).
"""
# Union-find simples
parent = {}
def find(x):
parent.setdefault(x, x)
if parent[x] != x:
parent[x] = find(parent[x])
return parent[x]
def union(x, y):
rx, ry = find(x), find(y)
if rx != ry:
parent[ry] = rx
for s in suspects:
union(s["a_id"], s["b_id"])
groups = {}
for s in suspects:
root = find(s["a_id"])
groups.setdefault(root, set()).update([s["a_id"], s["b_id"]])
return [sorted(list(v)) for v in groups.values()]
if __name__ == "__main__":
# 1) cole seu JSON aqui como string:
json_text = r"""PASTE_JSON_AQUI"""
data = json.loads(json_text)
# Ajuste as tolerâncias aqui:
rows, suspects = find_possible_duplicates(
data,
max_days_diff=1, # ex: 0 = mesmo dia; 1 = até 1 dia
max_amount_diff_cents=50, # ex: até 50 centavos de diferença
same_sign_only=True,
require_same_account=False, # coloque True se quiser travar por conta
)
print(f"Transações analisadas: {len(rows)}")
print(f"Pares suspeitos: {len(suspects)}\n")
for s in suspects[:200]: # limita impressão
print(
f"- IDs {s['a_id']} x {s['b_id']} | "
f"{s['a_date']} vs {s['b_date']} (Δ{s['days_diff']}d) | "
f"{cents_to_brl_str(s['a_amount'])} vs {cents_to_brl_str(s['b_amount'])} "
f"(Δ{cents_to_brl_str(s['amount_diff_cents'])})"
)
print(f" A: {s['a_desc']}")
print(f" B: {s['b_desc']}\n")
groups = group_suspects_by_proximity(suspects)
if groups:
print("Grupos (possíveis duplicidades em cadeia):")
for g in groups:
print(" -", g)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment