Skip to content

Instantly share code, notes, and snippets.

@dalepotter
Last active December 4, 2025 22:19
Show Gist options
  • Select an option

  • Save dalepotter/3cf719dbf21ec881e48ffda105bb3553 to your computer and use it in GitHub Desktop.

Select an option

Save dalepotter/3cf719dbf21ec881e48ffda105bb3553 to your computer and use it in GitHub Desktop.
Find which transactions sum closest to a target value (subset sum with dynamic programming)
# Test using `$ pytest closest_subset_sum.py`
# TODO
# - Allow CSV file & target amount to be specified as a command line arg
# - Strip `£` from prices if it exists
# - Normalise case differences in column headers: `date` vs. `Date`
import csv
import sys
import unittest
def find_best_subset(items, target):
"""
Find the subset of items that gets closest to target without exceeding it.
Uses dynamic programming (0/1 knapsack approach).
Args:
items: List of dicts with 'date', 'description', 'amount'
target: Target value (float)
Returns:
Tuple of (best_total, set of indices to include)
"""
# Work in pence to avoid floating point issues
target_pence = round(target * 100)
num_items = len(items)
# Convert amounts to pence
amounts_pence = [round(item['amount'] * 100) for item in items]
# Dynamic programming (DP) table: dp[i][w] = maximum value achievable using first i items with capacity w
dp = [[0 for _ in range(target_pence + 1)] for _ in range(num_items + 1)]
# Build the DP table
for item_index in range(1, num_items + 1):
item_value = amounts_pence[item_index - 1]
for capacity in range(target_pence + 1):
# Don't include this item
dp[item_index][capacity] = dp[item_index - 1][capacity]
# Try including this item
if item_value <= capacity:
with_item = dp[item_index - 1][capacity - item_value] + item_value
if with_item > dp[item_index][capacity]:
dp[item_index][capacity] = with_item
# Backtrack to find which items were selected
best_value_pence = dp[num_items][target_pence]
selected_indices = set()
remaining_capacity = target_pence
for item_index in range(num_items, 0, -1):
# Check if this item was included
if remaining_capacity >= amounts_pence[item_index - 1] and dp[item_index][remaining_capacity] == dp[item_index - 1][remaining_capacity - amounts_pence[item_index - 1]] + amounts_pence[item_index - 1]:
selected_indices.add(item_index - 1)
remaining_capacity -= amounts_pence[item_index - 1]
best_total = round(best_value_pence / 100, 2)
return best_total, selected_indices
def main():
# Configuration
csv_filename = 'transactions.csv' # Change this to your CSV filename
target = 150.00 # Change this to your target amount
# Read CSV file
items = []
try:
with open(csv_filename, 'r', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
items.append({
'date': row['date'].strip(),
'description': row['description'].strip(),
'amount': float(row['amount'].strip())
})
except FileNotFoundError:
print(f"Error: File '{csv_filename}' not found.")
sys.exit(1)
except Exception as error:
print(f"Error reading CSV: {error}")
sys.exit(1)
if not items:
print("No transactions found in CSV file.")
sys.exit(1)
# Find best subset
best_total, selected_indices = find_best_subset(items, target)
# Display results
print(f"\nTarget: £{target:.2f}")
print(f"Best achievable total: £{best_total:.2f}")
print(f"Under target by: £{target - best_total:.2f}")
print(f"\n{'Date':<15} {'Description':<35} {'Amount':>10} {'Include':>10}")
print("-" * 75)
for index, item in enumerate(items):
include = "Yes" if index in selected_indices else "No"
print(f"{item['date']:<15} {item['description']:<35} £{item['amount']:>8.2f} {include:>10}")
print("-" * 75)
print(f"{'TOTAL':<51} £{best_total:>8.2f}")
print(f"\nTransactions included: {len(selected_indices)} of {len(items)}")
if __name__ == "__main__":
main()
class FindBestSubsetTests(unittest.TestCase):
def test_exact_match_single_item(self):
"""Single item that exactly matches target should be selected."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 50.00}
]
total, indices = find_best_subset(items, 50.00)
self.assertEqual(total, 50.00)
self.assertEqual(indices, {0})
def test_exact_match_multiple_items(self):
"""Multiple items that sum to target exactly should all be selected."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 25.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 25.00},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 50.00}
]
total, indices = find_best_subset(items, 100.00)
self.assertEqual(total, 100.00)
self.assertEqual(len(indices), 3)
def test_no_items(self):
"""Empty list should return 0 and empty set."""
items = []
total, indices = find_best_subset(items, 100.00)
self.assertEqual(total, 0.00)
self.assertEqual(indices, set())
def test_all_items_exceed_target(self):
"""When all items exceed target, should return 0."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 100.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 200.00}
]
total, indices = find_best_subset(items, 50.00)
self.assertEqual(total, 0.00)
self.assertEqual(indices, set())
def test_closest_without_exceeding(self):
"""Should find closest sum without exceeding target."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 30.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 40.00},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 50.00}
]
total, indices = find_best_subset(items, 75.00)
# Best is 30 + 40 = 70 (not 50 + 30 = 80 which exceeds)
self.assertEqual(total, 70.00)
self.assertEqual(len(indices), 2)
def test_single_item_under_target(self):
"""Single item below target should be selected."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 30.00}
]
total, indices = find_best_subset(items, 100.00)
self.assertEqual(total, 30.00)
self.assertEqual(indices, {0})
def test_floating_point_precision(self):
"""Pence conversion should handle floating point correctly."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.99},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 20.50},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 15.51}
]
total, indices = find_best_subset(items, 32.00)
# Best is 10.99 + 20.50 = 31.49
self.assertEqual(total, 31.49)
self.assertEqual(len(indices), 2)
def test_all_items_fit_under_target(self):
"""When sum of all items is less than target, select all."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 20.00},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 30.00}
]
total, indices = find_best_subset(items, 100.00)
self.assertEqual(total, 60.00)
self.assertEqual(len(indices), 3)
def test_returns_correct_indices(self):
"""Verify returned indices map to correct items."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 25.00},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 15.00}
]
total, indices = find_best_subset(items, 40.00)
# Should select items at indices 1 and 2 (25 + 15 = 40)
selected_sum = sum(items[idx]['amount'] for idx in indices)
self.assertEqual(total, selected_sum)
self.assertEqual(total, 40.00)
class EdgeCaseTests(unittest.TestCase):
def test_target_zero(self):
"""Target of 0 should return 0 and empty set."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 20.00}
]
total, indices = find_best_subset(items, 0.00)
self.assertEqual(total, 0.00)
self.assertEqual(indices, set())
def test_very_small_amounts(self):
"""Test pence conversion with very small amounts like 0.01."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 0.01},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 0.02},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 0.03}
]
total, indices = find_best_subset(items, 0.05)
# Best is 0.02 + 0.03 = 0.05
self.assertEqual(total, 0.05)
self.assertEqual(len(indices), 2)
def test_single_penny_precision(self):
"""Test that single penny differences are handled correctly."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 1.23},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 4.56},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 7.89}
]
total, indices = find_best_subset(items, 5.79)
# Best is 1.23 + 4.56 = 5.79 (exact match)
self.assertEqual(total, 5.79)
self.assertEqual(len(indices), 2)
def test_many_small_items(self):
"""Test with many small items to ensure algorithm handles larger sets."""
items = [{'date': f'2024-01-{item_num:02d}', 'description': f'Item {item_num}', 'amount': 1.00}
for item_num in range(1, 21)]
total, indices = find_best_subset(items, 15.00)
self.assertEqual(total, 15.00)
self.assertEqual(len(indices), 15)
def test_duplicate_amounts(self):
"""Test with duplicate amounts to ensure correct selection."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 10.00},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 10.00}
]
total, indices = find_best_subset(items, 25.00)
# Should select 2 items totaling 20.00 (can't reach 25.00)
self.assertEqual(total, 20.00)
self.assertEqual(len(indices), 2)
class IntegrationTests(unittest.TestCase):
def test_realistic_expense_scenario(self):
"""Test with realistic transaction data."""
items = [
{'date': '2024-01-05', 'description': 'Grocery Store', 'amount': 45.67},
{'date': '2024-01-08', 'description': 'Gas Station', 'amount': 52.30},
{'date': '2024-01-12', 'description': 'Restaurant', 'amount': 28.50},
{'date': '2024-01-15', 'description': 'Pharmacy', 'amount': 15.99},
{'date': '2024-01-20', 'description': 'Coffee Shop', 'amount': 8.75},
{'date': '2024-01-25', 'description': 'Bookstore', 'amount': 32.40}
]
target = 150.00
total, indices = find_best_subset(items, target)
# Verify total doesn't exceed target
self.assertLessEqual(total, target)
# Verify all selected items sum correctly
selected_sum = round(sum(items[idx]['amount'] for idx in indices), 2)
self.assertEqual(total, selected_sum)
# For this data, best should be 149.61 (all except one item)
self.assertGreater(total, 140.00)
def test_subset_choice_optimization(self):
"""Verify algorithm chooses optimal subset over greedy approach."""
items = [
{'date': '2024-01-01', 'description': 'Large item', 'amount': 60.00},
{'date': '2024-01-02', 'description': 'Medium 1', 'amount': 35.00},
{'date': '2024-01-03', 'description': 'Medium 2', 'amount': 35.00}
]
target = 70.00
total, indices = find_best_subset(items, target)
# Greedy might pick 60, but optimal is 35 + 35 = 70
self.assertEqual(total, 70.00)
self.assertEqual(len(indices), 2)
def test_mixed_currency_values(self):
"""Test with various realistic currency amounts."""
items = [
{'date': '2024-02-01', 'description': 'Subscription', 'amount': 9.99},
{'date': '2024-02-03', 'description': 'Utilities', 'amount': 123.45},
{'date': '2024-02-07', 'description': 'Insurance', 'amount': 87.50},
{'date': '2024-02-14', 'description': 'Internet', 'amount': 55.00},
{'date': '2024-02-20', 'description': 'Phone', 'amount': 45.00}
]
target = 200.00
total, indices = find_best_subset(items, target)
self.assertLessEqual(total, target)
# Verify indices are valid
for idx in indices:
self.assertLess(idx, len(items))
self.assertGreaterEqual(idx, 0)
# Verify sum
selected_sum = sum(items[idx]['amount'] for idx in indices)
self.assertAlmostEqual(total, selected_sum, places=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment