Last active
December 4, 2025 22:19
-
-
Save dalepotter/3cf719dbf21ec881e48ffda105bb3553 to your computer and use it in GitHub Desktop.
Find which transactions sum closest to a target value (subset sum with dynamic programming)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Test using `$ pytest closest_subset_sum.py` | |
| # TODO | |
| # - Allow CSV file & target amount to be specified as a command line arg | |
| # - Strip `£` from prices if it exists | |
| # - Normalise case differences in column headers: `date` vs. `Date` | |
| import csv | |
| import sys | |
| import unittest | |
| def find_best_subset(items, target): | |
| """ | |
| Find the subset of items that gets closest to target without exceeding it. | |
| Uses dynamic programming (0/1 knapsack approach). | |
| Args: | |
| items: List of dicts with 'date', 'description', 'amount' | |
| target: Target value (float) | |
| Returns: | |
| Tuple of (best_total, set of indices to include) | |
| """ | |
| # Work in pence to avoid floating point issues | |
| target_pence = round(target * 100) | |
| num_items = len(items) | |
| # Convert amounts to pence | |
| amounts_pence = [round(item['amount'] * 100) for item in items] | |
| # Dynamic programming (DP) table: dp[i][w] = maximum value achievable using first i items with capacity w | |
| dp = [[0 for _ in range(target_pence + 1)] for _ in range(num_items + 1)] | |
| # Build the DP table | |
| for item_index in range(1, num_items + 1): | |
| item_value = amounts_pence[item_index - 1] | |
| for capacity in range(target_pence + 1): | |
| # Don't include this item | |
| dp[item_index][capacity] = dp[item_index - 1][capacity] | |
| # Try including this item | |
| if item_value <= capacity: | |
| with_item = dp[item_index - 1][capacity - item_value] + item_value | |
| if with_item > dp[item_index][capacity]: | |
| dp[item_index][capacity] = with_item | |
| # Backtrack to find which items were selected | |
| best_value_pence = dp[num_items][target_pence] | |
| selected_indices = set() | |
| remaining_capacity = target_pence | |
| for item_index in range(num_items, 0, -1): | |
| # Check if this item was included | |
| if remaining_capacity >= amounts_pence[item_index - 1] and dp[item_index][remaining_capacity] == dp[item_index - 1][remaining_capacity - amounts_pence[item_index - 1]] + amounts_pence[item_index - 1]: | |
| selected_indices.add(item_index - 1) | |
| remaining_capacity -= amounts_pence[item_index - 1] | |
| best_total = round(best_value_pence / 100, 2) | |
| return best_total, selected_indices | |
| def main(): | |
| # Configuration | |
| csv_filename = 'transactions.csv' # Change this to your CSV filename | |
| target = 150.00 # Change this to your target amount | |
| # Read CSV file | |
| items = [] | |
| try: | |
| with open(csv_filename, 'r', encoding='utf-8') as csvfile: | |
| reader = csv.DictReader(csvfile) | |
| for row in reader: | |
| items.append({ | |
| 'date': row['date'].strip(), | |
| 'description': row['description'].strip(), | |
| 'amount': float(row['amount'].strip()) | |
| }) | |
| except FileNotFoundError: | |
| print(f"Error: File '{csv_filename}' not found.") | |
| sys.exit(1) | |
| except Exception as error: | |
| print(f"Error reading CSV: {error}") | |
| sys.exit(1) | |
| if not items: | |
| print("No transactions found in CSV file.") | |
| sys.exit(1) | |
| # Find best subset | |
| best_total, selected_indices = find_best_subset(items, target) | |
| # Display results | |
| print(f"\nTarget: £{target:.2f}") | |
| print(f"Best achievable total: £{best_total:.2f}") | |
| print(f"Under target by: £{target - best_total:.2f}") | |
| print(f"\n{'Date':<15} {'Description':<35} {'Amount':>10} {'Include':>10}") | |
| print("-" * 75) | |
| for index, item in enumerate(items): | |
| include = "Yes" if index in selected_indices else "No" | |
| print(f"{item['date']:<15} {item['description']:<35} £{item['amount']:>8.2f} {include:>10}") | |
| print("-" * 75) | |
| print(f"{'TOTAL':<51} £{best_total:>8.2f}") | |
| print(f"\nTransactions included: {len(selected_indices)} of {len(items)}") | |
| if __name__ == "__main__": | |
| main() | |
| class FindBestSubsetTests(unittest.TestCase): | |
| def test_exact_match_single_item(self): | |
| """Single item that exactly matches target should be selected.""" | |
| items = [ | |
| {'date': '2024-01-01', 'description': 'Item 1', 'amount': 50.00} | |
| ] | |
| total, indices = find_best_subset(items, 50.00) | |
| self.assertEqual(total, 50.00) | |
| self.assertEqual(indices, {0}) | |
| def test_exact_match_multiple_items(self): | |
| """Multiple items that sum to target exactly should all be selected.""" | |
| items = [ | |
| {'date': '2024-01-01', 'description': 'Item 1', 'amount': 25.00}, | |
| {'date': '2024-01-02', 'description': 'Item 2', 'amount': 25.00}, | |
| {'date': '2024-01-03', 'description': 'Item 3', 'amount': 50.00} | |
| ] | |
| total, indices = find_best_subset(items, 100.00) | |
| self.assertEqual(total, 100.00) | |
| self.assertEqual(len(indices), 3) | |
| def test_no_items(self): | |
| """Empty list should return 0 and empty set.""" | |
| items = [] | |
| total, indices = find_best_subset(items, 100.00) | |
| self.assertEqual(total, 0.00) | |
| self.assertEqual(indices, set()) | |
| def test_all_items_exceed_target(self): | |
| """When all items exceed target, should return 0.""" | |
| items = [ | |
| {'date': '2024-01-01', 'description': 'Item 1', 'amount': 100.00}, | |
| {'date': '2024-01-02', 'description': 'Item 2', 'amount': 200.00} | |
| ] | |
| total, indices = find_best_subset(items, 50.00) | |
| self.assertEqual(total, 0.00) | |
| self.assertEqual(indices, set()) | |
| def test_closest_without_exceeding(self): | |
| """Should find closest sum without exceeding target.""" | |
| items = [ | |
| {'date': '2024-01-01', 'description': 'Item 1', 'amount': 30.00}, | |
| {'date': '2024-01-02', 'description': 'Item 2', 'amount': 40.00}, | |
| {'date': '2024-01-03', 'description': 'Item 3', 'amount': 50.00} | |
| ] | |
| total, indices = find_best_subset(items, 75.00) | |
| # Best is 30 + 40 = 70 (not 50 + 30 = 80 which exceeds) | |
| self.assertEqual(total, 70.00) | |
| self.assertEqual(len(indices), 2) | |
| def test_single_item_under_target(self): | |
| """Single item below target should be selected.""" | |
| items = [ | |
| {'date': '2024-01-01', 'description': 'Item 1', 'amount': 30.00} | |
| ] | |
| total, indices = find_best_subset(items, 100.00) | |
| self.assertEqual(total, 30.00) | |
| self.assertEqual(indices, {0}) | |
| def test_floating_point_precision(self): | |
| """Pence conversion should handle floating point correctly.""" | |
| items = [ | |
| {'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.99}, | |
| {'date': '2024-01-02', 'description': 'Item 2', 'amount': 20.50}, | |
| {'date': '2024-01-03', 'description': 'Item 3', 'amount': 15.51} | |
| ] | |
| total, indices = find_best_subset(items, 32.00) | |
| # Best is 10.99 + 20.50 = 31.49 | |
| self.assertEqual(total, 31.49) | |
| self.assertEqual(len(indices), 2) | |
| def test_all_items_fit_under_target(self): | |
| """When sum of all items is less than target, select all.""" | |
| items = [ | |
| {'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.00}, | |
| {'date': '2024-01-02', 'description': 'Item 2', 'amount': 20.00}, | |
| {'date': '2024-01-03', 'description': 'Item 3', 'amount': 30.00} | |
| ] | |
| total, indices = find_best_subset(items, 100.00) | |
| self.assertEqual(total, 60.00) | |
| self.assertEqual(len(indices), 3) | |
| def test_returns_correct_indices(self): | |
| """Verify returned indices map to correct items.""" | |
| items = [ | |
| {'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.00}, | |
| {'date': '2024-01-02', 'description': 'Item 2', 'amount': 25.00}, | |
| {'date': '2024-01-03', 'description': 'Item 3', 'amount': 15.00} | |
| ] | |
| total, indices = find_best_subset(items, 40.00) | |
| # Should select items at indices 1 and 2 (25 + 15 = 40) | |
| selected_sum = sum(items[idx]['amount'] for idx in indices) | |
| self.assertEqual(total, selected_sum) | |
| self.assertEqual(total, 40.00) | |
| class EdgeCaseTests(unittest.TestCase): | |
| def test_target_zero(self): | |
| """Target of 0 should return 0 and empty set.""" | |
| items = [ | |
| {'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.00}, | |
| {'date': '2024-01-02', 'description': 'Item 2', 'amount': 20.00} | |
| ] | |
| total, indices = find_best_subset(items, 0.00) | |
| self.assertEqual(total, 0.00) | |
| self.assertEqual(indices, set()) | |
| def test_very_small_amounts(self): | |
| """Test pence conversion with very small amounts like 0.01.""" | |
| items = [ | |
| {'date': '2024-01-01', 'description': 'Item 1', 'amount': 0.01}, | |
| {'date': '2024-01-02', 'description': 'Item 2', 'amount': 0.02}, | |
| {'date': '2024-01-03', 'description': 'Item 3', 'amount': 0.03} | |
| ] | |
| total, indices = find_best_subset(items, 0.05) | |
| # Best is 0.02 + 0.03 = 0.05 | |
| self.assertEqual(total, 0.05) | |
| self.assertEqual(len(indices), 2) | |
| def test_single_penny_precision(self): | |
| """Test that single penny differences are handled correctly.""" | |
| items = [ | |
| {'date': '2024-01-01', 'description': 'Item 1', 'amount': 1.23}, | |
| {'date': '2024-01-02', 'description': 'Item 2', 'amount': 4.56}, | |
| {'date': '2024-01-03', 'description': 'Item 3', 'amount': 7.89} | |
| ] | |
| total, indices = find_best_subset(items, 5.79) | |
| # Best is 1.23 + 4.56 = 5.79 (exact match) | |
| self.assertEqual(total, 5.79) | |
| self.assertEqual(len(indices), 2) | |
| def test_many_small_items(self): | |
| """Test with many small items to ensure algorithm handles larger sets.""" | |
| items = [{'date': f'2024-01-{item_num:02d}', 'description': f'Item {item_num}', 'amount': 1.00} | |
| for item_num in range(1, 21)] | |
| total, indices = find_best_subset(items, 15.00) | |
| self.assertEqual(total, 15.00) | |
| self.assertEqual(len(indices), 15) | |
| def test_duplicate_amounts(self): | |
| """Test with duplicate amounts to ensure correct selection.""" | |
| items = [ | |
| {'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.00}, | |
| {'date': '2024-01-02', 'description': 'Item 2', 'amount': 10.00}, | |
| {'date': '2024-01-03', 'description': 'Item 3', 'amount': 10.00} | |
| ] | |
| total, indices = find_best_subset(items, 25.00) | |
| # Should select 2 items totaling 20.00 (can't reach 25.00) | |
| self.assertEqual(total, 20.00) | |
| self.assertEqual(len(indices), 2) | |
| class IntegrationTests(unittest.TestCase): | |
| def test_realistic_expense_scenario(self): | |
| """Test with realistic transaction data.""" | |
| items = [ | |
| {'date': '2024-01-05', 'description': 'Grocery Store', 'amount': 45.67}, | |
| {'date': '2024-01-08', 'description': 'Gas Station', 'amount': 52.30}, | |
| {'date': '2024-01-12', 'description': 'Restaurant', 'amount': 28.50}, | |
| {'date': '2024-01-15', 'description': 'Pharmacy', 'amount': 15.99}, | |
| {'date': '2024-01-20', 'description': 'Coffee Shop', 'amount': 8.75}, | |
| {'date': '2024-01-25', 'description': 'Bookstore', 'amount': 32.40} | |
| ] | |
| target = 150.00 | |
| total, indices = find_best_subset(items, target) | |
| # Verify total doesn't exceed target | |
| self.assertLessEqual(total, target) | |
| # Verify all selected items sum correctly | |
| selected_sum = round(sum(items[idx]['amount'] for idx in indices), 2) | |
| self.assertEqual(total, selected_sum) | |
| # For this data, best should be 149.61 (all except one item) | |
| self.assertGreater(total, 140.00) | |
| def test_subset_choice_optimization(self): | |
| """Verify algorithm chooses optimal subset over greedy approach.""" | |
| items = [ | |
| {'date': '2024-01-01', 'description': 'Large item', 'amount': 60.00}, | |
| {'date': '2024-01-02', 'description': 'Medium 1', 'amount': 35.00}, | |
| {'date': '2024-01-03', 'description': 'Medium 2', 'amount': 35.00} | |
| ] | |
| target = 70.00 | |
| total, indices = find_best_subset(items, target) | |
| # Greedy might pick 60, but optimal is 35 + 35 = 70 | |
| self.assertEqual(total, 70.00) | |
| self.assertEqual(len(indices), 2) | |
| def test_mixed_currency_values(self): | |
| """Test with various realistic currency amounts.""" | |
| items = [ | |
| {'date': '2024-02-01', 'description': 'Subscription', 'amount': 9.99}, | |
| {'date': '2024-02-03', 'description': 'Utilities', 'amount': 123.45}, | |
| {'date': '2024-02-07', 'description': 'Insurance', 'amount': 87.50}, | |
| {'date': '2024-02-14', 'description': 'Internet', 'amount': 55.00}, | |
| {'date': '2024-02-20', 'description': 'Phone', 'amount': 45.00} | |
| ] | |
| target = 200.00 | |
| total, indices = find_best_subset(items, target) | |
| self.assertLessEqual(total, target) | |
| # Verify indices are valid | |
| for idx in indices: | |
| self.assertLess(idx, len(items)) | |
| self.assertGreaterEqual(idx, 0) | |
| # Verify sum | |
| selected_sum = sum(items[idx]['amount'] for idx in indices) | |
| self.assertAlmostEqual(total, selected_sum, places=2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment