Skip to content

Instantly share code, notes, and snippets.

@GreatPotato
Last active November 12, 2025 16:36
Show Gist options
  • Select an option

  • Save GreatPotato/5ee67c9ed671d5cdb18a0709168036f8 to your computer and use it in GitHub Desktop.

Select an option

Save GreatPotato/5ee67c9ed671d5cdb18a0709168036f8 to your computer and use it in GitHub Desktop.
import argparse
import csv
import json
from collections import defaultdict
from pathlib import Path
def csv_to_json(input_file, key_col, value_col, valid_keys=None, valid_values=None):
data = defaultdict(list)
found_keys = set()
found_values = set()
with open(input_file, newline='', encoding='utf-8') as f:
reader = csv.DictReader(f)
# Skip the second row (type definitions)
next(reader, None)
for row in reader:
key = row[key_col].strip()
val = row[value_col].strip()
# Track which filter items were actually found
if valid_keys and key in valid_keys:
found_keys.add(key)
if valid_values and val in valid_values:
found_values.add(val)
# Skip if valid_keys is specified and this key isn't in the list
if valid_keys and key not in valid_keys:
continue
# Skip if valid_values is specified and this value isn't in the list
if valid_values and val not in valid_values:
continue
if val not in data[key]:
data[key].append(val)
# Warn about unused filter items
if valid_keys:
unused_keys = set(valid_keys) - found_keys
if unused_keys:
print(f"Warning: The following filter-keys were not found: {', '.join(sorted(unused_keys))}")
if valid_values:
unused_values = set(valid_values) - found_values
if unused_values:
print(f"Warning: The following filter-values were not found: {', '.join(sorted(unused_values))}")
# Sort values for each key alphabetically
for key in data:
data[key].sort()
# Convert to regular dict and sort by keys
sorted_data = dict(sorted(data.items()))
output_file = Path(input_file).with_suffix(".json")
with open(output_file, "w", encoding="utf-8") as f:
json.dump(sorted_data, f, indent=2)
print(f"Saved: {output_file}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert CSV to grouped JSON")
parser.add_argument("input", help="Input CSV file")
parser.add_argument("--key", required=True, help="Column to use as JSON key")
parser.add_argument("--value", required=True, help="Column to use as JSON value")
parser.add_argument("--filter-keys", nargs="+", help="List of valid keys to include (omit all others)")
parser.add_argument("--filter-values", nargs="+", help="List of valid values to include (omit all others)")
args = parser.parse_args()
csv_to_json(args.input, args.key, args.value, args.filter_keys, args.filter_values)

Example usage:

python convert.py fertiliser_production_impact_predefined_ferts_V3.csv --key fertiliser_label --value Region Saved: fertiliser_production_impact_predefined_ferts_V3.json

python convert.py <input csv> --key <key> --value <value> --filter <values>

e.g.

python convert.py cattle_feed.csv --key feed_item__name --value region__name --filter-keys "Barley" "Beans" "Brans" --filter-values "Argentina" "Australia" "Austria"

python convert.py fertiliser_production_impact_predefined_ferts_V3.csv --key fertiliser_label --value Region --filter-keys "Ammonium nitrate - 33.5% N (granulated)" "Ammonium nitrate - 33.5% N (prilled)" "Ammonium sulphate - 21% N" --filter-values "Africa 2014" "China 2014" "Europe 2014"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment