|
import argparse |
|
import csv |
|
import json |
|
from collections import defaultdict |
|
from pathlib import Path |
|
|
|
|
|
def csv_to_json(input_file, key_col, value_col, valid_keys=None, valid_values=None): |
|
data = defaultdict(list) |
|
found_keys = set() |
|
found_values = set() |
|
|
|
with open(input_file, newline='', encoding='utf-8') as f: |
|
reader = csv.DictReader(f) |
|
# Skip the second row (type definitions) |
|
next(reader, None) |
|
for row in reader: |
|
key = row[key_col].strip() |
|
val = row[value_col].strip() |
|
|
|
# Track which filter items were actually found |
|
if valid_keys and key in valid_keys: |
|
found_keys.add(key) |
|
if valid_values and val in valid_values: |
|
found_values.add(val) |
|
|
|
# Skip if valid_keys is specified and this key isn't in the list |
|
if valid_keys and key not in valid_keys: |
|
continue |
|
# Skip if valid_values is specified and this value isn't in the list |
|
if valid_values and val not in valid_values: |
|
continue |
|
if val not in data[key]: |
|
data[key].append(val) |
|
|
|
# Warn about unused filter items |
|
if valid_keys: |
|
unused_keys = set(valid_keys) - found_keys |
|
if unused_keys: |
|
print(f"Warning: The following filter-keys were not found: {', '.join(sorted(unused_keys))}") |
|
|
|
if valid_values: |
|
unused_values = set(valid_values) - found_values |
|
if unused_values: |
|
print(f"Warning: The following filter-values were not found: {', '.join(sorted(unused_values))}") |
|
|
|
# Sort values for each key alphabetically |
|
for key in data: |
|
data[key].sort() |
|
|
|
# Convert to regular dict and sort by keys |
|
sorted_data = dict(sorted(data.items())) |
|
|
|
output_file = Path(input_file).with_suffix(".json") |
|
with open(output_file, "w", encoding="utf-8") as f: |
|
json.dump(sorted_data, f, indent=2) |
|
print(f"Saved: {output_file}") |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser(description="Convert CSV to grouped JSON") |
|
parser.add_argument("input", help="Input CSV file") |
|
parser.add_argument("--key", required=True, help="Column to use as JSON key") |
|
parser.add_argument("--value", required=True, help="Column to use as JSON value") |
|
parser.add_argument("--filter-keys", nargs="+", help="List of valid keys to include (omit all others)") |
|
parser.add_argument("--filter-values", nargs="+", help="List of valid values to include (omit all others)") |
|
args = parser.parse_args() |
|
|
|
csv_to_json(args.input, args.key, args.value, args.filter_keys, args.filter_values) |