xarical · June 23, 2025 00:38
diff --git a/load_dataset.py b/load_dataset.py
 """
 Dependencies: datasets, huggingface_hub
 Environment variables: HF_API_KEY, DATASET_ID
 """

 import os
 import json

 from datasets import load_dataset
 from huggingface_hub import HfApi

 dataset_id = os.environ["DATASET_ID"] # ID of the HF dataset, expected type: string
 hf_api_key = os.environ["HF_API_KEY"] # HF API key to access the dataset, expected type: string

 def load_ds() -> list[dict]:
    """
    Load and process the user_data from the HF dataset
    """

    # Try/except: if dataset doesn't exist or is empty, return an empty list
    try:
        # Load the dataset and convert the data to a dictionary
        dataset = load_dataset(dataset_id, token=hf_api_key)
        unprocessed_data = dataset["train"].to_dict()

        # Process the data; changes it from
        # {
        #     "name": ["A", "B", "C"],
        #     ...
        #     "amount": [100, 200, 300],
        # }
        # to
        # [
        #     {"name": "A", "amount": 100},
        #     ...
        #     {"name": "C", "amount": 300},
        # ]
        user_data = []
        for i in range(len(unprocessed_data["name"])):
            user_data.append({
                "name": unprocessed_data["name"][i],
                "category": unprocessed_data["category"][i],
                "date": unprocessed_data["date"][i],
                "amount": unprocessed_data["amount"][i],
            })

    except Exception as e:
        print("WARNING: dataset is empty or does not exist(?):", e)
        user_data = []

    return user_data

 def update_ds(user_data: list[dict]) -> None:
    """
    Update the HF dataset with the user_data
    """

    # Filter user_data and then dump into a data.json file
    with open('data.json', 'w') as file:
        json.dump(user_data, file, indent=4)

    # Upload data.json to the HF dataset
    api = HfApi()
    api.upload_file(
        path_or_fileobj="data.json",
        path_in_repo="data.json",
        repo_id=dataset_id,
        repo_type="dataset",
        commit_message="Update data.json 🤖",
        token=hf_api_key
    )
    print("Database updated!")
	"""
	Dependencies: datasets, huggingface_hub
	Environment variables: HF_API_KEY, DATASET_ID
	"""

	import os
	import json

	from datasets import load_dataset
	from huggingface_hub import HfApi

	dataset_id = os.environ["DATASET_ID"] # ID of the HF dataset, expected type: string
	hf_api_key = os.environ["HF_API_KEY"] # HF API key to access the dataset, expected type: string

	def load_ds() -> list[dict]:
	"""
	Load and process the user_data from the HF dataset
	"""

	# Try/except: if dataset doesn't exist or is empty, return an empty list
	try:
	# Load the dataset and convert the data to a dictionary
	dataset = load_dataset(dataset_id, token=hf_api_key)
	unprocessed_data = dataset["train"].to_dict()

	# Process the data; changes it from
	# {
	# "name": ["A", "B", "C"],
	# ...
	# "amount": [100, 200, 300],
	# }
	# to
	# [
	# {"name": "A", "amount": 100},
	# ...
	# {"name": "C", "amount": 300},
	# ]
	user_data = []
	for i in range(len(unprocessed_data["name"])):
	user_data.append({
	"name": unprocessed_data["name"][i],
	"category": unprocessed_data["category"][i],
	"date": unprocessed_data["date"][i],
	"amount": unprocessed_data["amount"][i],
	})

	except Exception as e:
	print("WARNING: dataset is empty or does not exist(?):", e)
	user_data = []

	return user_data

	def update_ds(user_data: list[dict]) -> None:
	"""
	Update the HF dataset with the user_data
	"""

	# Filter user_data and then dump into a data.json file
	with open('data.json', 'w') as file:
	json.dump(user_data, file, indent=4)

	# Upload data.json to the HF dataset
	api = HfApi()
	api.upload_file(
	path_or_fileobj="data.json",
	path_in_repo="data.json",
	repo_id=dataset_id,
	repo_type="dataset",
	commit_message="Update data.json 🤖",
	token=hf_api_key
	)
	print("Database updated!")
No results found