nielsdejong · July 29, 2025 12:26
diff --git a/spotify-instruments.py b/spotify-instruments.py
 import pandas as pd
 import os
 import io # Import the io module to handle byte streams

 from langchain_core.documents import Document # Still useful for metadata, but not for LLM inference
 from neo4j import GraphDatabase
 from google.colab import files # Import the files module for uploading

 # --- Configuration ---
 NEO4J_URI = "neo4j+s://46b4811c.databases.neo4j.io"
 NEO4J_USERNAME = "neo4j"
 NEO4J_PASSWORD = "..."
 CONCEPTUAL_CSV_NAME = "yacht_rock.csv"

 # --- Script Execution ---
 # Connect to Neo4j
 driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
 driver.verify_connectivity()
 print("Connected to Neo4j.")

 print("LLM and LLMGraphTransformer components are removed for direct CSV parsing.")

 # Load CSV
 print(f"Please upload any CSV file. It will be processed as '{CONCEPTUAL_CSV_NAME}'.")
 uploaded = files.upload() # This will open a file selection dialog in your browser

 # Get the actual name of the uploaded file
 if not uploaded:
    raise FileNotFoundError("No file was uploaded. Please upload a CSV file.")

 # Assuming only one file is uploaded for this process, take the first one.
 actual_uploaded_filename = list(uploaded.keys())[0]

 # Read the uploaded CSV file into a pandas DataFrame
 df = pd.read_csv(io.BytesIO(uploaded[actual_uploaded_filename]))

 # --- Delete the uploaded file from Colab's temporary storage ---
 try:
    os.remove(actual_uploaded_filename)
    print(f"'{actual_uploaded_filename}' deleted from Colab's temporary storage.")
 except OSError as e:
    print(f"Error deleting file '{actual_uploaded_filename}': {e}")
 # --- End file deletion ---

 if "Track URI" not in df.columns or "Instruments" not in df.columns:
    raise ValueError("CSV must contain 'Track URI' and 'Instruments' columns.")
 print(f"Loaded CSV from uploaded file: {actual_uploaded_filename} (processed as '{CONCEPTUAL_CSV_NAME}')")

 with driver.session() as session:
    print("Skipping deletion of old 'Track' and 'Instrument' nodes in Neo4j.")
    session.run("CREATE INDEX IF NOT EXISTS FOR (i:Instrument) ON (i.name)")

    # Iterate through the DataFrame rows
    for _, row in df.iterrows():
        track_id = row["Track URI"]
        instruments_text = str(row["Instruments"]) # Ensure it's a string

        # Clean the instruments text by removing brackets
        cleaned_instruments_text = instruments_text.replace('[', '').replace(']', '')

        # Attempt to split by comma first
        instruments_candidate = [
            instrument.strip()
            for instrument in cleaned_instruments_text.split(',')
            if instrument.strip()
        ]

        # If comma split yields too few instruments (e.g., 0 or 1, implying it wasn't comma-separated
        # or was just a single item without commas), try splitting by space.
        if len(instruments_candidate) <= 1 and ' ' in cleaned_instruments_text:
            instruments = [
                instrument.strip()
                for instrument in cleaned_instruments_text.split(' ')
                if instrument.strip()
            ]
        else:
            instruments = instruments_candidate

        # Filter out any empty strings that might result from multiple spaces or just bad data
        instruments = [inst for inst in instruments if inst]

        # Check if instruments list is empty after all parsing attempts
        if not instruments:
            print(f"Warning: No valid instruments found for Track '{track_id}' from input: '{instruments_text}'. Skipping relationship creation.")
            continue # Skip to the next row

        # Create/Merge Instrument nodes and relationships
        # Note: You currently use (t:Song) in your Cypher, but the index is on (t:Track).
        # Assuming 'Track' is the intended label for the track nodes.
        session.run("""
            UNWIND $instruments as instrument
            MATCH (t:Song {trackUri: $track_id})
            MERGE (i:Instrument {name: instrument})
            CREATE (t)-[:FEATURES_INSTRUMENT]->(i)
        """, track_id=track_id, instruments=instruments)
        print(f"Linked '{instruments}' to Track '{track_id}'.")

 driver.close()
 print("\nScript finished. Check Neo4j.")
	import pandas as pd
	import os
	import io # Import the io module to handle byte streams

	from langchain_core.documents import Document # Still useful for metadata, but not for LLM inference
	from neo4j import GraphDatabase
	from google.colab import files # Import the files module for uploading

	# --- Configuration ---
	NEO4J_URI = "neo4j+s://46b4811c.databases.neo4j.io"
	NEO4J_USERNAME = "neo4j"
	NEO4J_PASSWORD = "..."
	CONCEPTUAL_CSV_NAME = "yacht_rock.csv"

	# --- Script Execution ---
	# Connect to Neo4j
	driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
	driver.verify_connectivity()
	print("Connected to Neo4j.")

	print("LLM and LLMGraphTransformer components are removed for direct CSV parsing.")

	# Load CSV
	print(f"Please upload any CSV file. It will be processed as '{CONCEPTUAL_CSV_NAME}'.")
	uploaded = files.upload() # This will open a file selection dialog in your browser

	# Get the actual name of the uploaded file
	if not uploaded:
	raise FileNotFoundError("No file was uploaded. Please upload a CSV file.")

	# Assuming only one file is uploaded for this process, take the first one.
	actual_uploaded_filename = list(uploaded.keys())[0]

	# Read the uploaded CSV file into a pandas DataFrame
	df = pd.read_csv(io.BytesIO(uploaded[actual_uploaded_filename]))

	# --- Delete the uploaded file from Colab's temporary storage ---
	try:
	os.remove(actual_uploaded_filename)
	print(f"'{actual_uploaded_filename}' deleted from Colab's temporary storage.")
	except OSError as e:
	print(f"Error deleting file '{actual_uploaded_filename}': {e}")
	# --- End file deletion ---

	if "Track URI" not in df.columns or "Instruments" not in df.columns:
	raise ValueError("CSV must contain 'Track URI' and 'Instruments' columns.")
	print(f"Loaded CSV from uploaded file: {actual_uploaded_filename} (processed as '{CONCEPTUAL_CSV_NAME}')")

	with driver.session() as session:
	print("Skipping deletion of old 'Track' and 'Instrument' nodes in Neo4j.")
	session.run("CREATE INDEX IF NOT EXISTS FOR (i:Instrument) ON (i.name)")

	# Iterate through the DataFrame rows
	for _, row in df.iterrows():
	track_id = row["Track URI"]
	instruments_text = str(row["Instruments"]) # Ensure it's a string

	# Clean the instruments text by removing brackets
	cleaned_instruments_text = instruments_text.replace('[', '').replace(']', '')

	# Attempt to split by comma first
	instruments_candidate = [
	instrument.strip()
	for instrument in cleaned_instruments_text.split(',')
	if instrument.strip()
	]

	# If comma split yields too few instruments (e.g., 0 or 1, implying it wasn't comma-separated
	# or was just a single item without commas), try splitting by space.
	if len(instruments_candidate) <= 1 and ' ' in cleaned_instruments_text:
	instruments = [
	instrument.strip()
	for instrument in cleaned_instruments_text.split(' ')
	if instrument.strip()
	]
	else:
	instruments = instruments_candidate

	# Filter out any empty strings that might result from multiple spaces or just bad data
	instruments = [inst for inst in instruments if inst]

	# Check if instruments list is empty after all parsing attempts
	if not instruments:
	print(f"Warning: No valid instruments found for Track '{track_id}' from input: '{instruments_text}'. Skipping relationship creation.")
	continue # Skip to the next row

	# Create/Merge Instrument nodes and relationships
	# Note: You currently use (t:Song) in your Cypher, but the index is on (t:Track).
	# Assuming 'Track' is the intended label for the track nodes.
	session.run("""
	UNWIND $instruments as instrument
	MATCH (t:Song {trackUri: $track_id})
	MERGE (i:Instrument {name: instrument})
	CREATE (t)-[:FEATURES_INSTRUMENT]->(i)
	""", track_id=track_id, instruments=instruments)
	print(f"Linked '{instruments}' to Track '{track_id}'.")

	driver.close()
	print("\nScript finished. Check Neo4j.")
No results found