Skip to content

Instantly share code, notes, and snippets.

View zseta's full-sized avatar

Attila Tóth zseta

View GitHub Profile
def semantic_cached_prompt(prompt):
"""Retrieve a response from ScyllaDB or ask OpenAI if it's a new prompt.
Args:
prompt (str): The user prompt.
Returns:
str: The response to the prompt.
"""
embedding = create_embedding(prompt)
cached_response = search_cache(embedding, threshold=0.80)
if cached_response:
def calc_cosine_similarity(self, vec1, vec2):
"""Calculate cosine similarity between two vectors."""
v1, v2 = np.array(vec1), np.array(vec2)
return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
def search_cache(self, embedding, threshold=0.80):
"""
Returns the most similar response if it is above the threshold. Otherwise returns `None`.
"""
k = 1
# pip install sentence-transformers
from sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
embedding = embedding_model.encode(text).tolist()
CREATE KEYSPACE semantic_cache WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': '3'};
CREATE TABLE semantic_cache.prompts (
prompt_id uuid PRIMARY KEY,
inserted_at timestamp,
prompt_text text,
prompt_embedding vector<float, 384>,
llm_response text,
updated_at timestamp
);
CREATE KEYSPACE recommend WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': '3'};
CREATE TABLE recommend.movies (
id INT,
release_date TIMESTAMP,
title TEXT,
tagline TEXT,
genre TEXT,
imdb_id TEXT,
poster_url TEXT,
from db.scylladb import ScyllaClient
from embedding_creator import EmbeddingCreator
from models import Movie
class MovieRecommender:
def __init__(self):
self.scylla_client = ScyllaClient()
self.embedding_creator = EmbeddingCreator("all-MiniLM-L6-v2")
from sentence_transformers import SentenceTransformer
class EmbeddingCreator:
def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
self.embedding_model = SentenceTransformer(model_name, device='cpu')
def create_embedding(self, text: str) -> list[float]:
"""
Get embedding for a single text input using SentenceTransformer.
Returns the embedding vector.
"""
@zseta
zseta / scylla-ttl.cql
Created September 11, 2025 13:42
Code used for this video: https://youtu.be/SXkbu7mFHeA
CREATE KEYSPACE ttl_example WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1};
USE ttl_example;
-----------------------------------------------------------
-- BASIC USAGE
-----------------------------------------------------------
-- Set deafult TTL with CREATE TABLE
@zseta
zseta / linux_csv.md
Created April 21, 2024 15:53
Working with large CSV files in Linux to generate database schema

Output the first N numer of rows from a csv into another:

head -n 5 data.csv > header.csv

Generate schema from csv

pip install csvkit

csvsql -d "," -q '"' -i postgresql spotifyschema.csv &gt; spotify_schema.sql
@zseta
zseta / backup_docker_ssh.md
Created January 2, 2024 20:07
Backup Docker PostgreSQL database using SSH connection

Backup process with vanilla PostgreSQL or TimescaleDB

Connect to remote instance using SSH:

ssh example.com

List running Docker containers:

docker ps