Output the first N numer of rows from a csv into another:
head -n 5 data.csv > header.csv
Generate schema from csv
pip install csvkit
csvsql -d "," -q '"' -i postgresql spotifyschema.csv > spotify_schema.sql
| def semantic_cached_prompt(prompt): | |
| """Retrieve a response from ScyllaDB or ask OpenAI if it's a new prompt. | |
| Args: | |
| prompt (str): The user prompt. | |
| Returns: | |
| str: The response to the prompt. | |
| """ | |
| embedding = create_embedding(prompt) | |
| cached_response = search_cache(embedding, threshold=0.80) | |
| if cached_response: |
| def calc_cosine_similarity(self, vec1, vec2): | |
| """Calculate cosine similarity between two vectors.""" | |
| v1, v2 = np.array(vec1), np.array(vec2) | |
| return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)) | |
| def search_cache(self, embedding, threshold=0.80): | |
| """ | |
| Returns the most similar response if it is above the threshold. Otherwise returns `None`. | |
| """ | |
| k = 1 |
| # pip install sentence-transformers | |
| from sentence_transformers import SentenceTransformer | |
| embedding_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| embedding = embedding_model.encode(text).tolist() |
| CREATE KEYSPACE semantic_cache WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}; | |
| CREATE TABLE semantic_cache.prompts ( | |
| prompt_id uuid PRIMARY KEY, | |
| inserted_at timestamp, | |
| prompt_text text, | |
| prompt_embedding vector<float, 384>, | |
| llm_response text, | |
| updated_at timestamp | |
| ); |
| CREATE KEYSPACE recommend WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}; | |
| CREATE TABLE recommend.movies ( | |
| id INT, | |
| release_date TIMESTAMP, | |
| title TEXT, | |
| tagline TEXT, | |
| genre TEXT, | |
| imdb_id TEXT, | |
| poster_url TEXT, |
| from db.scylladb import ScyllaClient | |
| from embedding_creator import EmbeddingCreator | |
| from models import Movie | |
| class MovieRecommender: | |
| def __init__(self): | |
| self.scylla_client = ScyllaClient() | |
| self.embedding_creator = EmbeddingCreator("all-MiniLM-L6-v2") | |
| from sentence_transformers import SentenceTransformer | |
| class EmbeddingCreator: | |
| def __init__(self, model_name: str = 'all-MiniLM-L6-v2'): | |
| self.embedding_model = SentenceTransformer(model_name, device='cpu') | |
| def create_embedding(self, text: str) -> list[float]: | |
| """ | |
| Get embedding for a single text input using SentenceTransformer. | |
| Returns the embedding vector. | |
| """ |
| CREATE KEYSPACE ttl_example WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}; | |
| USE ttl_example; | |
| ----------------------------------------------------------- | |
| -- BASIC USAGE | |
| ----------------------------------------------------------- | |
| -- Set deafult TTL with CREATE TABLE |
Output the first N numer of rows from a csv into another:
head -n 5 data.csv > header.csv
Generate schema from csv
pip install csvkit
csvsql -d "," -q '"' -i postgresql spotifyschema.csv > spotify_schema.sql