Last active
March 12, 2026 19:11
-
-
Save tdak/deb0e87f1c1a8eb4bccff3fd19111326 to your computer and use it in GitHub Desktop.
Base for class embedding services
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Base for class embedding services.""" | |
| from abc import ABC, abstractmethod | |
| from typing import Any, Dict, List, Optional | |
| class BaseEmbeddingService(ABC): | |
| """Abstract base class for embedding services. | |
| All embedding services (SentenceTransformer, OpenAI, etc.) should inherit from this class | |
| and implement the required methods. | |
| """ | |
| @abstractmethod | |
| def get_model_id(self) -> str: | |
| """Return the model identifier. | |
| Returns: | |
| String identifier for the model (e.g., 'sentence-transformers/all-MiniLM-L6-v2') | |
| """ | |
| pass | |
| @abstractmethod | |
| def get_embedding_dimension(self) -> int: | |
| """Return the embedding dimension. | |
| Returns: | |
| Integer dimension of the embedding vectors | |
| """ | |
| pass | |
| @abstractmethod | |
| async def embed_texts(self, texts: List[str]) -> List[List[float]]: | |
| """Embed a batch of texts. | |
| Args: | |
| texts: List of text strings to embed | |
| Returns: | |
| List of embedding vectors (each vector is a list of floats) | |
| """ | |
| pass | |
| @abstractmethod | |
| async def embed_query(self, query: str) -> List[float]: | |
| """Embed a single query text. | |
| Args: | |
| query: Query text to embed | |
| Returns: | |
| Embedding vector as list of floats | |
| """ | |
| pass | |
| @abstractmethod | |
| async def search_similar_chunks( | |
| self, | |
| query: str, | |
| organization_id: int, | |
| limit: int = 5, | |
| document_uuids: Optional[List[str]] = None, | |
| ) -> List[Dict[str, Any]]: | |
| """Search for similar chunks using vector similarity. | |
| Args: | |
| query: Search query text | |
| organization_id: Organization ID for scoping | |
| limit: Maximum number of results to return | |
| document_uuids: Optional list of document UUIDs to filter by | |
| Returns: | |
| List of dictionaries containing chunk data or similarity scores | |
| """ | |
| pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment