Skip to content

Instantly share code, notes, and snippets.

WITH index_fields AS (
-- Parse each field from the index definitions
SELECT
vectorindexes__name as index_name,
path as field_path,
numdimensions,
type,
CASE
WHEN numdimensions IS NOT NULL THEN 'vector'
ELSE 'filter'
@hweller1
hweller1 / normalize_vectors.py
Created August 13, 2025 17:32
Simple script to normalize new vectors or update existing array of floats or binData vectors in MongoDB
import math
from pymongo import MongoClient, UpdateOne
from bson.binary import Binary
def normalize(vector):
"""Normalize a vector to unit length."""
magnitude = math.sqrt(sum(x**2 for x in vector))
return [x / magnitude for x in vector] if magnitude > 0 else vector
def normalize_vectors(vectors=None, db_uri="mongodb://localhost:27017", db_name="vectors", collection_name="data", batch_size=1000):
@hweller1
hweller1 / pgvector_filtered_search_query_pattern.py
Created July 10, 2025 19:17
Filtered two stage vector search using pgvector
`CREATE OR REPLACE FUNCTION two_stage_vector_search_optimized_prefiltered(
query_embedding vector,
match_limit int,
num_candidates int,
filter_category text DEFAULT NULL::text,
max_price double precision DEFAULT NULL::double precision
)
RETURNS TABLE (
id int8,
@hweller1
hweller1 / MRL_truncate_view
Created May 22, 2025 23:48
Example view definition for truncating MRL vectors
db.createView(
"512_embeddings",
"wikipedia-22-12-en 2",
[
{
$addFields: {
"512_embedding": { $slice: ["$embedding", 512] }
}
}
]
@hweller1
hweller1 / gist:5174da29e4eb5e5a5230d26f79902aaf
Created October 25, 2024 16:09
bson_sample_dataset_ingest.py
import pymongo
import json
from bson.binary import Binary, BinaryVectorDtype
connection_str = ""
client = pymongo.MongoClient(connection_str) # mongodb cluster URI
db = client['bsontestdb']
coll = db['embedded_mflix_bson']
import os
import openai
embedding = openai.Embedding.create(input="test query", model="text-embedding-ada-002").data[0].embedding
pipeline = [
{
"$vectorSearch": {
"index":'bson_vector_index',
"path": "plot_embedding",
"queryVector": embedding,
@hweller1
hweller1 / scalar_quantize_openai.py
Created October 9, 2024 18:08
user space SQ of openAI text-embedding-ada-002 (indexing + querying)
# deps
# %pip install pymongo openai numpy
import numpy as np
import os
os.environ["OPENAI_API_KEY"] = '<my-openai-key>'
import pymongo
import openai
"knnBeta": {
"filter": {
"compound": {
"must": [
{
"queryString": {
"query": "TEST_NAME_1 AND TEST_NAME_2~2 NOT TEST_NAME_3",
"defaultPath": "descriptions"
}
},
searchQ.filter = filter;
}
else if(beds || rooms) {
let filter = { "$and" : []}
if (beds) {
filter.$and.push({"beds" : {"$gte" : parseInt(beds) }})
}
if (rooms)
{
{$vectorSearch:{
"index": "default",
"queryVector": embedding,
"path": "doc_embedding",
"k": 100,
"numCandidates": 1000
}
}