Skip to content

Instantly share code, notes, and snippets.

@hweller1
Created October 25, 2024 16:09
Show Gist options
  • Select an option

  • Save hweller1/5174da29e4eb5e5a5230d26f79902aaf to your computer and use it in GitHub Desktop.

Select an option

Save hweller1/5174da29e4eb5e5a5230d26f79902aaf to your computer and use it in GitHub Desktop.
bson_sample_dataset_ingest.py
import pymongo
import json
from bson.binary import Binary, BinaryVectorDtype
connection_str = ""
client = pymongo.MongoClient(connection_str) # mongodb cluster URI
db = client['bsontestdb']
coll = db['embedded_mflix_bson']
with open("root_level_same_name_output_2.json", "r") as f:
json_obj = json.load(f)
def generate_bson_vector(vector):
return Binary.from_vector(vector, BinaryVectorDtype.FLOAT32)
for record in json_obj:
openai_embed_key = "plot_embedding"
minilm_embed = "plot_embedding_all-miniLM-L6-v2"
record[openai_embed_key] = generate_bson_vector(record[openai_embed_key])
record[minilm_embed] = generate_bson_vector(record[minilm_embed])
coll.insert_one(record)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment