Created
October 25, 2024 16:09
-
-
Save hweller1/5174da29e4eb5e5a5230d26f79902aaf to your computer and use it in GitHub Desktop.
bson_sample_dataset_ingest.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pymongo | |
| import json | |
| from bson.binary import Binary, BinaryVectorDtype | |
| connection_str = "" | |
| client = pymongo.MongoClient(connection_str) # mongodb cluster URI | |
| db = client['bsontestdb'] | |
| coll = db['embedded_mflix_bson'] | |
| with open("root_level_same_name_output_2.json", "r") as f: | |
| json_obj = json.load(f) | |
| def generate_bson_vector(vector): | |
| return Binary.from_vector(vector, BinaryVectorDtype.FLOAT32) | |
| for record in json_obj: | |
| openai_embed_key = "plot_embedding" | |
| minilm_embed = "plot_embedding_all-miniLM-L6-v2" | |
| record[openai_embed_key] = generate_bson_vector(record[openai_embed_key]) | |
| record[minilm_embed] = generate_bson_vector(record[minilm_embed]) | |
| coll.insert_one(record) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment