nreimers · March 21, 2024 20:21
diff --git a/Cohere_binary_embeddings_faiss.py b/Cohere_binary_embeddings_faiss.py
 # This example shows how to use Cohere binary embeddings to get a 32x reduction in memory
 # and up to a 40x faster search speed. 
 # You need the Cohere Python SDK as well as faiss
 # pip install cohere faiss-cpu numpy

 import faiss
 import cohere 
 import numpy as np


 documents = [
    "Alan Turing  was an English mathematician, computer scientist, logician, cryptanalyst, philosopher and theoretical biologist.",
    "Albert Einstein was a German-born theoretical physicist who is widely held to be one of the greatest and most influential scientists of all time.",
    "Isaac Newton was an English polymath active as a mathematician, physicist, astronomer, alchemist, theologian, and author who was described in his time as a natural philosopher.",
    "Marie Curie was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity"
 ]

 #Step 1: Get your Cohere API key from: www.cohere.com
 api_key = "<<YOUR_API_KEY>>"
 co = cohere.Client(api_key)

 # Step 2: Create a faiss IndexBinaryFlat index
 num_dim = 1024   #Use 1024 dimensions for the embed-english-v3.0 and 384 for the light models
 index = faiss.IndexBinaryFlat(num_dim)


 # Step 3: Compute the document embeddings in batches
 batch_size = 384
 doc_id = 0
 for start_idx in range(0, len(documents), batch_size):
    batch_documents = documents[start_idx:start_idx+batch_size]

    # Compute the int8 embeddings of your documents. Set input_type to "search_document" and embedding_types to "ubinary"
    embeddings = co.embed(texts=batch_documents, model="embed-english-v3.0", input_type="search_document", embedding_types=["ubinary"]).embeddings.ubinary
    
    #Cast embeddings to numpy
    embeddings = np.asarray(embeddings, dtype='uint8')

    #Add the embeddings to the faiss index
    index.add(embeddings)
    

 print("Indexing of documents finished")


 # Optional: Write index to disk
 index_name = "my_index.bin"
 faiss.write_index_binary(index, index_name)

 # Optional: Load index from disc
 index = faiss.read_index_binary(index_name)


 # The following is the function that takes in a query, calls the API to get the binary and the float embeddings
 # We use the binary embedding for a quick search in our faiss IndexBinaryFlat index.
 # There we retrieve the 10*top_k results. For these results, we re-score them with the float query embedding
 # and the binary document embedding. This gives another boost in search quality.
 def search(index, query, top_k=3):
    # Make sure to set input_type="search_query"
    query_emb = co.embed(texts=[query], model="embed-english-v3.0", input_type="search_query", embedding_types=["ubinary", "float"]).embeddings
    query_emb_bin = np.asarray(query_emb.ubinary, dtype='uint8')
    query_emb_float = np.asarray(query_emb.float, dtype="float32")

    # Phase I: Search on the index with a binary
    hits_scores, hits_doc_ids = index.search(query_emb_bin, k=min(10*top_k, index.ntotal))

    #Get the results in a list of hits
    hits = [{'doc_id': doc_id.item(), 'score_bin': score_bin} for doc_id, score_bin in zip(hits_doc_ids[0], hits_scores[0])]

    # Phase II: Do a re-scoring with the float query embedding
    binary_doc_emb = np.asarray([index.reconstruct(hit['doc_id']) for hit in hits])
    binary_doc_emb_unpacked = np.unpackbits(binary_doc_emb, axis=-1).astype("int")
    binary_doc_emb_unpacked = 2*binary_doc_emb_unpacked-1

    scores_cont = (query_emb_float[0] @ binary_doc_emb_unpacked.T)
    for idx in range(len(scores_cont)):
        hits[idx]['score_cont'] = scores_cont[idx]

    #Sort by largest score_cont
    hits.sort(key=lambda x: x['score_cont'], reverse=True)

    return hits[0:top_k]

 # Search in your index
 query = "Who discovered x-ray?"
 print("Query:", query)
 hits = search(index, query)
 for hit in hits:
    print(f"{hit['score_cont']:.2f}", documents[hit['doc_id']])
	# This example shows how to use Cohere binary embeddings to get a 32x reduction in memory
	# and up to a 40x faster search speed.
	# You need the Cohere Python SDK as well as faiss
	# pip install cohere faiss-cpu numpy

	import faiss
	import cohere
	import numpy as np


	documents = [
	"Alan Turing was an English mathematician, computer scientist, logician, cryptanalyst, philosopher and theoretical biologist.",
	"Albert Einstein was a German-born theoretical physicist who is widely held to be one of the greatest and most influential scientists of all time.",
	"Isaac Newton was an English polymath active as a mathematician, physicist, astronomer, alchemist, theologian, and author who was described in his time as a natural philosopher.",
	"Marie Curie was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity"
	]

	#Step 1: Get your Cohere API key from: www.cohere.com
	api_key = "<<YOUR_API_KEY>>"
	co = cohere.Client(api_key)

	# Step 2: Create a faiss IndexBinaryFlat index
	num_dim = 1024 #Use 1024 dimensions for the embed-english-v3.0 and 384 for the light models
	index = faiss.IndexBinaryFlat(num_dim)


	# Step 3: Compute the document embeddings in batches
	batch_size = 384
	doc_id = 0
	for start_idx in range(0, len(documents), batch_size):
	batch_documents = documents[start_idx:start_idx+batch_size]

	# Compute the int8 embeddings of your documents. Set input_type to "search_document" and embedding_types to "ubinary"
	embeddings = co.embed(texts=batch_documents, model="embed-english-v3.0", input_type="search_document", embedding_types=["ubinary"]).embeddings.ubinary

	#Cast embeddings to numpy
	embeddings = np.asarray(embeddings, dtype='uint8')

	#Add the embeddings to the faiss index
	index.add(embeddings)


	print("Indexing of documents finished")


	# Optional: Write index to disk
	index_name = "my_index.bin"
	faiss.write_index_binary(index, index_name)

	# Optional: Load index from disc
	index = faiss.read_index_binary(index_name)


	# The following is the function that takes in a query, calls the API to get the binary and the float embeddings
	# We use the binary embedding for a quick search in our faiss IndexBinaryFlat index.
	# There we retrieve the 10*top_k results. For these results, we re-score them with the float query embedding
	# and the binary document embedding. This gives another boost in search quality.
	def search(index, query, top_k=3):
	# Make sure to set input_type="search_query"
	query_emb = co.embed(texts=[query], model="embed-english-v3.0", input_type="search_query", embedding_types=["ubinary", "float"]).embeddings
	query_emb_bin = np.asarray(query_emb.ubinary, dtype='uint8')
	query_emb_float = np.asarray(query_emb.float, dtype="float32")

	# Phase I: Search on the index with a binary
	hits_scores, hits_doc_ids = index.search(query_emb_bin, k=min(10*top_k, index.ntotal))

	#Get the results in a list of hits
	hits = [{'doc_id': doc_id.item(), 'score_bin': score_bin} for doc_id, score_bin in zip(hits_doc_ids[0], hits_scores[0])]

	# Phase II: Do a re-scoring with the float query embedding
	binary_doc_emb = np.asarray([index.reconstruct(hit['doc_id']) for hit in hits])
	binary_doc_emb_unpacked = np.unpackbits(binary_doc_emb, axis=-1).astype("int")
	binary_doc_emb_unpacked = 2*binary_doc_emb_unpacked-1

	scores_cont = (query_emb_float[0] @ binary_doc_emb_unpacked.T)
	for idx in range(len(scores_cont)):
	hits[idx]['score_cont'] = scores_cont[idx]

	#Sort by largest score_cont
	hits.sort(key=lambda x: x['score_cont'], reverse=True)

	return hits[0:top_k]

	# Search in your index
	query = "Who discovered x-ray?"
	print("Query:", query)
	hits = search(index, query)
	for hit in hits:
	print(f"{hit['score_cont']:.2f}", documents[hit['doc_id']])
No results found