Skip to content

Instantly share code, notes, and snippets.

@mdouze
Created August 1, 2019 15:41
Show Gist options
  • Select an option

  • Save mdouze/e4bdb404dbd976c83fe447e529e5c9dc to your computer and use it in GitHub Desktop.

Select an option

Save mdouze/e4bdb404dbd976c83fe447e529e5c9dc to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import faiss"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"nq = 100\n",
"nb = 1000\n",
"d = 32\n",
"\n",
"xq = faiss.randn((nq, d))\n",
"xb = faiss.randn((nb, d))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# reference IP search\n",
"k = 10\n",
"index = faiss.IndexFlatIP(d)\n",
"index.add(xb)\n",
"Dref, Iref = index.search(xq, k)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# see http://ulrichpaquet.com/Papers/SpeedUp.pdf theorem 5\n",
"\n",
"def get_phi(xb): \n",
" return (xb ** 2).sum(1).max()\n",
"\n",
"def augment_xb(xb, phi=None): \n",
" norms = (xb ** 2).sum(1)\n",
" if phi is None: \n",
" phi = norms.max()\n",
" extracol = np.sqrt(phi - norms)\n",
" return np.hstack((xb, extracol.reshape(-1, 1)))\n",
"\n",
"def augment_xq(xq): \n",
" extracol = np.zeros(len(xq), dtype='float32')\n",
" return np.hstack((xq, extracol.reshape(-1, 1)))\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# reference IP search\n",
"k = 10\n",
"index = faiss.IndexFlatL2(d + 1)\n",
"index.add(augment_xb(xb))\n",
"D, I = index.search(augment_xq(xq), k)\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 12,
"metadata": {
"bento_obj_id": "140595066188536"
},
"output_type": "execute_result"
}
],
"source": [
"np.all(I == Iref)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"bento_stylesheets": {
"bento/extensions/flow/main.css": true,
"bento/extensions/kernel_selector/main.css": true,
"bento/extensions/kernel_ui/main.css": true,
"bento/extensions/new_kernel/main.css": true,
"bento/extensions/system_usage/main.css": true,
"bento/extensions/theme/main.css": true
},
"kernelspec": {
"display_name": "faiss",
"language": "python",
"name": "bento_kernel_faiss"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3rc1+"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@CaucherWang
Copy link

IndexFlatIP finds the minimum IP distance instead of the maximum. What if I want to find maximum IP similarity neighbors? Note that IP can be negative and positive, querying with the opposite vector will return the largest negative ones.

@mdouze
Copy link
Author

mdouze commented Sep 26, 2023

IndexFlatIP returns the maximum IP distances.

@ucas010
Copy link

ucas010 commented Dec 3, 2024

could i use the new xb with faiss.index_factory(768, "PCA128,IVF16384_HNSW64,Flat") ?
and got the similar results ?

old xb have been L2 norm

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment