Last active
December 11, 2023 04:32
-
-
Save kacperlukawski/2d3a3225f15a4cc5772cd1c81866340d to your computer and use it in GitHub Desktop.
Qdrant tips&tricks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "385fc2ea", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2023-03-13T11:52:47.900517Z", | |
| "start_time": "2023-03-13T11:52:47.700693Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import config\n", | |
| "import func\n", | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "51f36c37", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2023-03-13T11:52:48.194538Z", | |
| "start_time": "2023-03-13T11:52:47.905511Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from tqdm import tqdm\n", | |
| "from qdrant_client import QdrantClient\n", | |
| "from qdrant_client.http import models as rest" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "a86e5c3f", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2023-03-13T11:52:48.271822Z", | |
| "start_time": "2023-03-13T11:52:48.196267Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "client = QdrantClient(\n", | |
| " url=\"http://localhost\",\n", | |
| " prefer_grpc=True,\n", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "404b2b50", | |
| "metadata": {}, | |
| "source": [ | |
| "# Memory usage" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "id": "e2b63056", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2023-03-13T11:52:49.954056Z", | |
| "start_time": "2023-03-13T11:52:48.274622Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS\r\n", | |
| "51ed01f404f0 qdrant-tips-tricks_qdrant_1 0.25% 655.3MiB / 30.81GiB 2.08% 457MB / 452kB 0B / 1.83GB 54\r\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "!docker stats --no-stream" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "7697736c", | |
| "metadata": {}, | |
| "source": [ | |
| "# On disk payloads" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "id": "59db6a5d", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2023-03-13T11:52:49.984506Z", | |
| "start_time": "2023-03-13T11:52:49.961517Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=50001, indexed_vectors_count=50001, points_count=50001, segments_count=5, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=2048, distance=<Distance.COSINE: 'Cosine'>), shard_number=1, replication_factor=1, write_consistency_factor=1, on_disk_payload=True), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=10000, flush_interval_sec=5, max_optimization_threads=1), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0)), payload_schema={})" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "client.get_collection(collection_name=config.COLLECTION_NAME)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "id": "5388856c", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2023-03-13T11:52:50.512028Z", | |
| "start_time": "2023-03-13T11:52:49.989242Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "True" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "client.recreate_collection(\n", | |
| " collection_name=config.COLLECTION_NAME,\n", | |
| " vectors_config=rest.VectorParams(\n", | |
| " size=config.VECTOR_SIZE,\n", | |
| " distance=rest.Distance.COSINE,\n", | |
| " ),\n", | |
| " on_disk_payload=True,\n", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "id": "f146d480", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2023-03-13T11:59:54.998349Z", | |
| "start_time": "2023-03-13T11:53:09.728184Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "51it [06:45, 7.94s/it] \n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "max_num = 50_000\n", | |
| "batch_size = 1000\n", | |
| "\n", | |
| "objects = func.iterate_objects(max_num=max_num)\n", | |
| "batched_objects = func.batchify_objects(objects, n=batch_size)\n", | |
| "for batch in tqdm(batched_objects, total=max_num // batch_size):\n", | |
| " ids, vectors, payloads = batch\n", | |
| " client.upsert(\n", | |
| " collection_name=config.COLLECTION_NAME,\n", | |
| " points=rest.Batch(\n", | |
| " ids=ids,\n", | |
| " vectors=vectors,\n", | |
| " payloads=payloads,\n", | |
| " )\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "id": "1edd657a", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2023-03-13T11:59:56.685866Z", | |
| "start_time": "2023-03-13T11:59:55.000790Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS\r\n", | |
| "51ed01f404f0 qdrant-tips-tricks_qdrant_1 3.22% 843.2MiB / 30.81GiB 2.67% 915MB / 854kB 0B / 8.46GB 55\r\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "!docker stats --no-stream" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "f47b41ed", | |
| "metadata": {}, | |
| "source": [ | |
| "# Memmap support" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "id": "b9edc24a", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2023-03-13T11:59:57.274388Z", | |
| "start_time": "2023-03-13T11:59:56.691823Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "True" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "client.recreate_collection(\n", | |
| " collection_name=config.COLLECTION_NAME,\n", | |
| " vectors_config=rest.VectorParams(\n", | |
| " size=config.VECTOR_SIZE,\n", | |
| " distance=rest.Distance.COSINE,\n", | |
| " ),\n", | |
| " on_disk_payload=True,\n", | |
| " optimizers_config=rest.OptimizersConfigDiff(\n", | |
| " memmap_threshold=10_000, # 1K KBs\n", | |
| " ),\n", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "id": "46b0b9ce", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2023-03-13T12:10:13.764017Z", | |
| "start_time": "2023-03-13T11:59:57.276357Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "51it [10:16, 12.09s/it] \n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "objects = func.iterate_objects(max_num=max_num)\n", | |
| "batched_objects = func.batchify_objects(objects, n=batch_size)\n", | |
| "for batch in tqdm(batched_objects, total=max_num // batch_size):\n", | |
| " ids, vectors, payloads = batch\n", | |
| " client.upsert(\n", | |
| " collection_name=config.COLLECTION_NAME,\n", | |
| " points=rest.Batch(\n", | |
| " ids=ids,\n", | |
| " vectors=vectors,\n", | |
| " payloads=payloads,\n", | |
| " )\n", | |
| " )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "id": "3447c869", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2023-03-13T13:22:18.876054Z", | |
| "start_time": "2023-03-13T13:22:18.865702Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=50001, indexed_vectors_count=50001, points_count=50001, segments_count=2, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=2048, distance=<Distance.COSINE: 'Cosine'>), shard_number=1, replication_factor=1, write_consistency_factor=1, on_disk_payload=True), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=10000, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=1), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0)), payload_schema={})" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "client.get_collection(collection_name=config.COLLECTION_NAME)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "id": "55bcff36", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2023-03-13T13:22:22.277016Z", | |
| "start_time": "2023-03-13T13:22:20.576555Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS\r\n", | |
| "51ed01f404f0 qdrant-tips-tricks_qdrant_1 0.23% 466.2MiB / 30.81GiB 1.48% 1.37GB / 1.31MB 0B / 15GB 54\r\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "!docker stats --no-stream" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "9d0578b8", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.10.6" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment