Skip to content

Instantly share code, notes, and snippets.

@surya501
surya501 / benchmark_annoy.py
Created December 3, 2025 00:06
Annoy vector database benchmark for DINOv2 embeddings (1024D)
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "annoy==1.17.0", # Note: 1.17.3 has a bug with get_nns_by_vector on macOS ARM64
# "numpy",
# ]
# ///
"""
Annoy Vector Database Benchmark
@surya501
surya501 / generate_embeddings_onnx.py
Created November 19, 2025 18:22
ResNet ONNX/PyTorch Embedding Generator - PEP 723 standalone script for image similarity search (1x7 grid with ResNet input visualization)
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "onnxruntime>=1.18.0",
# "numpy>=1.24.0",
# "pillow>=10.0.0",
# "h5py>=3.9.0",
# "torch>=2.0.0",
# "torchvision>=0.15.0",
@surya501
surya501 / check_silence.py
Created May 14, 2025 22:55
Script to visually spot check the audio gaps in wav files
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = "<=3.12"
# dependencies = [
# "pydub",
# "numpy"
# ]
# ///
import argparse
# Pytorch implementation of LSHHash from https://github.com/kayzhu/LSHash
# Homework: Try to implement LSHHash in pytorch to speed up meanshift.
# Motivation: i.e. why calculate all distances when you need only a few.
import numpy as np
import importlib
import torch_utils
importlib.reload(torch_utils)
from torch_utils import *
@surya501
surya501 / airport-embedding.py
Created March 14, 2017 18:31
Airport embedding in keras
# %matplotlib inline
import pandas as pd
import numpy as np
import keras
import tensorflow as tf
import os
from keras.models import Model
from keras.layers import Input, Embedding, merge
from keras.layers.core import Flatten, Dense, Dropout, Lambda