Skip to content

Instantly share code, notes, and snippets.

mport onnxruntime as ort
import numpy as np
import torch
print(f"onnx runtime version:{ort.__version__}")
# Check if PyTorch can see an available GPU \
if torch.cuda.is_available():
import argparse
import os
import onnx
import onnx_safetensors
INPUT_DIR = "onnx-files" # onnx files have to go here
OUTPUT_DIR = "safetensors-files" # out put files will generated here
@aurotripathy
aurotripathy / resnet-on-onnxruntime.py
Last active March 13, 2026 01:17
resnet on onnx runtime
import onnxruntime # to inference ONNX models, we use the ONNX Runtime
import onnx
from onnx import numpy_helper
import urllib.request
import json
import time
import torch
import numpy as np
import onnxruntime
import numpy as np
from ultralytics import YOLO
# Load a pretrained YOLOv8 model
model = YOLO('yolov8n.pt')
# Export to ONNX format
model.export(format='onnx')
import onnx
import onnx_tool
# Load the ONNX model from a file
# model_path = "resnet34_1_3_416_640.onnx"
# model_path = "pointpillar_custom.onnx"
model_path = "detr_1_3_512_512.onnx"
print(f"Model: {model_path}")
# Use onnx.load to get the model proto object
#!/bin/bash
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <results_suffix> <port_num>"
exit 1
fi
results_suffix=$1
port_num=$2
@aurotripathy
aurotripathy / flowsettings.py
Last active June 4, 2025 21:15
Kotaemon flowsettings.py file for configuring the project
import os
from importlib.metadata import version
from inspect import currentframe, getframeinfo
from pathlib import Path
from decouple import config
from ktem.utils.lang import SUPPORTED_LANGUAGE_MAP
from theflow.settings.default import * # noqa
cur_frame = currentframe()
# from https://community.openai.com/t/easy-way-to-get-a-context-window-for-a-model/552099/4
# the hard way...
# Makes a gigantic meaningless OpenAI chat-completion promp call into the vLLM server
# Parses the (error) return and determines the actual context window supported
# Usage: python context-window-discovery.py --model gpt-4o-mini --base-url http://localhost:8080/v1
from openai import OpenAI
import re
import argparse
@aurotripathy
aurotripathy / RNGD-sample-RAG-App.py
Last active May 7, 2025 15:57
A canonical RAG sample application. Uses (furiosa-llm server + OpenAI embeddings + Chroma Vector DB + LangChain framework)
# A canonical RAG sample application
# Uses (furiosa-llm server + OpenAI embeddings + Chroma Vector DB + LangChain framework)
# 100% cursor-generated code
# Needs a text file in the documents directory,
# I used the doc, https://gist.github.com/wey-gu/75d49362d011a0f0354d39e396404ba2
from typing import List, Dict, Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from openai import OpenAI
from dotenv import load_dotenv
import numpy as np
load_dotenv()
def chat_with_gpt(messages):
client = OpenAI()
try:
completion = client.chat.completions.create(
model="gpt-3.5-turbo",