Skip to content

Instantly share code, notes, and snippets.

@eric-czech
eric-czech / http_stream_download_from_url.py
Created November 24, 2025 13:57
Example for streaming HTTP requests with retries
import sys, os, tempfile
from huggingface_hub.utils import http_stream_backoff
url = "https://cdn.mos.cms.futurecdn.net/v2/t:0,l:160,cw:960,ch:720,q:80,w:960/FaWKMJQnr2PFcYCmEyfiTm.jpg"
ext = os.path.splitext(url)[1]
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as f:
with http_stream_backoff(
method="GET",
url=url,
@eric-czech
eric-czech / bedtools_docker_step.py
Created August 1, 2025 01:52
Metaflow example using Docker SDK to invoke bedtools
#!/usr/bin/env python3
"""
Metaflow Example: Simple bedtools sorting with Docker
Demonstrates using biocontainers/bedtools via Docker SDK to sort genomic intervals.
"""
import tempfile
import os
@eric-czech
eric-czech / README.md
Last active January 3, 2025 17:33
Triton build script for ARM/AArch64

This script verifies that Triton can be built and minimally tested on ARM64 systems, specifically on AWS Graviton2 instances. Instructions:

  1. Create a g5g.xlarge instance using the following AWS CLI command and make sure to add at least 30G storage:
aws ec2 run-instances \
    --instance-type g5g.xlarge \
--image-id ami-02dcfe5d1d39baa4e \
@eric-czech
eric-czech / export_pmdb_pub_dates.py
Created March 4, 2024 16:49
Export PMDB-BQ publication dates
(
pd.read_gbq(
"SELECT pmid, EXTRACT(YEAR from pub_date) AS pub_year FROM `pmdb-bq.pmdb.article` WHERE pub_date IS NOT NULL",
use_bqstorage_api=True
)
.to_parquet("pub_years.parquet")
)
@eric-czech
eric-czech / ukb_gwas_poc.ipynb
Created January 25, 2024 20:23
UK Biobank sgkit GWAS POC
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@eric-czech
eric-czech / gpt4_linreg.ipynb
Created April 7, 2023 13:52
Linear regression for GWAS created by GPT4
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@eric-czech
eric-czech / mlb_top_hitter_prediction.ipynb
Created April 5, 2023 00:23
MLB Top Hitter Prediction (from ChatGPT)
This file has been truncated, but you can view the full file.
playerID,yearID,stint,teamID,lgID,G,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,IBB,HBP,SH,SF,GIDP
abercda01,1871,1,TRO,NA,1,4,0,0,0,0,0,0,0,0,0,0,,,,,0
addybo01,1871,1,RC1,NA,25,118,30,32,6,0,0,13,8,1,4,0,,,,,0
allisar01,1871,1,CL1,NA,29,137,28,40,4,5,0,19,3,1,2,5,,,,,1
allisdo01,1871,1,WS3,NA,27,133,28,44,10,2,2,27,1,1,0,2,,,,,0
ansonca01,1871,1,RC1,NA,25,120,29,39,11,3,0,16,6,2,2,1,,,,,0
armstbo01,1871,1,FW1,NA,12,49,9,11,2,1,0,5,0,1,0,1,,,,,0
barkeal01,1871,1,RC1,NA,1,4,0,1,0,0,0,2,0,0,1,0,,,,,0
barnero01,1871,1,BS1,NA,31,157,66,63,10,9,0,34,11,6,13,1,,,,,1
barrebi01,1871,1,FW1,NA,1,5,1,1,1,0,0,1,0,0,0,0,,,,,0
@eric-czech
eric-czech / baseball-stats.csv
Created August 29, 2022 17:48
baseball-stats.csv
We can't make this file beautiful and searchable because it's too large.
playerID,yearID,stint,teamID,lgID,G,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,IBB,HBP,SH,SF,GIDP
abercda01,1871,1,TRO,NA,1,4,0,0,0,0,0,0,0,0,0,0,,,,,0
addybo01,1871,1,RC1,NA,25,118,30,32,6,0,0,13,8,1,4,0,,,,,0
allisar01,1871,1,CL1,NA,29,137,28,40,4,5,0,19,3,1,2,5,,,,,1
allisdo01,1871,1,WS3,NA,27,133,28,44,10,2,2,27,1,1,0,2,,,,,0
ansonca01,1871,1,RC1,NA,25,120,29,39,11,3,0,16,6,2,2,1,,,,,0
armstbo01,1871,1,FW1,NA,12,49,9,11,2,1,0,5,0,1,0,1,,,,,0
barkeal01,1871,1,RC1,NA,1,4,0,1,0,0,0,2,0,0,1,0,,,,,0
barnero01,1871,1,BS1,NA,31,157,66,63,10,9,0,34,11,6,13,1,,,,,1
barrebi01,1871,1,FW1,NA,1,5,1,1,1,0,0,1,0,0,0,0,,,,,0
@eric-czech
eric-czech / Top MLB hitters.ipynb
Last active June 13, 2022 00:03
Top MLB hitters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@eric-czech
eric-czech / clustered_heatmap_example.py
Last active December 2, 2024 12:59
Clustered heatmap example
def get_clustered_order(df: pd.DataFrame, metric: str='euclidean', method: str='average') -> pd.DataFrame:
from scipy.cluster.hierarchy import linkage, leaves_list
return leaves_list(linkage(df, metric=metric, method=method))
def get_clustered_dataframe(df: pd.DataFrame, fill_value: Any=None, **kwargs) -> pd.DataFrame:
dfs = df if fill_value is None else df.fillna(fill_value)
return df.iloc[get_clustered_order(dfs, **kwargs), get_clustered_order(dfs.T, **kwargs)]
import plotly.express as px
px.imshow(get_clustered_dataframe(df))