Skip to content

Instantly share code, notes, and snippets.

@zaemyung
zaemyung / plot_graphs.py
Created October 20, 2025 19:20
plot_graphs.py
import math
import matplotlib.pyplot as plt
import networkx as nx
import regex as re
from common_utils import draw_graph, load_pickle_file
from matplotlib.gridspec import GridSpec
def visualize_graph_motifs(motifs, motif_size, save_path, selected_motif_indices=None, show_edge_label=True):
@zaemyung
zaemyung / mf_idf.py
Created October 14, 2025 01:52
Computing mf-idf
def compute_mfidf_distribution(machines, humans, motif_size):
# document here is either "machine" or "human"
machine_mf = np.array([sample[f'motif_m{motif_size}_hist_norm_by_edges'] for sample in machines])
machine_mf = np.mean(machine_mf, axis=0)
human_mf = np.array([sample[f'motif_m{motif_size}_hist_norm_by_edges'] for sample in humans])
human_mf = np.mean(human_mf, axis=0)
machine_df = np.array([sample[f'motif_m{motif_size}_hist_raw'] for sample in machines])
machine_df = (machine_df > 0).astype(int)
machine_df = np.mean(machine_df, axis=0)
@zaemyung
zaemyung / ct.py
Created October 17, 2023 08:20
custom trainer example
from sklearn.metrics import confusion_matrix
class CustomTrainer(Trainer):
def _inner_training_loop(
self, batch_size=None, args=None, resume_from_checkpoint=None, trial=None, \
ignore_keys_for_eval=None):
@zaemyung
zaemyung / download_arxiv_from_amazon_s3.py
Created June 28, 2023 19:45
Download arxiv dump files from S3 bucket - latest files first.
import os
from datetime import datetime
from pathlib import Path
# pip install boto3 python-dotenv beautifulsoup4
import boto3
import dotenv
from bs4 import BeautifulSoup
dotenv.load_dotenv()
@zaemyung
zaemyung / macbook_setup.md
Last active April 8, 2023 03:48
macbook setup
  • iCloud symlink
    • ln -s ~/Library/Mobile\ Documents/com\~apple\~CloudDocs ~/iCloud
  • Map keys: right-option to f18, capslock to control
    • /Users/zaemyung/Library/LaunchAgents/com.local.setting.key.remapping.plist
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
 Label
@zaemyung
zaemyung / radar_plot.py
Created December 8, 2022 11:21
Plotting Radar graph
import math
import textwrap
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
class ComplexRadar():
"""
@zaemyung
zaemyung / Docker Cheat Sheet.md
Last active June 1, 2023 00:16
Docker Cheat Sheet: commonly used Docker commands

Docker Cheat Sheet

(Adopted from Docker Cheat Sheet)

Build

Build an image from the Dockerfile in the current directory and tag the image

docker build -f Dockerfile -t {your_name/repo_name:tag} .

For building zaemyung/ml_non_root image, modify the UNAME in the dockerfile accordingly, and build as:

@zaemyung
zaemyung / color_google_sheet_cells.gs
Last active April 9, 2021 21:13
Google App macro script for coloring cells that are selected disjointly in Google Sheets
function colorSelectedRangeGreen() {
var sel = SpreadsheetApp.getActive().getSelection().getActiveRangeList().getRanges();
for(var i = 0; i < sel.length; i++){
var cell = sel[i].setBackground("#93c47d");
}
}
function colorSelectedRangePink() {
var sel = SpreadsheetApp.getActive().getSelection().getActiveRangeList().getRanges();
for(var i = 0; i < sel.length; i++){
@zaemyung
zaemyung / Liberal Regex Pattern for All URLs
Created March 26, 2019 04:40 — forked from gruber/Liberal Regex Pattern for All URLs
Liberal, Accurate Regex Pattern for Matching All URLs
The regex patterns in this gist are intended to match any URLs,
including "mailto:[email protected]", "x-whatever://foo", etc. For a
pattern that attempts only to match web URLs (http, https), see:
https://gist.github.com/gruber/8891611
# Single-line version of pattern:
(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))
@zaemyung
zaemyung / multi_bleu.py
Created April 25, 2018 02:38 — forked from basaundi/multi_bleu.py
python rewrite of Moses' multi-bleu.perl; usable as a library
#!/usr/bin/env python
# Ander Martinez Sanchez
from __future__ import division, print_function
from math import exp, log
from collections import Counter
def ngram_count(words, n):
if n <= len(words):