Skip to content

Instantly share code, notes, and snippets.

View stellaraccident's full-sized avatar

Stella Laurenzo stellaraccident

  • Amd
  • Seattle Washington, US
View GitHub Profile
@stellaraccident
stellaraccident / quartz-design.md
Created January 26, 2026 20:58
Quartz Design Document: ROCm CI/CD Dashboard & Orchestration

Quartz Design Document: ROCm CI/CD Dashboard & Orchestration

Executive Summary

This document provides architectural guidance for "Quartz" - a PyTorch HUD-like system for ROCm downstream CI/CD orchestration. The junior engineer's instinct to start with status.json is understandable but insufficient for the stated requirements. A database-first approach is correct.


Requirements Recap

@stellaraccident
stellaraccident / purrfect-crunching-glade.md
Created January 26, 2026 04:10
MoE components plan for frank-models (iree-link composition)

Plan: Add MoE Components (mul_mat_id, moe_ffn_block)

Overview

Extract MoE primitives from /develop/ai-no-fluff/kb/ben/moe_f32_parameterized.mlir:

  • mul_mat_id - Expert-selected matrix multiplication (gather + batch_matmul)
  • moe_ffn_block - Full MoE FFN block composing routing, expert compute, weighted sum

Key challenge: moe_ffn_block depends on mul_mat_id and swiglu. Need systematic composition without manual inlining.

@stellaraccident
stellaraccident / gist:4d3b5d24077b17ebfb8137bf3ad8135f
Last active January 22, 2026 01:18
Why merge commits are better than pushing hundreds of commits to main

Why Merge Commits Beat Pushing Hundreds of Commits to Main

The Comparison

Merge Commit Individual Commits on Main
One atomic integration point 500 commits sprawled on main
git revert -m1 <merge> undoes everything Good luck reverting
git bisect can skip the whole merge Bisect walks through 500 commits
Main history is readable Main history is chaos
@stellaraccident
stellaraccident / gist:f31fee06c22e6d172335f2ae881d7ff5
Last active January 8, 2026 04:54
TheRock: Developer builds from PRs and PyTorch packages

TheRock: Developer Builds from PRs and PyTorch Packages

Claude Code Prompt: Read the docs and workflows in therock using subagents and tell me how to make developer builds from a PR and get packages w/ pytorch.

Triggering PR Builds

Label-based (simplest)

Add labels to your PR:

@stellaraccident
stellaraccident / procedure.md
Last active January 12, 2022 21:36
IREE LLVM Integration Procedure

Notes on integrating LLVM from the OSS side

Strategy 1: Sync everything to a Google/TensorFlow commit

cd ~/src
git clone git clone https://github.com/tensorflow/tensorflow.git
git clone https://github.com/tensorflow/mlir-hlo.git
module @aqt_matmul {
iree_input.global private @_params$0 = dense<[[0.000000e+00, 5.003000e+02, 1.000600e+03], [1500.8999, 2.001200e+03, 2.501500e+03], [3001.7998, 3502.09985, 4.002400e+03], [4502.69971, 5.003000e+03, 5.503300e+03], [6003.59961, 6503.8999, 7004.1997], [7.504500e+03, 8004.7998, 8.505100e+03]]> : tensor<6x3xf32>
iree_input.global private @_params$1 = dense<5.000000e+00> : tensor<f32>
func @compute_native(%arg0: tensor<5x6xf32>) -> tensor<5x3xf32> {
%0 = iree_input.global.load @_params$0 : tensor<6x3xf32>
%1 = iree_input.global.load @_params$1 : tensor<f32>
%2 = call @main(%0, %1, %arg0) : (tensor<6x3xf32>, tensor<f32>, tensor<5x6xf32>) -> tensor<5x3xf32>
return %2 : tensor<5x3xf32>
}
func private @main(%arg0: tensor<6x3xf32>, %arg1: tensor<f32>, %arg2: tensor<5x6xf32>) -> tensor<5x3xf32> {
@stellaraccident
stellaraccident / vminput.mlir
Created December 13, 2021 05:49
Failing vm initializer
#device_target_vmvx = #hal.device.target<"vmvx", {executable_targets = [#hal.executable.target<"vmvx", "vmvx-bytecode-fb">]}>
module attributes {hal.device.targets = [#device_target_vmvx]} {
util.global private @hoisted_1 : !hal.buffer
util.global private @hoisted_1__offset : index
util.global private @hoisted_1__size : index
util.global private @hoisted_0 : !hal.buffer
util.global private @hoisted : !hal.buffer
util.global private @hoisted__storage_size : index
util.global private @hoisted__offset : index
util.global private @hoisted__size : index
@stellaraccident
stellaraccident / hoisting.mlir
Last active December 12, 2021 05:22
Initializer hoisting
#map0 = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
#map2 = affine_map<() -> ()>
module @aqt_matmul {
util.global private @_params$0 = dense<[[0.000000e+00, 5.003000e+02, 1.000600e+03], [1500.8999, 2.001200e+03, 2.501500e+03], [3001.7998, 3502.09985, 4.002400e+03], [4502.69971, 5.003000e+03, 5.503300e+03], [6003.59961, 6503.8999, 7004.1997], [7.504500e+03, 8004.7998, 8.505100e+03]]> : tensor<6x3xf32>
util.global private @_params$1 = dense<5.000000e+00> : tensor<f32>
func @compute_native(%arg0: tensor<5x6xf32>) -> tensor<5x3xf32> {
%c0_i32 = arith.constant 0 : i32
%cst = arith.constant dense<5.000000e-01> : tensor<5x6xf32>
%cst_0 = arith.constant dense<-1.270000e+02> : tensor<5x6xf32>
@stellaraccident
stellaraccident / debugdump.mlir
Last active December 11, 2021 03:32
Constant Hoisting
CONSTANT ROOT: %cst_0 = arith.constant dense<5.000000e-01> : tensor<6x3xf32>
CONSTANT ROOT: %_params$0 = util.global.load @_params$0 : tensor<6x3xf32>
CONSTANT ROOT: %cst_4 = arith.constant dense<1.270000e+02> : tensor<5x6xf32>
CONSTANT ROOT: %cst_2 = arith.constant dense<-1.270000e+02> : tensor<5x6xf32>
CONSTANT ROOT: %cst_3 = arith.constant dense<1.270000e+02> : tensor<f32>
CONSTANT ROOT: %c0_i32 = arith.constant 0 : i32
CONSTANT ROOT: %cst_1 = arith.constant dense<5.000000e-01> : tensor<5x6xf32>
CONSTANT ROOT: %_params$1 = util.global.load @_params$1 : tensor<f32>
CONSTANT ROOT: %cst = arith.constant 0xFF800000 : f32
EXPAND TO UNKNOWN: %26 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%24, %cst_0 : tensor<6x3xf32>, tensor<6x3xf32>) outs(%25 : tensor<6x3xf32>) {
@stellaraccident
stellaraccident / dense_stack.mlir
Last active December 7, 2021 03:56
IREE Jax AQT Matmul Examples
module @aqt_dense {
iree_input.global private @_params$0 = dense<[[0.000000e+00, 1.000000e-03, 2.000000e-03], [3.000000e-03, 4.000000e-03, 0.00500000035], [6.000000e-03, 7.000000e-03, 8.000000e-03], [0.00900000054, 0.0100000007, 0.0110000009], [1.200000e-02, 1.300000e-02, 1.400000e-02], [0.0150000006, 1.600000e-02, 1.700000e-02]]> : tensor<6x3xf32>
iree_input.global private @_params$1 = dense<[0.000000e+00, 1.000000e+01, 2.000000e+01]> : tensor<3xf32>
iree_input.global private @_params$2 = dense<5.000000e+00> : tensor<f32>
iree_input.global private @_params$3 = dense<[[0.000000e+00, 0.00999999977, 2.000000e-02, 3.000000e-02, 4.000000e-02, 0.049999997, 6.000000e-02, 7.000000e-02, 8.000000e-02], [0.0899999961, 0.099999994, 1.100000e-01, 1.200000e-01, 1.300000e-01, 1.400000e-01, 0.149999991, 1.600000e-01, 1.700000e-01], [0.179999992, 1.900000e-01, 0.199999988, 2.100000e-01, 2.200000e-01, 0.229999989, 2.400000e-01, 2.500000e-01, 2.600000e-01]]> : tensor<3x9xf32>
iree_input.global private @_params$4 = d