This document provides architectural guidance for "Quartz" - a PyTorch HUD-like system for ROCm downstream CI/CD orchestration. The junior engineer's instinct to start with status.json is understandable but insufficient for the stated requirements. A database-first approach is correct.
Extract MoE primitives from /develop/ai-no-fluff/kb/ben/moe_f32_parameterized.mlir:
mul_mat_id- Expert-selected matrix multiplication (gather + batch_matmul)moe_ffn_block- Full MoE FFN block composing routing, expert compute, weighted sum
Key challenge: moe_ffn_block depends on mul_mat_id and swiglu. Need systematic composition without manual inlining.
| Merge Commit | Individual Commits on Main |
|---|---|
| One atomic integration point | 500 commits sprawled on main |
git revert -m1 <merge> undoes everything |
Good luck reverting |
git bisect can skip the whole merge |
Bisect walks through 500 commits |
| Main history is readable | Main history is chaos |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| module @aqt_matmul { | |
| iree_input.global private @_params$0 = dense<[[0.000000e+00, 5.003000e+02, 1.000600e+03], [1500.8999, 2.001200e+03, 2.501500e+03], [3001.7998, 3502.09985, 4.002400e+03], [4502.69971, 5.003000e+03, 5.503300e+03], [6003.59961, 6503.8999, 7004.1997], [7.504500e+03, 8004.7998, 8.505100e+03]]> : tensor<6x3xf32> | |
| iree_input.global private @_params$1 = dense<5.000000e+00> : tensor<f32> | |
| func @compute_native(%arg0: tensor<5x6xf32>) -> tensor<5x3xf32> { | |
| %0 = iree_input.global.load @_params$0 : tensor<6x3xf32> | |
| %1 = iree_input.global.load @_params$1 : tensor<f32> | |
| %2 = call @main(%0, %1, %arg0) : (tensor<6x3xf32>, tensor<f32>, tensor<5x6xf32>) -> tensor<5x3xf32> | |
| return %2 : tensor<5x3xf32> | |
| } | |
| func private @main(%arg0: tensor<6x3xf32>, %arg1: tensor<f32>, %arg2: tensor<5x6xf32>) -> tensor<5x3xf32> { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #device_target_vmvx = #hal.device.target<"vmvx", {executable_targets = [#hal.executable.target<"vmvx", "vmvx-bytecode-fb">]}> | |
| module attributes {hal.device.targets = [#device_target_vmvx]} { | |
| util.global private @hoisted_1 : !hal.buffer | |
| util.global private @hoisted_1__offset : index | |
| util.global private @hoisted_1__size : index | |
| util.global private @hoisted_0 : !hal.buffer | |
| util.global private @hoisted : !hal.buffer | |
| util.global private @hoisted__storage_size : index | |
| util.global private @hoisted__offset : index | |
| util.global private @hoisted__size : index |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #map0 = affine_map<(d0, d1) -> ()> | |
| #map1 = affine_map<(d0, d1) -> (d0, d1)> | |
| #map2 = affine_map<() -> ()> | |
| module @aqt_matmul { | |
| util.global private @_params$0 = dense<[[0.000000e+00, 5.003000e+02, 1.000600e+03], [1500.8999, 2.001200e+03, 2.501500e+03], [3001.7998, 3502.09985, 4.002400e+03], [4502.69971, 5.003000e+03, 5.503300e+03], [6003.59961, 6503.8999, 7004.1997], [7.504500e+03, 8004.7998, 8.505100e+03]]> : tensor<6x3xf32> | |
| util.global private @_params$1 = dense<5.000000e+00> : tensor<f32> | |
| func @compute_native(%arg0: tensor<5x6xf32>) -> tensor<5x3xf32> { | |
| %c0_i32 = arith.constant 0 : i32 | |
| %cst = arith.constant dense<5.000000e-01> : tensor<5x6xf32> | |
| %cst_0 = arith.constant dense<-1.270000e+02> : tensor<5x6xf32> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| CONSTANT ROOT: %cst_0 = arith.constant dense<5.000000e-01> : tensor<6x3xf32> | |
| CONSTANT ROOT: %_params$0 = util.global.load @_params$0 : tensor<6x3xf32> | |
| CONSTANT ROOT: %cst_4 = arith.constant dense<1.270000e+02> : tensor<5x6xf32> | |
| CONSTANT ROOT: %cst_2 = arith.constant dense<-1.270000e+02> : tensor<5x6xf32> | |
| CONSTANT ROOT: %cst_3 = arith.constant dense<1.270000e+02> : tensor<f32> | |
| CONSTANT ROOT: %c0_i32 = arith.constant 0 : i32 | |
| CONSTANT ROOT: %cst_1 = arith.constant dense<5.000000e-01> : tensor<5x6xf32> | |
| CONSTANT ROOT: %_params$1 = util.global.load @_params$1 : tensor<f32> | |
| CONSTANT ROOT: %cst = arith.constant 0xFF800000 : f32 | |
| EXPAND TO UNKNOWN: %26 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%24, %cst_0 : tensor<6x3xf32>, tensor<6x3xf32>) outs(%25 : tensor<6x3xf32>) { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| module @aqt_dense { | |
| iree_input.global private @_params$0 = dense<[[0.000000e+00, 1.000000e-03, 2.000000e-03], [3.000000e-03, 4.000000e-03, 0.00500000035], [6.000000e-03, 7.000000e-03, 8.000000e-03], [0.00900000054, 0.0100000007, 0.0110000009], [1.200000e-02, 1.300000e-02, 1.400000e-02], [0.0150000006, 1.600000e-02, 1.700000e-02]]> : tensor<6x3xf32> | |
| iree_input.global private @_params$1 = dense<[0.000000e+00, 1.000000e+01, 2.000000e+01]> : tensor<3xf32> | |
| iree_input.global private @_params$2 = dense<5.000000e+00> : tensor<f32> | |
| iree_input.global private @_params$3 = dense<[[0.000000e+00, 0.00999999977, 2.000000e-02, 3.000000e-02, 4.000000e-02, 0.049999997, 6.000000e-02, 7.000000e-02, 8.000000e-02], [0.0899999961, 0.099999994, 1.100000e-01, 1.200000e-01, 1.300000e-01, 1.400000e-01, 0.149999991, 1.600000e-01, 1.700000e-01], [0.179999992, 1.900000e-01, 0.199999988, 2.100000e-01, 2.200000e-01, 0.229999989, 2.400000e-01, 2.500000e-01, 2.600000e-01]]> : tensor<3x9xf32> | |
| iree_input.global private @_params$4 = d |
NewerOlder