Skip to content

Instantly share code, notes, and snippets.

@pbsds
Last active September 2, 2025 11:46
Show Gist options
  • Select an option

  • Save pbsds/06c02d08eca5e327a5cbbb2390be53dc to your computer and use it in GitHub Desktop.

Select an option

Save pbsds/06c02d08eca5e327a5cbbb2390be53dc to your computer and use it in GitHub Desktop.
ssh idun-login1.hpc.ntnu.no sinfo -N -o %all --json | jq '
[.sinfo[]|select(.partition.name == "CPUQ")] as $CPUQ_nodes |
[.sinfo[]|select(.partition.name == "GPUQ")] as $GPUQ_nodes |
def gpus_maximum:
.gres.total
| match("gpu:[^:]*:([0-9]+)").captures[0].string
| tonumber;
def gpus_used:
.gres.used
| match("gpu:[^:]*:([0-9]+)").captures[0].string
| tonumber;
def nodes_to_gpu_report:
([.[]| .cpus.maximum / (gpus_maximum)] | min) as $cpus_per_gpu |
{
# nodes: ([.[]| .nodes.nodes] | flatten),
exists: {
no_starve_max: {
cpus_per_gpu: ([.[]| .cpus.maximum / (gpus_maximum)] | min),
memory_per_gpu: ([.[]| .memory.maximum / (gpus_maximum)] | min),
#memory_per_cpu: ([.[]| .memory.maximum / .cpus.total] | min),
#memory_per_cpus_per_gpu: ([.[]| .memory.maximum / $cpus_per_gpu] | min),
},
x01_gpu: ([.[]| gpus_maximum | select(.>= 1) | . / 1 | floor] | add),
x02_gpu: ([.[]| gpus_maximum | select(.>= 2) | . / 2 | floor] | add),
x03_gpu: ([.[]| gpus_maximum | select(.>= 3) | . / 3 | floor] | add),
x04_gpu: ([.[]| gpus_maximum | select(.>= 4) | . / 4 | floor] | add),
x06_gpu: ([.[]| gpus_maximum | select(.>= 6) | . / 6 | floor] | add),
x08_gpu: ([.[]| gpus_maximum | select(.>= 8) | . / 8 | floor] | add),
x10_gpu: ([.[]| gpus_maximum | select(.>=10) | . / 10 | floor] | add),
},
idle: {
no_starve_max: {
cpus_per_gpu: ([.[]| .cpus.idle / (gpus_maximum)] | min),
memory_per_gpu: ([.[]| .memory.free.maximum.number / (gpus_maximum)] | min),
#memory_per_cpu: ([.[]| .memory.free.maximum.number / .cpus.total] | min),
#memory_per_cpus_per_gpu: ([.[]| .memory.free.maximum.number / $cpus_per_gpu] | min),
},
x01_gpu: ([.[]| gpus_maximum - gpus_used | select(.>= 1) | . / 1 | floor] | add),
x02_gpu: ([.[]| gpus_maximum - gpus_used | select(.>= 2) | . / 2 | floor] | add),
x03_gpu: ([.[]| gpus_maximum - gpus_used | select(.>= 3) | . / 3 | floor] | add),
x04_gpu: ([.[]| gpus_maximum - gpus_used | select(.>= 4) | . / 4 | floor] | add),
x06_gpu: ([.[]| gpus_maximum - gpus_used | select(.>= 6) | . / 6 | floor] | add),
x08_gpu: ([.[]| gpus_maximum - gpus_used | select(.>= 8) | . / 8 | floor] | add),
x10_gpu: ([.[]| gpus_maximum - gpus_used | select(.>=10) | . / 10 | floor] | add),
},
};
{
CPUQ: ($CPUQ_nodes | {
# nodes: ([.[]| .nodes.nodes] | flatten),
no_starve_max: {
memory_per_cpu: ([.[]| .memory.maximum / .cpus.total] | min),
},
exists: {
x001_cpu: ([.[]| .cpus.maximum | select(.>= 1) | . / 1 | floor] | add),
x002_cpu: ([.[]| .cpus.maximum | select(.>= 2) | . / 2 | floor] | add),
x004_cpu: ([.[]| .cpus.maximum | select(.>= 4) | . / 4 | floor] | add),
x008_cpu: ([.[]| .cpus.maximum | select(.>= 8) | . / 8 | floor] | add),
x016_cpu: ([.[]| .cpus.maximum | select(.>= 16) | . / 16 | floor] | add),
x032_cpu: ([.[]| .cpus.maximum | select(.>= 32) | . / 32 | floor] | add),
x064_cpu: ([.[]| .cpus.maximum | select(.>= 64) | . / 64 | floor] | add),
x128_cpu: ([.[]| .cpus.maximum | select(.>=128) | . / 128 | floor] | add),
},
idle: {
x001_cpu: ([.[]| .cpus.idle | select(.>= 1) | . / 1 | floor] | add),
x002_cpu: ([.[]| .cpus.idle | select(.>= 2) | . / 2 | floor] | add),
x004_cpu: ([.[]| .cpus.idle | select(.>= 4) | . / 4 | floor] | add),
x008_cpu: ([.[]| .cpus.idle | select(.>= 8) | . / 8 | floor] | add),
x016_cpu: ([.[]| .cpus.idle | select(.>= 16) | . / 16 | floor] | add),
x032_cpu: ([.[]| .cpus.idle | select(.>= 32) | . / 32 | floor] | add),
x064_cpu: ([.[]| .cpus.idle | select(.>= 64) | . / 64 | floor] | add),
x128_cpu: ([.[]| .cpus.idle | select(.>=128) | . / 128 | floor] | add),
},
}),
GPUQ: ($GPUQ_nodes | nodes_to_gpu_report),
GPUQ_p100: ($GPUQ_nodes | [.[]|select(.features.total | contains("p100"))] | nodes_to_gpu_report),
GPUQ_v100: ($GPUQ_nodes | [.[]|select(.features.total | contains("v100"))] | nodes_to_gpu_report),
GPUQ_a100: ($GPUQ_nodes | [.[]|select(.features.total | contains("a100"))] | nodes_to_gpu_report),
GPUQ_h100: ($GPUQ_nodes | [.[]|select(.features.total | contains("h100"))] | nodes_to_gpu_report),
GPUQ_gpu16g: ($GPUQ_nodes | [.[]|select(.features.total | contains("gpu16g"))] | nodes_to_gpu_report),
GPUQ_gpu32g: ($GPUQ_nodes | [.[]|select(.features.total | contains("gpu32g"))] | nodes_to_gpu_report),
GPUQ_gpu40g: ($GPUQ_nodes | [.[]|select(.features.total | contains("gpu40g"))] | nodes_to_gpu_report),
GPUQ_gpu80g: ($GPUQ_nodes | [.[]|select(.features.total | contains("gpu80g"))] | nodes_to_gpu_report),
GPUQ_nvlink: ($GPUQ_nodes | [.[]|select(.features.total | contains("nvlink"))] | nodes_to_gpu_report),
min_GPUQ_p100: ($GPUQ_nodes | [.[]|select(.features.total | test("h100|a100|v100|p100"))] | nodes_to_gpu_report),
min_GPUQ_v100: ($GPUQ_nodes | [.[]|select(.features.total | test("h100|a100|v100"))] | nodes_to_gpu_report),
min_GPUQ_a100: ($GPUQ_nodes | [.[]|select(.features.total | test("h100|a100"))] | nodes_to_gpu_report),
min_GPUQ_h100: ($GPUQ_nodes | [.[]|select(.features.total | test("h100"))] | nodes_to_gpu_report),
min_GPUQ_gpu16g: ($GPUQ_nodes | [.[]|select(.features.total | test("gpu80g|gpu40g|gpu32g|gpu16g") )] | nodes_to_gpu_report),
min_GPUQ_gpu32g: ($GPUQ_nodes | [.[]|select(.features.total | test("gpu80g|gpu40g|gpu32g") )] | nodes_to_gpu_report),
min_GPUQ_gpu40g: ($GPUQ_nodes | [.[]|select(.features.total | test("gpu80g|gpu40g") )] | nodes_to_gpu_report),
min_GPUQ_gpu80g: ($GPUQ_nodes | [.[]|select(.features.total | test("gpu80g") )] | nodes_to_gpu_report),
}
# transpose
#| [to_entries[]
# | .key as $key2
# | .value
# | [to_entries[] | {(.key): {($key2): (.value)}}]
# | add
#]
' | gron
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment