Skip to content

Instantly share code, notes, and snippets.

@leok7v
Last active September 14, 2025 19:18
Show Gist options
  • Select an option

  • Save leok7v/bcea3593bfab962720d28318894e9fbe to your computer and use it in GitHub Desktop.

Select an option

Save leok7v/bcea3593bfab962720d28318894e9fbe to your computer and use it in GitHub Desktop.
Metal/Vulkan GPU count and VRAM detection (gpu.h/gpu.c)
/* File: gpu.h */
#pragma once
#ifndef GPU_H
#define GPU_H
#ifdef __cplusplus
extern "C" {
#endif
#include "etc.h"
struct gpu {
char name[64];
uint64_t vram_total;
uint64_t vram_used;
uint64_t vram_free;
char api[16];
};
struct gpus array_of(struct gpu);
int gpu_probe(struct gpus * out);
#ifdef __cplusplus
}
#endif
#endif // GPU_H
/* EOF: gpu.h */
/* File: gpu.c */
#include "gpu.h"
#define _POSIX_C_SOURCE 200809L
#include <string.h>
#include <stdint.h>
#if defined(_WIN32)
#include <windows.h>
#define DL_HANDLE HMODULE
#define DLOPEN(n) LoadLibraryA(n)
#define DLSYM(h,s) (void*)GetProcAddress(h,s)
#define DLCLOSE(h) do { if (h) FreeLibrary(h); } while (0)
#else
#include <dlfcn.h>
#define DL_HANDLE void*
#define DLOPEN(n) dlopen(n, RTLD_LAZY)
#define DLSYM(h,s) dlsym(h, s)
#define DLCLOSE(h) do { if (h) dlclose(h); } while (0)
#endif
static int push(struct gpus *out, const char *api, const char *name,
uint64_t total, uint64_t used, uint64_t free) {
struct gpu g = {0};
strncpy(g.api, api, sizeof(g.api) - 1);
strncpy(g.name, name ? name : "", sizeof(g.name) - 1);
g.vram_total = total;
g.vram_used = used;
g.vram_free = free;
return array_push(out, &g, 1) ? 1 : 0;
}
/* ---------------- Metal (Apple) ---------------- */
#if defined(__APPLE__)
#include <objc/objc.h>
#include <objc/runtime.h>
#include <objc/message.h>
static uint64_t metal_u64(id obj, const char *selname) {
uint64_t v = 0;
SEL s = sel_registerName(selname);
if (obj && s) {
IMP imp = class_getMethodImplementation(object_getClass(obj), s);
if (imp) { typedef uint64_t (*F)(id,SEL); v = ((F)imp)(obj,s); }
}
return v;
}
static const char* metal_s(id obj, const char *selname) {
const char *r = "";
SEL s = sel_registerName(selname);
if (obj && s) {
IMP imp = class_getMethodImplementation(object_getClass(obj), s);
if (imp) {
typedef id (*F)(id,SEL);
id str = ((F)imp)(obj,s);
if (str) {
SEL u8 = sel_registerName("UTF8String");
IMP i2 = class_getMethodImplementation(object_getClass(str), u8);
if (i2) { typedef const char* (*F2)(id,SEL); r = ((F2)i2)(str,u8); }
}
}
}
return r ? r : "";
}
static int probe_metal(struct gpus *out) {
DL_HANDLE h = DLOPEN("/System/Library/Frameworks/Metal.framework/Metal");
int found = 0;
if (h) {
id (*CopyAll)(void) = 0;
id (*CreateDef)(void) = 0;
*(void**)(&CopyAll) = DLSYM(h, "MTLCopyAllDevices");
*(void**)(&CreateDef) = DLSYM(h, "MTLCreateSystemDefaultDevice");
if (CopyAll) {
id ds = CopyAll();
if (ds) {
SEL sc = sel_registerName("count");
SEL so = sel_registerName("objectAtIndex:");
IMP ic = class_getMethodImplementation(object_getClass(ds), sc);
IMP io = class_getMethodImplementation(object_getClass(ds), so);
if (ic && io) {
typedef unsigned long (*FC)(id,SEL);
typedef id (*FO)(id,SEL,unsigned long);
unsigned long n = ((FC)ic)(ds, sc);
for (unsigned long i = 0; i < n; i++) {
id d = ((FO)io)(ds, so, i);
uint64_t tot = metal_u64(d, "recommendedMaxWorkingSetSize");
uint64_t usd = metal_u64(d, "currentAllocatedSize");
uint64_t fre = tot > usd ? tot - usd : 0;
found += push(out, "Metal", metal_s(d, "name"), tot, usd, fre);
}
}
}
} else if (CreateDef) {
id d = CreateDef();
if (d) {
uint64_t tot = metal_u64(d, "recommendedMaxWorkingSetSize");
uint64_t usd = metal_u64(d, "currentAllocatedSize");
uint64_t fre = tot > usd ? tot - usd : 0;
found += push(out, "Metal", metal_s(d, "name"), tot, usd, fre);
}
}
DLCLOSE(h);
}
return found;
}
#else
static int probe_metal(struct gpus *out) { (void)out; return 0; }
#endif
/* ---------------- NVIDIA (NVML) ---------------- */
static int probe_nvml(struct gpus *out) {
#if defined(_WIN32)
const char *cands[] = { "nvml.dll" };
#else
const char *cands[] = { "libnvidia-ml.so.1", "libnvidia-ml.so" };
#endif
DL_HANDLE h = 0;
size_t k = 0;
int found = 0;
for (; k < (sizeof(cands)/sizeof(cands[0])) && !h; k++) { h = DLOPEN(cands[k]); }
if (h) {
typedef int (*init_t)(void);
typedef int (*fini_t)(void);
typedef int (*cnt_t)(unsigned int*);
typedef int (*hdl_t)(unsigned int, void**);
typedef int (*name_t)(void*, char*, unsigned int);
struct mi { unsigned long long total, free, used; };
typedef int (*mem_t)(void*, struct mi*);
init_t init = (init_t)DLSYM(h, "nvmlInit_v2");
fini_t fini = (fini_t)DLSYM(h, "nvmlShutdown");
cnt_t gcnt = (cnt_t)DLSYM(h, "nvmlDeviceGetCount_v2");
hdl_t ghdl = (hdl_t)DLSYM(h, "nvmlDeviceGetHandleByIndex_v2");
name_t gnam = (name_t)DLSYM(h, "nvmlDeviceGetName");
mem_t gmem = (mem_t)DLSYM(h, "nvmlDeviceGetMemoryInfo");
if (init && fini && gcnt && ghdl && gnam && gmem) {
if (init() == 0) {
unsigned int n = 0, i = 0;
if (gcnt(&n) == 0) {
for (i = 0; i < n; i++) {
void *dev = 0;
char name[64] = {0};
struct mi m = {0};
if (ghdl(i, &dev) == 0 && gnam(dev, name, sizeof(name)) == 0 && gmem(dev, &m) == 0) {
found += push(out, "NVML", name, (uint64_t)m.total, (uint64_t)m.used, (uint64_t)m.free);
}
}
}
fini();
}
}
DLCLOSE(h);
}
return found;
}
/* ---------------- AMD (ROCm SMI, Linux) ---------------- */
static int probe_rocm(struct gpus *out) {
#if defined(_WIN32)
(void)out; return 0; /* ROCm SMI is Linux-only */
#else
const char *cands[] = { "librocm_smi64.so", "librocm_smi64.so.6", "librocm_smi64.so.5" };
DL_HANDLE h = 0;
size_t k = 0;
int found = 0;
for (; k < (sizeof(cands)/sizeof(cands[0])) && !h; k++) { h = DLOPEN(cands[k]); }
if (h) {
typedef int (*init_t)(uint64_t);
typedef int (*fini_t)(void);
typedef int (*num_t)(uint32_t*);
typedef int (*nam_t)(uint32_t,char*,size_t);
typedef int (*tot_t)(uint32_t,uint32_t,uint64_t*);
typedef int (*use_t)(uint32_t,uint32_t,uint64_t*);
init_t init = (init_t)DLSYM(h, "rsmi_init");
fini_t fini = (fini_t)DLSYM(h, "rsmi_shut_down");
num_t num = (num_t)DLSYM(h, "rsmi_num_monitor_devices");
nam_t name = (nam_t)DLSYM(h, "rsmi_dev_name_get");
tot_t tot = (tot_t)DLSYM(h, "rsmi_dev_memory_total_get");
use_t use = (use_t)DLSYM(h, "rsmi_dev_memory_usage_get");
if (init && fini && num && name && tot && use) {
if (init(0) == 0) {
uint32_t n = 0, i = 0;
if (num(&n) == 0) {
for (i = 0; i < n; i++) {
char nm[64] = {0};
uint64_t t = 0, u = 0;
if (name(i, nm, sizeof(nm)) == 0 && tot(i, 0, &t) == 0 && use(i, 0, &u) == 0) {
uint64_t f = t >= u ? t - u : 0;
found += push(out, "ROCm", nm, t, u, f);
}
}
}
fini();
}
}
DLCLOSE(h);
}
return found;
#endif
}
/* ---------------- Intel oneAPI Level Zero ---------------- */
static int probe_l0(struct gpus *out) {
#if defined(_WIN32)
const char *cands[] = { "ze_loader.dll" };
#else
const char *cands[] = { "libze_loader.so.1", "libze_loader.so" };
#endif
DL_HANDLE h = 0;
size_t k = 0;
int found = 0;
for (; k < (sizeof(cands)/sizeof(cands[0])) && !h; k++) { h = DLOPEN(cands[k]); }
if (h) {
typedef uint32_t ze_result_t;
typedef struct _ze_driver_handle_t* ze_driver_handle_t;
typedef struct _ze_device_handle_t* ze_device_handle_t;
typedef ze_result_t (*zeInit_t)(uint32_t);
typedef ze_result_t (*zeDriverGet_t)(uint32_t*, ze_driver_handle_t*);
typedef ze_result_t (*zeDeviceGet_t)(ze_driver_handle_t, uint32_t*, ze_device_handle_t*);
typedef struct { uint32_t type; const void* pNext; } ze_base_desc_t;
typedef struct { ze_base_desc_t stype; char name[256]; } ze_device_properties_t;
typedef ze_result_t (*zeDeviceGetProperties_t)(ze_device_handle_t, ze_device_properties_t*);
typedef ze_result_t (*zeDeviceGetMemoryProperties_t)(ze_device_handle_t, uint32_t*, void*);
zeInit_t zeInit = (zeInit_t)DLSYM(h, "zeInit");
zeDriverGet_t zeDriverGet = (zeDriverGet_t)DLSYM(h, "zeDriverGet");
zeDeviceGet_t zeDeviceGet = (zeDeviceGet_t)DLSYM(h, "zeDeviceGet");
zeDeviceGetProperties_t zeDeviceGetProperties = (zeDeviceGetProperties_t)DLSYM(h, "zeDeviceGetProperties");
zeDeviceGetMemoryProperties_t zeDeviceGetMemoryProperties = (zeDeviceGetMemoryProperties_t)DLSYM(h, "zeDeviceGetMemoryProperties");
if (zeInit && zeDriverGet && zeDeviceGet && zeDeviceGetProperties && zeDeviceGetMemoryProperties) {
if (zeInit(0) == 0) {
ze_driver_handle_t drvs[8] = {0};
uint32_t nd = 8, d = 0;
if (zeDriverGet(&nd, drvs) == 0) {
for (d = 0; d < nd; d++) {
uint32_t n = 0;
if (zeDeviceGet(drvs[d], &n, 0) == 0 && n) {
if (n > 32) { n = 32; }
ze_device_handle_t devs[32] = {0};
if (zeDeviceGet(drvs[d], &n, devs) == 0) {
for (uint32_t i = 0; i < n; i++) {
ze_device_properties_t props = {0};
uint32_t nm = 0;
uint64_t total = 0;
if (zeDeviceGetProperties(devs[i], &props) == 0) {
if (zeDeviceGetMemoryProperties(devs[i], &nm, 0) == 0 && nm) {
if (nm > 8) { nm = 8; }
struct { ze_base_desc_t s; uint64_t totalSize; uint32_t flags; char name[256]; } mps[8] = {0};
if (zeDeviceGetMemoryProperties(devs[i], &nm, mps) == 0) {
for (uint32_t j = 0; j < nm; j++) { total += mps[j].totalSize; }
}
}
found += push(out, "Level0", props.name, total, 0, 0);
}
}
}
}
}
}
}
}
DLCLOSE(h);
}
return found;
}
/* ---------------- Windows DXGI (Win10/11) ---------------- */
#if defined(_WIN32)
#include <wchar.h>
#include <dxgi1_4.h>
static void w2u8(char *dst, size_t cap, const wchar_t *ws) {
int n = WideCharToMultiByte(CP_UTF8, 0, ws, -1, dst, (int)cap, 0, 0);
if (n == 0 && cap) { dst[0] = 0; }
}
static int probe_dxgi(struct gpus *out) {
int found = 0;
DL_HANDLE h = DLOPEN("dxgi.dll");
if (h) {
typedef HRESULT (WINAPI *PFN_CreateDXGIFactory1)(REFIID, void**);
PFN_CreateDXGIFactory1 create = (PFN_CreateDXGIFactory1)DLSYM(h, "CreateDXGIFactory1");
if (create) {
IDXGIFactory1 *f = 0;
if (SUCCEEDED(create(&IID_IDXGIFactory1, (void**)&f)) && f) {
for (UINT i = 0; ; i++) {
IDXGIAdapter1 *a = 0;
if (f->lpVtbl->EnumAdapters1(f, i, &a) == DXGI_ERROR_NOT_FOUND) { break; }
if (a) {
DXGI_ADAPTER_DESC1 d = {0};
char name[64] = {0};
uint64_t total = 0, used = 0, free = 0;
if (SUCCEEDED(a->lpVtbl->GetDesc1(a, &d))) {
w2u8(name, sizeof(name), d.Description);
total = (uint64_t)d.DedicatedVideoMemory;
#if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER)
{ /* try IDXGIAdapter3 for live usage */
IDXGIAdapter3 *a3 = 0;
if (SUCCEEDED(a->lpVtbl->QueryInterface(a, &IID_IDXGIAdapter3, (void**)&a3)) && a3) {
DXGI_QUERY_VIDEO_MEMORY_INFO info = {0};
if (SUCCEEDED(a3->lpVtbl->QueryVideoMemoryInfo(a3, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info))) {
used = (uint64_t)info.CurrentUsage;
free = (uint64_t)(info.Budget > info.CurrentUsage ? info.Budget - info.CurrentUsage : 0);
}
a3->lpVtbl->Release(a3);
}
}
#endif
found += push(out, "DXGI", name, total, used, free);
}
a->lpVtbl->Release(a);
}
}
f->lpVtbl->Release(f);
}
}
DLCLOSE(h);
}
return found;
}
#else
static int probe_dxgi(struct gpus *out) { (void)out; return 0; }
#endif
int gpu_probe(struct gpus *out) {
int n = 0;
#if defined(__APPLE__)
n += probe_metal(out);
#endif
n += probe_nvml(out);
n += probe_rocm(out);
n += probe_l0(out);
n += probe_dxgi(out);
return (int)out->count;
}
/* EOF: gpu.c */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment