Last active
September 14, 2025 19:18
-
-
Save leok7v/bcea3593bfab962720d28318894e9fbe to your computer and use it in GitHub Desktop.
Metal/Vulkan GPU count and VRAM detection (gpu.h/gpu.c)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* File: gpu.h */ | |
| #pragma once | |
| #ifndef GPU_H | |
| #define GPU_H | |
| #ifdef __cplusplus | |
| extern "C" { | |
| #endif | |
| #include "etc.h" | |
| struct gpu { | |
| char name[64]; | |
| uint64_t vram_total; | |
| uint64_t vram_used; | |
| uint64_t vram_free; | |
| char api[16]; | |
| }; | |
| struct gpus array_of(struct gpu); | |
| int gpu_probe(struct gpus * out); | |
| #ifdef __cplusplus | |
| } | |
| #endif | |
| #endif // GPU_H | |
| /* EOF: gpu.h */ | |
| /* File: gpu.c */ | |
| #include "gpu.h" | |
| #define _POSIX_C_SOURCE 200809L | |
| #include <string.h> | |
| #include <stdint.h> | |
| #if defined(_WIN32) | |
| #include <windows.h> | |
| #define DL_HANDLE HMODULE | |
| #define DLOPEN(n) LoadLibraryA(n) | |
| #define DLSYM(h,s) (void*)GetProcAddress(h,s) | |
| #define DLCLOSE(h) do { if (h) FreeLibrary(h); } while (0) | |
| #else | |
| #include <dlfcn.h> | |
| #define DL_HANDLE void* | |
| #define DLOPEN(n) dlopen(n, RTLD_LAZY) | |
| #define DLSYM(h,s) dlsym(h, s) | |
| #define DLCLOSE(h) do { if (h) dlclose(h); } while (0) | |
| #endif | |
| static int push(struct gpus *out, const char *api, const char *name, | |
| uint64_t total, uint64_t used, uint64_t free) { | |
| struct gpu g = {0}; | |
| strncpy(g.api, api, sizeof(g.api) - 1); | |
| strncpy(g.name, name ? name : "", sizeof(g.name) - 1); | |
| g.vram_total = total; | |
| g.vram_used = used; | |
| g.vram_free = free; | |
| return array_push(out, &g, 1) ? 1 : 0; | |
| } | |
| /* ---------------- Metal (Apple) ---------------- */ | |
| #if defined(__APPLE__) | |
| #include <objc/objc.h> | |
| #include <objc/runtime.h> | |
| #include <objc/message.h> | |
| static uint64_t metal_u64(id obj, const char *selname) { | |
| uint64_t v = 0; | |
| SEL s = sel_registerName(selname); | |
| if (obj && s) { | |
| IMP imp = class_getMethodImplementation(object_getClass(obj), s); | |
| if (imp) { typedef uint64_t (*F)(id,SEL); v = ((F)imp)(obj,s); } | |
| } | |
| return v; | |
| } | |
| static const char* metal_s(id obj, const char *selname) { | |
| const char *r = ""; | |
| SEL s = sel_registerName(selname); | |
| if (obj && s) { | |
| IMP imp = class_getMethodImplementation(object_getClass(obj), s); | |
| if (imp) { | |
| typedef id (*F)(id,SEL); | |
| id str = ((F)imp)(obj,s); | |
| if (str) { | |
| SEL u8 = sel_registerName("UTF8String"); | |
| IMP i2 = class_getMethodImplementation(object_getClass(str), u8); | |
| if (i2) { typedef const char* (*F2)(id,SEL); r = ((F2)i2)(str,u8); } | |
| } | |
| } | |
| } | |
| return r ? r : ""; | |
| } | |
| static int probe_metal(struct gpus *out) { | |
| DL_HANDLE h = DLOPEN("/System/Library/Frameworks/Metal.framework/Metal"); | |
| int found = 0; | |
| if (h) { | |
| id (*CopyAll)(void) = 0; | |
| id (*CreateDef)(void) = 0; | |
| *(void**)(&CopyAll) = DLSYM(h, "MTLCopyAllDevices"); | |
| *(void**)(&CreateDef) = DLSYM(h, "MTLCreateSystemDefaultDevice"); | |
| if (CopyAll) { | |
| id ds = CopyAll(); | |
| if (ds) { | |
| SEL sc = sel_registerName("count"); | |
| SEL so = sel_registerName("objectAtIndex:"); | |
| IMP ic = class_getMethodImplementation(object_getClass(ds), sc); | |
| IMP io = class_getMethodImplementation(object_getClass(ds), so); | |
| if (ic && io) { | |
| typedef unsigned long (*FC)(id,SEL); | |
| typedef id (*FO)(id,SEL,unsigned long); | |
| unsigned long n = ((FC)ic)(ds, sc); | |
| for (unsigned long i = 0; i < n; i++) { | |
| id d = ((FO)io)(ds, so, i); | |
| uint64_t tot = metal_u64(d, "recommendedMaxWorkingSetSize"); | |
| uint64_t usd = metal_u64(d, "currentAllocatedSize"); | |
| uint64_t fre = tot > usd ? tot - usd : 0; | |
| found += push(out, "Metal", metal_s(d, "name"), tot, usd, fre); | |
| } | |
| } | |
| } | |
| } else if (CreateDef) { | |
| id d = CreateDef(); | |
| if (d) { | |
| uint64_t tot = metal_u64(d, "recommendedMaxWorkingSetSize"); | |
| uint64_t usd = metal_u64(d, "currentAllocatedSize"); | |
| uint64_t fre = tot > usd ? tot - usd : 0; | |
| found += push(out, "Metal", metal_s(d, "name"), tot, usd, fre); | |
| } | |
| } | |
| DLCLOSE(h); | |
| } | |
| return found; | |
| } | |
| #else | |
| static int probe_metal(struct gpus *out) { (void)out; return 0; } | |
| #endif | |
| /* ---------------- NVIDIA (NVML) ---------------- */ | |
| static int probe_nvml(struct gpus *out) { | |
| #if defined(_WIN32) | |
| const char *cands[] = { "nvml.dll" }; | |
| #else | |
| const char *cands[] = { "libnvidia-ml.so.1", "libnvidia-ml.so" }; | |
| #endif | |
| DL_HANDLE h = 0; | |
| size_t k = 0; | |
| int found = 0; | |
| for (; k < (sizeof(cands)/sizeof(cands[0])) && !h; k++) { h = DLOPEN(cands[k]); } | |
| if (h) { | |
| typedef int (*init_t)(void); | |
| typedef int (*fini_t)(void); | |
| typedef int (*cnt_t)(unsigned int*); | |
| typedef int (*hdl_t)(unsigned int, void**); | |
| typedef int (*name_t)(void*, char*, unsigned int); | |
| struct mi { unsigned long long total, free, used; }; | |
| typedef int (*mem_t)(void*, struct mi*); | |
| init_t init = (init_t)DLSYM(h, "nvmlInit_v2"); | |
| fini_t fini = (fini_t)DLSYM(h, "nvmlShutdown"); | |
| cnt_t gcnt = (cnt_t)DLSYM(h, "nvmlDeviceGetCount_v2"); | |
| hdl_t ghdl = (hdl_t)DLSYM(h, "nvmlDeviceGetHandleByIndex_v2"); | |
| name_t gnam = (name_t)DLSYM(h, "nvmlDeviceGetName"); | |
| mem_t gmem = (mem_t)DLSYM(h, "nvmlDeviceGetMemoryInfo"); | |
| if (init && fini && gcnt && ghdl && gnam && gmem) { | |
| if (init() == 0) { | |
| unsigned int n = 0, i = 0; | |
| if (gcnt(&n) == 0) { | |
| for (i = 0; i < n; i++) { | |
| void *dev = 0; | |
| char name[64] = {0}; | |
| struct mi m = {0}; | |
| if (ghdl(i, &dev) == 0 && gnam(dev, name, sizeof(name)) == 0 && gmem(dev, &m) == 0) { | |
| found += push(out, "NVML", name, (uint64_t)m.total, (uint64_t)m.used, (uint64_t)m.free); | |
| } | |
| } | |
| } | |
| fini(); | |
| } | |
| } | |
| DLCLOSE(h); | |
| } | |
| return found; | |
| } | |
| /* ---------------- AMD (ROCm SMI, Linux) ---------------- */ | |
| static int probe_rocm(struct gpus *out) { | |
| #if defined(_WIN32) | |
| (void)out; return 0; /* ROCm SMI is Linux-only */ | |
| #else | |
| const char *cands[] = { "librocm_smi64.so", "librocm_smi64.so.6", "librocm_smi64.so.5" }; | |
| DL_HANDLE h = 0; | |
| size_t k = 0; | |
| int found = 0; | |
| for (; k < (sizeof(cands)/sizeof(cands[0])) && !h; k++) { h = DLOPEN(cands[k]); } | |
| if (h) { | |
| typedef int (*init_t)(uint64_t); | |
| typedef int (*fini_t)(void); | |
| typedef int (*num_t)(uint32_t*); | |
| typedef int (*nam_t)(uint32_t,char*,size_t); | |
| typedef int (*tot_t)(uint32_t,uint32_t,uint64_t*); | |
| typedef int (*use_t)(uint32_t,uint32_t,uint64_t*); | |
| init_t init = (init_t)DLSYM(h, "rsmi_init"); | |
| fini_t fini = (fini_t)DLSYM(h, "rsmi_shut_down"); | |
| num_t num = (num_t)DLSYM(h, "rsmi_num_monitor_devices"); | |
| nam_t name = (nam_t)DLSYM(h, "rsmi_dev_name_get"); | |
| tot_t tot = (tot_t)DLSYM(h, "rsmi_dev_memory_total_get"); | |
| use_t use = (use_t)DLSYM(h, "rsmi_dev_memory_usage_get"); | |
| if (init && fini && num && name && tot && use) { | |
| if (init(0) == 0) { | |
| uint32_t n = 0, i = 0; | |
| if (num(&n) == 0) { | |
| for (i = 0; i < n; i++) { | |
| char nm[64] = {0}; | |
| uint64_t t = 0, u = 0; | |
| if (name(i, nm, sizeof(nm)) == 0 && tot(i, 0, &t) == 0 && use(i, 0, &u) == 0) { | |
| uint64_t f = t >= u ? t - u : 0; | |
| found += push(out, "ROCm", nm, t, u, f); | |
| } | |
| } | |
| } | |
| fini(); | |
| } | |
| } | |
| DLCLOSE(h); | |
| } | |
| return found; | |
| #endif | |
| } | |
| /* ---------------- Intel oneAPI Level Zero ---------------- */ | |
| static int probe_l0(struct gpus *out) { | |
| #if defined(_WIN32) | |
| const char *cands[] = { "ze_loader.dll" }; | |
| #else | |
| const char *cands[] = { "libze_loader.so.1", "libze_loader.so" }; | |
| #endif | |
| DL_HANDLE h = 0; | |
| size_t k = 0; | |
| int found = 0; | |
| for (; k < (sizeof(cands)/sizeof(cands[0])) && !h; k++) { h = DLOPEN(cands[k]); } | |
| if (h) { | |
| typedef uint32_t ze_result_t; | |
| typedef struct _ze_driver_handle_t* ze_driver_handle_t; | |
| typedef struct _ze_device_handle_t* ze_device_handle_t; | |
| typedef ze_result_t (*zeInit_t)(uint32_t); | |
| typedef ze_result_t (*zeDriverGet_t)(uint32_t*, ze_driver_handle_t*); | |
| typedef ze_result_t (*zeDeviceGet_t)(ze_driver_handle_t, uint32_t*, ze_device_handle_t*); | |
| typedef struct { uint32_t type; const void* pNext; } ze_base_desc_t; | |
| typedef struct { ze_base_desc_t stype; char name[256]; } ze_device_properties_t; | |
| typedef ze_result_t (*zeDeviceGetProperties_t)(ze_device_handle_t, ze_device_properties_t*); | |
| typedef ze_result_t (*zeDeviceGetMemoryProperties_t)(ze_device_handle_t, uint32_t*, void*); | |
| zeInit_t zeInit = (zeInit_t)DLSYM(h, "zeInit"); | |
| zeDriverGet_t zeDriverGet = (zeDriverGet_t)DLSYM(h, "zeDriverGet"); | |
| zeDeviceGet_t zeDeviceGet = (zeDeviceGet_t)DLSYM(h, "zeDeviceGet"); | |
| zeDeviceGetProperties_t zeDeviceGetProperties = (zeDeviceGetProperties_t)DLSYM(h, "zeDeviceGetProperties"); | |
| zeDeviceGetMemoryProperties_t zeDeviceGetMemoryProperties = (zeDeviceGetMemoryProperties_t)DLSYM(h, "zeDeviceGetMemoryProperties"); | |
| if (zeInit && zeDriverGet && zeDeviceGet && zeDeviceGetProperties && zeDeviceGetMemoryProperties) { | |
| if (zeInit(0) == 0) { | |
| ze_driver_handle_t drvs[8] = {0}; | |
| uint32_t nd = 8, d = 0; | |
| if (zeDriverGet(&nd, drvs) == 0) { | |
| for (d = 0; d < nd; d++) { | |
| uint32_t n = 0; | |
| if (zeDeviceGet(drvs[d], &n, 0) == 0 && n) { | |
| if (n > 32) { n = 32; } | |
| ze_device_handle_t devs[32] = {0}; | |
| if (zeDeviceGet(drvs[d], &n, devs) == 0) { | |
| for (uint32_t i = 0; i < n; i++) { | |
| ze_device_properties_t props = {0}; | |
| uint32_t nm = 0; | |
| uint64_t total = 0; | |
| if (zeDeviceGetProperties(devs[i], &props) == 0) { | |
| if (zeDeviceGetMemoryProperties(devs[i], &nm, 0) == 0 && nm) { | |
| if (nm > 8) { nm = 8; } | |
| struct { ze_base_desc_t s; uint64_t totalSize; uint32_t flags; char name[256]; } mps[8] = {0}; | |
| if (zeDeviceGetMemoryProperties(devs[i], &nm, mps) == 0) { | |
| for (uint32_t j = 0; j < nm; j++) { total += mps[j].totalSize; } | |
| } | |
| } | |
| found += push(out, "Level0", props.name, total, 0, 0); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| DLCLOSE(h); | |
| } | |
| return found; | |
| } | |
| /* ---------------- Windows DXGI (Win10/11) ---------------- */ | |
| #if defined(_WIN32) | |
| #include <wchar.h> | |
| #include <dxgi1_4.h> | |
| static void w2u8(char *dst, size_t cap, const wchar_t *ws) { | |
| int n = WideCharToMultiByte(CP_UTF8, 0, ws, -1, dst, (int)cap, 0, 0); | |
| if (n == 0 && cap) { dst[0] = 0; } | |
| } | |
| static int probe_dxgi(struct gpus *out) { | |
| int found = 0; | |
| DL_HANDLE h = DLOPEN("dxgi.dll"); | |
| if (h) { | |
| typedef HRESULT (WINAPI *PFN_CreateDXGIFactory1)(REFIID, void**); | |
| PFN_CreateDXGIFactory1 create = (PFN_CreateDXGIFactory1)DLSYM(h, "CreateDXGIFactory1"); | |
| if (create) { | |
| IDXGIFactory1 *f = 0; | |
| if (SUCCEEDED(create(&IID_IDXGIFactory1, (void**)&f)) && f) { | |
| for (UINT i = 0; ; i++) { | |
| IDXGIAdapter1 *a = 0; | |
| if (f->lpVtbl->EnumAdapters1(f, i, &a) == DXGI_ERROR_NOT_FOUND) { break; } | |
| if (a) { | |
| DXGI_ADAPTER_DESC1 d = {0}; | |
| char name[64] = {0}; | |
| uint64_t total = 0, used = 0, free = 0; | |
| if (SUCCEEDED(a->lpVtbl->GetDesc1(a, &d))) { | |
| w2u8(name, sizeof(name), d.Description); | |
| total = (uint64_t)d.DedicatedVideoMemory; | |
| #if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER) | |
| { /* try IDXGIAdapter3 for live usage */ | |
| IDXGIAdapter3 *a3 = 0; | |
| if (SUCCEEDED(a->lpVtbl->QueryInterface(a, &IID_IDXGIAdapter3, (void**)&a3)) && a3) { | |
| DXGI_QUERY_VIDEO_MEMORY_INFO info = {0}; | |
| if (SUCCEEDED(a3->lpVtbl->QueryVideoMemoryInfo(a3, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info))) { | |
| used = (uint64_t)info.CurrentUsage; | |
| free = (uint64_t)(info.Budget > info.CurrentUsage ? info.Budget - info.CurrentUsage : 0); | |
| } | |
| a3->lpVtbl->Release(a3); | |
| } | |
| } | |
| #endif | |
| found += push(out, "DXGI", name, total, used, free); | |
| } | |
| a->lpVtbl->Release(a); | |
| } | |
| } | |
| f->lpVtbl->Release(f); | |
| } | |
| } | |
| DLCLOSE(h); | |
| } | |
| return found; | |
| } | |
| #else | |
| static int probe_dxgi(struct gpus *out) { (void)out; return 0; } | |
| #endif | |
| int gpu_probe(struct gpus *out) { | |
| int n = 0; | |
| #if defined(__APPLE__) | |
| n += probe_metal(out); | |
| #endif | |
| n += probe_nvml(out); | |
| n += probe_rocm(out); | |
| n += probe_l0(out); | |
| n += probe_dxgi(out); | |
| return (int)out->count; | |
| } | |
| /* EOF: gpu.c */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment