Last active
February 9, 2022 10:33
-
-
Save chr5tphr/209860ce30768f4f58d676f23630f76c to your computer and use it in GitHub Desktop.
Print CUDA-info in JSON using the Nvidia Management Library (NVML) to avoid parsing of nvidia-smi
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <unistd.h> | |
| #include <sys/stat.h> | |
| #include <nvml.h> | |
| #include <time.h> | |
| #define MAXINFO 32 | |
| #define MAXCBUF 64 | |
| // compile with: | |
| // gcc cudamemstat.c -I /usr/local/cuda/include -lnvidia-ml -L /usr/lib/nvidia-384 -o cudamemstat | |
| int main(int argc, char* argv[]){ | |
| nvmlDevice_t handle; | |
| nvmlReturn_t retval; | |
| nvmlMemory_t meminfo; | |
| nvmlUtilization_t utilinfo; | |
| nvmlProcessInfo_t pinfo[MAXINFO]; | |
| struct stat sbuf; | |
| char cbuf[MAXCBUF]; | |
| retval = nvmlInit(); | |
| if (retval != NVML_SUCCESS) { | |
| fprintf(stderr, "%s\n",nvmlErrorString(retval)); | |
| return 1; | |
| } | |
| int version; | |
| retval = nvmlSystemGetCudaDriverVersion(&version); | |
| if (retval != NVML_SUCCESS) { | |
| fprintf(stderr, "%s\n",nvmlErrorString(retval)); | |
| return 1; | |
| } | |
| unsigned int numdev = 0; | |
| retval = nvmlDeviceGetCount(&numdev); | |
| if (retval != NVML_SUCCESS) { | |
| fprintf(stderr, "%s\n",nvmlErrorString(retval)); | |
| return 1; | |
| } | |
| gethostname(cbuf, MAXCBUF); | |
| // WARNING: hostname is not escaped here | |
| printf("{\"hostname\": \"%s\"", cbuf); | |
| printf(", \"cuda_version\": %d", version); | |
| printf(", \"time\": %lu", time(0)); | |
| printf(", \"gpu\": ["); | |
| unsigned int i = 0; | |
| for (i=0;i<numdev;i++){ | |
| if (i > 0) { | |
| printf(", "); | |
| } | |
| printf("{\"device\": %u", i); | |
| nvmlDeviceGetHandleByIndex(i,&handle); | |
| // device memory | |
| retval = nvmlDeviceGetMemoryInfo(handle,&meminfo); | |
| if (retval == NVML_SUCCESS) { | |
| printf(", \"memused\": %llu, \"memtotal\": %llu", meminfo.used, meminfo.total); | |
| } | |
| else { | |
| fprintf(stderr, "Error in device memory: %s\n", nvmlErrorString(retval)); | |
| } | |
| // device utilization | |
| retval = nvmlDeviceGetUtilizationRates(handle, &utilinfo); | |
| if (retval == NVML_SUCCESS) { | |
| printf(", \"gpuutil\": %u, \"memutil\": %u", utilinfo.gpu, utilinfo.memory); | |
| } | |
| else { | |
| fprintf(stderr, "Error in device util: %s\n", nvmlErrorString(retval)); | |
| } | |
| // processes on device | |
| printf(", \"proc\": ["); | |
| unsigned int infoCount = MAXINFO; | |
| retval = nvmlDeviceGetComputeRunningProcesses(handle, &infoCount, pinfo); | |
| if (retval == NVML_SUCCESS) { | |
| unsigned int j = 0; | |
| for (j=0;j<infoCount;j++) { | |
| if (j > 0) { | |
| printf(", "); | |
| } | |
| printf("{"); | |
| // pid | |
| printf("\"pid\": %u", pinfo[j].pid); | |
| // uid | |
| snprintf(cbuf, MAXCBUF, "/proc/%u", pinfo[j].pid); | |
| if (!stat(cbuf, &sbuf)) { | |
| printf(", \"uid\": %u", sbuf.st_uid); | |
| } | |
| // mem used | |
| printf(", \"memused\": %llu", pinfo[j].usedGpuMemory); | |
| printf("}"); | |
| } | |
| } | |
| else { | |
| fprintf(stderr, "Error in process info: %s, count %u\n", nvmlErrorString(retval), infoCount); | |
| } | |
| printf("]"); | |
| printf("}"); | |
| } | |
| printf("]"); | |
| printf("}\n"); | |
| nvmlShutdown(); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment