Created
November 29, 2025 17:24
-
-
Save skypenguins/ddf06ae8c590e3b7792b95254cd97946 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "id": "4ae13933-1bb6-4ac9-8606-cac172afdec4", | |
| "metadata": {}, | |
| "source": [ | |
| "# Running Julia + Flux.jl on GB10" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Julia Version 1.11.7\n", | |
| "Commit f2b3dbda30a (2025-09-08 12:10 UTC)\n", | |
| "Build Info:\n", | |
| " Official https://julialang.org/ release\n", | |
| "Platform Info:\n", | |
| " OS: Linux (aarch64-linux-gnu)\n", | |
| " CPU: 20 × unknown\n", | |
| " WORD_SIZE: 64\n", | |
| " LLVM: libLLVM-16.0.6 (ORCJIT, generic)\n", | |
| "Threads: 1 default, 0 interactive, 1 GC (on 20 virtual cores)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "versioninfo()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "6a59061b", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "XXX XXX XX XX:XX:XX XXXX \n", | |
| "+-----------------------------------------------------------------------------------------+\n", | |
| "| NVIDIA-SMI 580.95.05 Driver Version: 580.95.05 CUDA Version: 13.0 |\n", | |
| "+-----------------------------------------+------------------------+----------------------+\n", | |
| "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", | |
| "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", | |
| "| | | MIG M. |\n", | |
| "|=========================================+========================+======================|\n", | |
| "| 0 NVIDIA GB10 On | XXXXXXXX:XX:XX.X On | N/A |\n", | |
| "| N/A 41C P0 13W / N/A | Not Supported | 2% Default |\n", | |
| "| | | N/A |\n", | |
| "+-----------------------------------------+------------------------+----------------------+\n", | |
| "\n", | |
| "+-----------------------------------------------------------------------------------------+\n", | |
| "| Processes: |\n", | |
| "| GPU GI CI PID Type Process name GPU Memory |\n", | |
| "| ID ID Usage |\n", | |
| "|=========================================================================================|\n", | |
| "| 0 N/A N/A XXXXX G /usr/lib/xorg/Xorg 301MiB |\n", | |
| "| 0 N/A N/A XXXXX G /usr/bin/gnome-shell 207MiB |\n", | |
| "| 0 N/A N/A XXXXX G .../7421/usr/lib/firefox/firefox 554MiB |\n", | |
| "| 0 N/A N/A XXXXX G /usr/bin/nautilus 37MiB |\n", | |
| "+-----------------------------------------------------------------------------------------+\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Process(`\u001b[4mnvidia-smi\u001b[24m`, ProcessExited(0))" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "run(`nvidia-smi`)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "2258be8a", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| " total used free shared buff/cache available\n", | |
| "Mem: 119Gi 8.0Gi 102Gi 129Mi 9.9Gi 111Gi\n", | |
| "Swap: 15Gi 0B 15Gi\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Process(`\u001b[4mfree\u001b[24m \u001b[4m-h\u001b[24m`, ProcessExited(0))" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "run(`free -h`)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "id": "44cb78c3", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "using CUDA\n", | |
| "using Flux\n", | |
| "using Flux: onehotbatch, onecold\n", | |
| "using Flux.Losses: logitcrossentropy\n", | |
| "using MLDatasets\n", | |
| "using MLUtils: DataLoader\n", | |
| "using Optimisers\n", | |
| "using Statistics\n", | |
| "using ProgressMeter" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "id": "fcfdf7af", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CUDA toolchain: \n", | |
| "- runtime 13.0, artifact installation\n", | |
| "- driver 580.95.5 for 13.0\n", | |
| "- compiler 13.0\n", | |
| "\n", | |
| "CUDA libraries: \n", | |
| "- CUBLAS: 13.1.0\n", | |
| "- CURAND: 10.4.0\n", | |
| "- CUFFT: 12.0.0\n", | |
| "- CUSOLVER: 12.0.4\n", | |
| "- CUSPARSE: 12.6.3\n", | |
| "- CUPTI: 2025.3.1 (API 13.0.1)\n", | |
| "- NVML: 13.0.0+580.95.5\n", | |
| "\n", | |
| "Julia packages: \n", | |
| "- CUDA: 5.9.5\n", | |
| "- CUDA_Driver_jll: 13.0.2+0\n", | |
| "- CUDA_Compiler_jll: 0.3.0+0\n", | |
| "- CUDA_Runtime_jll: 0.19.2+0\n", | |
| "\n", | |
| "Toolchain:\n", | |
| "- Julia: 1.11.7\n", | |
| "- LLVM: 16.0.6\n", | |
| "\n", | |
| "1 device:\n", | |
| " 0: NVIDIA GB10 (sm_121, 101.059 GiB / 119.697 GiB available)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "CUDA.versioninfo()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "id": "13306bcf", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "getdata (generic function with 1 method)" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "function getdata(args, device)\n", | |
| " ENV[\"DATADEPS_ALWAYS_ACCEPT\"] = \"true\"\n", | |
| "\n", | |
| " # Loading Dataset\n", | |
| " train_data = MLDatasets.MNIST(split=:train)\n", | |
| " test_data = MLDatasets.MNIST(split=:test)\n", | |
| " \n", | |
| " # 前処理をまとめた関数\n", | |
| " function preprocess(data)\n", | |
| " x = Float32.(data.features)\n", | |
| " x = Flux.flatten(x)\n", | |
| " y = onehotbatch(data.targets, 0:9)\n", | |
| " return (x, y)\n", | |
| " end\n", | |
| " \n", | |
| " xtrain, ytrain = preprocess(train_data)\n", | |
| " xtest, ytest = preprocess(test_data)\n", | |
| "\n", | |
| " # Create DataLoaders\n", | |
| " train_loader = DataLoader((xtrain, ytrain) |> device, \n", | |
| " batchsize=args.batchsize, \n", | |
| " shuffle=true)\n", | |
| " test_loader = DataLoader((xtest, ytest) |> device, \n", | |
| " batchsize=args.batchsize)\n", | |
| "\n", | |
| " return train_loader, test_loader\n", | |
| "end" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "id": "58b5c513", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "build_model (generic function with 1 method)" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "function build_model(; imgsize=(28, 28, 1), nclasses=10)\n", | |
| " input_size = prod(imgsize)\n", | |
| " \n", | |
| " return Chain(\n", | |
| " Dense(input_size => 32, relu),\n", | |
| " Dense(32 => nclasses)\n", | |
| " )\n", | |
| "end" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "id": "be6873e8", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "loss_and_accuracy (generic function with 1 method)" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "function loss_and_accuracy(data_loader, model, device)\n", | |
| " acc = 0\n", | |
| " ls = 0.0f0\n", | |
| " num = 0\n", | |
| " for (x, y) in data_loader\n", | |
| " x, y = device(x), device(y)\n", | |
| " ŷ = model(x)\n", | |
| " ls += logitcrossentropy(ŷ, y, agg=sum)\n", | |
| " acc += sum(onecold(ŷ) .== onecold(y))\n", | |
| " num += size(x)[end]\n", | |
| " end\n", | |
| " return ls / num, acc / num\n", | |
| "end\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "id": "fdba656b", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Args" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "@kwdef mutable struct Args\n", | |
| " η::Float64 = 3e-4 # learning rate\n", | |
| " batchsize::Int = 256 # batch size\n", | |
| " epochs::Int = 10 # number of epochs\n", | |
| " use_cuda::Bool = true # use gpu (if cuda available)\n", | |
| "end" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "id": "73a68083", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "train (generic function with 1 method)" | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "function train(; kws...)\n", | |
| " args = Args(; kws...)\n", | |
| "\n", | |
| " # Device setup\n", | |
| " device = if CUDA.functional() && args.use_cuda\n", | |
| " @info \"Training on CUDA GPU\"\n", | |
| " CUDA.allowscalar(false)\n", | |
| " gpu\n", | |
| " else\n", | |
| " @info \"Training on CPU\"\n", | |
| " cpu\n", | |
| " end\n", | |
| "\n", | |
| " # Data\n", | |
| " train_loader, test_loader = getdata(args, device)\n", | |
| "\n", | |
| " # Model & Optimizer\n", | |
| " model = build_model() |> device\n", | |
| " opt_state = Optimisers.setup(Adam(args.η), model)\n", | |
| " \n", | |
| " # Training loop\n", | |
| " @showprogress for epoch in 1:args.epochs\n", | |
| " for (x, y) in train_loader\n", | |
| " x, y = device(x), device(y)\n", | |
| " \n", | |
| " grads = Flux.gradient(model) do m\n", | |
| " logitcrossentropy(m(x), y)\n", | |
| " end\n", | |
| " \n", | |
| " opt_state, model = Optimisers.update!(opt_state, model, grads[1])\n", | |
| " end\n", | |
| " \n", | |
| " # Evaluation\n", | |
| " train_loss, train_acc = loss_and_accuracy(train_loader, model, device)\n", | |
| " test_loss, test_acc = loss_and_accuracy(test_loader, model, device)\n", | |
| " \n", | |
| " @info \"Epoch $epoch\" train_loss train_acc test_loss test_acc\n", | |
| " end\n", | |
| " \n", | |
| " return model\n", | |
| "end" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "id": "4c2d78da", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\u001b[36m\u001b[1m[ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mTraining on CUDA GPU\n", | |
| "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mEpoch 1\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_loss = 0.55677825f0\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_acc = 0.8614333333333334\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m test_loss = 0.53808856f0\n", | |
| "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m test_acc = 0.8693\n", | |
| "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mEpoch 2\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_loss = 0.38993272f0\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_acc = 0.8958166666666667\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m test_loss = 0.37647548f0\n", | |
| "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m test_acc = 0.8987\n", | |
| "\u001b[32mProgress: 20%|████████▎ | ETA: 0:01:39\u001b[39m\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mEpoch 3\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_loss = 0.33460784f0\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_acc = 0.90815\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m test_loss = 0.32494542f0\n", | |
| "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m test_acc = 0.9103\n", | |
| "\u001b[32mProgress: 30%|████████████▎ | ETA: 0:00:59\u001b[39m\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mEpoch 4\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_loss = 0.30106047f0\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_acc = 0.9171\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m test_loss = 0.29319176f0\n", | |
| "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m test_acc = 0.9185\n", | |
| "\u001b[32mProgress: 40%|████████████████▍ | ETA: 0:00:39\u001b[39m\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mEpoch 5\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_loss = 0.279726f0\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_acc = 0.92155\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m test_loss = 0.27430114f0\n", | |
| "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m test_acc = 0.9238\n", | |
| "\u001b[32mProgress: 50%|████████████████████▌ | ETA: 0:00:26\u001b[39m\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mEpoch 6\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_loss = 0.26288173f0\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_acc = 0.92675\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m test_loss = 0.25974548f0\n", | |
| "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m test_acc = 0.9269\n", | |
| "\u001b[32mProgress: 60%|████████████████████████▋ | ETA: 0:00:18\u001b[39m\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mEpoch 7\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_loss = 0.2496861f0\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_acc = 0.9304166666666667\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m test_loss = 0.2493859f0\n", | |
| "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m test_acc = 0.9296\n", | |
| "\u001b[32mProgress: 70%|████████████████████████████▊ | ETA: 0:00:12\u001b[39m\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mEpoch 8\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_loss = 0.23734426f0\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_acc = 0.9341\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m test_loss = 0.23815677f0\n", | |
| "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m test_acc = 0.9314\n", | |
| "\u001b[32mProgress: 80%|████████████████████████████████▊ | ETA: 0:00:07\u001b[39m\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mEpoch 9\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_loss = 0.2268096f0\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_acc = 0.9369666666666666\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m test_loss = 0.22869739f0\n", | |
| "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m test_acc = 0.9344\n", | |
| "\u001b[32mProgress: 90%|████████████████████████████████████▉ | ETA: 0:00:03\u001b[39m\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mEpoch 10\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_loss = 0.21937917f0\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m train_acc = 0.93865\n", | |
| "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m test_loss = 0.22186221f0\n", | |
| "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m test_acc = 0.9354\n", | |
| "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:27\u001b[39m\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| " 30.506029 seconds (90.89 M allocations: 4.707 GiB, 2.40% gc time, 22 lock conflicts, 79.89% compilation time: 1% of which was recompilation)\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Chain(\n", | |
| " Dense(784 => 32, relu), \u001b[90m# 25_120 parameters\u001b[39m\n", | |
| " Dense(32 => 10), \u001b[90m# 330 parameters\u001b[39m\n", | |
| ") \u001b[90m # Total: 4 arrays, \u001b[39m25_450 parameters, 664 bytes." | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Run training \n", | |
| "@time model = train()" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Julia 1.11", | |
| "language": "julia", | |
| "name": "julia-1.11" | |
| }, | |
| "language_info": { | |
| "file_extension": ".jl", | |
| "mimetype": "application/julia", | |
| "name": "julia", | |
| "version": "1.11.7" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment