Created
January 27, 2022 09:48
-
-
Save carstenbauer/9a2f7707350902cb969d82f60c0f1cba to your computer and use it in GitHub Desktop.
Attempt to measure core-to-core latency in Julia
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using ThreadPinning | |
| using UnicodePlots | |
| using Base.Threads: @threads, nthreads | |
| # copied from ThreadPools.jl | |
| macro tspawnat(thrdid, expr) | |
| letargs = Base._lift_one_interp!(expr) | |
| thunk = esc(:(() -> ($expr))) | |
| var = esc(Base.sync_varname) | |
| tid = esc(thrdid) | |
| quote | |
| if $tid < 1 || $tid > Threads.nthreads() | |
| throw(AssertionError("@tspawnat thread assignment ($($tid)) must be between 1 and Threads.nthreads() (1:$(Threads.nthreads()))")) | |
| end | |
| let $(letargs...) | |
| local task = Task($thunk) | |
| task.sticky = false | |
| ccall(:jl_set_task_tid, Cvoid, (Any, Cint), task, $tid - 1) | |
| if $(Expr(:islocal, var)) | |
| put!($var, task) | |
| end | |
| schedule(task) | |
| task | |
| end | |
| end | |
| end | |
| const State = Int | |
| const Preparing = 0 | |
| const Ready = 1 | |
| const Ping = 2 | |
| const Pong = 3 | |
| const Finish = 4 | |
| Base.@kwdef struct Sync | |
| state::Threads.Atomic{State} = Threads.Atomic{State}(Preparing) | |
| end | |
| state(S::Sync) = S.state[] | |
| function wait_until(S::Sync, expected_state::State) | |
| while state(S) != expected_state | |
| end | |
| return nothing | |
| end | |
| function set(S::Sync, state::State) | |
| S.state[] = state | |
| return nothing | |
| end | |
| function wait_as_long_as(S::Sync, wait_state::State) | |
| loaded_state = state(S) | |
| while loaded_state == wait_state | |
| loaded_state = state(S) | |
| end | |
| return loaded_state | |
| end | |
| function _run_latency_bench(cpu1::Integer, cpu2::Integer; nsamples::Integer = 100, mode::Symbol = :min) | |
| cpu1 == cpu2 && return zero(Float64) | |
| nthreads() >= 2 || @error("Need at least two Julia threads.") | |
| S = Sync() | |
| pinthread(cpu1) | |
| second_thread = @tspawnat 2 begin | |
| pinthread(cpu2) | |
| set(S, Ready) | |
| state = wait_as_long_as(S, Ready) | |
| while state != Finish | |
| if state == Ping | |
| set(S, Pong) | |
| state = wait_as_long_as(S, Pong) | |
| end | |
| end | |
| end | |
| wait_until(S, Ready) | |
| Δts = zeros(typeof(time_ns()), nsamples) | |
| @inbounds for i in 1:nsamples | |
| Δts[i] = begin | |
| t = time_ns() | |
| set(S, Ping) | |
| wait_until(S, Pong) | |
| time_ns() - t | |
| end | |
| end | |
| if mode == :avg | |
| Δt = sum(Float64, Δts) / nsamples | |
| elseif mode == :min || mode == :minimum | |
| Δt = Float64(minimum(Δts)) | |
| else | |
| throw(ArgumentError("Unkown mode $mode.")) | |
| end | |
| set(S, Finish) | |
| fetch(second_thread) | |
| return Δt | |
| end | |
| function bench_core2core_latency(cpuids = 0:Sys.CPU_THREADS-1; nbench = 5, kwargs...) | |
| # check validity of cpuids input | |
| for c in cpuids | |
| if c < 0 || c > Sys.CPU_THREADS | |
| @error("CPU IDs must all be non-negative and ≤ Sys.CPU_THREADS.") | |
| end | |
| end | |
| # backup current thread affinity | |
| pinning_before = getcpuids() | |
| # run benchmarks | |
| ncpuids = length(cpuids) | |
| latencies = zeros(ncpuids, ncpuids) | |
| for b in 1:nbench | |
| for (j, cpu2) in pairs(cpuids) | |
| for (i, cpu1) in pairs(cpuids) | |
| @inbounds latencies[i, j] += _run_latency_bench(cpu1, cpu2; kwargs...) | |
| end | |
| end | |
| end | |
| latencies ./= nbench | |
| # restore previous thread affinity | |
| pinthreads(pinning_before) | |
| return latencies | |
| end | |
| function core2core_latency(args...; kwargs...) | |
| latencies = bench_core2core_latency(args...; kwargs...) | |
| heatmap(latencies, xoffset = -1, yoffset = -1) | |
| end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This file is machine-generated - editing it directly is not advised | |
| julia_version = "1.7.1" | |
| manifest_format = "2.0" | |
| [[deps.ArgTools]] | |
| uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" | |
| [[deps.Artifacts]] | |
| uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" | |
| [[deps.Base64]] | |
| uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" | |
| [[deps.ChainRulesCore]] | |
| deps = ["Compat", "LinearAlgebra", "SparseArrays"] | |
| git-tree-sha1 = "54fc4400de6e5c3e27be6047da2ef6ba355511f8" | |
| uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" | |
| version = "1.11.6" | |
| [[deps.ChangesOfVariables]] | |
| deps = ["ChainRulesCore", "LinearAlgebra", "Test"] | |
| git-tree-sha1 = "bf98fa45a0a4cee295de98d4c1462be26345b9a1" | |
| uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" | |
| version = "0.1.2" | |
| [[deps.Compat]] | |
| deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] | |
| git-tree-sha1 = "44c37b4636bc54afac5c574d2d02b625349d6582" | |
| uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" | |
| version = "3.41.0" | |
| [[deps.CompilerSupportLibraries_jll]] | |
| deps = ["Artifacts", "Libdl"] | |
| uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" | |
| [[deps.Contour]] | |
| deps = ["StaticArrays"] | |
| git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7" | |
| uuid = "d38c429a-6771-53c6-b99e-75d170b6e991" | |
| version = "0.5.7" | |
| [[deps.Crayons]] | |
| git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" | |
| uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" | |
| version = "4.1.1" | |
| [[deps.DataAPI]] | |
| git-tree-sha1 = "cc70b17275652eb47bc9e5f81635981f13cea5c8" | |
| uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" | |
| version = "1.9.0" | |
| [[deps.DataStructures]] | |
| deps = ["Compat", "InteractiveUtils", "OrderedCollections"] | |
| git-tree-sha1 = "3daef5523dd2e769dad2365274f760ff5f282c7d" | |
| uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" | |
| version = "0.18.11" | |
| [[deps.Dates]] | |
| deps = ["Printf"] | |
| uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" | |
| [[deps.DelimitedFiles]] | |
| deps = ["Mmap"] | |
| uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" | |
| [[deps.Distributed]] | |
| deps = ["Random", "Serialization", "Sockets"] | |
| uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" | |
| [[deps.DocStringExtensions]] | |
| deps = ["LibGit2"] | |
| git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b" | |
| uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" | |
| version = "0.8.6" | |
| [[deps.Downloads]] | |
| deps = ["ArgTools", "LibCURL", "NetworkOptions"] | |
| uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" | |
| [[deps.InteractiveUtils]] | |
| deps = ["Markdown"] | |
| uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" | |
| [[deps.InverseFunctions]] | |
| deps = ["Test"] | |
| git-tree-sha1 = "a7254c0acd8e62f1ac75ad24d5db43f5f19f3c65" | |
| uuid = "3587e190-3f89-42d0-90ee-14403ec27112" | |
| version = "0.1.2" | |
| [[deps.IrrationalConstants]] | |
| git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151" | |
| uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" | |
| version = "0.1.1" | |
| [[deps.LibCURL]] | |
| deps = ["LibCURL_jll", "MozillaCACerts_jll"] | |
| uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" | |
| [[deps.LibCURL_jll]] | |
| deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] | |
| uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" | |
| [[deps.LibGit2]] | |
| deps = ["Base64", "NetworkOptions", "Printf", "SHA"] | |
| uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" | |
| [[deps.LibSSH2_jll]] | |
| deps = ["Artifacts", "Libdl", "MbedTLS_jll"] | |
| uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" | |
| [[deps.Libdl]] | |
| uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" | |
| [[deps.LinearAlgebra]] | |
| deps = ["Libdl", "libblastrampoline_jll"] | |
| uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" | |
| [[deps.LogExpFunctions]] | |
| deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"] | |
| git-tree-sha1 = "e5718a00af0ab9756305a0392832c8952c7426c1" | |
| uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" | |
| version = "0.3.6" | |
| [[deps.Logging]] | |
| uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" | |
| [[deps.Markdown]] | |
| deps = ["Base64"] | |
| uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" | |
| [[deps.MbedTLS_jll]] | |
| deps = ["Artifacts", "Libdl"] | |
| uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" | |
| [[deps.Missings]] | |
| deps = ["DataAPI"] | |
| git-tree-sha1 = "bf210ce90b6c9eed32d25dbcae1ebc565df2687f" | |
| uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" | |
| version = "1.0.2" | |
| [[deps.Mmap]] | |
| uuid = "a63ad114-7e13-5084-954f-fe012c677804" | |
| [[deps.MozillaCACerts_jll]] | |
| uuid = "14a3606d-f60d-562e-9121-12d972cd8159" | |
| [[deps.NetworkOptions]] | |
| uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" | |
| [[deps.OpenBLAS_jll]] | |
| deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] | |
| uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" | |
| [[deps.OrderedCollections]] | |
| git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c" | |
| uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" | |
| version = "1.4.1" | |
| [[deps.Pkg]] | |
| deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] | |
| uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" | |
| [[deps.Printf]] | |
| deps = ["Unicode"] | |
| uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" | |
| [[deps.REPL]] | |
| deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] | |
| uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" | |
| [[deps.Random]] | |
| deps = ["SHA", "Serialization"] | |
| uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" | |
| [[deps.Requires]] | |
| deps = ["UUIDs"] | |
| git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" | |
| uuid = "ae029012-a4dd-5104-9daa-d747884805df" | |
| version = "1.3.0" | |
| [[deps.SHA]] | |
| uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" | |
| [[deps.Serialization]] | |
| uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" | |
| [[deps.SharedArrays]] | |
| deps = ["Distributed", "Mmap", "Random", "Serialization"] | |
| uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" | |
| [[deps.Sockets]] | |
| uuid = "6462fe0b-24de-5631-8697-dd941f90decc" | |
| [[deps.SortingAlgorithms]] | |
| deps = ["DataStructures"] | |
| git-tree-sha1 = "b3363d7460f7d098ca0912c69b082f75625d7508" | |
| uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" | |
| version = "1.0.1" | |
| [[deps.SparseArrays]] | |
| deps = ["LinearAlgebra", "Random"] | |
| uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" | |
| [[deps.StaticArrays]] | |
| deps = ["LinearAlgebra", "Random", "Statistics"] | |
| git-tree-sha1 = "2884859916598f974858ff01df7dfc6c708dd895" | |
| uuid = "90137ffa-7385-5640-81b9-e52037218182" | |
| version = "1.3.3" | |
| [[deps.Statistics]] | |
| deps = ["LinearAlgebra", "SparseArrays"] | |
| uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" | |
| [[deps.StatsAPI]] | |
| git-tree-sha1 = "d88665adc9bcf45903013af0982e2fd05ae3d0a6" | |
| uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" | |
| version = "1.2.0" | |
| [[deps.StatsBase]] | |
| deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] | |
| git-tree-sha1 = "51383f2d367eb3b444c961d485c565e4c0cf4ba0" | |
| uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" | |
| version = "0.33.14" | |
| [[deps.TOML]] | |
| deps = ["Dates"] | |
| uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" | |
| [[deps.Tar]] | |
| deps = ["ArgTools", "SHA"] | |
| uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" | |
| [[deps.Test]] | |
| deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] | |
| uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" | |
| [[deps.ThreadPinning]] | |
| deps = ["Libdl", "LinearAlgebra", "Random", "Requires"] | |
| git-tree-sha1 = "f999f26862999bf23935097c2570fd4192db80d6" | |
| uuid = "811555cd-349b-4f26-b7bc-1f208b848042" | |
| version = "0.3.0" | |
| [[deps.UUIDs]] | |
| deps = ["Random", "SHA"] | |
| uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" | |
| [[deps.Unicode]] | |
| uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" | |
| [[deps.UnicodePlots]] | |
| deps = ["Contour", "Crayons", "Dates", "SparseArrays", "StatsBase"] | |
| git-tree-sha1 = "62595983da672758a96f89e07f7fd3735f16c18c" | |
| uuid = "b8865327-cd53-5732-bb35-84acbb429228" | |
| version = "2.7.0" | |
| [[deps.Zlib_jll]] | |
| deps = ["Libdl"] | |
| uuid = "83775a58-1f1d-513f-b197-d71354ab007a" | |
| [[deps.libblastrampoline_jll]] | |
| deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] | |
| uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" | |
| [[deps.nghttp2_jll]] | |
| deps = ["Artifacts", "Libdl"] | |
| uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" | |
| [[deps.p7zip_jll]] | |
| deps = ["Artifacts", "Libdl"] | |
| uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [deps] | |
| ThreadPinning = "811555cd-349b-4f26-b7bc-1f208b848042" | |
| UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" |
Author
Author
On an empty node of Noctua 1 I get
julia> bench_core2core_latency()
40×40 Matrix{Float64}:
0.0 214.5 214.2 218.6 221.4 213.2 219.8 210.2 222.5 221.8 … 273.9 265.6 255.7 266.8 268.5 272.7 269.1 263.0 271.9 269.4
211.5 0.0 218.4 221.4 221.3 215.1 224.4 219.1 222.8 223.8 267.0 262.5 272.5 269.2 267.5 266.5 269.2 265.6 268.0 270.8
222.5 218.5 0.0 216.9 222.4 219.7 222.3 216.4 219.8 224.6 257.6 260.9 264.0 265.2 262.1 260.0 266.5 262.4 277.4 268.6
221.4 222.5 215.8 0.0 221.7 215.0 217.4 218.0 221.3 219.1 255.7 259.4 266.1 267.7 255.4 261.2 258.7 266.5 275.3 268.4
225.8 223.1 221.1 223.0 0.0 221.4 220.6 219.0 224.6 222.2 268.3 264.5 271.5 262.2 258.5 267.8 259.7 263.9 267.7 273.1
229.3 218.8 220.5 216.1 223.6 0.0 220.7 218.1 221.7 224.3 … 273.9 277.9 261.7 269.6 267.8 269.0 269.5 267.4 261.2 263.7
213.4 221.3 219.3 216.8 221.4 223.9 0.0 217.3 221.2 220.7 270.0 268.2 256.4 260.8 274.7 264.0 257.2 266.5 266.8 262.0
223.2 229.4 214.3 221.7 221.4 221.9 218.7 0.0 222.4 215.0 269.4 253.7 267.7 262.7 270.6 275.4 258.6 272.6 264.8 262.7
222.9 222.0 219.3 221.4 222.5 221.3 222.3 224.6 0.0 218.5 267.8 265.7 265.5 265.8 260.1 271.4 266.2 263.0 262.8 261.8
224.1 223.6 227.1 223.9 225.6 225.7 221.5 223.0 222.7 0.0 264.2 269.5 258.5 274.0 257.0 264.5 261.6 257.9 266.0 262.9
⋮ ⋮ ⋱ ⋮ ⋮
260.6 265.1 264.8 263.2 256.1 267.1 257.7 268.3 268.9 270.4 … 0.0 222.5 211.3 225.1 222.9 228.3 226.7 221.0 224.9 225.8
266.8 271.9 269.8 260.8 256.2 262.0 262.8 268.5 265.5 258.3 233.0 0.0 217.2 220.8 224.6 228.2 221.3 220.6 221.4 223.5
260.7 266.7 259.2 258.0 265.0 259.8 264.4 267.9 264.6 256.1 227.1 216.3 0.0 220.5 221.8 225.1 219.1 222.1 216.9 226.5
266.8 258.6 258.2 263.5 265.1 268.3 261.1 268.6 266.0 266.7 223.8 221.5 214.8 0.0 226.8 225.0 219.4 227.4 221.0 231.6
264.3 262.1 262.5 265.3 263.1 265.6 259.5 270.8 265.3 272.0 223.5 222.2 221.8 218.6 0.0 226.0 220.4 225.5 227.2 220.6
260.2 261.4 263.4 274.9 267.1 269.5 265.3 264.8 260.7 275.5 … 218.0 212.7 220.4 226.7 225.3 0.0 233.3 223.4 222.4 227.5
268.5 270.3 263.4 267.8 267.5 273.7 267.2 260.4 256.6 264.1 216.5 219.6 218.8 219.9 226.4 231.0 0.0 220.1 226.2 224.0
269.0 264.0 268.5 267.6 261.9 261.3 265.4 264.7 263.8 258.9 223.6 223.0 213.2 220.2 217.8 227.5 221.9 0.0 216.2 223.9
265.2 256.5 261.4 266.6 264.4 259.1 269.3 268.4 264.2 264.8 221.0 222.5 218.1 229.7 219.5 223.0 223.0 222.3 0.0 213.5
269.1 262.6 261.2 264.8 267.3 267.0 265.8 262.1 260.2 269.0 222.3 224.2 232.1 236.4 229.3 223.8 227.8 220.1 228.0 0.0
Author
Running c2clat on the same system I find the following. Note that I multiplied the resulting latencies by two (since in the Julia variant we are measuring a full roundtrip between cores instead of just one way).
julia> M
40×40 Matrix{Float64}:
0.0 202.0 200.0 192.0 206.0 202.0 202.0 196.0 190.0 192.0 … 486.0 494.0 484.0 486.0 492.0 508.0 488.0 492.0 494.0 498.0
202.0 0.0 200.0 188.0 204.0 200.0 194.0 190.0 188.0 190.0 490.0 492.0 484.0 482.0 498.0 500.0 488.0 490.0 492.0 492.0
200.0 200.0 0.0 204.0 198.0 202.0 202.0 204.0 196.0 214.0 496.0 498.0 490.0 488.0 502.0 512.0 496.0 494.0 496.0 502.0
192.0 188.0 204.0 0.0 216.0 192.0 186.0 204.0 198.0 218.0 490.0 490.0 480.0 484.0 496.0 490.0 484.0 486.0 496.0 492.0
206.0 204.0 198.0 216.0 0.0 212.0 206.0 196.0 220.0 204.0 502.0 504.0 496.0 494.0 508.0 510.0 502.0 502.0 504.0 506.0
202.0 200.0 202.0 192.0 212.0 0.0 200.0 198.0 204.0 190.0 … 498.0 496.0 488.0 492.0 500.0 506.0 496.0 498.0 496.0 500.0
202.0 194.0 202.0 186.0 206.0 200.0 0.0 188.0 192.0 196.0 506.0 506.0 496.0 496.0 504.0 518.0 506.0 502.0 502.0 502.0
196.0 190.0 204.0 204.0 196.0 198.0 188.0 0.0 214.0 200.0 494.0 490.0 486.0 482.0 498.0 496.0 492.0 492.0 496.0 496.0
190.0 188.0 196.0 198.0 220.0 204.0 192.0 214.0 0.0 222.0 496.0 498.0 486.0 492.0 504.0 500.0 496.0 494.0 502.0 500.0
192.0 190.0 214.0 218.0 204.0 190.0 196.0 200.0 222.0 0.0 500.0 498.0 492.0 492.0 506.0 500.0 496.0 496.0 498.0 502.0
⋮ ⋮ ⋱ ⋮ ⋮
486.0 490.0 496.0 490.0 502.0 498.0 506.0 494.0 496.0 500.0 … 0.0 190.0 186.0 186.0 194.0 198.0 190.0 186.0 192.0 212.0
494.0 492.0 498.0 490.0 504.0 496.0 506.0 490.0 498.0 498.0 190.0 0.0 200.0 186.0 210.0 198.0 188.0 186.0 192.0 196.0
484.0 484.0 490.0 480.0 496.0 488.0 496.0 486.0 486.0 492.0 186.0 200.0 0.0 204.0 198.0 200.0 188.0 186.0 196.0 214.0
486.0 482.0 488.0 484.0 494.0 492.0 496.0 482.0 492.0 492.0 186.0 186.0 204.0 0.0 214.0 186.0 186.0 204.0 196.0 200.0
492.0 498.0 502.0 496.0 508.0 500.0 504.0 498.0 504.0 506.0 194.0 210.0 198.0 214.0 0.0 208.0 196.0 212.0 220.0 224.0
508.0 500.0 512.0 490.0 510.0 506.0 518.0 496.0 500.0 500.0 … 198.0 198.0 200.0 186.0 208.0 0.0 198.0 186.0 190.0 208.0
488.0 488.0 496.0 484.0 502.0 496.0 506.0 492.0 496.0 496.0 190.0 188.0 188.0 186.0 196.0 198.0 0.0 202.0 210.0 198.0
492.0 490.0 494.0 486.0 502.0 498.0 502.0 492.0 494.0 496.0 186.0 186.0 186.0 204.0 212.0 186.0 202.0 0.0 210.0 200.0
494.0 492.0 496.0 496.0 504.0 496.0 502.0 496.0 502.0 498.0 192.0 192.0 196.0 196.0 220.0 190.0 210.0 210.0 0.0 222.0
498.0 492.0 502.0 492.0 506.0 500.0 502.0 496.0 500.0 502.0 212.0 196.0 214.0 200.0 224.0 208.0 198.0 200.0 222.0 0.0The differences between intra- and intersocket are more pronounced. Intrasocket appears to be only slightly faster but intersocket turns out to be much slower compared to the Julia findings. But I take it the results are in the same "ballpark".
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment


Based on https://github.com/ajakubek/core-latency/blob/master/main.cpp (and https://github.com/rigtorp/c2clat/blob/master/c2clat.cpp).