January 26, 2026 14:45 · January 26, 2026 14:43 · January 22, 2026 23:50 · January 22, 2026 21:35 · December 18, 2025 15:25 · December 16, 2025 20:55
 Hello,

 1. The preexisting potential undefined behavior in FP conversions.

 There is an edge case in the specification potentially allowing for value-dependent undefined behavior (henceforth UB) in floating-point conversions:

 7.3.10 [conv.double] #2 says:

    If the source value can be exactly represented in the destination type, the result of the conversion is that exact representation. If the source value is between two adjacent destination values, the result of the conversion is an implementation-defined choice of either of those values. Otherwise, the behavior is undefined.
 diff --git a/tmp/log-good.mlir b/tmp/log-bad.mlir
 index 0408554..f00f377 100644
 --- a/tmp/log-good.mlir
 +++ b/tmp/log-bad.mlir
 @@ -400,7 +400,7 @@ module {
     %11 = iree_tensor_ext.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [48, 768, 16, 1], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<48x768x16x1xf16>> -> tensor<48x768x16x1xf16>
     %12 = tensor.empty() : tensor<4x48x16x16xf32>
     %13 = linalg.fill {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} ins(%cst : f32) outs(%12 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32>
 -    %14 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 4, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%10, %11 : tensor<4x768x16x1xf16>, tensor<48x768x16x1xf16>) outs(%13 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32>
 +    %14 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 48, 0

 ---------------------------- live log sessionstart -----------------------------
 INFO     conftest:conftest.py:38 Pytest quality test session is starting
 ============================= test session starts ==============================
 platform linux -- Python 3.12.3, pytest-8.0.0, pluggy-1.6.0
 rootdir: /home/ossci/iree-test-suites/sharktank_models
 configfile: pytest.ini
 plugins: anyio-4.11.0, xdist-3.5.0, timeout-2.4.0, subtests-0.15.0, metadata-3.1.1, cov-7.0.0, asyncio-0.23.8, html-4.1.1, retry-1.7.0, reportlog-1.0.0, check-2.6.0
 timeout: 600.0s
 timeout method: signal
 FAILED: tests/e2e/math/math_ops_llvm-cpu_math_ops_llvm-cpu.mlir_module.vmfb /tmp/xx/iree-build/tests/e2e/math/math_ops_llvm-cpu_math_ops_llvm-cpu.mlir_module.vmfb 
 cd /tmp/xx/iree-build/tests/e2e/math && /tmp/xx/iree-build/tools/iree-compile --output-format=vm-bytecode --mlir-print-op-on-diagnostic=false --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-target-cpu=generic /tmp/xx/iree-build/tests/e2e/math/math_ops_llvm-cpu.mlir -o math_ops_llvm-cpu_math_ops_llvm-cpu.mlir_module.vmfb --iree-hal-executable-object-search-path=\"/tmp/xx/iree-build\" --iree-llvmcpu-embedded-linker-path=\"/tmp/xx/iree-build/llvm-project/bin/lld\" --iree-llvmcpu-wasm-linker-path=\"/tmp/xx/iree-build/llvm-project/bin/lld\"
 lld: error: undefined symbol: fma
 >>> referenced by math_ops_llvm-cpu.mlir:29
 >>>               /tmp/math_ops_llvm_cpu_linked-cb60c2.o:(_test_acos_f16_dispatch_0_elementwise_9_f16)
 >>> referenced by math_ops_llvm-cpu.mlir:29
 >>>               /tmp/math_ops_llvm_cpu_linked-cb60c2.o:(_test_acos_f16_dispatch_0_elemen
 diff --git a/compiler/src/iree/compiler/API/BUILD.bazel b/compiler/src/iree/compiler/API/BUILD.bazel
 index 543a088fba..43ff10395d 100644
 --- a/compiler/src/iree/compiler/API/BUILD.bazel
 +++ b/compiler/src/iree/compiler/API/BUILD.bazel
 @@ -38,6 +38,7 @@ iree_compiler_cc_library(
         "//compiler/src/iree/compiler/API/Internal:IREEReduceToolEntryPoint",
         "//compiler/src/iree/compiler/API/Internal:LLDToolEntryPoint",
         "//llvm-external-projects/iree-dialects:CAPI",
 +        "@llvm-project//mlir:CAPIAMDGPU",
         "@llvm-project//mlir:CAPIDebug",
 ➜  iree-build ninja iree-opt && tools/iree-opt '--pass-pipeline=builtin.module(func.func(iree-codegen-materialize-device-encoding))'  /tmp/xx/iree/compiler/plugins/target/ROCM/test/materialize_encoding_ukernel_gfx942.mlir
 [0/2] Re-checking globbed directories...
 [4/4] Linking CXX executable tools/iree-opt
 =================================================================
 ==118841==ERROR: AddressSanitizer: heap-use-after-free on address 0x7cd8cc303aec at pc 0x7ff8e297eb6d bp 0x7bf8c725b8f0 sp 0x7bf8c725b8e8
 READ of size 4 at 0x7cd8cc303aec thread T5
    #0 0x7ff8e297eb6c in mlir::Operation::getPropertiesStorageSize() const /tmp/xx/iree/third_party/llvm-project/mlir/include/mlir/IR/Operation.h:897:18
    #1 0x7ff8e297eb6c in mlir::Operation::getAttr(llvm::StringRef) /tmp/xx/iree/third_party/llvm-project/mlir/include/mlir/IR/Operation.h:542:9
    #2 0x7ff8e297eb6c in mlir::iree_compiler::IREE::ROCM::TensorUKernelProviderAttr::getDataLayoutForUKernel(mlir::Attribute, mlir::DictionaryAttr) const /tmp/xx/iree/compil
 (gdb) thread apply all bt

 Thread 7 (Thread 0x7ffff520a6c0 (LWP 1109500) "llvm-worker-5"):
 #0  0x00007ffff7aafd71 in __futex_abstimed_wait_common64 (private=32767, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x555561966fc4) at ./nptl/futex-internal.c:57
 #1  __futex_abstimed_wait_common (cancel=true, private=32767, abstime=0x0, clockid=0, expected=0, futex_word=0x555561966fc4) at ./nptl/futex-internal.c:87
 #2  __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x555561966fc4, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at ./nptl/futex-internal.c:139
 #3  0x00007ffff7ab27ed in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x555561966f70, cond=0x555561966f98) at ./nptl/pthread_cond_wait.c:503
 #4  ___pthread_cond_wait (cond=0x555561966f98, mutex=0x555561966f70) at ./nptl/pthread_cond_wait.c:627
 #5  0x00005555613da316 in llvm::StdThreadPool::processTasks(llvm::ThreadPoolTaskGroup*) ()
 #6  0x00005555613da777 in voi
 (gdb) thread apply all bt

 Thread 7 (Thread 0x7fffc37fe6c0 (LWP 1644017) "llvm-worker-5"):
 #0  0x00007fffd2825d71 in __futex_abstimed_wait_common64 (private=32767, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x555555f75a20) at ./nptl/futex-internal.c:57
 #1  __futex_abstimed_wait_common (cancel=true, private=32767, abstime=0x0, clockid=0, expected=0, futex_word=0x555555f75a20) at ./nptl/futex-internal.c:87
 #2  __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x555555f75a20, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at ./nptl/futex-internal.c:139
 #3  0x00007fffd28287ed in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x555555f759d0, cond=0x555555f759f8) at ./nptl/pthread_cond_wait.c:503
 #4  ___pthread_cond_wait (cond=0x555555f759f8, mutex=0x555555f759d0) at ./nptl/pthread_cond_wait.c:627
 #5  0x00007fffe35b15cb in std::condition_variable::wait<llvm::StdThreadPool::processTasks(llvm::ThreadPoolTaskGroup*)::$
 diff --git a/runtime/src/iree/vm/native_module_packing.h b/runtime/src/iree/vm/native_module_packing.h
 index 705b68528a..be0ee5047e 100644
 --- a/runtime/src/iree/vm/native_module_packing.h
 +++ b/runtime/src/iree/vm/native_module_packing.h
 @@ -8,6 +8,7 @@
 #define IREE_VM_MODULE_ABI_PACKING_H_
 
 #include <memory>
 +#include <numeric>
 #include <tuple>
	Hello,

	1. The preexisting potential undefined behavior in FP conversions.

	There is an edge case in the specification potentially allowing for value-dependent undefined behavior (henceforth UB) in floating-point conversions:

	7.3.10 [conv.double] #2 says:

	If the source value can be exactly represented in the destination type, the result of the conversion is that exact representation. If the source value is between two adjacent destination values, the result of the conversion is an implementation-defined choice of either of those values. Otherwise, the behavior is undefined.
	diff --git a/tmp/log-good.mlir b/tmp/log-bad.mlir
	index 0408554..f00f377 100644
	--- a/tmp/log-good.mlir
	+++ b/tmp/log-bad.mlir
	@@ -400,7 +400,7 @@ module {
	%11 = iree_tensor_ext.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [48, 768, 16, 1], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<48x768x16x1xf16>> -> tensor<48x768x16x1xf16>
	%12 = tensor.empty() : tensor<4x48x16x16xf32>
	%13 = linalg.fill {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} ins(%cst : f32) outs(%12 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32>
	- %14 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 4, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%10, %11 : tensor<4x768x16x1xf16>, tensor<48x768x16x1xf16>) outs(%13 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32>
	+ %14 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 48, 0

	---------------------------- live log sessionstart -----------------------------
	INFO conftest:conftest.py:38 Pytest quality test session is starting
	============================= test session starts ==============================
	platform linux -- Python 3.12.3, pytest-8.0.0, pluggy-1.6.0
	rootdir: /home/ossci/iree-test-suites/sharktank_models
	configfile: pytest.ini
	plugins: anyio-4.11.0, xdist-3.5.0, timeout-2.4.0, subtests-0.15.0, metadata-3.1.1, cov-7.0.0, asyncio-0.23.8, html-4.1.1, retry-1.7.0, reportlog-1.0.0, check-2.6.0
	timeout: 600.0s
	timeout method: signal
	FAILED: tests/e2e/math/math_ops_llvm-cpu_math_ops_llvm-cpu.mlir_module.vmfb /tmp/xx/iree-build/tests/e2e/math/math_ops_llvm-cpu_math_ops_llvm-cpu.mlir_module.vmfb
	cd /tmp/xx/iree-build/tests/e2e/math && /tmp/xx/iree-build/tools/iree-compile --output-format=vm-bytecode --mlir-print-op-on-diagnostic=false --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-target-cpu=generic /tmp/xx/iree-build/tests/e2e/math/math_ops_llvm-cpu.mlir -o math_ops_llvm-cpu_math_ops_llvm-cpu.mlir_module.vmfb --iree-hal-executable-object-search-path=\"/tmp/xx/iree-build\" --iree-llvmcpu-embedded-linker-path=\"/tmp/xx/iree-build/llvm-project/bin/lld\" --iree-llvmcpu-wasm-linker-path=\"/tmp/xx/iree-build/llvm-project/bin/lld\"
	lld: error: undefined symbol: fma
	>>> referenced by math_ops_llvm-cpu.mlir:29
	>>> /tmp/math_ops_llvm_cpu_linked-cb60c2.o:(_test_acos_f16_dispatch_0_elementwise_9_f16)
	>>> referenced by math_ops_llvm-cpu.mlir:29
	>>> /tmp/math_ops_llvm_cpu_linked-cb60c2.o:(_test_acos_f16_dispatch_0_elemen
	diff --git a/compiler/src/iree/compiler/API/BUILD.bazel b/compiler/src/iree/compiler/API/BUILD.bazel
	index 543a088fba..43ff10395d 100644
	--- a/compiler/src/iree/compiler/API/BUILD.bazel
	+++ b/compiler/src/iree/compiler/API/BUILD.bazel
	@@ -38,6 +38,7 @@ iree_compiler_cc_library(
	"//compiler/src/iree/compiler/API/Internal:IREEReduceToolEntryPoint",
	"//compiler/src/iree/compiler/API/Internal:LLDToolEntryPoint",
	"//llvm-external-projects/iree-dialects:CAPI",
	+ "@llvm-project//mlir:CAPIAMDGPU",
	"@llvm-project//mlir:CAPIDebug",
	➜ iree-build ninja iree-opt && tools/iree-opt '--pass-pipeline=builtin.module(func.func(iree-codegen-materialize-device-encoding))' /tmp/xx/iree/compiler/plugins/target/ROCM/test/materialize_encoding_ukernel_gfx942.mlir
	[0/2] Re-checking globbed directories...
	[4/4] Linking CXX executable tools/iree-opt
	=================================================================
	==118841==ERROR: AddressSanitizer: heap-use-after-free on address 0x7cd8cc303aec at pc 0x7ff8e297eb6d bp 0x7bf8c725b8f0 sp 0x7bf8c725b8e8
	READ of size 4 at 0x7cd8cc303aec thread T5
	#0 0x7ff8e297eb6c in mlir::Operation::getPropertiesStorageSize() const /tmp/xx/iree/third_party/llvm-project/mlir/include/mlir/IR/Operation.h:897:18
	#1 0x7ff8e297eb6c in mlir::Operation::getAttr(llvm::StringRef) /tmp/xx/iree/third_party/llvm-project/mlir/include/mlir/IR/Operation.h:542:9
	#2 0x7ff8e297eb6c in mlir::iree_compiler::IREE::ROCM::TensorUKernelProviderAttr::getDataLayoutForUKernel(mlir::Attribute, mlir::DictionaryAttr) const /tmp/xx/iree/compil
	(gdb) thread apply all bt

	Thread 7 (Thread 0x7ffff520a6c0 (LWP 1109500) "llvm-worker-5"):
	#0 0x00007ffff7aafd71 in __futex_abstimed_wait_common64 (private=32767, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x555561966fc4) at ./nptl/futex-internal.c:57
	#1 __futex_abstimed_wait_common (cancel=true, private=32767, abstime=0x0, clockid=0, expected=0, futex_word=0x555561966fc4) at ./nptl/futex-internal.c:87
	#2 __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x555561966fc4, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at ./nptl/futex-internal.c:139
	#3 0x00007ffff7ab27ed in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x555561966f70, cond=0x555561966f98) at ./nptl/pthread_cond_wait.c:503
	#4 ___pthread_cond_wait (cond=0x555561966f98, mutex=0x555561966f70) at ./nptl/pthread_cond_wait.c:627
	#5 0x00005555613da316 in llvm::StdThreadPool::processTasks(llvm::ThreadPoolTaskGroup*) ()
	#6 0x00005555613da777 in voi
	(gdb) thread apply all bt

	Thread 7 (Thread 0x7fffc37fe6c0 (LWP 1644017) "llvm-worker-5"):
	#0 0x00007fffd2825d71 in __futex_abstimed_wait_common64 (private=32767, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x555555f75a20) at ./nptl/futex-internal.c:57
	#1 __futex_abstimed_wait_common (cancel=true, private=32767, abstime=0x0, clockid=0, expected=0, futex_word=0x555555f75a20) at ./nptl/futex-internal.c:87
	#2 __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x555555f75a20, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at ./nptl/futex-internal.c:139
	#3 0x00007fffd28287ed in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x555555f759d0, cond=0x555555f759f8) at ./nptl/pthread_cond_wait.c:503
	#4 ___pthread_cond_wait (cond=0x555555f759f8, mutex=0x555555f759d0) at ./nptl/pthread_cond_wait.c:627
	#5 0x00007fffe35b15cb in std::condition_variable::wait<llvm::StdThreadPool::processTasks(llvm::ThreadPoolTaskGroup*)::$
	diff --git a/runtime/src/iree/vm/native_module_packing.h b/runtime/src/iree/vm/native_module_packing.h
	index 705b68528a..be0ee5047e 100644
	--- a/runtime/src/iree/vm/native_module_packing.h
	+++ b/runtime/src/iree/vm/native_module_packing.h
	@@ -8,6 +8,7 @@
	#define IREE_VM_MODULE_ABI_PACKING_H_

	#include <memory>
	+#include <numeric>
	#include <tuple>