Created
January 22, 2026 23:50
-
-
Save bjacob/6e6259f24ac05011ba9603c29d8ee18b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| diff --git a/tmp/log-good.mlir b/tmp/log-bad.mlir | |
| index 0408554..f00f377 100644 | |
| --- a/tmp/log-good.mlir | |
| +++ b/tmp/log-bad.mlir | |
| @@ -400,7 +400,7 @@ module { | |
| %11 = iree_tensor_ext.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [48, 768, 16, 1], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<48x768x16x1xf16>> -> tensor<48x768x16x1xf16> | |
| %12 = tensor.empty() : tensor<4x48x16x16xf32> | |
| %13 = linalg.fill {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} ins(%cst : f32) outs(%12 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32> | |
| - %14 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 4, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%10, %11 : tensor<4x768x16x1xf16>, tensor<48x768x16x1xf16>) outs(%13 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32> | |
| + %14 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 48, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%10, %11 : tensor<4x768x16x1xf16>, tensor<48x768x16x1xf16>) outs(%13 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32> | |
| %15 = tensor.empty() : tensor<64x768xf32> | |
| %unpack = linalg.unpack %14 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %15 {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1]>} : tensor<4x48x16x16xf32> -> tensor<64x768xf32> | |
| iree_tensor_ext.dispatch.tensor.store %unpack, %9, offsets = [0, 0], sizes = [64, 768], strides = [1, 1] : tensor<64x768xf32> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<64x768xf32>> | |
| @@ -667,7 +667,7 @@ module { | |
| %19 = iree_tensor_ext.dispatch.tensor.load %14, offsets = [0, 0, 0, 0], sizes = [4, 48, 16, 16], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<4x48x16x16xf16>> -> tensor<4x48x16x16xf16> | |
| %20 = tensor.empty() : tensor<4x48x16x16xf32> | |
| %21 = linalg.fill {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} ins(%cst : f32) outs(%20 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32> | |
| - %22 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 4, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%16, %17 : tensor<4x768x16x1xf16>, tensor<48x768x16x1xf16>) outs(%21 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32> | |
| + %22 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 48, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%16, %17 : tensor<4x768x16x1xf16>, tensor<48x768x16x1xf16>) outs(%21 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32> | |
| %23 = tensor.empty() : tensor<4x48x16x16xf16> | |
| %24 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%19, %22, %18 : tensor<4x48x16x16xf16>, tensor<4x48x16x16xf32>, tensor<48x16xf16>) outs(%23 : tensor<4x48x16x16xf16>) attrs = {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} { | |
| ^bb0(%in: f16, %in_0: f32, %in_1: f16, %out: f16): | |
| @@ -773,7 +773,7 @@ module { | |
| %15 = iree_tensor_ext.dispatch.tensor.load %11, offsets = [0, 0], sizes = [192, 16], strides = [1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<192x16xf16>> -> tensor<192x16xf16> | |
| %16 = tensor.empty() : tensor<4x192x16x16xf32> | |
| %17 = linalg.fill {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} ins(%cst_1 : f32) outs(%16 : tensor<4x192x16x16xf32>) -> tensor<4x192x16x16xf32> | |
| - %18 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 4, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%13, %14 : tensor<4x768x16x1xf16>, tensor<192x768x16x1xf16>) outs(%17 : tensor<4x192x16x16xf32>) -> tensor<4x192x16x16xf32> | |
| + %18 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 192, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%13, %14 : tensor<4x768x16x1xf16>, tensor<192x768x16x1xf16>) outs(%17 : tensor<4x192x16x16xf32>) -> tensor<4x192x16x16xf32> | |
| %19 = tensor.empty() : tensor<4x192x16x16xf16> | |
| %20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%18, %15 : tensor<4x192x16x16xf32>, tensor<192x16xf16>) outs(%19 : tensor<4x192x16x16xf16>) attrs = {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} { | |
| ^bb0(%in: f32, %in_2: f16, %out: f16): | |
| @@ -830,7 +830,7 @@ module { | |
| %19 = iree_tensor_ext.dispatch.tensor.load %14, offsets = [0, 0, 0, 0], sizes = [4, 48, 16, 16], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<4x48x16x16xf16>> -> tensor<4x48x16x16xf16> | |
| %20 = tensor.empty() : tensor<4x48x16x16xf32> | |
| %21 = linalg.fill {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} ins(%cst : f32) outs(%20 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32> | |
| - %22 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [1, 1, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%16, %17 : tensor<4x3072x16x1xf16>, tensor<48x3072x16x1xf16>) outs(%21 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32> | |
| + %22 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 48, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%16, %17 : tensor<4x3072x16x1xf16>, tensor<48x3072x16x1xf16>) outs(%21 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32> | |
| %23 = tensor.empty() : tensor<4x48x16x16xf16> | |
| %24 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%19, %22, %18 : tensor<4x48x16x16xf16>, tensor<4x48x16x16xf32>, tensor<48x16xf16>) outs(%23 : tensor<4x48x16x16xf16>) attrs = {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} { | |
| ^bb0(%in: f16, %in_0: f32, %in_1: f16, %out: f16): | |
| @@ -875,7 +875,7 @@ module { | |
| %15 = iree_tensor_ext.dispatch.tensor.load %10, offsets = [0, 0, 0, 0], sizes = [4, 48, 16, 16], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<4x48x16x16xf16>> -> tensor<4x48x16x16xf16> | |
| %16 = tensor.empty() : tensor<4x48x16x16xf32> | |
| %17 = linalg.fill {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} ins(%cst : f32) outs(%16 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32> | |
| - %18 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [1, 1, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%12, %13 : tensor<4x3072x16x1xf16>, tensor<48x3072x16x1xf16>) outs(%17 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32> | |
| + %18 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 48, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%12, %13 : tensor<4x3072x16x1xf16>, tensor<48x3072x16x1xf16>) outs(%17 : tensor<4x48x16x16xf32>) -> tensor<4x48x16x16xf32> | |
| %19 = tensor.empty() : tensor<4x48x16x16xf16> | |
| %20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%15, %18, %14 : tensor<4x48x16x16xf16>, tensor<4x48x16x16xf32>, tensor<48x16xf16>) outs(%19 : tensor<4x48x16x16xf16>) attrs = {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} { | |
| ^bb0(%in: f16, %in_0: f32, %in_1: f16, %out: f16): | |
| @@ -1040,7 +1040,7 @@ module { | |
| %11 = iree_tensor_ext.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [80, 1280, 16, 1], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<80x1280x16x1xf16>> -> tensor<80x1280x16x1xf16> | |
| %12 = tensor.empty() : tensor<4x80x16x16xf32> | |
| %13 = linalg.fill {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} ins(%cst : f32) outs(%12 : tensor<4x80x16x16xf32>) -> tensor<4x80x16x16xf32> | |
| - %14 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [2, 2, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%10, %11 : tensor<4x1280x16x1xf16>, tensor<80x1280x16x1xf16>) outs(%13 : tensor<4x80x16x16xf32>) -> tensor<4x80x16x16xf32> | |
| + %14 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 80, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%10, %11 : tensor<4x1280x16x1xf16>, tensor<80x1280x16x1xf16>) outs(%13 : tensor<4x80x16x16xf32>) -> tensor<4x80x16x16xf32> | |
| %15 = tensor.empty() : tensor<64x1280xf32> | |
| %unpack = linalg.unpack %14 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %15 {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1]>} : tensor<4x80x16x16xf32> -> tensor<64x1280xf32> | |
| iree_tensor_ext.dispatch.tensor.store %unpack, %9, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : tensor<64x1280xf32> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<64x1280xf32>> | |
| @@ -1307,7 +1307,7 @@ module { | |
| %19 = iree_tensor_ext.dispatch.tensor.load %14, offsets = [0, 0, 0, 0], sizes = [4, 80, 16, 16], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<4x80x16x16xf16>> -> tensor<4x80x16x16xf16> | |
| %20 = tensor.empty() : tensor<4x80x16x16xf32> | |
| %21 = linalg.fill {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} ins(%cst : f32) outs(%20 : tensor<4x80x16x16xf32>) -> tensor<4x80x16x16xf32> | |
| - %22 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [2, 2, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%16, %17 : tensor<4x1280x16x1xf16>, tensor<80x1280x16x1xf16>) outs(%21 : tensor<4x80x16x16xf32>) -> tensor<4x80x16x16xf32> | |
| + %22 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 80, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%16, %17 : tensor<4x1280x16x1xf16>, tensor<80x1280x16x1xf16>) outs(%21 : tensor<4x80x16x16xf32>) -> tensor<4x80x16x16xf32> | |
| %23 = tensor.empty() : tensor<4x80x16x16xf16> | |
| %24 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%19, %22, %18 : tensor<4x80x16x16xf16>, tensor<4x80x16x16xf32>, tensor<80x16xf16>) outs(%23 : tensor<4x80x16x16xf16>) attrs = {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} { | |
| ^bb0(%in: f16, %in_0: f32, %in_1: f16, %out: f16): | |
| @@ -1414,7 +1414,7 @@ module { | |
| %15 = iree_tensor_ext.dispatch.tensor.load %11, offsets = [0, 0], sizes = [320, 16], strides = [1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<320x16xf16>> -> tensor<320x16xf16> | |
| %16 = tensor.empty() : tensor<4x320x16x16xf32> | |
| %17 = linalg.fill {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} ins(%cst_2 : f32) outs(%16 : tensor<4x320x16x16xf32>) -> tensor<4x320x16x16xf32> | |
| - %18 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [2, 2, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%13, %14 : tensor<4x1280x16x1xf16>, tensor<320x1280x16x1xf16>) outs(%17 : tensor<4x320x16x16xf32>) -> tensor<4x320x16x16xf32> | |
| + %18 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 80, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%13, %14 : tensor<4x1280x16x1xf16>, tensor<320x1280x16x1xf16>) outs(%17 : tensor<4x320x16x16xf32>) -> tensor<4x320x16x16xf32> | |
| %19 = tensor.empty() : tensor<4x320x16x16xf16> | |
| %20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%18, %15 : tensor<4x320x16x16xf32>, tensor<320x16xf16>) outs(%19 : tensor<4x320x16x16xf16>) attrs = {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} { | |
| ^bb0(%in: f32, %in_3: f16, %out: f16): | |
| @@ -1470,7 +1470,7 @@ module { | |
| %19 = iree_tensor_ext.dispatch.tensor.load %14, offsets = [0, 0, 0, 0], sizes = [4, 80, 16, 16], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<4x80x16x16xf16>> -> tensor<4x80x16x16xf16> | |
| %20 = tensor.empty() : tensor<4x80x16x16xf32> | |
| %21 = linalg.fill {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} ins(%cst : f32) outs(%20 : tensor<4x80x16x16xf32>) -> tensor<4x80x16x16xf32> | |
| - %22 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [1, 1, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%16, %17 : tensor<4x5120x16x1xf16>, tensor<80x5120x16x1xf16>) outs(%21 : tensor<4x80x16x16xf32>) -> tensor<4x80x16x16xf32> | |
| + %22 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [4, 20, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 16, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%16, %17 : tensor<4x5120x16x1xf16>, tensor<80x5120x16x1xf16>) outs(%21 : tensor<4x80x16x16xf32>) -> tensor<4x80x16x16xf32> | |
| %23 = tensor.empty() : tensor<4x80x16x16xf16> | |
| %24 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%19, %22, %18 : tensor<4x80x16x16xf16>, tensor<4x80x16x16xf32>, tensor<80x16xf16>) outs(%23 : tensor<4x80x16x16xf16>) attrs = {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 16, 16]>} { | |
| ^bb0(%in: f16, %in_0: f32, %in_1: f16, %out: f16): | |
| @@ -1604,7 +1604,7 @@ module { | |
| %9 = iree_tensor_ext.dispatch.tensor.load %6, offsets = [0, 0, 0, 0], sizes = [80, 1280, 16, 1], strides = [1, 1, 1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<80x1280x16x1xf16>> -> tensor<80x1280x16x1xf16> | |
| %10 = tensor.empty() : tensor<1x80x1x16xf32> | |
| %11 = linalg.fill {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 1, 16]>} ins(%cst : f32) outs(%10 : tensor<1x80x1x16xf32>) -> tensor<1x80x1x16xf32> | |
| - %12 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [1, 2, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 1, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%8, %9 : tensor<1x1280x1x1xf16>, tensor<80x1280x16x1xf16>) outs(%11 : tensor<1x80x1x16xf32>) -> tensor<1x80x1x16xf32> | |
| + %12 = linalg.mmt4d {lowering_config = #iree_cpu.lowering_config<distribution = [1, 80, 0, 0, 0, 0], vector_common_parallel = [1, 1, 0, 1, 16, 0], vector_reduction = [0, 0, 1, 0, 0, 1]>} ins(%8, %9 : tensor<1x1280x1x1xf16>, tensor<80x1280x16x1xf16>) outs(%11 : tensor<1x80x1x16xf32>) -> tensor<1x80x1x16xf32> | |
| %13 = tensor.empty() : tensor<1x80x1x16xf16> | |
| %14 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%12 : tensor<1x80x1x16xf32>) outs(%13 : tensor<1x80x1x16xf16>) attrs = {lowering_config = #iree_cpu.lowering_config<vector_common_parallel = [1, 1, 1, 16]>} { | |
| ^bb0(%in: f32, %out: f16): |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment