The comprehensive bufferization does not support LinalgExt ops
hanhanW opened this issue · comments
I'm trying to switch to upstream bufferization for all CPU pipeline. The last piece is to implement bufferization for other LinalgExt ops like linalg_ext.fft, linalg_ext.reverse, etc.
To repro:
Replace the bufferization in addCPUDefaultPassPipeline
with
passManager.addNestedPass<func::FuncOp>(createConvertToDestinationPassingStylePass());
passManager.addPass(createCanonicalizerPass());
BufferizationOptions::AllocationFn allocationFn = cpuComprehensiveBufferizeAllocationFn;
BufferizationOptions::DeallocationFn deallocationFn = cpuComprehensiveBufferizeDeallocationFn;
BufferizationOptions::MemCpyFn memcpyFn = cpuComprehensiveBufferizeCopyFn;
addIREEComprehensiveBufferizePasses(passManager, allocationFn, deallocationFn, memcpyFn);
Run iree-translate -iree-mlir-to-vm-bytecode-module --iree-hal-target-backends=dylib-llvm-aot --iree-llvm-link-embedded=true iree/test/e2e/linalg_ext_ops/reverse.mlir -o /tmp/a.vmfb
You'll see errors:
iree/test/e2e/linalg_ext_ops/reverse.mlir:24:8: error: op was not bufferized
%0 = iree_linalg_ext.reverse
The above commit adds the missing bufferizations, but the test case still fails due to a dominance error. But this looks unrelated to the bufferization of these two ops.
"scf.for"(%12, %1, %13) ({
^bb0(%arg0: index):
%14 = "affine.apply"(%8) {map = affine_map<()[s0] -> (s0 * 64)>} : (index) -> index
%15 = "affine.apply"(%9) {map = affine_map<()[s0] -> (s0 * 64)>} : (index) -> index
"scf.for"(%14, %2, %15) ({
^bb0(%arg1: index):
%16 = "memref.subview"(%4, %arg0, %arg1) {operand_segment_sizes = dense<[1, 2, 0, 0]> : vector<4xi32>, static_offsets = [-9223372036854775808, -9223372036854775808], static_sizes = [2, 3], static_strides = [1, 1]} : (memref<2x3xf32>, index, index) -> memref<2x3xf32, affine_map<(d0, d1)[s0] -> (d0 * 3 + s0 + d1)>>
%17 = "bufferization.to_tensor"(%16) : (memref<2x3xf32, affine_map<(d0, d1)[s0] -> (d0 * 3 + s0 + d1)>>) -> tensor<2x3xf32>
%18 = "memref.subview"(%6, %22, %arg1, %1, %2, %0, %0) {operand_segment_sizes = dense<[1, 2, 2, 2]> : vector<4xi32>, static_offsets = [-9223372036854775808, -9223372036854775808], static_sizes = [-1, -1], static_strides = [-9223372036854775808, -9223372036854775808]} : (memref<2x3xf32>, index, index, index, index, index, index) -> memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>
%19 = "bufferization.to_tensor"(%18) : (memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>) -> tensor<?x?xf32>
%20 = "linalg.init_tensor"() {static_sizes = [2, 3]} : () -> tensor<2x3xf32>
"iree_linalg_ext.reverse"(%16, %18) {dimensions = dense<0> : tensor<1xi64>, lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64]]>, operand_segment_sizes = dense<1> : vector<2xi32>} : (memref<2x3xf32, affine_map<(d0, d1)[s0] -> (d0 * 3 + s0 + d1)>>, memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>) -> ()
%21 = "bufferization.to_tensor"(%18) : (memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>) -> tensor<?x?xf32>
%22 = "affine.apply"(%arg0) {map = affine_map<()[s0] -> (-s0)>} : (index) -> index
%23 = "memref.subview"(%6, %22, %arg1, %1, %2, %0, %0) {operand_segment_sizes = dense<[1, 2, 2, 2]> : vector<4xi32>, static_offsets = [-9223372036854775808, -9223372036854775808], static_sizes = [-1, -1], static_strides = [-9223372036854775808, -9223372036854775808]} : (memref<2x3xf32>, index, index, index, index, index, index) -> memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>
"linalg.generic"(%18, %23) ({
^bb0(%arg2: f32, %arg3: f32):
"linalg.yield"(%arg2) : (f32) -> ()
}) {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"], operand_segment_sizes = dense<1> : vector<2xi32>} : (memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>, memref<?x?xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
The problematic op is %18
, which uses %22
.
the test case still fails due to a dominance error.
It fails because convert to destination style passing does not work. I'll work on fixing it.