iree-org / iree

A retargetable MLIR-based machine learning compiler and runtime toolkit.

Home Page:http://iree.dev/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

[CUDA] [CODEGEN] Cuda back end implementation of TopK results in wrong output.

LLITCHEV opened this issue · comments

What happened?

Running the following tests, results in duplicated elements in the output.

  %input_values = util.unfoldable_constant dense<[[
    1.3, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.3, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.3, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.3, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.3, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 123.45, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.3, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 8.9, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.3, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.3, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 4.28, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
    1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 7.8, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0]]> : tensor<1x256xf32>
  %out_values_empty = tensor.empty() : tensor<1x40xf32>
  %out_indices_empty = tensor.empty() : tensor<1x40xi32>
  %neg_inf = arith.constant 0xFF800000 : f32
  %c0 = arith.constant 0 : i32
  %out_values = linalg.fill ins(%neg_inf : f32) outs(%out_values_empty : tensor<1x40xf32>) -> tensor<1x40xf32>
  %out_indices = linalg.fill ins(%c0 : i32) outs(%out_indices_empty : tensor<1x40xi32>) -> tensor<1x40xi32>
  %0:2 = iree_linalg_ext.topk
        dimension(1)
        ins(%input_values: tensor<1x256xf32>)
        outs(%out_values, %out_indices : tensor<1x40xf32>, tensor<1x40xi32>) {
        ^bb0(%arg0 : f32, %arg1 : f32):
         %0 = arith.cmpf ogt, %arg0, %arg1 : f32
         iree_linalg_ext.yield %0 : i1
        } -> tensor<1x40xf32>, tensor<1x40xi32>
 check.expect_almost_eq_const(
     %0#0,
     dense<[[123.45, 8.9, 7.8, 4.28, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0]]> : tensor<1x40xf32>
 ) : tensor<1x40xf32>

  check.expect_eq_const(
      %0#1,
      dense<[[135, 169, 247, 233, 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63, 67, 71, 75, 79, 83, 87, 91, 95, 99, 103, 107, 111, 115, 119, 123, 127, 131, 139, 143, 147]]> : tensor<1x40xi32>
  ) : tensor<1x40xi32>
 return
}```

The output is:
```[ RUN      ] module.vector_call_topk_1x256
/work/runtime/src/iree/modules/check/module.cc:372: Failure
Failed
Expected equality of these values. Contents does not match.
  lhs:
    1x40xi32=[135 135 169 169 247 247 233 233 3 3 7 7 11 11 15 15 19 19 23 23 27 27 31 31 35 35 39 39 43 43 47 47 51 51 55 55 59 59 63 63]
  rhs:
    1x40xi32=[135 169 247 233 3 7 11 15 19 23 27 31 35 39 43 47 51 55 59 63 67 71 75 79 83 87 91 95 99 103 107 111 115 119 123 127 131 139 143 147]

/work/runtime/src/iree/modules/check/module.cc:456: Failure
Failed
Expected near equality of these values. Contents does not match.
  lhs:
    1x40xf32=[123.45 123.45 8.9 8.9 7.8 7.8 4.28 4.28 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4]
  rhs:
    1x40xf32=[123.45 8.9 7.8 4.28 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4]

[  FAILED  ] module.vector_call_topk_1x256, where GetParam() = 2 (599 ms)```


### Steps to reproduce your issue

I get the error when running newly added tests in CI on (WIP) PR https://github.com/iree-org/iree/pull/17045

### What component(s) does this issue relate to?

_No response_

### Version information

Latest source.

### Additional context

_No response_