make_sync generates syncwarp when syncthreads is needed

Question

make_sync generates syncwarp when syncthreads is needed

Blealtan opened this issue 2 years ago · comments

import ir

with ir.VarDef("u", (64, 64, 64, 5, ), "float64", "input", "gpu/global") as u: 
    with ir.VarDef("e", (64, 64, 64, 5, ), "float64", "output", "gpu/global") as e: 
        with ir.For("i", 0, 64) as i:
            with ir.For("j", 0, 64) as j:
                with ir.For("k", 0, 64) as k:
                    with ir.VarDef("materialize_tmp", (5, ), "float64", "cache", "gpu/global") as materialize__tmp: 
                        materialize__tmp[0] = u[i, j, k, 0]
                        materialize__tmp[1] = u[i, j, k, 1]
                        materialize__tmp[2] = u[i, j, k, 2]
                        materialize__tmp[3] = u[i, j, k, 3]
                        materialize__tmp[4] = u[i, j, k, 4]
                        with ir.For("i1", 0, 5) as i1:
                            e[i, j, k, i1] = materialize__tmp[i1]


s = ir.Schedule(ir.pop_ast())
s.auto_schedule(ir.GPU())
print(s.ast())
print(ir.lower(s.ast(), ir.GPU()))