make_sync generates syncwarp when syncthreads is needed
Blealtan opened this issue · comments
Blealtan commented
import ir
with ir.VarDef("u", (64, 64, 64, 5, ), "float64", "input", "gpu/global") as u:
with ir.VarDef("e", (64, 64, 64, 5, ), "float64", "output", "gpu/global") as e:
with ir.For("i", 0, 64) as i:
with ir.For("j", 0, 64) as j:
with ir.For("k", 0, 64) as k:
with ir.VarDef("materialize_tmp", (5, ), "float64", "cache", "gpu/global") as materialize__tmp:
materialize__tmp[0] = u[i, j, k, 0]
materialize__tmp[1] = u[i, j, k, 1]
materialize__tmp[2] = u[i, j, k, 2]
materialize__tmp[3] = u[i, j, k, 3]
materialize__tmp[4] = u[i, j, k, 4]
with ir.For("i1", 0, 5) as i1:
e[i, j, k, i1] = materialize__tmp[i1]
s = ir.Schedule(ir.pop_ast())
s.auto_schedule(ir.GPU())
print(s.ast())
print(ir.lower(s.ast(), ir.GPU()))