reduction of taichi is slower than numpy
Frenchfries135 opened this issue · comments
Frenchfries135 commented
import taichi as ti
import numpy as np
import time
ti.init(arch=ti.cuda)#,debug=True)
n=1<<18
a=np.random.randn(n).astype(np.float32)
b=ti.field(ti.f32,n)
b.from_numpy(a)
@ti.kernel
def sum1()->ti.f32:
result=0.0
for i in b:
result+=b[i]
return result
#compile
sum1()
t1=time.time()
for _ in range(1000):
a.sum()
t2=time.time()
for _ in range(1000):
sum1()
ti.sync()
t3=time.time()
print('time numpy:',t2-t1,'time ti atomic_add:',t3-t2)
input('enter to continue')
result in RTX3060 and R7 5800H is:
time numpy: 0.07301616668701172 time ti atomic_add: 0.3090794086456299