Reduce copied code while keeping performance?
shawnsmithdev opened this issue · comments
The below is a prototype implementation of radix sort for []uint64
. It's about 3x slower than the existing implementation. I need to do some profiling on this. If it is even possible to reduce this performance hit to a tolerable amount, it might be worth refactoring the whole library in this style.
package internal
type radixee struct {
len int
mask func(int) func(int) byte
}
type radixer struct {
x radixee // to sort
y radixee // buffer
digits int
copier func(int, int, bool)
}
func SortRadixer(r radixer) {
if r.x.len > r.y.len {
panic("buffer too small")
}
if r.x.len < 2 {
return
}
flip := false
var key byte
var offset [256]int
for digit := 0; digit < r.digits; digit++ {
var counts [256]int
mask := r.x.mask(digit)
for i := 0; i < r.x.len; i++ {
key = mask(i)
counts[key]++
}
offset[0] = 0
for i := 1; i < len(offset); i++ {
offset[i] = offset[i-1] + counts[i-1]
}
for i := 0; i < r.x.len; i++ {
key = mask(i)
r.copier(offset[key], i, flip)
offset[key]++
}
r.x, r.y = r.y, r.x
flip = !flip
}
}
func uint64Copier(x, y []uint64) func(int, int, bool) {
return func(dst, src int, flip bool) {
if flip {
x[dst] = y[src]
} else {
y[dst] = x[src]
}
}
}
func uint64Masker(x []uint64) func(int) func(int) byte {
return func(offset int) func(int) byte {
bitOffset := uint(offset * 8)
keyMask := uint64(0xFF << bitOffset)
return func(i int) byte {
return byte((x[i] & keyMask) >> bitOffset)
}
}
}
func SortUint64V2(x, buffer []uint64) {
y := buffer[:len(x)]
SortRadixer(radixer{
x: radixee{len: len(x), mask: uint64Masker(x)},
y: radixee{len: len(y), mask: uint64Masker(y)},
digits: 8,
copier: uint64Copier(x, y),
})
}
Just going to wait and see what happens with generics...