klauspost / reedsolomon

Reed-Solomon Erasure Coding in Go

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Huge memory Cost

1a1a11a opened this issue · comments

I am benchmarking the performance of this library, so I notice that it uses huge amount of memory, the benchmark program is the following.
In the code, n is the number of dataChunks + parityChunks, k is the number of data chunks. I used n=4, k=3, and 64GB is not large enough to run through the test.

func encode(coder reedsolomon.Encoder, n, k, chunkSize int) (shards [][]byte) {
	var data = make([]byte, chunkSize*k)
	rand.Read(data)

	shards, _ = coder.Split(data)
	_ = coder.Encode(shards)
	ok, _ := coder.Verify(shards)
	if !ok {
		log.Fatal("error")
	}
	return shards
}

func loseData(n, k int, shards [][]byte) {

	if n-k == 1 {
		shards[rand.Intn(n)] = nil
	} else {
		for i := 0; i < n-k; i++ {
			if shards[rand.Intn(n)] == nil {
				i --;
			} else {
				shards[rand.Intn(n)] = nil
			}
		}
	}
}

func decode(coder reedsolomon.Encoder, n, k int, shards [][]byte) (fixedShards [][]byte) {
	_ = coder.Reconstruct(shards)
	return shards
}


func Benchmark(n, k int, thrptMap map[int]float64, mtx *sync.Mutex, wg *sync.WaitGroup) {
	defer (*wg).Done()
	RUNTIME := 2
	rand.Seed(time.Now().UnixNano())

	fmt.Println("WithMaxGoroutines(1) WithCauchyMatrix()")
	coder, _ := reedsolomon.New(n-k, k, reedsolomon.WithMaxGoroutines(1), reedsolomon.WithCauchyMatrix())

	for chunkSize := 16; chunkSize < int(math.Pow(2, 30)); chunkSize *= 8 {
		encodedData := encode(coder, n, k, chunkSize)

		startTs := time.Now()
		turnAroundBytes := 0

		for int(time.Since(startTs).Seconds()) < RUNTIME {
			loseData(n, k, encodedData)
			decode(coder, n, k, encodedData)
			turnAroundBytes += chunkSize * k
		}
		thrpt := float64(turnAroundBytes)/1000000/float64(time.Since(startTs).Nanoseconds()/1000000000)

		mtx.Lock()
		if v, ok := thrptMap[chunkSize]; ok{
			thrptMap[chunkSize] = v + thrpt
		}else{
			thrptMap[chunkSize] = thrpt
		}
		mtx.Unlock()

		//fmt.Printf("%d\t%d\t%d\t%.4f\n", n, k, chunkSize, thrpt)
	}
}

func BenchmarkParallel(n, k, nThreads int){
	fmt.Println(n, k, nThreads)
	thrptMap := make(map[int]float64)
	mtx := &sync.Mutex{}
	var wg = sync.WaitGroup{}

	for i:=0; i<nThreads; i++{
		wg.Add(1)
		go Benchmark(4, 3, thrptMap, mtx, &wg)
	}
	wg.Wait()

	keys := make([]int, 0)
	for k, _ := range thrptMap {
		keys = append(keys, k)
	}
	sort.Ints(keys)

	for _, k := range keys {
		fmt.Printf("%d \t %.4f\n", k, thrptMap[k])
	}
}