jxskiss / base62

Compact and high performace implementation of base62 algorithm for Golang.

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Does not agree with Saltpack / GMP

coolaj86 opened this issue · comments

Reference Strings

For reference, I here's the output of this library (jxskiss) compared to saltpack, which seems to also be compatible with the GMP and GnuPG Base62 implementations, even when using the same alphabet:

enc := base62.NewEncoding("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
Raw     : Hello, 世界 (13 bytes)
Base64  : SGVsbG8sIOS4lueVjA (18 chars)
jxskiss : CMvvMYBvWmoRinMP81 (18 chars)
saltpack: 1wJfrzvdbuFbL65vcS (18 chars)

Raw     : Hello World (11 bytes)
Base64  : SGVsbG8gV29ybGQ (15 chars)
jxskiss : ancSlT58ln6RbX4 (15 chars)
saltpack: 73XpUgyMwkGr29M (15 chars)

Raw     : [0] (1 bytes)
Base64  : AA (2 chars)
jxskiss : 00 (2 chars)
saltpack: 00 (2 chars)

Raw     : [0 0 0 0 0 0 0 0 0 0 0 0] (12 bytes)
Base64  : AAAAAAAAAAAAAAAA (16 chars)
jxskiss : 0000000000000000 (16 chars)
saltpack: 00000000000000000 (17 chars)

Raw     : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] (24 bytes)
Base64  : AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA (32 chars)
jxskiss : 00000000000000000000000000000000 (32 chars)
saltpack: 000000000000000000000000000000000 (33 chars)

Raw     : [0 0 0 0 255 255 255 255] (8 bytes)
Base64  : AAAAAP____8 (11 chars)
jxskiss : VVVVVV300000 (12 chars)
saltpack: 000004gfFC3 (11 chars)

Raw     : [255 255 255 255 0 0 0 0] (8 bytes)
Base64  : _____wAAAAA (11 chars)
jxskiss : 00000yVVVVV7 (12 chars)
saltpack: LygHZwPV2MC (11 chars)

As you can see, it's not just a matter of the output being reversed, but the character sequences are entirely different.

Test Output Reference

package main

import (
	"encoding/base64"
	"fmt"

	"github.com/jxskiss/base62"
	"github.com/keybase/saltpack/encoding/basex"
)

func main() {
	enc := base62.NewEncoding("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")

	for _, src := range [][]byte{
		[]byte("Hello, 世界"),
		[]byte("Hello World"),
		{0},
		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
		{0, 0, 0, 0, 255, 255, 255, 255},
		{255, 255, 255, 255, 0, 0, 0, 0},
	} {
		b62 := enc.Encode(src)
		b64 := base64.RawURLEncoding.EncodeToString(src)
		sp62 := basex.Base62StdEncoding.EncodeToString(src)

		if src[0] == 0x0 || src[1] == 255 {
			fmt.Printf("Raw     : %v (%d bytes)\n", src, len(src))
		} else {
			fmt.Printf("Raw     : %v (%d bytes)\n", string(src), len(src))
		}
		fmt.Printf("Base64  : %s (%d chars)\n", b64, len(b64))
		fmt.Printf("jxskiss : %s (%d chars)\n", b62, len(b62))
		fmt.Printf("saltpack: %s (%d chars)\n", sp62, len(sp62))
		fmt.Println("")
	}
}

This implementation is not string-compatible with typical big-int based implementation. Saying Base62 encoding and decoding, it is correct, it encodes arbitrary bytes to string using 62 characters, then correctly decodes the string back to the original bytes. The correctness is tested by large amount of random bytes, see the test file https://github.com/jxskiss/base62/blob/master/base62_test.go.

This implemention is much performant than typical big-int based implementation. Regarding the reversed order of bytes, it's an implementation detail, and is the main reason we can get the performance.

The folling test code gives this:

(anaconda3-2018.12) ➜  temp git:(master) ✗ go test -count=1 ./testbase62
ok      temp/testbase62 0.011s

(anaconda3-2018.12) ➜  temp git:(master) ✗ go test -run=none -bench=. ./testbase62
goos: darwin
goarch: amd64
pkg: temp/testbase62
cpu: Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
Benchmark_jxskiss_Base62_1K-12             95632             11263 ns/op
Benchmark_saltpack_Base62_1K-12            13999             87196 ns/op
Benchmark_jxskiss_Base62_4K-12             25191             44248 ns/op
Benchmark_saltpack_Base62_4K-12             3537            347388 ns/op
Benchmark_jxskiss_Base62_1M-12               100          11347393 ns/op
Benchmark_saltpack_Base62_1M-12               13          88337949 ns/op
PASS
ok      temp/testbase62 8.553s
package testbase62

import (
	"bytes"
	"crypto/rand"
	"testing"

	"github.com/jxskiss/base62"
	"github.com/keybase/saltpack/encoding/basex"
)

var bytes1K []byte
var bytes4K []byte
var bytes1M []byte

func init() {
	bytes1K = make([]byte, 1024)
	bytes4K = make([]byte, 4096)
	bytes1M = make([]byte, 1024*1024)
	_, err1 := rand.Read(bytes1K)
	_, err2 := rand.Read(bytes4K)
	_, err3 := rand.Read(bytes1M)
	if err1 != nil || err2 != nil || err3 != nil {
		panic("rand.Read got error")
	}
}

func TestCorrectness(t *testing.T) {
	b1 := base62.Encode(bytes4K)
	b2, err := base62.Decode(b1)
	if err != nil {
		panic("base62 error")
	}
	if !bytes.Equal(bytes4K, b2) {
		panic("base62 not equal")
	}

	n3 := basex.Base62StdEncoding.EncodedLen(len(bytes4K))
	b3 := make([]byte, n3)
	basex.Base62StdEncoding.Encode(b3, bytes4K)
	n4 := basex.Base62StdEncoding.DecodedLen(len(b3))
	b4 := make([]byte, n4)
	nDec, err := basex.Base62StdEncoding.Decode(b4, b3)
	if err != nil {
		panic("basex error")
	}
	b4 = b4[:nDec]
	if !bytes.Equal(bytes4K, b4) {
		panic("basex not equal")
	}
}

func Benchmark_jxskiss_Base62_1K(b *testing.B) {
	for i := 0; i < b.N; i++ {
		_ = base62.Encode(bytes1K)
	}
}

func Benchmark_saltpack_Base62_1K(b *testing.B) {
	for i := 0; i < b.N; i++ {
		n := basex.Base62StdEncoding.EncodedLen(len(bytes1K))
		buf := make([]byte, n)
		basex.Base62StdEncoding.Encode(buf, bytes1K)
	}
}

func Benchmark_jxskiss_Base62_4K(b *testing.B) {
	for i := 0; i < b.N; i++ {
		_ = base62.Encode(bytes4K)
	}
}

func Benchmark_saltpack_Base62_4K(b *testing.B) {
	for i := 0; i < b.N; i++ {
		n := basex.Base62StdEncoding.EncodedLen(len(bytes4K))
		buf := make([]byte, n)
		basex.Base62StdEncoding.Encode(buf, bytes4K)
	}
}

func Benchmark_jxskiss_Base62_1M(b *testing.B) {
	for i := 0; i < b.N; i++ {
		_ = base62.Encode(bytes1M)
	}
}

func Benchmark_saltpack_Base62_1M(b *testing.B) {
	for i := 0; i < b.N; i++ {
		n := basex.Base62StdEncoding.EncodedLen(len(bytes1M))
		buf := make([]byte, n)
		basex.Base62StdEncoding.Encode(buf, bytes1M)
	}
}

Hope I will get some time later to add the implementation details to README.