google / zoekt

Fast trigram based code search

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

zoekt cannot handle '[\s\S]a'

ijt opened this issue · comments

[ ~/src/github.com/sourcegraph/sourcegraph ] zoekt '[\s\S]a'
2019/05/01 15:10:26 crashed shard: shard(/Users/issactrotts/.zoekt/sourcegraph_v15.00000.zoekt): regexp: Compile(`(?i)[^]a`): error parsing regexp: missing closing ]: `[^]a`, goroutine 55 [running]:
runtime/debug.Stack(0xc000724240, 0xc005ee5fc0, 0x3c)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/debug/stack.go:24 +0x9d
github.com/google/zoekt/shards.searchOneShard.func1(0x14b92c0, 0xc000724240, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:228 +0x9c
panic(0x139c300, 0xc000216190)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/panic.go:522 +0x1b5
regexp.MustCompile(0xc005b8fa08, 0x8, 0x4)
        /usr/local/Cellar/go/1.12/libexec/src/regexp/regexp.go:272 +0x159
github.com/google/zoekt.(*indexData).newMatchTree(0xc00003b800, 0x14b1940, 0xc000216150, 0xc0065ed990, 0x8, 0x178b8b8, 0xc000214100)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:479 +0x88e
github.com/google/zoekt.(*indexData).newMatchTree(0xc00003b800, 0x14b1920, 0xc000214100, 0x14b1920, 0xc000214100, 0xc0065edec0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:501 +0x11e6
github.com/google/zoekt.(*indexData).Search(0xc00003b800, 0x14b8dc0, 0xc0000a8280, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0x0, 0x0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/eval.go:120 +0x387
github.com/google/zoekt/shards.searchOneShard(0x14b8dc0, 0xc0000a8280, 0x14b92c0, 0xc000724240, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:236 +0xbd
github.com/google/zoekt/shards.(*shardedSearcher).Search.func2(0xc0000a6360, 0xc00004cc40, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:170 +0x8f
created by github.com/google/zoekt/shards.(*shardedSearcher).Search
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:168 +0x5c0
2019/05/01 15:10:26 crashed shard: shard(/Users/issactrotts/.zoekt/a_v15.00000.zoekt): regexp: Compile(`(?i)[^]a`): error parsing regexp: missing closing ]: `[^]a`, goroutine 58 [running]:
runtime/debug.Stack(0xc000724200, 0xc0011900c0, 0x32)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/debug/stack.go:24 +0x9d
github.com/google/zoekt/shards.searchOneShard.func1(0x14b92c0, 0xc000724200, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:228 +0x9c
panic(0x139c300, 0xc00004cca0)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/panic.go:522 +0x1b5
regexp.MustCompile(0xc0010c6c48, 0x8, 0x4)
        /usr/local/Cellar/go/1.12/libexec/src/regexp/regexp.go:272 +0x159
github.com/google/zoekt.(*indexData).newMatchTree(0xc000062800, 0x14b1940, 0xc00004cc70, 0xc000049990, 0x8, 0x178b8b8, 0xc0007242c0)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:479 +0x88e
github.com/google/zoekt.(*indexData).newMatchTree(0xc000062800, 0x14b1920, 0xc0007242c0, 0x14b1920, 0xc0007242c0, 0xc000049ec0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:501 +0x11e6
github.com/google/zoekt.(*indexData).Search(0xc000062800, 0x14b8dc0, 0xc0000a8280, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0x0, 0x0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/eval.go:120 +0x387
github.com/google/zoekt/shards.searchOneShard(0x14b8dc0, 0xc0000a8280, 0x14b92c0, 0xc000724200, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:236 +0xbd
github.com/google/zoekt/shards.(*shardedSearcher).Search.func2(0xc0000a6360, 0xc00004cc40, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:170 +0x8f
created by github.com/google/zoekt/shards.(*shardedSearcher).Search
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:168 +0x5c0
2019/05/01 15:10:26 crashed shard: shard(/Users/issactrotts/.zoekt/sourcegraph_v15.00001.zoekt): regexp: Compile(`(?i)[^]a`): error parsing regexp: missing closing ]: `[^]a`, goroutine 57 [running]:
runtime/debug.Stack(0xc000724260, 0xc0065fa080, 0x3c)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/debug/stack.go:24 +0x9d
github.com/google/zoekt/shards.searchOneShard.func1(0x14b92c0, 0xc000724260, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:228 +0x9c
panic(0x139c300, 0xc006600040)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/panic.go:522 +0x1b5
regexp.MustCompile(0xc006606020, 0x8, 0x4)
        /usr/local/Cellar/go/1.12/libexec/src/regexp/regexp.go:272 +0x159
github.com/google/zoekt.(*indexData).newMatchTree(0xc00003bc00, 0x14b1940, 0xc006600010, 0x2000, 0x8, 0x1881248, 0xc00000e120)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:479 +0x88e
github.com/google/zoekt.(*indexData).newMatchTree(0xc00003bc00, 0x14b1920, 0xc00000e120, 0x14b1920, 0xc00000e120, 0xc0065f9ec0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:501 +0x11e6
github.com/google/zoekt.(*indexData).Search(0xc00003bc00, 0x14b8dc0, 0xc0000a8280, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0x0, 0x0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/eval.go:120 +0x387
github.com/google/zoekt/shards.searchOneShard(0x14b8dc0, 0xc0000a8280, 0x14b92c0, 0xc000724260, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:236 +0xbd
github.com/google/zoekt/shards.(*shardedSearcher).Search.func2(0xc0000a6360, 0xc00004cc40, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:170 +0x8f
created by github.com/google/zoekt/shards.(*shardedSearcher).Search
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:168 +0x5c0
2019/05/01 15:10:26 crashed shard: shard(/Users/issactrotts/.zoekt/sourcegraph_v15.00002.zoekt): regexp: Compile(`(?i)[^]a`): error parsing regexp: missing closing ]: `[^]a`, goroutine 56 [running]:
runtime/debug.Stack(0xc000724220, 0xc00118c1c0, 0x3c)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/debug/stack.go:24 +0x9d
github.com/google/zoekt/shards.searchOneShard.func1(0x14b92c0, 0xc000724220, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:228 +0x9c
panic(0x139c300, 0xc00008a1f0)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/panic.go:522 +0x1b5
regexp.MustCompile(0xc001198020, 0x8, 0x4)
        /usr/local/Cellar/go/1.12/libexec/src/regexp/regexp.go:272 +0x159
github.com/google/zoekt.(*indexData).newMatchTree(0xc00003b400, 0x14b1940, 0xc00008a1c0, 0xc0065f1990, 0x8, 0x178b8b8, 0xc0000940c0)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:479 +0x88e
github.com/google/zoekt.(*indexData).newMatchTree(0xc00003b400, 0x14b1920, 0xc0000940c0, 0x14b1920, 0xc0000940c0, 0xc0065f1ec0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:501 +0x11e6
github.com/google/zoekt.(*indexData).Search(0xc00003b400, 0x14b8dc0, 0xc0000a8280, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0x0, 0x0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/eval.go:120 +0x387
github.com/google/zoekt/shards.searchOneShard(0x14b8dc0, 0xc0000a8280, 0x14b92c0, 0xc000724220, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:236 +0xbd
github.com/google/zoekt/shards.(*shardedSearcher).Search.func2(0xc0000a6360, 0xc00004cc40, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:170 +0x8f
created by github.com/google/zoekt/shards.(*shardedSearcher).Search
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:168 +0x5c0

Adding this test to parse_test.go might be useful:

func Test_regexpQuery(t *testing.T) {
	t.Run("does not make valid regexps not compile", func(t *testing.T) {
		pats := []string {
			`[\s\S]b`,
		}
		for _, pat := range pats {
			t.Run(pat, func(t *testing.T) {
				// The given pats should all be valid.
				_, err := regexp.Compile(pat)
				if err != nil {
					t.Fatal(err)
				}

				// Check to see that regexpQuery doesn't somehow produce an invalid result.
				rx2, err := regexpQuery(pat, false, false)
				if err != nil {
					t.Fatal(err)
				}
				_, err = regexp.Compile(rx2.String())
				if err != nil {
					t.Errorf("compiling stringified version of regexpQuery() output: %v", err)
				}
			})
		}
	})
}

I think it's actually a bug in the regexp library.

\s is space, \S is not space
[\s\S] should not translate to '[^]' but to '.'

You're right!

package main

import (
	"regexp/syntax"
	"testing"
)

func TestRegexpParse(t *testing.T) {
	flags := syntax.PerlX
	pat := `[\s\S]b`
	rx, err := syntax.Parse(pat, flags)
	if err != nil {
		t.Fatal(err)
	}
	if rx.String() != pat {
		t.Error("syntax.Parse() = ", rx.String(), ", want ", pat)
	}
}
[ ~/src/github.com/ijt/rx ] go test ./...
--- FAIL: TestRegexpParse (0.00s)
    main_test.go:16: syntax.Parse() =  [^]b , want  [\s\S]b
FAIL
FAIL	github.com/ijt/rx	0.006s

I'll file it upstream.

Oh, looks like you already filed it: golang/go#31807.

this is fixed in golang.