alecthomas / participle

A parser library for Go

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Issue with a nested structure

gwd opened this issue · comments

First, thank you so much for this library -- made my first grammar on Friday and I think the setup really helped make things straightforward.

I'm using participle v2.10.0, and trying to write a parser for strings like the following:

	tests := []string{
		"Xen 4.18.x",
		"Linux 4.7.x",
		"QEMU 4.7-RC series",
		"Linux 4.7.x - Linux 4.9.x",
		"xen-unstable",
		"Xen 4.18.x - xen-unstable",
		"QEMU 4.7-RC series - QEMU 4.10.x",
	}

i.e., a version that may be a single version, or a range (separated by -); that may be a single string, or <project> <version number>.

I wrote the following participle structures:

type ProjectVersion struct {
	Project string `parser:"@Project"`
	Version string `parser:"Space @(VersionString | RCVersionString)"`
}

type VersionSingle struct {
	TipVersion     *string         `parser:"@TipVersion"`
	ProjectVersion *ProjectVersion `parser:"| @@"`
}

type VersionRange struct {
	From VersionSingle `parser:"@@"`
	To   VersionSingle `parser:"VersionDash @@"`
}

type Version struct {
	Range  *VersionRange  `parser:"@@"`
	Single *VersionSingle `parser:"| @@"`
}

var rulesCommon []lexer.SimpleRule{	
	{"Project", `Xen|Linux|QEMU|xapi`},
	{"TipVersion", `xen-unstable`},
	{"RCVersionString", `\d+\.\d+-RC series`},
	{"VersionString", `\d+\.\d+\.x`},
	{"VersionDash", " - "},
	{"Space", ` `},
}

Unfortunately, I get errors like the following:

Parsing Xen 4.18.x: 1:11: unexpected token "<EOF>" (expected <versiondash> VersionSingle)
Parsing Linux 4.7.x: 1:12: unexpected token "<EOF>" (expected <versiondash> VersionSingle)
Parsing QEMU 4.7-RC series: 1:19: unexpected token "<EOF>" (expected <versiondash> VersionSingle)

In other words, it's somehow getting stuck on parsing something as a VersionRange, and not backing out and parsing it simply as a VersionSingle.

But this only happens if both Version and VersionSingle have at least two ways to be interpreted. If I replace the ProjectVersion with a single regexp that matches the same string, it works (here replacing the Version lexer token with a ProjectVersion token with the appropriate regexp).

// With the range, without the "project version"
type VersionSingleNoProjectVersion struct {
	TipVersion     *string `parser:"@TipVersion"`
	ProjectVersion *string `parser:"| @ProjectVersion"`
}

type VersionRangeNoProjectVersion struct {
	From VersionSingleNoProjectVersion `parser:"@@"`
	To   VersionSingleNoProjectVersion `parser:"VersionDash @@"`
}

type VersionNoProjectVersion struct {
	Range  *VersionRangeNoProjectVersion  `parser:"@@"`
	Single *VersionSingleNoProjectVersion `parser:"| @@"`
}

The problem for single items goes away if I get rid of the RangeVersion; but then of course you can't parse ranges:

// With "project version", without range
type VersionNoRange struct {
	Single *VersionSingle `parser:"@@"`
}

Any idea what's going on?

For completeness, here's a complete testing function you can use to trigger the issue:

package participletest_test

import (
	"testing"

	"github.com/alecthomas/participle/v2"
	"github.com/alecthomas/participle/v2/lexer"
)

// What I'd like:
type ProjectVersion struct {
	Project string `parser:"@Project"`
	Version string `parser:"Space @(VersionString | RCVersionString)"`
}

type VersionSingle struct {
	TipVersion     *string         `parser:"@TipVersion"`
	ProjectVersion *ProjectVersion `parser:"| @@"`
}

type VersionRange struct {
	From VersionSingle `parser:"@@"`
	To   VersionSingle `parser:"VersionDash @@"`
}

type Version struct {
	Range  *VersionRange  `parser:"@@"`
	Single *VersionSingle `parser:"| @@"`
}

// With "project version", without range
type VersionNoRange struct {
	Single *VersionSingle `parser:"@@"`
}

// With the range, without the "project version"
type VersionSingleNoProjectVersion struct {
	TipVersion     *string `parser:"@TipVersion"`
	ProjectVersion *string `parser:"| @ProjectVersion"`
}

type VersionRangeNoProjectVersion struct {
	From VersionSingleNoProjectVersion `parser:"@@"`
	To   VersionSingleNoProjectVersion `parser:"VersionDash @@"`
}

type VersionNoProjectVersion struct {
	Range  *VersionRangeNoProjectVersion  `parser:"@@"`
	Single *VersionSingleNoProjectVersion `parser:"| @@"`
}

var rulesCommon = []lexer.SimpleRule{
	{"TipVersion", `xen-unstable`},
	{"RCVersionString", `\d+\.\d+-RC series`},
	{"VersionString", `\d+\.\d+\.x`},
	{"VersionDash", " - "},
	{"Space", ` `},
}

func TestVersion(t *testing.T) {
	simpletests := []string{
		"Xen 4.18.x",
		"Linux 4.7.x",
		"QEMU 4.7-RC series",
		"xen-unstable",
	}

	rangetests := []string{
		"Linux 4.7.x - Linux 4.9.x",
		"Xen 4.18.x - xen-unstable",
		"QEMU 4.7-RC series - QEMU 4.10.x",
	}

	lexProject := lexer.MustSimple(append(rulesCommon, lexer.SimpleRule{"Project", `Xen|Linux|QEMU|xapi`}))

	lexProjectVersion := lexer.MustSimple(append(rulesCommon,
		lexer.SimpleRule{"ProjectVersion", `(Xen|Linux|QEMU|xapi) (\d+\.\d+\.x|\d+\.\d+-RC series)`}))

	pVersion := participle.MustBuild[Version](participle.Lexer(lexProject))

	t.Log("Testing pVersion with simple and range")
	for _, in := range append(simpletests, rangetests...) {
		out, err := pVersion.ParseString("", in)
		if err != nil {
			t.Errorf("ERROR: Parsing %v: %v", in, err)
		} else {
			t.Logf("Parsing %v resulted in %v", in, out)
		}
	}

	pVersionNoRange := participle.MustBuild[VersionNoRange](participle.Lexer(lexProject))

	t.Log("Testing pVersionNoRange with simple only")
	for _, in := range simpletests {
		out, err := pVersionNoRange.ParseString("", in)
		if err != nil {
			t.Errorf("ERROR: Parsing %v: %v", in, err)
		} else {
			t.Logf("Parsing %v resulted in %v", in, out)
		}
	}

	pVersionNoProjcetVersion := participle.MustBuild[VersionNoProjectVersion](participle.Lexer(lexProjectVersion))

	t.Log("Testing pVersionNoProjectVersion with simple and range")
	for _, in := range append(simpletests, rangetests...) {
		out, err := pVersionNoProjcetVersion.ParseString("", in)
		if err != nil {
			t.Errorf("ERROR: Parsing %v: %v", in, err)
		} else {
			t.Logf("Parsing %v resulted in %v", in, out)
		}
	}
}

BTW, the following works for all tests:

type VersionSlice struct {
	Range []VersionSingle `parser:"@@ ( VersionDash @@ )*"`
}

...
	pVersionSlice := participle.MustBuild[VersionSlice](participle.Lexer(lexProject))

	t.Log("Testing pVersionSlice with simple and range")
	for _, in := range append(simpletests, rangetests...) {
		out, err := pVersionSlice.ParseString("", in)
		if err != nil {
			t.Errorf("ERROR: Parsing %v: %v", in, err)
		} else {
			t.Logf("Parsing %v resulted in %v", in, out)
		}
	}
...

So I think I have a work-around for now; but if the first version is supposed to work, it would be good to track down what's going on.