Skip to content

Commit eacde77

Browse files
committed
encoding/toml: check parser errors and reject duplicate keys
And teach the tests about inputs which we expect to fail. `want` is now `wantCUE` for clarity; `wantErr` uses `qt.ErrorMatches`. Updates #68. Signed-off-by: Daniel Martí <[email protected]> Change-Id: I2796a76faed01ed62c78145e257c8022efeb5ec3 Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1194707 Reviewed-by: Roger Peppe <[email protected]> TryBot-Result: CUEcueckoo <[email protected]>
1 parent abd5778 commit eacde77

File tree

2 files changed

+70
-41
lines changed

2 files changed

+70
-41
lines changed

encoding/toml/decode.go

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
toml "github.com/pelletier/go-toml/v2/unstable"
2626

2727
"cuelang.org/go/cue/ast"
28+
"cuelang.org/go/cue/literal"
2829
"cuelang.org/go/cue/token"
2930
)
3031

@@ -34,7 +35,7 @@ import (
3435
func NewDecoder(r io.Reader) *Decoder {
3536
// Note that we don't consume the reader here,
3637
// as there's no need, and we can't return an error either.
37-
return &Decoder{r: r}
38+
return &Decoder{r: r, seenKeys: make(map[string]bool)}
3839
}
3940

4041
// Decoder implements the decoding state.
@@ -47,6 +48,11 @@ type Decoder struct {
4748
decoded bool // whether [Decoder.Decoded] has been called already
4849
parser toml.Parser
4950

51+
// seenKeys tracks which dot-separated rooted keys we have already decoded,
52+
// as duplicate keys in TOML are not allowed.
53+
// The string elements in between the dots may be quoted to avoid ambiguity.
54+
seenKeys map[string]bool
55+
5056
currentFields []*ast.Field
5157
}
5258

@@ -77,6 +83,9 @@ func (d *Decoder) Decode() (ast.Node, error) {
7783
return nil, err
7884
}
7985
}
86+
if err := d.parser.Error(); err != nil {
87+
return nil, err
88+
}
8089
for _, field := range d.currentFields {
8190
file.Decls = append(file.Decls, field)
8291
}
@@ -110,26 +119,36 @@ func (d *Decoder) nextRootNode(tnode *toml.Node) error {
110119
// }
111120
case toml.KeyValue:
112121
keys := tnode.Key()
113-
topField := &ast.Field{
122+
curName := string(keys.Node().Data)
123+
curField := &ast.Field{
114124
Label: &ast.Ident{
115125
NamePos: token.NoPos.WithRel(token.Newline),
116-
Name: string(keys.Node().Data),
126+
Name: curName,
117127
},
118128
}
119-
ast.SetRelPos(topField.Label, token.Newline)
129+
130+
topField := curField
131+
rootKey := quoteLabelIfNeeded(curName)
132+
120133
keys.Next() // TODO(mvdan): for some reason the first Next call doesn't count?
121-
curField := topField
122134
for keys.Next() {
135+
nextName := string(keys.Node().Data)
123136
nextField := &ast.Field{
124137
Label: &ast.Ident{
125138
NamePos: token.NoPos.WithRel(token.Blank),
126-
Name: string(keys.Node().Data),
139+
Name: nextName,
127140
},
128141
}
129-
ast.SetRelPos(nextField.Label, token.Blank)
142+
130143
curField.Value = &ast.StructLit{Elts: []ast.Decl{nextField}}
131144
curField = nextField
145+
// TODO(mvdan): use an append-like API once we have benchmarks
146+
rootKey += "." + quoteLabelIfNeeded(nextName)
147+
}
148+
if d.seenKeys[rootKey] {
149+
return fmt.Errorf("duplicate key: %s", rootKey)
132150
}
151+
d.seenKeys[rootKey] = true
133152
value, err := d.decodeExpr(tnode.Value())
134153
if err != nil {
135154
return err
@@ -144,6 +163,13 @@ func (d *Decoder) nextRootNode(tnode *toml.Node) error {
144163
return nil
145164
}
146165

166+
func quoteLabelIfNeeded(name string) string {
167+
if ast.IsValidIdent(name) {
168+
return name
169+
}
170+
return literal.Label.Quote(name)
171+
}
172+
147173
// nextRootNode is called for every top-level expression from the TOML parser.
148174
func (d *Decoder) decodeExpr(tnode *toml.Node) (ast.Expr, error) {
149175
// TODO(mvdan): we currently assume that TOML basic literals (string, int, float)

encoding/toml/decode_test.go

Lines changed: 37 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -35,25 +35,32 @@ func TestDecoder(t *testing.T) {
3535
// The whitespace doesn't affect the input TOML, and we cue/format on the "want" CUE source,
3636
// so the added newlines and tabs don't change the test behavior.
3737
tests := []struct {
38-
name string
39-
input string
40-
want string
38+
name string
39+
input string
40+
wantCUE string
41+
wantErr string
4142
}{{
42-
name: "Empty",
43-
input: "",
44-
want: "",
43+
name: "Empty",
44+
input: "",
45+
wantCUE: "",
4546
}, {
4647
name: "LoneComment",
4748
input: `
4849
# Just a comment
4950
`,
50-
want: "",
51+
wantCUE: "",
52+
}, {
53+
name: "RootKeyMissing",
54+
input: `
55+
= "no key name"
56+
`,
57+
wantErr: "invalid character at start of key: =",
5158
}, {
5259
name: "RootKeysOne",
5360
input: `
5461
key = "value"
5562
`,
56-
want: `
63+
wantCUE: `
5764
key: "value"
5865
`,
5966
}, {
@@ -63,7 +70,7 @@ func TestDecoder(t *testing.T) {
6370
key2 = "value2"
6471
key3 = "value3"
6572
`,
66-
want: `
73+
wantCUE: `
6774
key1: "value1"
6875
key2: "value2"
6976
key3: "value3"
@@ -75,7 +82,7 @@ func TestDecoder(t *testing.T) {
7582
b1.b2 = "B"
7683
c1.c2.c3 = "C"
7784
`,
78-
want: `
85+
wantCUE: `
7986
a1: "A"
8087
b1: b2: "B"
8188
c1: c2: c3: "C"
@@ -87,7 +94,7 @@ func TestDecoder(t *testing.T) {
8794
a_b = "underscores"
8895
123 = "numbers"
8996
`,
90-
want: `
97+
wantCUE: `
9198
"a-b": "dashes"
9299
a_b: "underscores"
93100
"123": "numbers"
@@ -99,7 +106,7 @@ func TestDecoder(t *testing.T) {
99106
"foo bar" = "quoted space"
100107
'foo "bar"' = "nested quotes"
101108
`,
102-
want: `
109+
wantCUE: `
103110
"1.2.3": "quoted dots"
104111
"foo bar": "quoted space"
105112
"foo \"bar\"": "nested quotes"
@@ -109,28 +116,23 @@ func TestDecoder(t *testing.T) {
109116
input: `
110117
site."foo.com".title = "foo bar"
111118
`,
112-
want: `
119+
wantCUE: `
113120
site: "foo.com": title: "foo bar"
114121
`,
115122
}, {
116-
// TODO(mvdan): the TOML spec says that defining a key multiple times is invalid,
117-
// we should error even though this can be OK in CUE as long as the values unify.
118123
name: "RootKeysDuplicate",
119124
input: `
120125
foo = "same value"
121126
foo = "same value"
122127
`,
123-
want: `
124-
foo: "same value"
125-
foo: "same value"
126-
`,
128+
wantErr: `duplicate key: foo`,
127129
}, {
128130
name: "BasicStrings",
129131
input: `
130132
escapes = "foo \"bar\" \n\t\\ baz"
131133
unicode = "foo \u00E9"
132134
`,
133-
want: `
135+
wantCUE: `
134136
escapes: "foo \"bar\" \n\t\\ baz"
135137
unicode: "foo é"
136138
`,
@@ -153,7 +155,7 @@ line one \
153155
line two.\
154156
"""
155157
`,
156-
want: `
158+
wantCUE: `
157159
nested: " can contain \"\" quotes "
158160
four: "\"four\""
159161
double: "line one\nline two"
@@ -169,7 +171,7 @@ line two.\
169171
quoted = 'Tom "Dubs" Preston-Werner'
170172
regex = '<\i\c*\s*>'
171173
`,
172-
want: `
174+
wantCUE: `
173175
winpath: "C:\\Users\\nodejs\\templates"
174176
winpath2: "\\\\ServerX\\admin$\\system32\\"
175177
quoted: "Tom \"Dubs\" Preston-Werner"
@@ -194,7 +196,7 @@ line one \
194196
line two.\
195197
'''
196198
`,
197-
want: `
199+
wantCUE: `
198200
nested: " can contain '' quotes "
199201
four: "'four'"
200202
double: "line one\nline two"
@@ -213,7 +215,7 @@ line two.\
213215
octal = 0o755
214216
binary = 0b11010110
215217
`,
216-
want: `
218+
wantCUE: `
217219
zero: 0
218220
positive: 123
219221
plus: +40
@@ -234,7 +236,7 @@ line two.\
234236
exponent_minus = -2E-4
235237
exponent_dot = 6.789e-30
236238
`,
237-
want: `
239+
wantCUE: `
238240
pi: 3.1415
239241
plus: +1.23
240242
minus: -4.56
@@ -249,7 +251,7 @@ line two.\
249251
positive = true
250252
negative = false
251253
`,
252-
want: `
254+
wantCUE: `
253255
positive: true
254256
negative: false
255257
`,
@@ -263,7 +265,7 @@ line two.\
263265
strings = [ "all", 'strings', """are the same""", '''type''' ]
264266
mixed_numbers = [ 0.1, 0.2, 0.5, 1, 2, 5 ]
265267
`,
266-
want: `
268+
wantCUE: `
267269
integers: [1, 2, 3]
268270
colors: ["red", "yellow", "green"]
269271
nested_ints: [[1, 2], [3, 4, 5]]
@@ -280,13 +282,20 @@ line two.\
280282
dec := toml.NewDecoder(strings.NewReader(test.input))
281283

282284
node, err := dec.Decode()
285+
if test.wantErr != "" {
286+
qt.Assert(t, qt.ErrorMatches(err, test.wantErr))
287+
qt.Assert(t, qt.IsNil(node))
288+
// We don't continue, so we can't expect any decoded CUE.
289+
qt.Assert(t, qt.Equals(test.wantCUE, ""))
290+
return
291+
}
283292
qt.Assert(t, qt.IsNil(err))
284293

285294
node2, err := dec.Decode()
286295
qt.Assert(t, qt.IsNil(node2))
287296
qt.Assert(t, qt.Equals(err, io.EOF))
288297

289-
wantFormatted, err := format.Source([]byte(test.want))
298+
wantFormatted, err := format.Source([]byte(test.wantCUE))
290299
qt.Assert(t, qt.IsNil(err))
291300

292301
formatted, err := format.Node(node)
@@ -301,12 +310,6 @@ line two.\
301310
qt.Assert(t, qt.IsNil(val.Err()))
302311
qt.Assert(t, qt.IsNil(val.Validate()))
303312

304-
// See the TODO above; go-toml rejects duplicate keys per the spec,
305-
// but our decoder does not yet.
306-
if test.name == "RootKeysDuplicate" {
307-
return
308-
}
309-
310313
// Validate that the decoded CUE value is equivalent
311314
// to the Go value that a direct TOML unmarshal produces.
312315
// We use JSON equality as some details such as which integer types are used

0 commit comments

Comments
 (0)