Skip to content

Commit add7dce

Browse files
committed
internal/encoding/yaml: encode YAML anchors as CUE definitions
This commits supports encoding YAML documents such as: a: &a 3 b: *a To this CUE document: #a: 3 a: #a b: #a Fixes #3818 Signed-off-by: Omri Steiner <omri@steiners.co.il>
1 parent ee299f0 commit add7dce

File tree

3 files changed

+183
-41
lines changed

3 files changed

+183
-41
lines changed

internal/encoding/yaml/decode.go

Lines changed: 120 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,15 @@ type decoder struct {
6464

6565
// forceNewline ensures that the next position will be on a new line.
6666
forceNewline bool
67+
68+
// anchorFields contains the anchors that are gathered as we walk the YAML nodes.
69+
// these are only added to the AST when we're done processing the whole document.
70+
anchorFields []ast.Field
71+
// anchorNames map anchor nodes to their names.
72+
anchorNames map[*yaml.Node]string
73+
// anchorTakenNames keeps track of anchor names that have been taken.
74+
// It is used to ensure unique anchor names.
75+
anchorTakenNames map[string]struct{}
6776
}
6877

6978
// TODO(mvdan): this can be io.Reader really, except that token.Pos is offset-based,
@@ -83,9 +92,11 @@ func NewDecoder(filename string, b []byte) *decoder {
8392
tokFile := token.NewFile(filename, 0, len(b)+1)
8493
tokFile.SetLinesForContent(b)
8594
return &decoder{
86-
tokFile: tokFile,
87-
tokLines: append(tokFile.Lines(), len(b)),
88-
yamlDecoder: *yaml.NewDecoder(bytes.NewReader(b)),
95+
tokFile: tokFile,
96+
tokLines: append(tokFile.Lines(), len(b)),
97+
yamlDecoder: *yaml.NewDecoder(bytes.NewReader(b)),
98+
anchorNames: make(map[*yaml.Node]string),
99+
anchorTakenNames: make(map[string]struct{}),
89100
}
90101
}
91102

@@ -176,24 +187,35 @@ func Unmarshal(filename string, data []byte) (ast.Expr, error) {
176187
return n, nil
177188
}
178189

179-
func (d *decoder) extract(yn *yaml.Node) (ast.Expr, error) {
180-
d.addHeadCommentsToPending(yn)
181-
var expr ast.Expr
182-
var err error
190+
func (d *decoder) extractNoAnchor(yn *yaml.Node) (ast.Expr, error) {
183191
switch yn.Kind {
184192
case yaml.DocumentNode:
185-
expr, err = d.document(yn)
193+
return d.document(yn)
186194
case yaml.SequenceNode:
187-
expr, err = d.sequence(yn)
195+
return d.sequence(yn)
188196
case yaml.MappingNode:
189-
expr, err = d.mapping(yn)
197+
return d.mapping(yn)
190198
case yaml.ScalarNode:
191-
expr, err = d.scalar(yn)
199+
return d.scalar(yn)
192200
case yaml.AliasNode:
193-
expr, err = d.alias(yn)
201+
return d.referenceAlias(yn)
194202
default:
195203
return nil, d.posErrorf(yn, "unknown yaml node kind: %d", yn.Kind)
196204
}
205+
}
206+
207+
func (d *decoder) extract(yn *yaml.Node) (ast.Expr, error) {
208+
d.addHeadCommentsToPending(yn)
209+
210+
var expr ast.Expr
211+
var err error
212+
213+
if yn.Anchor == "" {
214+
expr, err = d.extractNoAnchor(yn)
215+
} else {
216+
expr, err = d.anchor(yn)
217+
}
218+
197219
if err != nil {
198220
return nil, err
199221
}
@@ -324,7 +346,39 @@ func (d *decoder) document(yn *yaml.Node) (ast.Expr, error) {
324346
if n := len(yn.Content); n != 1 {
325347
return nil, d.posErrorf(yn, "yaml document nodes are meant to have one content node but have %d", n)
326348
}
327-
return d.extract(yn.Content[0])
349+
350+
expr, err := d.extract(yn.Content[0])
351+
if err != nil {
352+
return nil, err
353+
}
354+
355+
return d.addAnchorNodes(expr)
356+
}
357+
358+
// addAnchorNodes prepends anchor nodes at the top of the document.
359+
func (d *decoder) addAnchorNodes(expr ast.Expr) (ast.Expr, error) {
360+
elements := []ast.Decl{}
361+
362+
for _, field := range d.anchorFields {
363+
elements = append(elements, &field)
364+
}
365+
366+
switch x := expr.(type) {
367+
case *ast.StructLit:
368+
x.Elts = append(elements, x.Elts...)
369+
case *ast.ListLit:
370+
if len(elements) > 0 {
371+
expr = &ast.StructLit{
372+
Elts: append(elements, x),
373+
}
374+
}
375+
default:
376+
// If the whole YAML document is not a map / seq, then it can't have anchors.
377+
// maybe assert that `anchorFields` is empty?
378+
break
379+
}
380+
381+
return expr, nil
328382
}
329383

330384
func (d *decoder) sequence(yn *yaml.Node) (ast.Expr, error) {
@@ -458,7 +512,7 @@ func (d *decoder) label(yn *yaml.Node) (ast.Label, error) {
458512
if yn.Alias.Kind != yaml.ScalarNode {
459513
return nil, d.posErrorf(yn, "invalid map key: %v", yn.Alias.ShortTag())
460514
}
461-
expr, err = d.alias(yn)
515+
expr, err = d.inlineAlias(yn)
462516
value = yn.Alias.Value
463517
default:
464518
return nil, d.posErrorf(yn, "invalid map key: %v", yn.ShortTag())
@@ -639,7 +693,10 @@ func (d *decoder) makeNum(yn *yaml.Node, val string, kind token.Token) (expr ast
639693
return expr
640694
}
641695

642-
func (d *decoder) alias(yn *yaml.Node) (ast.Expr, error) {
696+
// inlineAlias expands an alias node in place, returning the expanded node.
697+
// Sometimes we have to resort to this, for example when the alias
698+
// is inside a map key, since CUE does not support structs as map keys.
699+
func (d *decoder) inlineAlias(yn *yaml.Node) (ast.Expr, error) {
643700
if d.extractingAliases[yn] {
644701
// TODO this could actually be allowed in some circumstances.
645702
return nil, d.posErrorf(yn, "anchor %q value contains itself", yn.Value)
@@ -649,11 +706,58 @@ func (d *decoder) alias(yn *yaml.Node) (ast.Expr, error) {
649706
}
650707
d.extractingAliases[yn] = true
651708
var node ast.Expr
652-
node, err := d.extract(yn.Alias)
709+
node, err := d.extractNoAnchor(yn.Alias)
653710
delete(d.extractingAliases, yn)
654711
return node, err
655712
}
656713

714+
// referenceAlias replaces an alias with a reference to the identifier of its anchor.
715+
func (d *decoder) referenceAlias(yn *yaml.Node) (ast.Expr, error) {
716+
anchor, ok := d.anchorNames[yn.Alias]
717+
if !ok {
718+
return nil, d.posErrorf(yn, "anchor %q not found", yn.Alias.Anchor)
719+
}
720+
721+
return &ast.Ident{
722+
NamePos: d.pos(yn),
723+
Name: anchor,
724+
}, nil
725+
}
726+
727+
func (d *decoder) anchor(yn *yaml.Node) (ast.Expr, error) {
728+
var anchorIdent string
729+
730+
// Pick a non-conflicting anchor name.
731+
for i := 1; ; i++ {
732+
if i == 1 {
733+
anchorIdent = "#" + yn.Anchor
734+
} else {
735+
anchorIdent = "#" + yn.Anchor + "_" + strconv.Itoa(i)
736+
}
737+
if _, ok := d.anchorTakenNames[anchorIdent]; !ok {
738+
d.anchorTakenNames[anchorIdent] = struct{}{}
739+
break
740+
}
741+
}
742+
d.anchorNames[yn] = anchorIdent
743+
744+
// Process the node itself, but don't put it into the AST just yet,
745+
// store it for later to be used as an anchor identifier.
746+
expr, err := d.extractNoAnchor(yn)
747+
if err != nil {
748+
return nil, err
749+
}
750+
d.anchorFields = append(d.anchorFields, ast.Field{
751+
Label: &ast.Ident{Name: anchorIdent},
752+
Value: expr,
753+
})
754+
755+
return &ast.Ident{
756+
NamePos: d.pos(yn),
757+
Name: anchorIdent,
758+
}, nil
759+
}
760+
657761
func labelStr(l ast.Label) string {
658762
switch l := l.(type) {
659763
case *ast.Ident:

internal/encoding/yaml/decode_test.go

Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -474,26 +474,57 @@ Null: 1
474474
// Anchors and aliases.
475475
{
476476
"a: &x 1\nb: &y 2\nc: *x\nd: *y\n",
477-
`a: 1
478-
b: 2
479-
c: 1
480-
d: 2`,
477+
`#x: 1
478+
#y: 2
479+
a: #x
480+
b: #y
481+
c: #x
482+
d: #y`,
481483
}, {
482484
"a: &a {c: 1}\nb: *a",
483-
`a: {c: 1}
484-
b: {
485-
c: 1
486-
}`,
485+
`#a: {c: 1}
486+
a: #a
487+
b: #a`,
487488
}, {
488489
"a: &a [1, 2]\nb: *a",
489-
"a: [1, 2]\nb: [1, 2]", // TODO: a: [1, 2], b: a
490+
"#a: [1, 2]\na: #a\nb: #a",
490491
},
491492
{
492493
`a: &a "b"
493494
*a : "c"`,
494-
`a: "b"
495-
b: "c"`,
495+
`#a: "b"
496+
a: #a
497+
b: "c"`,
496498
},
499+
// Test nested anchors
500+
{
501+
`foo: &a
502+
bar: &b
503+
baz: 1
504+
a: *a
505+
b: *b
506+
`,
507+
`#b: {
508+
baz: 1
509+
}
510+
#a: {
511+
bar: #b
512+
}
513+
foo: #a
514+
a: #a
515+
b: #b`,
516+
},
517+
{
518+
`a:
519+
- &b c`,
520+
`#b: "c"
521+
a: [#b]`,
522+
},
523+
// Recursive anchor - make sure we don't infinitely recurse on such input.
524+
{"a: &a\n b: *a\n", `#a: {
525+
b: #a
526+
}
527+
a: #a`},
497528

498529
{
499530
"foo: ''",
@@ -778,10 +809,12 @@ a:
778809
// yaml-test-suite 3GZX: Spec Example 7.1. Alias Nodes
779810
{
780811
"First occurrence: &anchor Foo\nSecond occurrence: *anchor\nOverride anchor: &anchor Bar\nReuse anchor: *anchor\n",
781-
`"First occurrence": "Foo"
782-
"Second occurrence": "Foo"
783-
"Override anchor": "Bar"
784-
"Reuse anchor": "Bar"`,
812+
`#anchor: "Foo"
813+
#anchor_2: "Bar"
814+
"First occurrence": #anchor
815+
"Second occurrence": #anchor
816+
"Override anchor": #anchor_2
817+
"Reuse anchor": #anchor_2`,
785818
},
786819
}
787820

@@ -926,7 +959,6 @@ var unmarshalErrorTests = []struct {
926959
{"v:\n- [A,", "test.yaml:2: did not find expected node content"},
927960
{"a:\n- b: *,", "test.yaml:2: did not find expected alphabetic or numeric character"},
928961
{"a: *b\n", "test.yaml: unknown anchor 'b' referenced"},
929-
{"a: &a\n b: *a\n", `test.yaml:2: anchor "a" value contains itself`},
930962
{"a: &a { b: c }\n*a : foo", "test.yaml:2: invalid map key: !!map"},
931963
{"a: &a [b]\n*a : foo", "test.yaml:2: invalid map key: !!seq"},
932964
{"value: -", "test.yaml: block sequence entries are not allowed in this context"},

internal/encoding/yaml/testdata/merge.out

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
1+
#CENTER: {
2+
x: 1, y: 2
3+
}
4+
#LEFT: {
5+
x: 0, y: 2
6+
}
7+
#BIG: {
8+
r: 10
9+
}
10+
#SMALL: {
11+
r: 1
12+
}
13+
114
// From http://yaml.org/type/merge.html
215
// Test
316
anchors: {
4-
list: [{
5-
x: 1, y: 2
6-
}, {
7-
x: 0, y: 2
8-
}, {
9-
r: 10
10-
}, {
11-
r: 1
12-
}]
17+
list: [#CENTER, #LEFT, #BIG, #SMALL,
18+
]
1319
}
1420

1521
// All the following maps are equal:

0 commit comments

Comments
 (0)