Skip to content

Commit f554f5c

Browse files
committed
Ensure that Parquet with complex types transforms to valid GeoParquet
1 parent 6e08eca commit f554f5c

File tree

6 files changed

+362
-12
lines changed

6 files changed

+362
-12
lines changed

internal/geoparquet/geoparquet.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ func getMetadata(fileReader *file.Reader, convertOptions *ConvertOptions) *Metad
3232
primaryColumn = convertOptions.InputPrimaryColumn
3333
}
3434
metadata = &Metadata{
35+
Version: Version,
3536
PrimaryColumn: primaryColumn,
3637
Columns: map[string]*GeometryColumn{
3738
primaryColumn: getDefaultGeometryColumn(),

internal/pqutil/transform.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,6 @@ func TransformByColumn(config *TransformConfig) error {
175175
if err != nil {
176176
return err
177177
}
178-
if transformed.DataType() != outputField.Type {
179-
return fmt.Errorf("transform generated an unexpected type, got %s, expected %s", transformed.DataType().Name(), outputField.Type.Name())
180-
}
181178
arr = transformed
182179
}
183180
colWriter, colWriterErr := pqarrow.NewArrowColumnWriter(arr, 0, int64(arr.Len()), outputManifest, rowGroupWriter, fieldNum)

internal/validator/rules.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -432,13 +432,13 @@ func GeometryUngrouped() Rule {
432432
title: "geometry columns must not be grouped",
433433
validate: func(info *FileInfo) error {
434434
metadata := info.Metadata
435-
sc := info.File.MetaData().Schema
435+
root := info.File.MetaData().Schema.Root()
436436
for name := range metadata.Columns {
437-
index := sc.ColumnIndexByName(name)
437+
index := root.FieldIndexByName(name)
438438
if index < 0 {
439439
return fatal("missing geometry column %q", name)
440440
}
441-
_, ok := sc.Root().Field(index).(*schema.PrimitiveNode)
441+
_, ok := root.Field(index).(*schema.PrimitiveNode)
442442
if !ok {
443443
return fmt.Errorf("column %q must not be a group", name)
444444
}
@@ -454,14 +454,14 @@ func GeometryDataType() Rule {
454454
title: "geometry columns must be stored using the BYTE_ARRAY parquet type",
455455
validate: func(info *FileInfo) error {
456456
metadata := info.Metadata
457-
sc := info.File.MetaData().Schema
457+
root := info.File.MetaData().Schema.Root()
458458
for name := range metadata.Columns {
459-
index := sc.ColumnIndexByName(name)
459+
index := root.FieldIndexByName(name)
460460
if index < 0 {
461461
return fatal("missing geometry column %q", name)
462462
}
463463

464-
field, ok := sc.Root().Field(index).(*schema.PrimitiveNode)
464+
field, ok := root.Field(index).(*schema.PrimitiveNode)
465465
if !ok {
466466
return fatal("expected primitive column for %q", name)
467467
}
@@ -480,14 +480,14 @@ func GeometryRepetition() Rule {
480480
title: "geometry columns must be required or optional, not repeated",
481481
validate: func(info *FileInfo) error {
482482
metadata := info.Metadata
483-
sc := info.File.MetaData().Schema
483+
root := info.File.MetaData().Schema.Root()
484484
for name := range metadata.Columns {
485-
index := sc.ColumnIndexByName(name)
485+
index := root.FieldIndexByName(name)
486486
if index < 0 {
487487
return fatal("missing geometry column %q", name)
488488
}
489489

490-
repetitionType := sc.Root().Field(index).RepetitionType()
490+
repetitionType := root.Field(index).RepetitionType()
491491
if repetitionType == parquet.Repetitions.Repeated {
492492
return fmt.Errorf("column %q must not be repeated", name)
493493
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
{
2+
"checks": [
3+
{
4+
"title": "file must include a \"geo\" metadata key",
5+
"run": true,
6+
"passed": true
7+
},
8+
{
9+
"title": "metadata must be a JSON object",
10+
"run": true,
11+
"passed": true
12+
},
13+
{
14+
"title": "metadata must include a \"version\" string",
15+
"run": true,
16+
"passed": true
17+
},
18+
{
19+
"title": "metadata must include a \"primary_column\" string",
20+
"run": true,
21+
"passed": true
22+
},
23+
{
24+
"title": "metadata must include a \"columns\" object",
25+
"run": true,
26+
"passed": true
27+
},
28+
{
29+
"title": "column metadata must include the \"primary_column\" name",
30+
"run": true,
31+
"passed": true
32+
},
33+
{
34+
"title": "column metadata must include a valid \"encoding\" string",
35+
"run": true,
36+
"passed": true
37+
},
38+
{
39+
"title": "column metadata must include a \"geometry_types\" list",
40+
"run": true,
41+
"passed": true
42+
},
43+
{
44+
"title": "optional \"crs\" must be null or a PROJJSON object",
45+
"run": true,
46+
"passed": true
47+
},
48+
{
49+
"title": "optional \"orientation\" must be a valid string",
50+
"run": true,
51+
"passed": true
52+
},
53+
{
54+
"title": "optional \"edges\" must be a valid string",
55+
"run": true,
56+
"passed": true
57+
},
58+
{
59+
"title": "optional \"bbox\" must be an array of 4 or 6 numbers",
60+
"run": true,
61+
"passed": true
62+
},
63+
{
64+
"title": "optional \"epoch\" must be a number",
65+
"run": true,
66+
"passed": true
67+
},
68+
{
69+
"title": "geometry columns must not be grouped",
70+
"run": true,
71+
"passed": true
72+
},
73+
{
74+
"title": "geometry columns must be stored using the BYTE_ARRAY parquet type",
75+
"run": true,
76+
"passed": true
77+
},
78+
{
79+
"title": "geometry columns must be required or optional, not repeated",
80+
"run": true,
81+
"passed": true
82+
},
83+
{
84+
"title": "all geometry values match the \"encoding\" metadata",
85+
"run": true,
86+
"passed": true
87+
},
88+
{
89+
"title": "all geometry types must be included in the \"geometry_types\" metadata (if not empty)",
90+
"run": true,
91+
"passed": true
92+
},
93+
{
94+
"title": "all polygon geometries must follow the \"orientation\" metadata (if present)",
95+
"run": true,
96+
"passed": true
97+
},
98+
{
99+
"title": "all geometries must fall within the \"bbox\" metadata (if present)",
100+
"run": true,
101+
"passed": true
102+
}
103+
],
104+
"metadataOnly": false
105+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
{
2+
"metadata": {
3+
"version": "1.0.0",
4+
"primary_column": "geometry",
5+
"columns": {
6+
"geometry": {
7+
"encoding": "WKB",
8+
"geometry_types": [
9+
"Point"
10+
],
11+
"orientation": "counterclockwise",
12+
"edges": "planar",
13+
"bbox": [
14+
0,
15+
0,
16+
0,
17+
0
18+
],
19+
"epoch": 2021.47,
20+
"crs": {
21+
"$schema": "https://proj.org/schemas/v0.5/projjson.schema.json",
22+
"type": "GeographicCRS",
23+
"name": "WGS 84 longitude-latitude",
24+
"datum": {
25+
"type": "GeodeticReferenceFrame",
26+
"name": "World Geodetic System 1984",
27+
"ellipsoid": {
28+
"name": "WGS 84",
29+
"semi_major_axis": 6378137,
30+
"inverse_flattening": 298.257223563
31+
}
32+
},
33+
"coordinate_system": {
34+
"subtype": "ellipsoidal",
35+
"axis": [
36+
{
37+
"name": "Geodetic longitude",
38+
"abbreviation": "Lon",
39+
"direction": "east",
40+
"unit": "degree"
41+
},
42+
{
43+
"name": "Geodetic latitude",
44+
"abbreviation": "Lat",
45+
"direction": "north",
46+
"unit": "degree"
47+
}
48+
]
49+
},
50+
"id": {
51+
"authority": "OGC",
52+
"code": "CRS84"
53+
}
54+
}
55+
}
56+
}
57+
},
58+
"data": {
59+
"type": "FeatureCollection",
60+
"features": [
61+
{
62+
"type": "Feature",
63+
"properties": {
64+
"numbers": [2, 4, 6, 8],
65+
"strings": ["chicken", "soup"],
66+
"object": {
67+
"name": "Bob"
68+
},
69+
"names": {
70+
"common": [
71+
{"value": "Hello", "language": "en"}
72+
]
73+
}
74+
},
75+
"geometry": {
76+
"type": "Point",
77+
"coordinates": [
78+
0,
79+
0
80+
]
81+
}
82+
}
83+
]
84+
}
85+
}

0 commit comments

Comments
 (0)