Skip to content

Commit 449433e

Browse files
authored
Merge pull request #98 from planetlabs/urls
Accept URLs for describe, validate, and convert input
2 parents 82fd0ab + d99478e commit 449433e

File tree

10 files changed

+485
-65
lines changed

10 files changed

+485
-65
lines changed

cmd/gpq/command/command.go

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
package command
22

33
import (
4+
"bytes"
45
"fmt"
56
"io"
7+
"os"
8+
"strings"
9+
10+
"github.com/planetlabs/gpq/internal/storage"
611
)
712

813
var CLI struct {
@@ -12,12 +17,6 @@ var CLI struct {
1217
Version VersionCmd `cmd:"" help:"Print the version of this program."`
1318
}
1419

15-
type ReaderAtSeeker interface {
16-
io.Reader
17-
io.ReaderAt
18-
io.Seeker
19-
}
20-
2120
type CommandError struct {
2221
err error
2322
}
@@ -33,3 +32,19 @@ func (e *CommandError) Error() string {
3332
func (e *CommandError) Unwrap() error {
3433
return e.err
3534
}
35+
36+
func readerFromInput(input string) (storage.ReaderAtSeeker, error) {
37+
if input == "" {
38+
data, err := io.ReadAll(os.Stdin)
39+
if err != nil {
40+
return nil, fmt.Errorf("trouble reading from stdin: %w", err)
41+
}
42+
return bytes.NewReader(data), nil
43+
}
44+
45+
if strings.HasPrefix(input, "http://") || strings.HasPrefix(input, "https://") {
46+
return storage.NewHttpReader(input)
47+
}
48+
49+
return os.Open(input)
50+
}

cmd/gpq/command/command_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package command_test
22

33
import (
44
"io"
5+
"net/http"
6+
"net/http/httptest"
57
"os"
68
"testing"
79

@@ -14,6 +16,7 @@ type Suite struct {
1416
mockStdin *os.File
1517
originalStdout *os.File
1618
mockStdout *os.File
19+
server *httptest.Server
1720
}
1821

1922
func (s *Suite) SetupTest() {
@@ -28,6 +31,9 @@ func (s *Suite) SetupTest() {
2831
s.originalStdout = os.Stdout
2932
s.mockStdout = stdout
3033
os.Stdout = stdout
34+
35+
handler := http.FileServer(http.Dir("../../../internal"))
36+
s.server = httptest.NewServer(handler)
3137
}
3238

3339
func (s *Suite) writeStdin(data []byte) {
@@ -58,6 +64,8 @@ func (s *Suite) TearDownTest() {
5864

5965
_ = s.mockStdout.Close()
6066
s.NoError(os.Remove(s.mockStdout.Name()))
67+
68+
s.server.Close()
6169
}
6270

6371
func TestSuite(t *testing.T) {

cmd/gpq/command/convert.go

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515
package command
1616

1717
import (
18-
"bytes"
19-
"io"
18+
"net/url"
2019
"os"
2120
"strings"
2221

@@ -25,7 +24,7 @@ import (
2524
)
2625

2726
type ConvertCmd struct {
28-
Input string `arg:"" optional:"" name:"input" help:"Input file. If not provided, input is read from stdin." type:"path"`
27+
Input string `arg:"" optional:"" name:"input" help:"Input file path or URL. If not provided, input is read from stdin."`
2928
From string `help:"Input file format. Possible values: ${enum}." enum:"auto, geojson, geoparquet, parquet" default:"auto"`
3029
Output string `arg:"" optional:"" name:"output" help:"Output file. If not provided, output is written to stdout." type:"path"`
3130
To string `help:"Output file format. Possible values: ${enum}." enum:"auto, geojson, geoparquet" default:"auto"`
@@ -64,14 +63,17 @@ func parseFormatType(format string) FormatType {
6463
return ft
6564
}
6665

67-
func getFormatType(filename string) FormatType {
68-
if strings.HasSuffix(filename, ".json") || strings.HasSuffix(filename, ".geojson") {
66+
func getFormatType(resource string) FormatType {
67+
if u, err := url.Parse(resource); err == nil {
68+
resource = u.Path
69+
}
70+
if strings.HasSuffix(resource, ".json") || strings.HasSuffix(resource, ".geojson") {
6971
return GeoJSONType
7072
}
71-
if strings.HasSuffix(filename, ".gpq") || strings.HasSuffix(filename, ".geoparquet") {
73+
if strings.HasSuffix(resource, ".gpq") || strings.HasSuffix(resource, ".geoparquet") {
7274
return GeoParquetType
7375
}
74-
if strings.HasSuffix(filename, ".pq") || strings.HasSuffix(filename, ".parquet") {
76+
if strings.HasSuffix(resource, ".pq") || strings.HasSuffix(resource, ".parquet") {
7577
return ParquetType
7678
}
7779
return UnknownType
@@ -116,20 +118,9 @@ func (c *ConvertCmd) Run() error {
116118
return NewCommandError("could not determine input format for %s", inputSource)
117119
}
118120

119-
var input ReaderAtSeeker
120-
if inputSource == "" {
121-
data, err := io.ReadAll(os.Stdin)
122-
if err != nil {
123-
return NewCommandError("trouble reading from stdin: %w", err)
124-
}
125-
input = bytes.NewReader(data)
126-
} else {
127-
i, readErr := os.Open(inputSource)
128-
if readErr != nil {
129-
return NewCommandError("failed to read from %q: %w", inputSource, readErr)
130-
}
131-
defer i.Close()
132-
input = i
121+
input, inputErr := readerFromInput(inputSource)
122+
if inputErr != nil {
123+
return NewCommandError("trouble getting a reader from %q: %w", c.Input, inputErr)
133124
}
134125

135126
var output *os.File

cmd/gpq/command/convert_test.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,17 @@ func (s *Suite) TestConvertUnknownStdinToGeoParquetStdout() {
120120

121121
s.ErrorContains(cmd.Run(), "when reading from stdin, the --from option must be provided")
122122
}
123+
124+
func (s *Suite) TestConvertGeoParquetUrlToGeoJSONStdout() {
125+
cmd := &command.ConvertCmd{
126+
Input: s.server.URL + "/testdata/cases/example-v1.0.0.parquet",
127+
To: "geojson",
128+
}
129+
130+
s.Require().NoError(cmd.Run())
131+
data := s.readStdout()
132+
133+
collection := &geo.FeatureCollection{}
134+
s.Require().NoError(json.Unmarshal(data, collection))
135+
s.Len(collection.Features, 5)
136+
}

cmd/gpq/command/describe.go

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,9 @@
1515
package command
1616

1717
import (
18-
"bytes"
1918
"encoding/json"
2019
"errors"
2120
"fmt"
22-
"io"
2321
"os"
2422
"strconv"
2523
"strings"
@@ -34,7 +32,7 @@ import (
3432
)
3533

3634
type DescribeCmd struct {
37-
Input string `arg:"" optional:"" name:"input" help:"Path to a GeoParquet file. If not provided, input is read from stdin." type:"existingfile"`
35+
Input string `arg:"" optional:"" name:"input" help:"Path or URL for a GeoParquet file. If not provided, input is read from stdin."`
3836
Format string `help:"Report format. Possible values: ${enum}." enum:"text, json" default:"text"`
3937
MetadataOnly bool `help:"Print the unformatted geo metadata only (other arguments will be ignored)."`
4038
Unpretty bool `help:"No newlines or indentation in the JSON output."`
@@ -53,20 +51,9 @@ const (
5351
)
5452

5553
func (c *DescribeCmd) Run() error {
56-
var input ReaderAtSeeker
57-
if c.Input == "" {
58-
data, err := io.ReadAll(os.Stdin)
59-
if err != nil {
60-
return NewCommandError("trouble reading from stdin: %w", err)
61-
}
62-
input = bytes.NewReader(data)
63-
} else {
64-
i, readErr := os.Open(c.Input)
65-
if readErr != nil {
66-
return NewCommandError("failed to read from %q: %w", c.Input, readErr)
67-
}
68-
defer i.Close()
69-
input = i
54+
input, inputErr := readerFromInput(c.Input)
55+
if inputErr != nil {
56+
return NewCommandError("trouble getting a reader from %q: %w", c.Input, inputErr)
7057
}
7158

7259
fileReader, fileErr := file.NewParquetReader(input)

cmd/gpq/command/describe_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,28 @@ func (s *Suite) TestDescribeMissingMetadata() {
172172
s.Require().Len(info.Issues, 1)
173173
s.Contains(info.Issues[0], "Not a valid GeoParquet file (missing the \"geo\" metadata key).")
174174
}
175+
176+
func (s *Suite) TestDescribeFromUrl() {
177+
cmd := &command.DescribeCmd{
178+
Format: "json",
179+
Input: s.server.URL + "/testdata/cases/example-v1.0.0.parquet",
180+
}
181+
182+
s.Require().NoError(cmd.Run())
183+
184+
output := s.readStdout()
185+
info := &command.DescribeInfo{}
186+
err := json.Unmarshal(output, info)
187+
s.Require().NoError(err)
188+
189+
s.Equal(int64(5), info.NumRows)
190+
s.Equal(int64(1), info.NumRowGroups)
191+
s.Require().Len(info.Schema.Fields, 6)
192+
193+
s.Equal("geometry", info.Schema.Fields[0].Name)
194+
s.Equal("binary", info.Schema.Fields[0].Type)
195+
s.Equal("gzip", info.Schema.Fields[0].Compression)
196+
s.True(info.Schema.Fields[0].Optional)
197+
198+
s.Len(info.Issues, 0)
199+
}

cmd/gpq/command/validate.go

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,9 @@
1515
package command
1616

1717
import (
18-
"bytes"
1918
"context"
2019
"encoding/json"
2120
"fmt"
22-
"io"
2321
"os"
2422
"strings"
2523

@@ -29,34 +27,22 @@ import (
2927
)
3028

3129
type ValidateCmd struct {
32-
Input string `arg:"" optional:"" name:"input" help:"Path to a GeoParquet file. If not provided, input is read from stdin." type:"existingfile"`
30+
Input string `arg:"" optional:"" name:"input" help:"Path or URL for a GeoParquet file. If not provided, input is read from stdin."`
3331
MetadataOnly bool `help:"Only run rules that apply to file metadata and schema (no data will be scanned)."`
3432
Unpretty bool `help:"No colors in text output, no newlines and indentation in JSON output."`
3533
Format string `help:"Report format. Possible values: ${enum}." enum:"text, json" default:"text"`
3634
}
3735

3836
func (c *ValidateCmd) Run(ctx *kong.Context) error {
37+
input, inputErr := readerFromInput(c.Input)
38+
if inputErr != nil {
39+
return NewCommandError("trouble getting a reader from %q: %w", c.Input, inputErr)
40+
}
41+
3942
inputName := c.Input
40-
var input ReaderAtSeeker
41-
if c.Input == "" {
42-
if !hasStdin() {
43-
return NewCommandError("input argument must be provided if there is no stdin data")
44-
}
45-
data, err := io.ReadAll(os.Stdin)
46-
if err != nil {
47-
return NewCommandError("trouble reading from stdin: %w", err)
48-
}
49-
input = bytes.NewReader(data)
43+
if inputName == "" {
5044
inputName = "<stdin>"
51-
} else {
52-
i, readErr := os.Open(c.Input)
53-
if readErr != nil {
54-
return NewCommandError("failed to read from %q: %w", c.Input, readErr)
55-
}
56-
defer i.Close()
57-
input = i
5845
}
59-
6046
v := validator.New(c.MetadataOnly)
6147
report, err := v.Validate(context.Background(), input, inputName)
6248
if err != nil {

0 commit comments

Comments
 (0)