Skip to content

Commit f9016af

Browse files
apricotejooola
andauthored
fix(resize): recover from part-way failed resizes (#623)
If a previous resize attempt failed after calling `POST /v1/volumes/{id}/resize` (rate limit, network connectivity, ...), the volume might already have the target size. The PVC (in Kubernetes) would still have the old size in its status, so the `external-resizer` makes another attempt at the resize. In the Hetzner Cloud API, a resize must always be larger than the current size, so the new resize failed with: volume size is too small (invalid_input) This is now fixed, as we compare the size of the Hetzner Cloud volume to the desired size and only proceed if an actual resize is needed. This was a violation of the CSI Spec[0], which stated for `ControllerExpandVolume`: > This operation MUST be idempotent. If a volume corresponding to the specified volume ID is already larger than or equal to the target capacity of the expansion request, the plugin SHOULD reply 0 OK. [0] https://github.com/container-storage-interface/spec/blob/release-1.9/spec.md#controllerexpandvolume --------- Co-authored-by: Jonas Lammler <jonas.lammler@hetzner-cloud.de>
1 parent 487fd6a commit f9016af

File tree

8 files changed

+197
-46
lines changed

8 files changed

+197
-46
lines changed

api/volume.go

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -345,47 +345,40 @@ func (s *VolumeService) Detach(ctx context.Context, volume *csi.Volume, server *
345345
}
346346

347347
func (s *VolumeService) Resize(ctx context.Context, volume *csi.Volume, size int) error {
348-
level.Info(s.logger).Log(
349-
"msg", "resize volume",
348+
logger := log.With(s.logger,
350349
"volume-id", volume.ID,
351350
"requested-size", size,
352351
)
353352

353+
level.Info(logger).Log(
354+
"msg", "resize volume",
355+
)
356+
354357
hcloudVolume, _, err := s.client.Volume.GetByID(ctx, volume.ID)
355358
if err != nil {
356-
level.Info(s.logger).Log(
357-
"msg", "failed to get volume",
358-
"volume-id", volume.ID,
359-
"err", err,
360-
)
359+
level.Info(logger).Log("msg", "failed to get volume", "err", err)
361360
return err
362361
}
363362
if hcloudVolume == nil {
364-
level.Info(s.logger).Log(
365-
"msg", "volume to resize not found",
366-
"volume-id", volume.ID,
367-
)
363+
level.Info(logger).Log("msg", "volume to resize not found")
368364
return volumes.ErrVolumeNotFound
369365
}
370366

367+
logger = log.With(logger, "current-size", hcloudVolume.Size)
368+
369+
if hcloudVolume.Size >= size {
370+
level.Info(logger).Log("msg", "volume size is already larger or equal than the requested size")
371+
return volumes.ErrVolumeSizeAlreadyReached
372+
}
373+
371374
action, _, err := s.client.Volume.Resize(ctx, hcloudVolume, size)
372375
if err != nil {
373-
level.Info(s.logger).Log(
374-
"msg", "failed to resize volume",
375-
"volume-id", volume.ID,
376-
"size", size,
377-
"err", err,
378-
)
376+
level.Info(logger).Log("msg", "failed to resize volume", "err", err)
379377
return err
380378
}
381379

382-
if err := s.client.Action.WaitFor(ctx, action); err != nil {
383-
level.Info(s.logger).Log(
384-
"msg", "failed to resize volume",
385-
"volume-id", volume.ID,
386-
"size", size,
387-
"err", err,
388-
)
380+
if err = s.client.Action.WaitFor(ctx, action); err != nil {
381+
level.Info(logger).Log("msg", "failed to resize volume", "err", err)
389382
return err
390383
}
391384
return nil

api/volume_test.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,94 @@
11
package api
22

33
import (
4+
"context"
5+
"net/http/httptest"
6+
"testing"
7+
"time"
8+
9+
"github.com/go-kit/log"
10+
"github.com/stretchr/testify/assert"
11+
12+
"github.com/hetznercloud/csi-driver/csi"
13+
"github.com/hetznercloud/csi-driver/internal/mocked"
414
"github.com/hetznercloud/csi-driver/volumes"
15+
"github.com/hetznercloud/hcloud-go/v2/hcloud"
16+
"github.com/hetznercloud/hcloud-go/v2/hcloud/schema"
517
)
618

719
var _ volumes.Service = (*VolumeService)(nil)
20+
21+
func makeTestVolumeService(t *testing.T, requests []mocked.Request) (*VolumeService, func()) {
22+
t.Helper()
23+
24+
testServer := httptest.NewServer(mocked.Handler(t, requests))
25+
26+
testClient := hcloud.NewClient(
27+
hcloud.WithEndpoint(testServer.URL),
28+
hcloud.WithBackoffFunc(func(_ int) time.Duration { return 0 }),
29+
hcloud.WithPollBackoffFunc(func(_ int) time.Duration { return 0 }),
30+
)
31+
32+
volumeService := NewVolumeService(log.NewNopLogger(), testClient)
33+
34+
return volumeService, testServer.Close
35+
}
36+
37+
func TestResize(t *testing.T) {
38+
t.Run("ErrVolumeSizeAlreadyReached", func(t *testing.T) {
39+
t.Run("happy with larger volume size", func(t *testing.T) {
40+
volumeService, cleanup := makeTestVolumeService(t, []mocked.Request{
41+
{
42+
Method: "GET", Path: "/volumes/1",
43+
Status: 200,
44+
JSON: schema.VolumeGetResponse{
45+
Volume: schema.Volume{ID: 1, Name: "pvc-123", Size: 10},
46+
},
47+
},
48+
{
49+
Method: "POST", Path: "/volumes/1/actions/resize",
50+
Status: 201,
51+
JSON: schema.VolumeActionResizeVolumeResponse{
52+
Action: schema.Action{ID: 3, Status: "success"},
53+
},
54+
},
55+
})
56+
defer cleanup()
57+
58+
err := volumeService.Resize(context.Background(), &csi.Volume{ID: 1}, 15)
59+
assert.NoError(t, err)
60+
})
61+
62+
t.Run("with equal volume size", func(t *testing.T) {
63+
volumeService, cleanup := makeTestVolumeService(t, []mocked.Request{
64+
{
65+
Method: "GET", Path: "/volumes/1",
66+
Status: 200,
67+
JSON: schema.VolumeGetResponse{
68+
Volume: schema.Volume{ID: 1, Name: "pvc-123", Size: 15},
69+
},
70+
},
71+
})
72+
defer cleanup()
73+
74+
err := volumeService.Resize(context.Background(), &csi.Volume{ID: 1}, 15)
75+
assert.Equal(t, volumes.ErrVolumeSizeAlreadyReached, err)
76+
})
77+
78+
t.Run("with smaller volume size", func(t *testing.T) {
79+
volumeService, cleanup := makeTestVolumeService(t, []mocked.Request{
80+
{
81+
Method: "GET", Path: "/volumes/1",
82+
Status: 200,
83+
JSON: schema.VolumeGetResponse{
84+
Volume: schema.Volume{ID: 1, Name: "pvc-123", Size: 15},
85+
},
86+
},
87+
})
88+
defer cleanup()
89+
90+
err := volumeService.Resize(context.Background(), &csi.Volume{ID: 1}, 10)
91+
assert.Equal(t, volumes.ErrVolumeSizeAlreadyReached, err)
92+
})
93+
})
94+
}

go.mod

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ require (
1111
github.com/hetznercloud/hcloud-go/v2 v2.8.0
1212
github.com/kubernetes-csi/csi-test/v5 v5.2.0
1313
github.com/prometheus/client_golang v1.19.1
14+
github.com/stretchr/testify v1.9.0
1415
golang.org/x/sys v0.21.0
1516
google.golang.org/grpc v1.64.0
1617
k8s.io/mount-utils v0.29.4
@@ -20,6 +21,7 @@ require (
2021
require (
2122
github.com/beorn7/perks v1.0.1 // indirect
2223
github.com/cespare/xxhash/v2 v2.2.0 // indirect
24+
github.com/davecgh/go-spew v1.1.1 // indirect
2325
github.com/go-logfmt/logfmt v0.5.1 // indirect
2426
github.com/go-logr/logr v1.3.0 // indirect
2527
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
@@ -29,6 +31,7 @@ require (
2931
github.com/moby/sys/mountinfo v0.6.2 // indirect
3032
github.com/onsi/ginkgo/v2 v2.13.1 // indirect
3133
github.com/onsi/gomega v1.30.0 // indirect
34+
github.com/pmezard/go-difflib v1.0.0 // indirect
3235
github.com/prometheus/client_model v0.5.0 // indirect
3336
github.com/prometheus/common v0.48.0 // indirect
3437
github.com/prometheus/procfs v0.12.0 // indirect

go.sum

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,6 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDa
5050
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8=
5151
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho=
5252
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
53-
github.com/hetznercloud/hcloud-go/v2 v2.7.2 h1:UlE7n1GQZacCfyjv9tDVUN7HZfOXErPIfM/M039u9A0=
54-
github.com/hetznercloud/hcloud-go/v2 v2.7.2/go.mod h1:49tIV+pXRJTUC7fbFZ03s45LKqSQdOPP5y91eOnJo/k=
5553
github.com/hetznercloud/hcloud-go/v2 v2.8.0 h1:vfbfL/JfV8dIZUX7ANHWEbKNqgFWsETqvt/EctvoFJ0=
5654
github.com/hetznercloud/hcloud-go/v2 v2.8.0/go.mod h1:jvpP3qAWMIZ3WQwQLYa97ia6t98iPCgsJNwRts+Jnrk=
5755
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
@@ -77,8 +75,6 @@ github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFSt
7775
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
7876
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
7977
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
80-
github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU=
81-
github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k=
8278
github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
8379
github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
8480
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
@@ -140,10 +136,6 @@ golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7w
140136
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
141137
golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
142138
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
143-
golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
144-
golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
145-
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
146-
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
147139
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
148140
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
149141
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -170,17 +162,13 @@ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7
170162
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
171163
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
172164
google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
173-
google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de h1:cZGRis4/ot9uVm639a+rHCUaG0JJHEsdyzSQTMX+suY=
174-
google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY=
175165
google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 h1:NnYq6UN9ReLM9/Y01KWNOWyI5xQ9kbIms5GGJVwS/Yc=
176166
google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY=
177167
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
178168
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
179169
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
180170
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
181171
google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
182-
google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM=
183-
google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA=
184172
google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY=
185173
google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg=
186174
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
@@ -203,7 +191,5 @@ k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0=
203191
k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo=
204192
k8s.io/mount-utils v0.29.4 h1:tW/URea4gtXlaVW7VObr52NQhS+z3SXTg1GUaFZjRL4=
205193
k8s.io/mount-utils v0.29.4/go.mod h1:SHUMR9n3b6tLgEmlyT36cL6fV6Sjwa5CJhc0guCXvb0=
206-
k8s.io/utils v0.0.0-20240423183400-0849a56e8f22 h1:ao5hUqGhsqdm+bYbjH/pRkCs0unBGe9UyDahzs9zQzQ=
207-
k8s.io/utils v0.0.0-20240423183400-0849a56e8f22/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
208194
k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 h1:jgGTlFYnhF1PM1Ax/lAlxUPE+KfCIXHaathvJg1C3ak=
209195
k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=

internal/mocked/mocked.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package mocked
2+
3+
import (
4+
"encoding/json"
5+
"net/http"
6+
"testing"
7+
8+
"github.com/stretchr/testify/assert"
9+
"github.com/stretchr/testify/require"
10+
)
11+
12+
type Request struct {
13+
Method string
14+
Path string
15+
PathRegexp string
16+
17+
Status int
18+
Body string
19+
JSON any
20+
}
21+
22+
func Handler(t *testing.T, requests []Request) http.HandlerFunc {
23+
t.Helper()
24+
25+
index := 0
26+
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
27+
if index >= len(requests) {
28+
t.Fatalf("received unknown request %d", index)
29+
}
30+
31+
response := requests[index]
32+
assert.Equal(t, response.Method, r.Method)
33+
if response.PathRegexp != "" {
34+
require.Regexp(t, response.Path, r.RequestURI, "request %d", index)
35+
} else {
36+
require.Equal(t, response.Path, r.RequestURI, "request %d", index)
37+
}
38+
39+
w.WriteHeader(response.Status)
40+
w.Header().Set("Content-Type", "application/json")
41+
if response.Body != "" {
42+
_, err := w.Write([]byte(response.Body))
43+
if err != nil {
44+
t.Fatal(err)
45+
}
46+
} else if response.JSON != nil {
47+
if err := json.NewEncoder(w).Encode(response.JSON); err != nil {
48+
t.Fatal(err)
49+
}
50+
}
51+
52+
index++
53+
})
54+
}

volumes/idempotency.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,5 +156,15 @@ func (s *IdempotentService) Detach(ctx context.Context, volume *csi.Volume, serv
156156
}
157157

158158
func (s *IdempotentService) Resize(ctx context.Context, volume *csi.Volume, size int) error {
159-
return s.volumeService.Resize(ctx, volume, size)
159+
switch err := s.volumeService.Resize(ctx, volume, size); err {
160+
case ErrVolumeSizeAlreadyReached:
161+
// If a previous rescale attempt failed (rate limit, network connectivity, ...), the volume might already have the target size.
162+
// In the Hetzner Cloud API, a resize must always be larger than the current size, so this
163+
// would manifest as a "volume size is too small (invalid_input)" error.
164+
return nil
165+
case nil:
166+
return nil
167+
default:
168+
return err
169+
}
160170
}

volumes/idempotency_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,3 +371,20 @@ func TestIdempotentServiceDetachAttachedToDifferentServer(t *testing.T) {
371371
t.Fatal(err)
372372
}
373373
}
374+
375+
func TestIdempotentServiceExpand(t *testing.T) {
376+
t.Run("ErrVolumeSizeAlreadyReached", func(t *testing.T) {
377+
volumeService := &mock.VolumeService{
378+
ResizeFunc: func(ctx context.Context, volume *csi.Volume, size int) error {
379+
return volumes.ErrVolumeSizeAlreadyReached
380+
},
381+
}
382+
383+
service := volumes.NewIdempotentService(log.NewNopLogger(), volumeService)
384+
385+
err := service.Resize(context.Background(), &csi.Volume{}, 10)
386+
if err != nil {
387+
t.Fatal(err)
388+
}
389+
})
390+
}

volumes/service.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@ import (
88
)
99

1010
var (
11-
ErrVolumeNotFound = errors.New("volume not found")
12-
ErrVolumeAlreadyExists = errors.New("volume does already exist")
13-
ErrServerNotFound = errors.New("server not found")
14-
ErrAttached = errors.New("volume is attached")
15-
ErrNotAttached = errors.New("volume is not attached")
16-
ErrAttachLimitReached = errors.New("max number of attachments per server reached")
17-
ErrLockedServer = errors.New("server is locked")
11+
ErrVolumeNotFound = errors.New("volume not found")
12+
ErrVolumeAlreadyExists = errors.New("volume does already exist")
13+
ErrServerNotFound = errors.New("server not found")
14+
ErrAttached = errors.New("volume is attached")
15+
ErrNotAttached = errors.New("volume is not attached")
16+
ErrAttachLimitReached = errors.New("max number of attachments per server reached")
17+
ErrLockedServer = errors.New("server is locked")
18+
ErrVolumeSizeAlreadyReached = errors.New("volume size is already larger or equal than the requested size")
1819
)
1920

2021
type Service interface {

0 commit comments

Comments
 (0)