Skip to content

Commit 9d30779

Browse files
authored
Merge pull request #1460 from flux-iac/feat/exponential-retry-on-failure
Added exponential backoff on reconciliation failure
2 parents 3bbe9d6 + 69475bd commit 9d30779

File tree

6 files changed

+232
-4
lines changed

6 files changed

+232
-4
lines changed

api/v1alpha2/terraform_types.go

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ package v1alpha2
1919
import (
2020
"bytes"
2121
"fmt"
22+
"math"
2223
"net"
2324
"strings"
2425
"time"
2526
"unicode/utf8"
2627

27-
"github.com/flux-iac/tofu-controller/api/planid"
2828
"github.com/fluxcd/pkg/apis/meta"
2929
sourcev1 "github.com/fluxcd/source-controller/api/v1"
3030
corev1 "k8s.io/api/core/v1"
@@ -33,6 +33,8 @@ import (
3333
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3434
"k8s.io/apimachinery/pkg/runtime"
3535
"k8s.io/apimachinery/pkg/runtime/serializer"
36+
37+
"github.com/flux-iac/tofu-controller/api/planid"
3638
)
3739

3840
const (
@@ -145,6 +147,21 @@ type TerraformSpec struct {
145147
// +optional
146148
RetryInterval *metav1.Duration `json:"retryInterval,omitempty"`
147149

150+
// The strategy to use when retrying a previously failed reconciliation.
151+
// The default strategy is StaticInterval and the retry interval is based on the RetryInterval value.
152+
// The ExponentialBackoff strategy uses the formula: 2^reconciliationFailures * RetryInterval with a
153+
// maximum requeue duration of MaxRetryInterval.
154+
// +kubebuilder:validation:Enum=StaticInterval;ExponentialBackoff
155+
// +kubebuilder:default:string=StaticInterval
156+
// +optional
157+
RetryStrategy RetryStrategyEnum `json:"retryStrategy,omitempty"`
158+
159+
// The maximum requeue duration after a previously failed reconciliation.
160+
// Only applicable when RetryStrategy is set to ExponentialBackoff.
161+
// The default value is 24 hours when not specified.
162+
// +optional
163+
MaxRetryInterval *metav1.Duration `json:"maxRetryInterval,omitempty"`
164+
148165
// Path to the directory containing Terraform (.tf) files.
149166
// Defaults to 'None', which translates to the root path of the SourceRef.
150167
// +optional
@@ -521,6 +538,13 @@ const (
521538
ForceUnlockEnumNo ForceUnlockEnum = "no"
522539
)
523540

541+
type RetryStrategyEnum string
542+
543+
const (
544+
StaticInterval RetryStrategyEnum = "StaticInterval"
545+
ExponentialBackoff RetryStrategyEnum = "ExponentialBackoff"
546+
)
547+
524548
const (
525549
TerraformKind = "Terraform"
526550
TerraformFinalizer = "finalizers.tf.contrib.fluxcd.io"
@@ -892,12 +916,24 @@ func (in Terraform) GetDependsOn() []meta.NamespacedObjectReference {
892916

893917
// GetRetryInterval returns the retry interval
894918
func (in Terraform) GetRetryInterval() time.Duration {
919+
retryInterval := 15 * time.Second
895920
if in.Spec.RetryInterval != nil {
896-
return in.Spec.RetryInterval.Duration
921+
retryInterval = in.Spec.RetryInterval.Duration
922+
}
923+
924+
if in.Spec.RetryStrategy == ExponentialBackoff {
925+
retryInterval *= time.Duration(math.Pow(2, float64(in.Status.ReconciliationFailures)))
926+
maxRetryInterval := 24 * time.Hour
927+
if in.Spec.MaxRetryInterval != nil {
928+
maxRetryInterval = in.Spec.MaxRetryInterval.Duration
929+
}
930+
931+
if retryInterval > maxRetryInterval {
932+
return maxRetryInterval
933+
}
897934
}
898935

899-
// The default retry interval is 15 seconds.
900-
return 15 * time.Second
936+
return retryInterval
901937
}
902938

903939
// GetStatusConditions returns a pointer to the Status.Conditions slice.
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package v1alpha2
2+
3+
import (
4+
"testing"
5+
"time"
6+
7+
. "github.com/onsi/gomega"
8+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9+
)
10+
11+
func TestGetRetryInterval(t *testing.T) {
12+
g := NewGomegaWithT(t)
13+
14+
tests := []struct {
15+
name string
16+
terraform Terraform
17+
expectedRetryInterval time.Duration
18+
}{
19+
{
20+
name: "default retry interval",
21+
terraform: Terraform{
22+
Spec: TerraformSpec{},
23+
},
24+
expectedRetryInterval: 15 * time.Second,
25+
},
26+
{
27+
name: "custom retry interval",
28+
terraform: Terraform{
29+
Spec: TerraformSpec{
30+
RetryInterval: &metav1.Duration{Duration: 30 * time.Second},
31+
},
32+
},
33+
expectedRetryInterval: 30 * time.Second,
34+
},
35+
{
36+
name: "exponential backoff with default retry interval",
37+
terraform: Terraform{
38+
Spec: TerraformSpec{
39+
RetryStrategy: ExponentialBackoff,
40+
},
41+
Status: TerraformStatus{
42+
ReconciliationFailures: 2,
43+
},
44+
},
45+
expectedRetryInterval: 60 * time.Second,
46+
},
47+
{
48+
name: "exponential backoff",
49+
terraform: Terraform{
50+
Spec: TerraformSpec{
51+
RetryStrategy: ExponentialBackoff,
52+
RetryInterval: &metav1.Duration{Duration: 60 * time.Second},
53+
},
54+
Status: TerraformStatus{
55+
ReconciliationFailures: 4,
56+
},
57+
},
58+
expectedRetryInterval: 960 * time.Second,
59+
},
60+
{
61+
name: "exponential backoff with max retry interval",
62+
terraform: Terraform{
63+
Spec: TerraformSpec{
64+
RetryStrategy: ExponentialBackoff,
65+
RetryInterval: &metav1.Duration{Duration: 60 * time.Second},
66+
MaxRetryInterval: &metav1.Duration{Duration: 45 * time.Second},
67+
},
68+
Status: TerraformStatus{
69+
ReconciliationFailures: 4,
70+
},
71+
},
72+
expectedRetryInterval: 45 * time.Second,
73+
},
74+
}
75+
76+
for _, tt := range tests {
77+
t.Run(tt.name, func(t *testing.T) {
78+
g.Expect(tt.terraform.GetRetryInterval()).To(Equal(tt.expectedRetryInterval))
79+
})
80+
}
81+
}

api/v1alpha2/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

charts/tofu-controller/crds/crds.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5536,6 +5536,12 @@ spec:
55365536
interval:
55375537
description: The interval at which to reconcile the Terraform.
55385538
type: string
5539+
maxRetryInterval:
5540+
description: |-
5541+
The maximum requeue duration after a previously failed reconciliation.
5542+
Only applicable when RetryStrategy is set to ExponentialBackoff.
5543+
The default value is 24 hours when not specified.
5544+
type: string
55395545
parallelism:
55405546
default: 0
55415547
description: Parallelism limits the number of concurrent operations
@@ -5587,6 +5593,17 @@ spec:
55875593
The interval at which to retry a previously failed reconciliation.
55885594
The default value is 15 when not specified.
55895595
type: string
5596+
retryStrategy:
5597+
default: StaticInterval
5598+
description: |-
5599+
The strategy to use when retrying a previously failed reconciliation.
5600+
The default strategy is StaticInterval and the retry interval is based on the RetryInterval value.
5601+
The ExponentialBackoff strategy uses the formula: 2^reconciliationFailures * RetryInterval with a
5602+
maximum requeue duration of MaxRetryInterval.
5603+
enum:
5604+
- StaticInterval
5605+
- ExponentialBackoff
5606+
type: string
55905607
runnerPodTemplate:
55915608
properties:
55925609
metadata:

config/crd/bases/infra.contrib.fluxcd.io_terraforms.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5536,6 +5536,12 @@ spec:
55365536
interval:
55375537
description: The interval at which to reconcile the Terraform.
55385538
type: string
5539+
maxRetryInterval:
5540+
description: |-
5541+
The maximum requeue duration after a previously failed reconciliation.
5542+
Only applicable when RetryStrategy is set to ExponentialBackoff.
5543+
The default value is 24 hours when not specified.
5544+
type: string
55395545
parallelism:
55405546
default: 0
55415547
description: Parallelism limits the number of concurrent operations
@@ -5587,6 +5593,17 @@ spec:
55875593
The interval at which to retry a previously failed reconciliation.
55885594
The default value is 15 when not specified.
55895595
type: string
5596+
retryStrategy:
5597+
default: StaticInterval
5598+
description: |-
5599+
The strategy to use when retrying a previously failed reconciliation.
5600+
The default strategy is StaticInterval and the retry interval is based on the RetryInterval value.
5601+
The ExponentialBackoff strategy uses the formula: 2^reconciliationFailures * RetryInterval with a
5602+
maximum requeue duration of MaxRetryInterval.
5603+
enum:
5604+
- StaticInterval
5605+
- ExponentialBackoff
5606+
type: string
55905607
runnerPodTemplate:
55915608
properties:
55925609
metadata:

docs/References/terraform.md

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,12 @@ string
794794
</table>
795795
</div>
796796
</div>
797+
<h3 id="infra.contrib.fluxcd.io/v1alpha2.RetryStrategyEnum">RetryStrategyEnum
798+
(<code>string</code> alias)</h3>
799+
<p>
800+
(<em>Appears on:</em>
801+
<a href="#infra.contrib.fluxcd.io/v1alpha2.TerraformSpec">TerraformSpec</a>)
802+
</p>
797803
<h3 id="infra.contrib.fluxcd.io/v1alpha2.RunnerPodMetadata">RunnerPodMetadata
798804
</h3>
799805
<p>
@@ -1568,6 +1574,39 @@ The default value is 15 when not specified.</p>
15681574
</tr>
15691575
<tr>
15701576
<td>
1577+
<code>retryStrategy</code><br>
1578+
<em>
1579+
<a href="#infra.contrib.fluxcd.io/v1alpha2.RetryStrategyEnum">
1580+
RetryStrategyEnum
1581+
</a>
1582+
</em>
1583+
</td>
1584+
<td>
1585+
<em>(Optional)</em>
1586+
<p>The strategy to use when retrying a previously failed reconciliation.
1587+
The default strategy is StaticInterval and the retry interval is based on the RetryInterval value.
1588+
The ExponentialBackoff strategy uses the formula: 2^reconciliationFailures * RetryInterval with a
1589+
maximum requeue duration of MaxRetryInterval.</p>
1590+
</td>
1591+
</tr>
1592+
<tr>
1593+
<td>
1594+
<code>maxRetryInterval</code><br>
1595+
<em>
1596+
<a href="https://godoc.org/k8s.io/apimachinery/pkg/apis/meta/v1#Duration">
1597+
Kubernetes meta/v1.Duration
1598+
</a>
1599+
</em>
1600+
</td>
1601+
<td>
1602+
<em>(Optional)</em>
1603+
<p>The maximum requeue duration after a previously failed reconciliation.
1604+
Only applicable when RetryStrategy is set to ExponentialBackoff.
1605+
The default value is 24 hours when not specified.</p>
1606+
</td>
1607+
</tr>
1608+
<tr>
1609+
<td>
15711610
<code>path</code><br>
15721611
<em>
15731612
string
@@ -2131,6 +2170,39 @@ The default value is 15 when not specified.</p>
21312170
</tr>
21322171
<tr>
21332172
<td>
2173+
<code>retryStrategy</code><br>
2174+
<em>
2175+
<a href="#infra.contrib.fluxcd.io/v1alpha2.RetryStrategyEnum">
2176+
RetryStrategyEnum
2177+
</a>
2178+
</em>
2179+
</td>
2180+
<td>
2181+
<em>(Optional)</em>
2182+
<p>The strategy to use when retrying a previously failed reconciliation.
2183+
The default strategy is StaticInterval and the retry interval is based on the RetryInterval value.
2184+
The ExponentialBackoff strategy uses the formula: 2^reconciliationFailures * RetryInterval with a
2185+
maximum requeue duration of MaxRetryInterval.</p>
2186+
</td>
2187+
</tr>
2188+
<tr>
2189+
<td>
2190+
<code>maxRetryInterval</code><br>
2191+
<em>
2192+
<a href="https://godoc.org/k8s.io/apimachinery/pkg/apis/meta/v1#Duration">
2193+
Kubernetes meta/v1.Duration
2194+
</a>
2195+
</em>
2196+
</td>
2197+
<td>
2198+
<em>(Optional)</em>
2199+
<p>The maximum requeue duration after a previously failed reconciliation.
2200+
Only applicable when RetryStrategy is set to ExponentialBackoff.
2201+
The default value is 24 hours when not specified.</p>
2202+
</td>
2203+
</tr>
2204+
<tr>
2205+
<td>
21342206
<code>path</code><br>
21352207
<em>
21362208
string

0 commit comments

Comments
 (0)