Skip to content

Commit 9ef5607

Browse files
committed
backup-restore - Enable functional testing
This change: - updates the cli/restore tool to make it delete the Deployments and StatefulSets Resources that will require a respawn from the sf-operator reconcile. - adds a high-level functional test to simulate the backup/restore process. - removes provious in-depth backup and restore role in order to only keep the high-level test. - updates the CLI documenation Change-Id: I385c83c959f8bc06097b1415f7711097889be79e
1 parent 9701ddb commit 9ef5607

File tree

23 files changed

+273
-387
lines changed

23 files changed

+273
-387
lines changed

CHANGELOG.md

Lines changed: 0 additions & 69 deletions
This file was deleted.

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
doc/reference/CHANGELOG.md

cli/cmd/restore.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ import (
3232

3333
"github.com/spf13/cobra"
3434

35+
appsv1 "k8s.io/api/apps/v1"
3536
corev1 "k8s.io/api/core/v1"
37+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3638
"k8s.io/client-go/kubernetes"
3739
ctrl "sigs.k8s.io/controller-runtime"
3840
)
@@ -162,6 +164,33 @@ func restoreZuul(ns string, backupDir string, kubeClientSet *kubernetes.Clientse
162164

163165
}
164166

167+
func clearComponents(ns string, kubeContext string) {
168+
ctrl.Log.Info("Removing components requiring a complete restart ...")
169+
170+
env := cliutils.ENV{
171+
Cli: cliutils.CreateKubernetesClientOrDie(kubeContext),
172+
Ctx: context.TODO(),
173+
Ns: ns,
174+
}
175+
176+
for _, stsName := range []string{"zuul-scheduler", "zuul-executor", "zuul-merger", "nodepool-builder", "zookeeper"} {
177+
cliutils.DeleteOrDie(&env, &appsv1.StatefulSet{
178+
ObjectMeta: metav1.ObjectMeta{
179+
Name: stsName,
180+
Namespace: env.Ns,
181+
},
182+
})
183+
}
184+
for _, depName := range []string{"zuul-web", "nodepool-launcher"} {
185+
cliutils.DeleteOrDie(&env, &appsv1.Deployment{
186+
ObjectMeta: metav1.ObjectMeta{
187+
Name: depName,
188+
Namespace: env.Ns,
189+
},
190+
})
191+
}
192+
}
193+
165194
func restoreCmd(kmd *cobra.Command, args []string) {
166195

167196
// NOTE: Solution for restoring DB and Zuul require kubectl binary to be installed and configured .kube/config
@@ -191,6 +220,7 @@ func restoreCmd(kmd *cobra.Command, args []string) {
191220
restoreZuul(ns, backupDir, kubeClientSet, kubeContext)
192221
restoreSecret(ns, backupDir, kubeContext)
193222
restoreDB(ns, backupDir, kubeClientSet, kubeContext)
223+
clearComponents(ns, kubeContext)
194224

195225
}
196226

controllers/config.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,10 @@ func (r *SFController) InstallTooling() {
166166

167167
func (r *SFController) SetupConfigJob() bool {
168168

169+
// Get the resource version of the keystore password
170+
zkp := apiv1.Secret{}
171+
r.GetM("zuul-keystore-password", &zkp)
172+
169173
// This ensure we trigger the base secret creation job when the setting change
170174
extraSettingsChecksum := "ns"
171175
if r.cr.Spec.ConfigRepositoryLocation.ClusterAPIURL != "" || r.cr.Spec.ConfigRepositoryLocation.LogserverHost != "" {
@@ -179,7 +183,7 @@ func (r *SFController) SetupConfigJob() bool {
179183
cmName = "zs-internal-tenant-reconfigure"
180184
zsInternalTenantReconfigure apiv1.ConfigMap
181185
configHash = utils.Checksum([]byte(r.MkPreInitScript()))
182-
internalTenantSecretsVersion = "1" + "-" + extraSettingsChecksum
186+
internalTenantSecretsVersion = "1" + "-" + zkp.ResourceVersion + "-" + extraSettingsChecksum
183187
needReconfigureTenant = false
184188
needCMUpdate = false
185189
)

doc/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,10 @@ The current project status is: **Alpha - NOT PRODUCTION READY**
4545
* Level 2 - Seamless upgrades - **2/2**
4646
- Operator: ✅
4747
- Operands: ✅
48-
* Level 3 - Full Lifecycle - **1/5**
48+
* Level 3 - Full Lifecycle - **3/5**
4949
- SF 3.8.x migration ❌
50-
- Backup:
51-
- Restore:
50+
- Backup:
51+
- Restore:
5252
- Rolling deployments: ❌
5353
- Reconfiguration: ✅
5454
* Level 4 - Deep Insights - **1/3**

doc/deployment/backup-restore.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Backup and Restore
2+
3+
The sf-operator CLI provides commands to perform a backup and a restore of a deployment managed by the sf-operator.
4+
5+
The [backup command](../reference/cli/index.md#backup) can be run periodically to perform a backup of a Software Factory deployment.
6+
The command should be couple with a proper backup system to safely store the backuped data.
7+
8+
Restoring a backup must be done via the [restore command](../reference/cli/index.md#restore) and by following a specific process:
9+
10+
1. Deploy a Software Factory CR resource without `ConfigLocation` and Zuul connections definition.
11+
2. Restore the backup with the `restore` command
12+
3. Apply your final Software Factory CR
13+
14+
## The backup archive
15+
16+
The archive contains:
17+
18+
- Some k8s Secret resources (like the Zuul Keystore Secret and Zuul SSH private key Secret)
19+
- The Zuul SQL database content (history of builds)
20+
- The Zuul projects' private keys (the keys stored into Zookeeper and used to encrypt/decrypt in-repo Zuul Secrets)

doc/deployment/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,6 @@ and managing a Software Factory Custom Resource through SF-Operator.
2020
1. [Add corporate CA certificates into the CA trust chain](./corporate-certificates.md)
2121
1. [Monitoring](./monitoring.md)
2222
1. [Logging](./logging.md)
23+
1. [Backup and Restore](./backup-restore.md)
2324
1. [Deleting a deployment](./delete.md)
2425
1. [Custom Resource Definitions reference](./crds.md)

doc/reference/CHANGELOG.md

Lines changed: 0 additions & 1 deletion
This file was deleted.

doc/reference/CHANGELOG.md

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Changelog
2+
3+
All notable changes to this project will be documented in this file.
4+
5+
## [in development]
6+
7+
### Added
8+
9+
- CLI: restore command and documentation.
10+
- Dev CLI - Add command "go run ./main.go dev getImagesSecurityIssues" to ease getting a small report of HIGH
11+
and CRITICAL Security issues reported by quay.io on container images used by the sf-operator.
12+
13+
### Changed
14+
15+
- Zookeeper version bumped to 3.8.4
16+
- The Operator handles only one Route resource as a 'gateway' pod dispatches incoming connections.
17+
18+
### Deprecated
19+
### Removed
20+
21+
- The LogsServer CRD and controller. As there is no identified need for a proper CRD and Controller.
22+
23+
### Fixed
24+
### Security
25+
26+
- UBI9/Zookeeper image rebuid to address reported security issues
27+
28+
## [v0.0.27] - 2024-03-27
29+
30+
🐰🔔 Easter release 🐰🔔
31+
32+
### Added
33+
34+
- "Debug" toggle for fluent bit sidecars
35+
- A support for running zuul-executor component external to the cluster (see ADR#014).
36+
- The standalone deployment mode exits 1 when the reconcile is not possible after 300 seconds
37+
- A bundled YAML file containing information about container images used by the operator `controllers/libs/base/static/images.yaml`
38+
39+
### Changed
40+
41+
- zookeeper: update liveness and readyness probes to only check SSL access and remove superfluous Service resource called
42+
zookeeper-headless.
43+
- nodepool: update version to 10.0.0
44+
- zuul: update version to 10.0.0
45+
- CLI: simplified `SF backup` options to streamline the backup process.
46+
47+
### Deprecated
48+
### Removed
49+
### Fixed
50+
51+
- nodepool-builder: fixed the log path configuration when using the fluent bit log forwarder, resulting in much less file access errors appearing in fluent bit logs.
52+
53+
### Security
54+
55+
## [v0.0.26] - 2024-03-08
56+
57+
### Added
58+
59+
- CLI: Add the `SF backup` subcommand. This subcommand dumps a Software Factory's most important data for safekeeping.
60+
61+
### Changed
62+
### Deprecated
63+
### Removed
64+
### Fixed
65+
### Security
66+
67+
## [alpha] - not released
68+
69+
- Initial alpha version. Please consult the commit log for detailed information.
70+
- From now on all changes will be referenced into this changelog.

doc/reference/cli/index.md

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ We provide a command to perform various actions related to the management of Sof
44
deployments, beyond what can be defined in a custom resource manifest.
55

66

7-
1. [Installing the CLI](#installing-the-cli)
8-
1. [Global Flags](#global-flags)
9-
1. [Configuration File](#configuration-file)
10-
1. [Subcommands](#subcommands)
11-
1. [Dev](#dev)
7+
- [Installing the CLI](#installing-the-cli)
8+
- [Global Flags](#global-flags)
9+
- [Configuration File](#configuration-file)
10+
- [Subcommands](#subcommands)
11+
- [Dev](#dev)
1212
- [cloneAsAdmin](#cloneasadmin)
1313
- [create demo-env](#create-demo-env)
1414
- [create gerrit](#create-gerrit)
@@ -17,20 +17,20 @@ deployments, beyond what can be defined in a custom resource manifest.
1717
- [run-tests](#run-tests)
1818
- [wipe gerrit](#wipe-gerrit)
1919
- [getImagesSecurityIssues](#getimagessecurityissues)
20-
1. [Init](#init)
21-
1. [Nodepool](#nodepool)
20+
- [Init](#init)
21+
- [Nodepool](#nodepool)
2222
- [configure providers-secrets](#configure-providers-secrets)
2323
- [create openshiftpods-namespace](#create-openshiftpods-namespace)
2424
- [get builder-ssh-key](#get-builder-ssh-key)
2525
- [get providers-secrets](#get-providers-secrets)
26-
1. [Operator](#operator)
27-
1. [SF](#sf)
28-
1. [backup](#backup)
29-
1. [bootstrap-tenant](#bootstrap-tenant)
30-
1. [configure TLS](#configure-tls)
31-
1. [restore](#restore)
32-
1. [wipe](#wipe)
33-
1. [Zuul](#zuul)
26+
- [Operator](#operator)
27+
- [SF](#sf)
28+
- [backup](#backup)
29+
- [bootstrap-tenant](#bootstrap-tenant)
30+
- [configure TLS](#configure-tls)
31+
- [restore](#restore)
32+
- [wipe](#wipe)
33+
- [Zuul](#zuul)
3434
- [create auth-token](#create-auth-token)
3535
- [create client-config](#create-client-config)
3636

@@ -418,14 +418,9 @@ The following subcommands can be used to manage a Software Factory deployment an
418418

419419
#### backup
420420

421-
The `backup` subcommand lets you dump a Software Factory's most important files for safekeeping,
422-
most important files, such as:
421+
The `backup` subcommand lets you dump a Software Factory's most important files for safekeeping.
423422

424-
- MariaDB Zuul database copy
425-
- secrets backup
426-
- Zuul project private keys
427-
428-
To create a backup located in `/tmp/backup` directory of all important objects, run the following command:
423+
To create a backup located in `/tmp/backup` directory, run the following command:
429424

430425
```sh
431426
sf-operator SF backup --namespace sf --backup_dir /tmp/backup
@@ -437,6 +432,14 @@ Flags:
437432
|----------|------|-------|----|----|
438433
| --backup_dir | string | The path to the backup directory | no | - |
439434

435+
The backup is composed of:
436+
437+
- some relevant `Secrets` located in the deployment's namespace
438+
- the Zuul's SQL database
439+
- the Zuul's project's keys as exported by [zuul-admin export-keys](https://zuul-ci.org/docs/zuul/latest/client.html#export-keys)
440+
441+
The backup directory content could be compressed and stored safely in a backup system.
442+
440443
#### bootstrap-tenant
441444

442445
Initialize a Zuul tenant's config repository with boilerplate code that define standard pipelines:
@@ -480,13 +483,7 @@ Flags:
480483
!!! warning
481484
The command requires to to have `kubectl` binary installed in the system
482485

483-
The `restore` subcommand lets you restore:
484-
485-
- Secrets
486-
- MariaDB Zuul database
487-
- Zuul project private keys
488-
489-
that has been done by the `backup` command.
486+
The `restore` subcommand lets you restore a backup created with the `backup` command.
490487

491488
For example:
492489

@@ -500,6 +497,7 @@ Available flags:
500497
|----------|------|-------|----|----|
501498
| --backup_dir | string | The path to the backup directory to restore | yes | - |
502499

500+
503501
#### wipe
504502

505503
The `wipe` subcommand can be used to remove all Software Factory instances in the provided namespace,

playbooks/files/sf-minimal.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: sf.softwarefactory-project.io/v1
2+
kind: SoftwareFactory
3+
metadata:
4+
name: my-sf
5+
spec:
6+
fqdn: "sfop.me"

roles/health-check/backup/defaults/main.yaml

Lines changed: 0 additions & 3 deletions
This file was deleted.

roles/health-check/backup/tasks/check_dir.yaml

Lines changed: 0 additions & 10 deletions
This file was deleted.

0 commit comments

Comments
 (0)