Skip to content

Fixed race condition in additional volumes reconciliation #2337

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions api/v1beta2/ibmvpcmachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,10 @@ type IBMVPCMachineStatus struct {
// LoadBalancerPoolMembers is the status of IBM Cloud VPC Load Balancer Backend Pools the machine is a member.
// +optional
LoadBalancerPoolMembers []VPCLoadBalancerBackendPoolMember `json:"loadBalancerPoolMembers,omitempty"`

// AdditionalVolumeIDs is a list of Volume ID as per IBMCloud
// +optional
AdditionalVolumeIDs []string `json:"additionalVolumeIDs,omitempty"`
}

// +kubebuilder:object:root=true
Expand Down
5 changes: 5 additions & 0 deletions api/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 36 additions & 7 deletions cloud/scope/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -1168,12 +1168,33 @@ func (m *MachineScope) GetVolumeAttachments() ([]vpcv1.VolumeAttachment, error)
return result.VolumeAttachments, nil
}

// CreateAndAttachVolume creates a new Volume and attaches it to the instance.
func (m *MachineScope) CreateAndAttachVolume(vpcVolume *infrav1beta2.VPCVolume) error {
// GetVolumeState returns the volume's state.
func (m *MachineScope) GetVolumeState(volumeID string) (string, error) {
options := vpcv1.GetVolumeOptions{
ID: &volumeID,
}
result, _, err := m.IBMVPCClient.GetVolume(&options)
if err != nil {
return "", fmt.Errorf("could not fetch volume status: %w", err)
}
return *result.Status, err
}

// CreateVolume creates a new Volume and attaches it to the instance.
func (m *MachineScope) CreateVolume(vpcVolume *infrav1beta2.VPCVolume) (string, error) {
volumeOptions := vpcv1.CreateVolumeOptions{}
var resourceGroupID string
if m.IBMVPCCluster.Status.ResourceGroup != nil {
resourceGroupID = m.IBMVPCCluster.Status.ResourceGroup.ID
} else {
resourceGroupID = m.IBMVPCCluster.Spec.ResourceGroup
}
// TODO: EncryptionKeyCRN is not supported for now, the field is omitted from the manifest
if vpcVolume.Profile != "custom" {
volumeOptions.VolumePrototype = &vpcv1.VolumePrototype{
ResourceGroup: &vpcv1.ResourceGroupIdentityByID{
ID: &resourceGroupID,
},
Profile: &vpcv1.VolumeProfileIdentityByName{
Name: &vpcVolume.Profile,
},
Expand All @@ -1184,6 +1205,9 @@ func (m *MachineScope) CreateAndAttachVolume(vpcVolume *infrav1beta2.VPCVolume)
}
} else {
volumeOptions.VolumePrototype = &vpcv1.VolumePrototype{
ResourceGroup: &vpcv1.ResourceGroupIdentityByID{
ID: &resourceGroupID,
},
Iops: &vpcVolume.Iops,
Profile: &vpcv1.VolumeProfileIdentityByName{
Name: &vpcVolume.Profile,
Expand All @@ -1197,19 +1221,24 @@ func (m *MachineScope) CreateAndAttachVolume(vpcVolume *infrav1beta2.VPCVolume)

volumeResult, _, err := m.IBMVPCClient.CreateVolume(&volumeOptions)
if err != nil {
return fmt.Errorf("error while creating volume: %w", err)
return "", fmt.Errorf("error while creating volume: %w", err)
}

return *volumeResult.ID, nil
}

// AttachVolume attaches the given volume to the instance.
func (m *MachineScope) AttachVolume(deleteOnInstanceDelete bool, volumeID, volumeName string) error {
attachmentOptions := vpcv1.CreateInstanceVolumeAttachmentOptions{
InstanceID: &m.IBMVPCMachine.Status.InstanceID,
Volume: &vpcv1.VolumeAttachmentPrototypeVolume{
ID: volumeResult.ID,
ID: &volumeID,
},
DeleteVolumeOnInstanceDelete: &vpcVolume.DeleteVolumeOnInstanceDelete,
Name: &vpcVolume.Name,
DeleteVolumeOnInstanceDelete: &deleteOnInstanceDelete,
Name: &volumeName,
}

_, _, err = m.IBMVPCClient.AttachVolumeToInstance(&attachmentOptions)
_, _, err := m.IBMVPCClient.AttachVolumeToInstance(&attachmentOptions)
if err != nil {
err = fmt.Errorf("error while attaching volume to instance: %w", err)
}
Expand Down
114 changes: 91 additions & 23 deletions cloud/scope/machine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ import (
. "github.com/onsi/gomega"
)

var (
volumeName = "foo-volume"
volumeID = "foo-volume-id"
)

func newVPCMachine(clusterName, machineName string) *infrav1beta2.IBMVPCMachine {
return &infrav1beta2.IBMVPCMachine{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -1109,7 +1114,6 @@ func TestGetVolumeAttachments(t *testing.T) {
},
}
volumeAttachmentName := "foo-volume-attachment"
volumeName := "foo-volume"

testVolumeAttachments := vpcv1.VolumeAttachmentCollection{
VolumeAttachments: []vpcv1.VolumeAttachment{{
Expand Down Expand Up @@ -1145,14 +1149,57 @@ func TestGetVolumeAttachments(t *testing.T) {
})
}

func TestCreateAndAttachVolume(t *testing.T) {
func TestGetVolumeState(t *testing.T) {
setup := func(t *testing.T) (*gomock.Controller, *mock.MockVpc) {
t.Helper()
return gomock.NewController(t), mock.NewMockVpc(gomock.NewController(t))
}

volumeName := "foo-volume"
volumeID := "foo-volume-id"
volumeStatus := vpcv1.VolumeStatusPendingConst

vpcMachine := infrav1beta2.IBMVPCMachine{
Status: infrav1beta2.IBMVPCMachineStatus{
InstanceID: "foo-instance-id",
},
}

vpcVolume := vpcv1.Volume{
Name: &volumeName,
ID: &volumeID,
Status: &volumeStatus,
}
volumeFetchError := errors.New("error while fetching volume")

t.Run("Return correct volume state", func(t *testing.T) {
g := NewWithT(t)
mockController, mockVPC := setup(t)
t.Cleanup(mockController.Finish)
scope := setupMachineScope(clusterName, machineName, mockVPC)
scope.IBMVPCMachine.Status = vpcMachine.Status
mockVPC.EXPECT().GetVolume(gomock.AssignableToTypeOf(&vpcv1.GetVolumeOptions{})).Return(&vpcVolume, nil, nil)
state, err := scope.GetVolumeState(volumeID)
g.Expect(err).To(BeNil())
g.Expect(state).To(Equal(volumeStatus))
})

t.Run("Return error when GetVolumeState returns error", func(t *testing.T) {
g := NewWithT(t)
mockController, mockVPC := setup(t)
t.Cleanup(mockController.Finish)
scope := setupMachineScope(clusterName, machineName, mockVPC)
scope.IBMVPCMachine.Status = vpcMachine.Status
mockVPC.EXPECT().GetVolume(gomock.AssignableToTypeOf(&vpcv1.GetVolumeOptions{})).Return(nil, nil, volumeFetchError)
state, err := scope.GetVolumeState(volumeID)
g.Expect(state).To(BeZero())
g.Expect(errors.Is(err, volumeFetchError)).To(BeTrue())
})
}

func TestCreateVolume(t *testing.T) {
setup := func(t *testing.T) (*gomock.Controller, *mock.MockVpc) {
t.Helper()
return gomock.NewController(t), mock.NewMockVpc(gomock.NewController(t))
}

vpcMachine := infrav1beta2.IBMVPCMachine{
Status: infrav1beta2.IBMVPCMachineStatus{
Expand All @@ -1161,54 +1208,75 @@ func TestCreateAndAttachVolume(t *testing.T) {
}

infraVolume := infrav1beta2.VPCVolume{
Name: volumeName,
Name: volumeName,
Profile: "custom",
Iops: 100,
SizeGiB: 50,
}
pendingState := vpcv1.VolumeStatusPendingConst

vpcVolume := vpcv1.Volume{
Name: &volumeName,
ID: &volumeID,
Name: &volumeName,
ID: &volumeID,
Status: &pendingState,
}

volumeCreationError := errors.New("error while creating volume")

volumeAttachmentError := errors.New("error while attaching volume")

t.Run("Volume creation and attachment is successful", func(t *testing.T) {
t.Run("Volume creation is successful", func(t *testing.T) {
g := NewWithT(t)
mockController, mockVPC := setup(t)
t.Cleanup(mockController.Finish)
scope := setupMachineScope(clusterName, machineName, mockVPC)
scope.IBMVPCMachine.Status = vpcMachine.Status
mockVPC.EXPECT().CreateVolume(gomock.AssignableToTypeOf(&vpcv1.CreateVolumeOptions{})).Return(&vpcVolume, nil, nil)
mockVPC.EXPECT().AttachVolumeToInstance(gomock.AssignableToTypeOf(&vpcv1.CreateInstanceVolumeAttachmentOptions{})).Return(nil, nil, nil)

err := scope.CreateAndAttachVolume(&infraVolume)
id, err := scope.CreateVolume(&infraVolume)
g.Expect(err).Should(Succeed())
g.Expect(id).Should(Equal(volumeID))
})

t.Run("Volume Creation Fails", func(t *testing.T) {
t.Run("Volume creation fails", func(t *testing.T) {
g := NewWithT(t)
mockController, mockVPC := setup(t)
t.Cleanup(mockController.Finish)
scope := setupMachineScope(clusterName, machineName, mockVPC)
scope.IBMVPCMachine.Status = vpcMachine.Status
mockVPC.EXPECT().CreateVolume(gomock.AssignableToTypeOf(&vpcv1.CreateVolumeOptions{})).Return(nil, nil, volumeCreationError)

err := scope.CreateAndAttachVolume(&infraVolume)
id, err := scope.CreateVolume(&infraVolume)
g.Expect(err).ShouldNot(Succeed())
g.Expect(errors.Is(err, volumeCreationError)).To(BeTrue())
g.Expect(id).To(BeZero())
})
}

func TestAttachVolume(t *testing.T) {
setup := func(t *testing.T) (*gomock.Controller, *mock.MockVpc) {
t.Helper()
return gomock.NewController(t), mock.NewMockVpc(gomock.NewController(t))
}

t.Run("Volume Attachment Fails", func(t *testing.T) {
deleteOnInstanceDelete := true
vpcMachine := infrav1beta2.IBMVPCMachine{
Status: infrav1beta2.IBMVPCMachineStatus{
InstanceID: "foo-instance-id",
},
}
volumeAttachmentError := errors.New("error while attaching volume")
t.Run("Volume attachment is successful", func(t *testing.T) {
g := NewWithT(t)
mockController, mockVPC := setup(t)
t.Cleanup(mockController.Finish)
scope := setupMachineScope(clusterName, machineName, mockVPC)
scope.IBMVPCMachine.Status = vpcMachine.Status
mockVPC.EXPECT().AttachVolumeToInstance(gomock.AssignableToTypeOf(&vpcv1.CreateInstanceVolumeAttachmentOptions{})).Return(nil, nil, nil)
err := scope.AttachVolume(deleteOnInstanceDelete, volumeID, volumeName)
g.Expect(err).Should(Succeed())
})
t.Run("Volume attachment fails", func(t *testing.T) {
g := NewWithT(t)
mockController, mockVPC := setup(t)
t.Cleanup(mockController.Finish)
scope := setupMachineScope(clusterName, machineName, mockVPC)
scope.IBMVPCMachine.Status = vpcMachine.Status
mockVPC.EXPECT().CreateVolume(gomock.AssignableToTypeOf(&vpcv1.CreateVolumeOptions{})).Return(&vpcVolume, nil, nil)
mockVPC.EXPECT().AttachVolumeToInstance(gomock.AssignableToTypeOf(&vpcv1.CreateInstanceVolumeAttachmentOptions{})).Return(nil, nil, volumeAttachmentError)

err := scope.CreateAndAttachVolume(&infraVolume)
err := scope.AttachVolume(deleteOnInstanceDelete, volumeID, volumeName)
g.Expect(err).ShouldNot(Succeed())
g.Expect(errors.Is(err, volumeAttachmentError)).To(BeTrue())
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,11 @@ spec:
status:
description: IBMVPCMachineStatus defines the observed state of IBMVPCMachine.
properties:
additionalVolumeIDs:
description: AdditionalVolumeIDs is a list of Volume ID as per IBMCloud
items:
type: string
type: array
addresses:
description: Addresses contains the IBM Cloud instance associated
addresses.
Expand Down
49 changes: 37 additions & 12 deletions controllers/ibmvpcmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,15 +258,16 @@ func (r *IBMVPCMachineReconciler) reconcileNormal(machineScope *scope.MachineSco
}

// Handle Additional Volumes
err = r.reconcileAdditionalVolumes(machineScope)
var result ctrl.Result
result, err = r.reconcileAdditionalVolumes(machineScope)
if err != nil {
return ctrl.Result{}, fmt.Errorf("error reconciling additional volumes: %w", err)
}

// With a running machine and all Load Balancer Pool Members reconciled, mark machine as ready.
machineScope.SetReady()
conditions.MarkTrue(machineScope.IBMVPCMachine, infrav1beta2.InstanceReadyCondition)
return ctrl.Result{}, nil
return result, nil
}

func (r *IBMVPCMachineReconciler) getOrCreate(scope *scope.MachineScope) (*vpcv1.Instance, error) {
Expand Down Expand Up @@ -298,14 +299,18 @@ func (r *IBMVPCMachineReconciler) reconcileDelete(scope *scope.MachineScope) (_
return ctrl.Result{}, nil
}

func (r *IBMVPCMachineReconciler) reconcileAdditionalVolumes(machineScope *scope.MachineScope) error {
func (r *IBMVPCMachineReconciler) reconcileAdditionalVolumes(machineScope *scope.MachineScope) (ctrl.Result, error) {
if machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs == nil {
machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs = make([]string, len(machineScope.IBMVPCMachine.Spec.AdditionalVolumes))
}
machineVolumes := machineScope.IBMVPCMachine.Spec.AdditionalVolumes
result := ctrl.Result{}
if len(machineVolumes) == 0 {
return nil
return result, nil
}
volumeAttachmentList, err := machineScope.GetVolumeAttachments()
if err != nil {
return err
return result, err
}
volumeAttachmentNames := sets.New[string]()
for i := range volumeAttachmentList {
Expand All @@ -315,15 +320,35 @@ func (r *IBMVPCMachineReconciler) reconcileAdditionalVolumes(machineScope *scope
// Read through the list, checking if volume exists and create volume if it does not
for v := range machineVolumes {
if volumeAttachmentNames.Has(machineVolumes[v].Name) {
// volume attachment has been created so volume will eventually be attached
// volume attachment has been created so volume is already attached
continue
}

err = machineScope.CreateAndAttachVolume(machineVolumes[v])
if err != nil {
errList = append(errList, err)
continue
if machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs[v] != "" {
// volume was already created, fetch volume status and attach if possible
state, err := machineScope.GetVolumeState(machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs[v])
if err != nil {
errList = append(errList, err)
}
switch state {
case vpcv1.VolumeStatusPendingConst, vpcv1.VolumeStatusUpdatingConst:
result = ctrl.Result{RequeueAfter: 10 * time.Second}
case vpcv1.VolumeStatusFailedConst, vpcv1.VolumeStatusUnusableConst:
errList = append(errList, fmt.Errorf("volume in unexpected state: %s", state))
case vpcv1.VolumeStatusAvailableConst:
err = machineScope.AttachVolume(machineVolumes[v].DeleteVolumeOnInstanceDelete, machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs[v], machineVolumes[v].Name)
if err != nil {
errList = append(errList, err)
}
}
} else {
// volume does not exist, create it and requeue so that it becomes available
volumeID, err := machineScope.CreateVolume(machineVolumes[v])
machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs[v] = volumeID
if err != nil {
errList = append(errList, err)
}
result = ctrl.Result{RequeueAfter: 10 * time.Second}
}
}
return errors.Join(errList...)
return result, errors.Join(errList...)
}
Loading