Skip to content

Downtimes: mark their histories as cancelled when removed from conf file #913

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions pkg/icingadb/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/icinga/icinga-go-library/types"
"github.com/icinga/icingadb/pkg/common"
v1 "github.com/icinga/icingadb/pkg/icingadb/v1"
"github.com/icinga/icingadb/pkg/icingadb/v1/history"
"github.com/icinga/icingadb/pkg/icingaredis"
"github.com/icinga/icingadb/pkg/icingaredis/telemetry"
"github.com/pkg/errors"
Expand Down Expand Up @@ -162,9 +163,32 @@ func (s Sync) ApplyDelta(ctx context.Context, delta *Delta) error {
// Delete
if len(delta.Delete) > 0 {
s.logger.Infof("Deleting %d items of type %s", len(delta.Delete), strcase.Delimited(types.Name(delta.Subject.Entity()), ' '))
g.Go(func() error {
return s.db.Delete(ctx, delta.Subject.Entity(), delta.Delete.IDs(), database.OnSuccessIncrement[any](stat))
})

ids := delta.Delete.IDs()
if _, ok := delta.Subject.Entity().(*v1.Downtime); ok {
// Those downtimes are probably removed from the configuration files, i.e. Icinga 2 won't send
// the corresponding downtime end/removed events for them. So try to mark them as cancelled manually,
// so that they don't show up as if they were still active in the UI.
//
// The reason why we don't perform this in a separate goroutine like the other ones is that we don't
// want to delete the downtimes until we've successfully updated the downtime history records. So,
// if we fail to update the downtime history records, for whatever reason, forward the error to the
// config sync group and abort the config sync.
if err := history.SyncDowntimeHistoryEndEvent(ctx, s.db, ids); err != nil {
errCh := make(chan error, 1)
errCh <- err
close(errCh)
com.ErrgroupReceive(g, errCh)
}
}

// Start the deletion process only if we haven't aborted the config sync
// due to an error in the downtime history update above.
if ctx.Err() == nil {
g.Go(func() error {
return s.db.Delete(ctx, delta.Subject.Entity(), ids, database.OnSuccessIncrement[any](stat))
})
}
}

return g.Wait()
Expand Down
44 changes: 44 additions & 0 deletions pkg/icingadb/v1/history/downtime.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package history

import (
"context"
"database/sql/driver"
"github.com/icinga/icinga-go-library/database"
"github.com/icinga/icinga-go-library/types"
"github.com/icinga/icingadb/pkg/contracts"
"golang.org/x/sync/errgroup"
"time"
)

type DowntimeHistoryEntity struct {
Expand Down Expand Up @@ -147,6 +150,47 @@ func (et SlaDowntimeEndTime) Value() (driver.Value, error) {
}
}

// SyncDowntimeHistoryEndEvent updates the downtime history records with the given downtime IDs to mark them as cancelled.
//
// This function is used to mark downtimes as cancelled when the downtime configuration is removed from
// the configuration files. In such cases, Icinga 2 won't send the corresponding downtime end/removed events,
// so we need to mark the downtimes as cancelled manually.
func SyncDowntimeHistoryEndEvent(ctx context.Context, db *database.DB, downtimeIds []any) error {
downtimes := make(chan database.Entity, len(downtimeIds))
g, ctx := errgroup.WithContext(ctx)
g.Go(func() error {
defer close(downtimes)

// Transform the downtime IDs into DowntimeHistory entities with the necessary fields
// and stream them into the downtimes channel.
for _, id := range downtimeIds {
// The downtimes channel is buffered, so this will never block, and we can just keep sending.
downtimes <- &DowntimeHistory{
DowntimeHistoryEntity: DowntimeHistoryEntity{DowntimeId: id.(types.Binary)},
DowntimeHistoryUpserter: DowntimeHistoryUpserter{
CancelledBy: types.MakeString("Downtime Config Removed"),
HasBeenCancelled: types.Bool{Bool: true, Valid: true},
CancelTime: types.UnixMilli(time.Now()),
},
}
}

return nil
})

g.Go(func() error {
// Each downtime removed in this manner should never have been cancelled before, so we
// don't need extra where clauses in the update statement other than the downtime ID.
stmt := `UPDATE downtime_history SET cancel_time = :cancel_time, has_been_cancelled = :has_been_cancelled, cancelled_by = :cancelled_by WHERE downtime_id = :downtime_id`

return db.NamedBulkExecTx(ctx, stmt, db.Options.MaxRowsPerTransaction, db.GetSemaphoreForTable("downtime_history"), downtimes)
})

// TODO: Fake the downtime end event in the regular history and SLA downtime history tables.

return g.Wait()
}

// Assert interface compliance.
var (
_ database.Entity = (*DowntimeHistoryEntity)(nil)
Expand Down
Loading