fix(suites): Load persisted triggered alerts for suite endpoints on start (#1347)

This commit is contained in:
TwiN
2025-10-20 13:31:58 -04:00
committed by GitHub
parent 91931e48b4
commit 70d7d0c54c
3 changed files with 29 additions and 3 deletions

27
main.go
View File

@@ -183,6 +183,33 @@ func initializeStorage(cfg *config.Config) {
}
}
}
// Load persisted triggered alerts for suite endpoints
for _, suite := range cfg.Suites {
for _, ep := range suite.Endpoints {
var checksums []string
for _, alert := range ep.Alerts {
if alert.IsEnabled() {
checksums = append(checksums, alert.Checksum())
}
}
numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep, checksums)
if numberOfTriggeredAlertsDeleted > 0 {
logr.Debugf("[main.initializeStorage] Deleted %d triggered alerts for suite endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ep.Key())
}
for _, alert := range ep.Alerts {
exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(ep, alert)
if err != nil {
logr.Errorf("[main.initializeStorage] Failed to get triggered alert for suite endpoint with key=%s: %s", ep.Key(), err.Error())
continue
}
if exists {
alert.Triggered, alert.ResolveKey = true, resolveKey
ep.NumberOfSuccessesInARow, ep.NumberOfFailuresInARow = numberOfSuccessesInARow, alert.FailureThreshold
numberOfPersistedTriggeredAlertsLoaded++
}
}
}
}
if numberOfPersistedTriggeredAlertsLoaded > 0 {
logr.Infof("[main.initializeStorage] Loaded %d persisted triggered alerts", numberOfPersistedTriggeredAlertsLoaded)
}

View File

@@ -64,7 +64,6 @@ func executeEndpoint(ep *endpoint.Endpoint, cfg *config.Config, extraLabels []st
}
}
if !cfg.Maintenance.IsUnderMaintenance() && !inEndpointMaintenanceWindow {
// TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause...
HandleAlerting(ep, result, cfg.Alerting)
} else {
logr.Debug("[watchdog.executeEndpoint] Not handling alerting because currently in the maintenance window")

View File

@@ -50,6 +50,8 @@ func executeSuite(s *suite.Suite, cfg *config.Config, extraLabels []string) {
if cfg.Metrics {
metrics.PublishMetricsForSuite(s, result, extraLabels)
}
// Store result
UpdateSuiteStatus(s, result)
// Handle alerting for suite endpoints
for i, ep := range s.Endpoints {
if i < len(result.EndpointResults) {
@@ -72,8 +74,6 @@ func executeSuite(s *suite.Suite, cfg *config.Config, extraLabels []string) {
}
}
logr.Infof("[watchdog.executeSuite] Completed suite=%s; success=%v; errors=%d; duration=%v; endpoints_executed=%d/%d", s.Name, result.Success, len(result.Errors), result.Duration, len(result.EndpointResults), len(s.Endpoints))
// Store result in database
UpdateSuiteStatus(s, result)
}
// UpdateSuiteStatus persists the suite result in the database