diff --git a/README.md b/README.md
index 71af2b0f..9343e101 100644
--- a/README.md
+++ b/README.md
@@ -553,16 +553,17 @@ individual endpoints with configurable descriptions and thresholds.
Alerts are configured at the endpoint level like so:
-| Parameter | Description | Default |
-|:-----------------------------|:-------------------------------------------------------------------------------|:--------------|
-| `alerts` | List of all alerts for a given endpoint. | `[]` |
-| `alerts[].type` | Type of alert.
See table below for all valid types. | Required `""` |
-| `alerts[].enabled` | Whether to enable the alert. | `true` |
-| `alerts[].failure-threshold` | Number of failures in a row needed before triggering the alert. | `3` |
-| `alerts[].success-threshold` | Number of successes in a row before an ongoing incident is marked as resolved. | `2` |
-| `alerts[].send-on-resolved` | Whether to send a notification once a triggered alert is marked as resolved. | `false` |
-| `alerts[].description` | Description of the alert. Will be included in the alert sent. | `""` |
-| `alerts[].provider-override` | Alerting provider configuration override for the given alert type | `{}` |
+| Parameter | Description | Default |
+|:-------------------------------------|:-------------------------------------------------------------------------------|:--------------|
+| `alerts` | List of all alerts for a given endpoint. | `[]` |
+| `alerts[].type` | Type of alert.
See table below for all valid types. | Required `""` |
+| `alerts[].enabled` | Whether to enable the alert. | `true` |
+| `alerts[].failure-threshold` | Number of failures in a row needed before triggering the alert. | `3` |
+| `alerts[].success-threshold` | Number of successes in a row before an ongoing incident is marked as resolved. | `2` |
+| `alerts[].minimum-reminder-interval` | Configuration for setting an interval between reminders. | `""` |
+| `alerts[].send-on-resolved` | Whether to send a notification once a triggered alert is marked as resolved. | `false` |
+| `alerts[].description` | Description of the alert. Will be included in the alert sent. | `""` |
+| `alerts[].provider-override` | Alerting provider configuration override for the given alert type | `{}` |
Here's an example of what an alert configuration might look like at the endpoint level:
```yaml
diff --git a/alerting/alert/alert.go b/alerting/alert/alert.go
index 52eb5f5d..ebd34c19 100644
--- a/alerting/alert/alert.go
+++ b/alerting/alert/alert.go
@@ -6,6 +6,7 @@ import (
"errors"
"strconv"
"strings"
+ "time"
"github.com/TwiN/logr"
"gopkg.in/yaml.v3"
@@ -35,6 +36,9 @@ type Alert struct {
// SuccessThreshold defines how many successful executions must happen in a row before an ongoing incident is marked as resolved
SuccessThreshold int `yaml:"success-threshold"`
+ // MinimumReminderInterval is the interval between reminders
+ MinimumReminderInterval time.Duration `yaml:"minimum-reminder-interval,omitempty"`
+
// Description of the alert. Will be included in the alert sent.
//
// This is a pointer, because it is populated by YAML and we need to know whether it was explicitly set to a value
diff --git a/alerting/provider/ilert/ilert_test.go b/alerting/provider/ilert/ilert_test.go
index 3f18d60e..f5040e5e 100644
--- a/alerting/provider/ilert/ilert_test.go
+++ b/alerting/provider/ilert/ilert_test.go
@@ -174,21 +174,21 @@ func TestAlertProvider_BuildRequestBody(t *testing.T) {
Provider: AlertProvider{DefaultConfig: Config{IntegrationKey: "some-integration-key"}},
Alert: alert.Alert{Description: &firstDescription, SuccessThreshold: 3, FailureThreshold: 3, ResolveKey: "123", Type: "ilert", SendOnResolved: &sendOnResolved},
Resolved: false,
- ExpectedBody: `{"alert":{"Type":"ilert","Enabled":null,"FailureThreshold":3,"SuccessThreshold":3,"Description":"description-1","SendOnResolved":true,"ProviderOverride":null,"ResolveKey":"123","Triggered":false},"name":"endpoint-name","group":"","status":"firing","title":"endpoint-name","details":"description-1","condition_results":[{"condition":"[CONNECTED] == true","success":false},{"condition":"[STATUS] == 200","success":false}],"url":""}`,
+ ExpectedBody: `{"alert":{"Type":"ilert","Enabled":null,"FailureThreshold":3,"SuccessThreshold":3,"MinimumReminderInterval":0,"Description":"description-1","SendOnResolved":true,"ProviderOverride":null,"ResolveKey":"123","Triggered":false},"name":"endpoint-name","group":"","status":"firing","title":"endpoint-name","details":"description-1","condition_results":[{"condition":"[CONNECTED] == true","success":false},{"condition":"[STATUS] == 200","success":false}],"url":""}`,
},
{
Name: "resolved",
Provider: AlertProvider{DefaultConfig: Config{IntegrationKey: "some-integration-key"}},
Alert: alert.Alert{Description: &firstDescription, SuccessThreshold: 4, FailureThreshold: 3, ResolveKey: "123", Type: "ilert", SendOnResolved: &sendOnResolved},
Resolved: true,
- ExpectedBody: `{"alert":{"Type":"ilert","Enabled":null,"FailureThreshold":3,"SuccessThreshold":4,"Description":"description-1","SendOnResolved":true,"ProviderOverride":null,"ResolveKey":"123","Triggered":false},"name":"endpoint-name","group":"","status":"resolved","title":"endpoint-name","details":"description-1","condition_results":[{"condition":"[CONNECTED] == true","success":true},{"condition":"[STATUS] == 200","success":true}],"url":""}`,
+ ExpectedBody: `{"alert":{"Type":"ilert","Enabled":null,"FailureThreshold":3,"SuccessThreshold":4,"MinimumReminderInterval":0,"Description":"description-1","SendOnResolved":true,"ProviderOverride":null,"ResolveKey":"123","Triggered":false},"name":"endpoint-name","group":"","status":"resolved","title":"endpoint-name","details":"description-1","condition_results":[{"condition":"[CONNECTED] == true","success":true},{"condition":"[STATUS] == 200","success":true}],"url":""}`,
},
{
Name: "group-override",
Provider: AlertProvider{DefaultConfig: Config{IntegrationKey: "some-integration-key"}, Overrides: []Override{{Group: "g", Config: Config{IntegrationKey: "different-integration-key"}}}},
Alert: alert.Alert{Description: &secondDescription, SuccessThreshold: 5, FailureThreshold: 3, ResolveKey: "123", Type: "ilert", SendOnResolved: &sendOnResolved},
Resolved: false,
- ExpectedBody: `{"alert":{"Type":"ilert","Enabled":null,"FailureThreshold":3,"SuccessThreshold":5,"Description":"description-2","SendOnResolved":true,"ProviderOverride":null,"ResolveKey":"123","Triggered":false},"name":"endpoint-name","group":"","status":"firing","title":"endpoint-name","details":"description-2","condition_results":[{"condition":"[CONNECTED] == true","success":false},{"condition":"[STATUS] == 200","success":false}],"url":""}`,
+ ExpectedBody: `{"alert":{"Type":"ilert","Enabled":null,"FailureThreshold":3,"SuccessThreshold":5,"MinimumReminderInterval":0,"Description":"description-2","SendOnResolved":true,"ProviderOverride":null,"ResolveKey":"123","Triggered":false},"name":"endpoint-name","group":"","status":"firing","title":"endpoint-name","details":"description-2","condition_results":[{"condition":"[CONNECTED] == true","success":false},{"condition":"[STATUS] == 200","success":false}],"url":""}`,
},
}
diff --git a/config/endpoint/endpoint.go b/config/endpoint/endpoint.go
index fd1bc305..5153a45b 100644
--- a/config/endpoint/endpoint.go
+++ b/config/endpoint/endpoint.go
@@ -131,6 +131,9 @@ type Endpoint struct {
// NumberOfSuccessesInARow is the number of successful evaluations in a row
NumberOfSuccessesInARow int `yaml:"-"`
+
+ // LastReminderSent is the time at which the last reminder was sent for this endpoint.
+ LastReminderSent time.Time `yaml:"-"`
}
// IsEnabled returns whether the endpoint is enabled or not
diff --git a/watchdog/alerting.go b/watchdog/alerting.go
index 866ed667..4e09477d 100644
--- a/watchdog/alerting.go
+++ b/watchdog/alerting.go
@@ -2,7 +2,9 @@ package watchdog
import (
"errors"
+ "log"
"os"
+ "time"
"github.com/TwiN/gatus/v5/alerting"
"github.com/TwiN/gatus/v5/config/endpoint"
@@ -30,14 +32,24 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
if !endpointAlert.IsEnabled() || endpointAlert.FailureThreshold > ep.NumberOfFailuresInARow {
continue
}
- if endpointAlert.Triggered {
- logr.Debugf("[watchdog.handleAlertsToTrigger] Alert for endpoint with key=%s with description='%s' has already been TRIGGERED, skipping", ep.Key(), endpointAlert.GetDescription())
+ // Determine if an initial alert should be sent
+ sendInitialAlert := !endpointAlert.Triggered
+ // Determine if a reminder should be sent
+ sendReminder := endpointAlert.Triggered && endpointAlert.MinimumReminderInterval > 0 && time.Since(ep.LastReminderSent) >= endpointAlert.MinimumReminderInterval
+ // If neither initial alert nor reminder needs to be sent, skip to the next alert
+ if !sendInitialAlert && !sendReminder {
+ logr.Debugf("[watchdog.handleAlertsToTrigger] Alert for endpoint=%s with description='%s' is not due for triggering or reminding, skipping", ep.Name, endpointAlert.GetDescription())
continue
}
alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
if alertProvider != nil {
logr.Infof("[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint with key=%s with description='%s' has been TRIGGERED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
var err error
+ alertType := "reminder"
+ if sendInitialAlert {
+ alertType = "initial"
+ }
+ log.Printf("[watchdog.handleAlertsToTrigger] Sending %s %s alert because alert for endpoint=%s with description='%s' has been TRIGGERED", alertType, endpointAlert.Type, ep.Name, endpointAlert.GetDescription())
if os.Getenv("MOCK_ALERT_PROVIDER") == "true" {
if os.Getenv("MOCK_ALERT_PROVIDER_ERROR") == "true" {
err = errors.New("error")
@@ -48,7 +60,11 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
if err != nil {
logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
} else {
- endpointAlert.Triggered = true
+ // Mark initial alert as triggered and update last reminder time
+ if sendInitialAlert {
+ endpointAlert.Triggered = true
+ }
+ ep.LastReminderSent = time.Now()
if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil {
logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
}
diff --git a/watchdog/alerting_test.go b/watchdog/alerting_test.go
index 2dffd909..21bd7434 100644
--- a/watchdog/alerting_test.go
+++ b/watchdog/alerting_test.go
@@ -3,6 +3,7 @@ package watchdog
import (
"os"
"testing"
+ "time"
"github.com/TwiN/gatus/v5/alerting"
"github.com/TwiN/gatus/v5/alerting/alert"
@@ -517,6 +518,48 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) {
verify(t, ep, 0, 2, false, "")
}
+func TestHandleAlertingWithMinimumReminderInterval(t *testing.T) {
+ _ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
+ defer os.Clearenv()
+
+ cfg := &config.Config{
+ Alerting: &alerting.Config{
+ Custom: &custom.AlertProvider{
+ DefaultConfig: custom.Config{
+ URL: "https://twin.sh/health",
+ Method: "GET",
+ },
+ },
+ },
+ }
+ enabled := true
+ ep := &endpoint.Endpoint{
+ URL: "https://example.com",
+ Alerts: []*alert.Alert{
+ {
+ Type: alert.TypeCustom,
+ Enabled: &enabled,
+ FailureThreshold: 2,
+ SuccessThreshold: 3,
+ SendOnResolved: &enabled,
+ Triggered: false,
+ MinimumReminderInterval: 1 * time.Second,
+ },
+ },
+ }
+
+ verify(t, ep, 0, 0, false, "The alert shouldn't start triggered")
+ HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+ verify(t, ep, 1, 0, false, "The alert shouldn't have triggered")
+ HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+ verify(t, ep, 2, 0, true, "The alert should've triggered")
+ HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+ verify(t, ep, 3, 0, true, "The alert should still be triggered")
+ HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+ verify(t, ep, 4, 0, true, "The alert should still be triggered")
+ HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
+}
+
func verify(t *testing.T, ep *endpoint.Endpoint, expectedNumberOfFailuresInARow, expectedNumberOfSuccessInARow int, expectedTriggered bool, expectedTriggeredReason string) {
if ep.NumberOfFailuresInARow != expectedNumberOfFailuresInARow {
t.Errorf("endpoint.NumberOfFailuresInARow should've been %d, got %d", expectedNumberOfFailuresInARow, ep.NumberOfFailuresInARow)