Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
daf67dc1e6 | ||
|
|
3ebed01b4c | ||
|
|
a2f5516b06 | ||
|
|
a68e7e39bd | ||
|
|
f9d7320a2a | ||
|
|
c374649019 | ||
|
|
f6e938746f | ||
|
|
2c6fede468 | ||
|
|
9205cb2890 | ||
|
|
6a9cbb1728 | ||
|
|
4667fdbc15 |
3
.github/workflows/labeler.yml
vendored
3
.github/workflows/labeler.yml
vendored
@@ -26,6 +26,8 @@ jobs:
|
||||
gh issue edit "$NUMBER" --add-label "feature"
|
||||
elif [[ $TITLE == "fix"* ]]; then
|
||||
gh issue edit "$NUMBER" --add-label "bug"
|
||||
elif [[ $TITLE == "docs"* ]]; then
|
||||
gh issue edit "$NUMBER" --add-label "documentation"
|
||||
fi
|
||||
if [[ $TITLE == *"alerting"* || $TITLE == *"provider"* || $TITLE == *"alert"* ]]; then
|
||||
gh issue edit "$NUMBER" --add-label "area/alerting"
|
||||
@@ -39,4 +41,3 @@ jobs:
|
||||
if [[ $TITLE == *"metric"* || $TITLE == *"prometheus"* ]]; then
|
||||
gh issue edit "$NUMBER" --add-label "area/metrics"
|
||||
fi
|
||||
|
||||
|
||||
2
.github/workflows/publish-latest.yml
vendored
2
.github/workflows/publish-latest.yml
vendored
@@ -11,7 +11,7 @@ jobs:
|
||||
publish-latest:
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ (github.event.workflow_run.conclusion == 'success') && (github.event.workflow_run.head_repository.full_name == github.repository) }}
|
||||
timeout-minutes: 120
|
||||
timeout-minutes: 240
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up QEMU
|
||||
|
||||
50
README.md
50
README.md
@@ -84,6 +84,7 @@ Have any feedback or questions? [Create a discussion](https://github.com/TwiN/ga
|
||||
- [OIDC](#oidc)
|
||||
- [TLS Encryption](#tls-encryption)
|
||||
- [Metrics](#metrics)
|
||||
- [Custom Labels](#custom-labels)
|
||||
- [Connectivity](#connectivity)
|
||||
- [Remote instances (EXPERIMENTAL)](#remote-instances-experimental)
|
||||
- [Deployment](#deployment)
|
||||
@@ -123,6 +124,7 @@ Have any feedback or questions? [Create a discussion](https://github.com/TwiN/ga
|
||||
- [Response time (chart)](#response-time-chart)
|
||||
- [How to change the color thresholds of the response time badge](#how-to-change-the-color-thresholds-of-the-response-time-badge)
|
||||
- [API](#api)
|
||||
- [Interacting with the API programmatically](#interacting-with-the-api-programmatically)
|
||||
- [Raw Data](#raw-data)
|
||||
- [Uptime](#uptime-1)
|
||||
- [Response Time](#response-time-1)
|
||||
@@ -293,7 +295,7 @@ You may use the following placeholders in the body (`endpoints[].body`):
|
||||
- `[ENDPOINT_GROUP]` (resolved from `endpoints[].group`)
|
||||
- `[ENDPOINT_URL]` (resolved from `endpoints[].url`)
|
||||
- `[LOCAL_ADDRESS]` (resolves to the local IP and port like `192.0.2.1:25` or `[2001:db8::1]:80`)
|
||||
- `[RANDOM_STRING_N]` (resolves to a random string of numbers and letters of length N)
|
||||
- `[RANDOM_STRING_N]` (resolves to a random string of numbers and letters of length N (max: 8192))
|
||||
|
||||
|
||||
### External Endpoints
|
||||
@@ -552,16 +554,17 @@ individual endpoints with configurable descriptions and thresholds.
|
||||
|
||||
Alerts are configured at the endpoint level like so:
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|:-----------------------------|:-------------------------------------------------------------------------------|:--------------|
|
||||
| `alerts` | List of all alerts for a given endpoint. | `[]` |
|
||||
| `alerts[].type` | Type of alert. <br />See table below for all valid types. | Required `""` |
|
||||
| `alerts[].enabled` | Whether to enable the alert. | `true` |
|
||||
| `alerts[].failure-threshold` | Number of failures in a row needed before triggering the alert. | `3` |
|
||||
| `alerts[].success-threshold` | Number of successes in a row before an ongoing incident is marked as resolved. | `2` |
|
||||
| `alerts[].send-on-resolved` | Whether to send a notification once a triggered alert is marked as resolved. | `false` |
|
||||
| `alerts[].description` | Description of the alert. Will be included in the alert sent. | `""` |
|
||||
| `alerts[].provider-override` | Alerting provider configuration override for the given alert type | `{}` |
|
||||
| Parameter | Description | Default |
|
||||
|:-------------------------------------|:-------------------------------------------------------------------------------|:--------------|
|
||||
| `alerts` | List of all alerts for a given endpoint. | `[]` |
|
||||
| `alerts[].type` | Type of alert. <br />See table below for all valid types. | Required `""` |
|
||||
| `alerts[].enabled` | Whether to enable the alert. | `true` |
|
||||
| `alerts[].failure-threshold` | Number of failures in a row needed before triggering the alert. | `3` |
|
||||
| `alerts[].success-threshold` | Number of successes in a row before an ongoing incident is marked as resolved. | `2` |
|
||||
| `alerts[].minimum-reminder-interval` | Configuration for setting an interval between reminders. (e.g. 30m, 24h) | `""` |
|
||||
| `alerts[].send-on-resolved` | Whether to send a notification once a triggered alert is marked as resolved. | `false` |
|
||||
| `alerts[].description` | Description of the alert. Will be included in the alert sent. | `""` |
|
||||
| `alerts[].provider-override` | Alerting provider configuration override for the given alert type | `{}` |
|
||||
|
||||
Here's an example of what an alert configuration might look like at the endpoint level:
|
||||
```yaml
|
||||
@@ -1949,6 +1952,23 @@ endpoint on the same port your application is configured to run on (`web.port`).
|
||||
|
||||
See [examples/docker-compose-grafana-prometheus](.examples/docker-compose-grafana-prometheus) for further documentation as well as an example.
|
||||
|
||||
#### Custom Labels
|
||||
|
||||
Added a Labels field to the Config and Endpoint structs to support key-value pairs for metrics. Updated the Prometheus metrics initialization to include dynamic labels from the configuration. See the example below:
|
||||
|
||||
```yaml
|
||||
endpoints:
|
||||
- name: front-end
|
||||
group: core
|
||||
url: "https://twin.sh/health"
|
||||
interval: 5m
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[BODY].status == UP"
|
||||
- "[RESPONSE_TIME] < 150"
|
||||
labels:
|
||||
environment: staging
|
||||
```
|
||||
|
||||
### Connectivity
|
||||
| Parameter | Description | Default |
|
||||
@@ -2183,7 +2203,7 @@ This works for SCTP based application.
|
||||
|
||||
|
||||
### Monitoring a WebSocket endpoint
|
||||
By prefixing `endpoints[].url` with `ws://` or `wss://`, you can monitor WebSocket endpoints:
|
||||
By prefixing `endpoints[].url` with `ws://` or `wss://`, you can monitor WebSocket endpoints at a very basic level:
|
||||
```yaml
|
||||
endpoints:
|
||||
- name: example
|
||||
@@ -2641,6 +2661,11 @@ Gzip compression will be used if the `Accept-Encoding` HTTP header contains `gzi
|
||||
The API will return a JSON payload with the `Content-Type` response header set to `application/json`.
|
||||
No such header is required to query the API.
|
||||
|
||||
|
||||
#### Interacting with the API programmatically
|
||||
See [TwiN/gatus-sdk](https://github.com/TwiN/gatus-sdk)
|
||||
|
||||
|
||||
#### Raw Data
|
||||
Gatus exposes the raw data for one of your monitored endpoints.
|
||||
This allows you to track and aggregate data in your own applications for monitored endpoints. For instance if you want to track uptime for a period longer than 7 days.
|
||||
@@ -2673,6 +2698,7 @@ For instance, if you want the raw response time data for the last 24 hours from
|
||||
https://example.com/api/v1/endpoints/core_frontend/response-times/24h
|
||||
```
|
||||
|
||||
|
||||
### Installing as binary
|
||||
You can download Gatus as a binary using the following command:
|
||||
```
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"errors"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/logr"
|
||||
"gopkg.in/yaml.v3"
|
||||
@@ -35,6 +36,9 @@ type Alert struct {
|
||||
// SuccessThreshold defines how many successful executions must happen in a row before an ongoing incident is marked as resolved
|
||||
SuccessThreshold int `yaml:"success-threshold"`
|
||||
|
||||
// MinimumReminderInterval is the interval between reminders
|
||||
MinimumReminderInterval time.Duration `yaml:"minimum-reminder-interval,omitempty"`
|
||||
|
||||
// Description of the alert. Will be included in the alert sent.
|
||||
//
|
||||
// This is a pointer, because it is populated by YAML and we need to know whether it was explicitly set to a value
|
||||
|
||||
@@ -174,21 +174,21 @@ func TestAlertProvider_BuildRequestBody(t *testing.T) {
|
||||
Provider: AlertProvider{DefaultConfig: Config{IntegrationKey: "some-integration-key"}},
|
||||
Alert: alert.Alert{Description: &firstDescription, SuccessThreshold: 3, FailureThreshold: 3, ResolveKey: "123", Type: "ilert", SendOnResolved: &sendOnResolved},
|
||||
Resolved: false,
|
||||
ExpectedBody: `{"alert":{"Type":"ilert","Enabled":null,"FailureThreshold":3,"SuccessThreshold":3,"Description":"description-1","SendOnResolved":true,"ProviderOverride":null,"ResolveKey":"123","Triggered":false},"name":"endpoint-name","group":"","status":"firing","title":"endpoint-name","details":"description-1","condition_results":[{"condition":"[CONNECTED] == true","success":false},{"condition":"[STATUS] == 200","success":false}],"url":""}`,
|
||||
ExpectedBody: `{"alert":{"Type":"ilert","Enabled":null,"FailureThreshold":3,"SuccessThreshold":3,"MinimumReminderInterval":0,"Description":"description-1","SendOnResolved":true,"ProviderOverride":null,"ResolveKey":"123","Triggered":false},"name":"endpoint-name","group":"","status":"firing","title":"endpoint-name","details":"description-1","condition_results":[{"condition":"[CONNECTED] == true","success":false},{"condition":"[STATUS] == 200","success":false}],"url":""}`,
|
||||
},
|
||||
{
|
||||
Name: "resolved",
|
||||
Provider: AlertProvider{DefaultConfig: Config{IntegrationKey: "some-integration-key"}},
|
||||
Alert: alert.Alert{Description: &firstDescription, SuccessThreshold: 4, FailureThreshold: 3, ResolveKey: "123", Type: "ilert", SendOnResolved: &sendOnResolved},
|
||||
Resolved: true,
|
||||
ExpectedBody: `{"alert":{"Type":"ilert","Enabled":null,"FailureThreshold":3,"SuccessThreshold":4,"Description":"description-1","SendOnResolved":true,"ProviderOverride":null,"ResolveKey":"123","Triggered":false},"name":"endpoint-name","group":"","status":"resolved","title":"endpoint-name","details":"description-1","condition_results":[{"condition":"[CONNECTED] == true","success":true},{"condition":"[STATUS] == 200","success":true}],"url":""}`,
|
||||
ExpectedBody: `{"alert":{"Type":"ilert","Enabled":null,"FailureThreshold":3,"SuccessThreshold":4,"MinimumReminderInterval":0,"Description":"description-1","SendOnResolved":true,"ProviderOverride":null,"ResolveKey":"123","Triggered":false},"name":"endpoint-name","group":"","status":"resolved","title":"endpoint-name","details":"description-1","condition_results":[{"condition":"[CONNECTED] == true","success":true},{"condition":"[STATUS] == 200","success":true}],"url":""}`,
|
||||
},
|
||||
{
|
||||
Name: "group-override",
|
||||
Provider: AlertProvider{DefaultConfig: Config{IntegrationKey: "some-integration-key"}, Overrides: []Override{{Group: "g", Config: Config{IntegrationKey: "different-integration-key"}}}},
|
||||
Alert: alert.Alert{Description: &secondDescription, SuccessThreshold: 5, FailureThreshold: 3, ResolveKey: "123", Type: "ilert", SendOnResolved: &sendOnResolved},
|
||||
Resolved: false,
|
||||
ExpectedBody: `{"alert":{"Type":"ilert","Enabled":null,"FailureThreshold":3,"SuccessThreshold":5,"Description":"description-2","SendOnResolved":true,"ProviderOverride":null,"ResolveKey":"123","Triggered":false},"name":"endpoint-name","group":"","status":"firing","title":"endpoint-name","details":"description-2","condition_results":[{"condition":"[CONNECTED] == true","success":false},{"condition":"[STATUS] == 200","success":false}],"url":""}`,
|
||||
ExpectedBody: `{"alert":{"Type":"ilert","Enabled":null,"FailureThreshold":3,"SuccessThreshold":5,"MinimumReminderInterval":0,"Description":"description-2","SendOnResolved":true,"ProviderOverride":null,"ResolveKey":"123","Triggered":false},"name":"endpoint-name","group":"","status":"firing","title":"endpoint-name","details":"description-2","condition_results":[{"condition":"[CONNECTED] == true","success":false},{"condition":"[STATUS] == 200","success":false}],"url":""}`,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
)
|
||||
|
||||
func CreateExternalEndpointResult(cfg *config.Config) fiber.Handler {
|
||||
extraLabels := cfg.GetUniqueExtraMetricLabels()
|
||||
return func(c *fiber.Ctx) error {
|
||||
// Check if the success query parameter is present
|
||||
success, exists := c.Queries()["success"]
|
||||
@@ -74,7 +75,7 @@ func CreateExternalEndpointResult(cfg *config.Config) fiber.Handler {
|
||||
externalEndpoint.NumberOfFailuresInARow = convertedEndpoint.NumberOfFailuresInARow
|
||||
}
|
||||
if cfg.Metrics {
|
||||
metrics.PublishMetricsForEndpoint(convertedEndpoint, result)
|
||||
metrics.PublishMetricsForEndpoint(convertedEndpoint, result, extraLabels)
|
||||
}
|
||||
// Return the result
|
||||
return c.Status(200).SendString("")
|
||||
|
||||
@@ -22,7 +22,6 @@ import (
|
||||
"github.com/TwiN/gatus/v5/security"
|
||||
"github.com/TwiN/gatus/v5/storage"
|
||||
"github.com/TwiN/logr"
|
||||
"github.com/gofiber/fiber/v2/log"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
@@ -103,6 +102,25 @@ type Config struct {
|
||||
lastFileModTime time.Time // last modification time
|
||||
}
|
||||
|
||||
// GetUniqueExtraMetricLabels returns a slice of unique metric labels from all enabled endpoints
|
||||
// in the configuration. It iterates through each endpoint, checks if it is enabled,
|
||||
// and then collects unique labels from the endpoint's labels map.
|
||||
func (config *Config) GetUniqueExtraMetricLabels() []string {
|
||||
labels := make([]string, 0)
|
||||
for _, ep := range config.Endpoints {
|
||||
if !ep.IsEnabled() {
|
||||
continue
|
||||
}
|
||||
for label := range ep.ExtraLabels {
|
||||
if contains(labels, label) {
|
||||
continue
|
||||
}
|
||||
labels = append(labels, label)
|
||||
}
|
||||
}
|
||||
return labels
|
||||
}
|
||||
|
||||
func (config *Config) GetEndpointByKey(key string) *endpoint.Endpoint {
|
||||
for i := 0; i < len(config.Endpoints); i++ {
|
||||
ep := config.Endpoints[i]
|
||||
@@ -444,7 +462,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
|
||||
// Validate the endpoint alert's overrides, if applicable
|
||||
if len(endpointAlert.ProviderOverride) > 0 {
|
||||
if err = alertProvider.ValidateOverrides(ep.Group, endpointAlert); err != nil {
|
||||
log.Warnf("[config.validateAlertingConfig] endpoint with key=%s has invalid overrides for provider=%s: %s", ep.Key(), alertType, err.Error())
|
||||
logr.Warnf("[config.validateAlertingConfig] endpoint with key=%s has invalid overrides for provider=%s: %s", ep.Key(), alertType, err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -458,7 +476,7 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
|
||||
// Validate the endpoint alert's overrides, if applicable
|
||||
if len(endpointAlert.ProviderOverride) > 0 {
|
||||
if err = alertProvider.ValidateOverrides(ee.Group, endpointAlert); err != nil {
|
||||
log.Warnf("[config.validateAlertingConfig] endpoint with key=%s has invalid overrides for provider=%s: %s", ee.Key(), alertType, err.Error())
|
||||
logr.Warnf("[config.validateAlertingConfig] endpoint with key=%s has invalid overrides for provider=%s: %s", ee.Key(), alertType, err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,7 +124,7 @@ endpoints:
|
||||
name: "dir-with-two-config-files",
|
||||
configPath: dir,
|
||||
pathAndFiles: map[string]string{
|
||||
"config.yaml": `endpoints:
|
||||
"config.yaml": `endpoints:
|
||||
- name: one
|
||||
url: https://example.com
|
||||
conditions:
|
||||
@@ -135,7 +135,7 @@ endpoints:
|
||||
url: https://example.org
|
||||
conditions:
|
||||
- "len([BODY]) > 0"`,
|
||||
"config.yml": `endpoints:
|
||||
"config.yml": `endpoints:
|
||||
- name: three
|
||||
url: https://twin.sh/health
|
||||
conditions:
|
||||
@@ -237,7 +237,7 @@ endpoints:
|
||||
for _, scenario := range scenarios {
|
||||
t.Run(scenario.name, func(t *testing.T) {
|
||||
for path, content := range scenario.pathAndFiles {
|
||||
if err := os.WriteFile(filepath.Join(dir, path), []byte(content), 0644); err != nil {
|
||||
if err := os.WriteFile(filepath.Join(dir, path), []byte(content), 0o644); err != nil {
|
||||
t.Fatalf("[%s] failed to write file: %v", scenario.name, err)
|
||||
}
|
||||
}
|
||||
@@ -282,7 +282,7 @@ func TestConfig_HasLoadedConfigurationBeenModified(t *testing.T) {
|
||||
url: https://twin.sh/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
`), 0644)
|
||||
`), 0o644)
|
||||
|
||||
t.Run("config-file-as-config-path", func(t *testing.T) {
|
||||
config, err := LoadConfiguration(configFilePath)
|
||||
@@ -298,7 +298,7 @@ func TestConfig_HasLoadedConfigurationBeenModified(t *testing.T) {
|
||||
- name: website
|
||||
url: https://twin.sh/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"`), 0644); err != nil {
|
||||
- "[STATUS] == 200"`), 0o644); err != nil {
|
||||
t.Fatalf("failed to overwrite config file: %v", err)
|
||||
}
|
||||
if !config.HasLoadedConfigurationBeenModified() {
|
||||
@@ -315,7 +315,7 @@ func TestConfig_HasLoadedConfigurationBeenModified(t *testing.T) {
|
||||
}
|
||||
time.Sleep(time.Second) // Because the file mod time only has second precision, we have to wait for a second
|
||||
// Update the config file
|
||||
if err = os.WriteFile(filepath.Join(dir, "metrics.yaml"), []byte(`metrics: true`), 0644); err != nil {
|
||||
if err = os.WriteFile(filepath.Join(dir, "metrics.yaml"), []byte(`metrics: true`), 0o644); err != nil {
|
||||
t.Fatalf("failed to overwrite config file: %v", err)
|
||||
}
|
||||
if !config.HasLoadedConfigurationBeenModified() {
|
||||
@@ -713,7 +713,7 @@ func TestParseAndValidateBadConfigBytes(t *testing.T) {
|
||||
_, err := parseAndValidateConfigBytes([]byte(`
|
||||
badconfig:
|
||||
- asdsa: w0w
|
||||
usadasdrl: asdxzczxc
|
||||
usadasdrl: asdxzczxc
|
||||
asdas:
|
||||
- soup
|
||||
`))
|
||||
@@ -1943,3 +1943,114 @@ func TestGetAlertingProviderByAlertType(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfig_GetUniqueExtraMetricLabels(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
config *Config
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "no-endpoints",
|
||||
config: &Config{
|
||||
Endpoints: []*endpoint.Endpoint{},
|
||||
},
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "single-endpoint-no-labels",
|
||||
config: &Config{
|
||||
Endpoints: []*endpoint.Endpoint{
|
||||
{
|
||||
Name: "endpoint1",
|
||||
URL: "https://example.com",
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "single-endpoint-with-labels",
|
||||
config: &Config{
|
||||
Endpoints: []*endpoint.Endpoint{
|
||||
{
|
||||
Name: "endpoint1",
|
||||
URL: "https://example.com",
|
||||
Enabled: toPtr(true),
|
||||
ExtraLabels: map[string]string{
|
||||
"env": "production",
|
||||
"team": "backend",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: []string{"env", "team"},
|
||||
},
|
||||
{
|
||||
name: "multiple-endpoints-with-labels",
|
||||
config: &Config{
|
||||
Endpoints: []*endpoint.Endpoint{
|
||||
{
|
||||
Name: "endpoint1",
|
||||
URL: "https://example.com",
|
||||
Enabled: toPtr(true),
|
||||
ExtraLabels: map[string]string{
|
||||
"env": "production",
|
||||
"team": "backend",
|
||||
"module": "auth",
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "endpoint2",
|
||||
URL: "https://example.org",
|
||||
Enabled: toPtr(true),
|
||||
ExtraLabels: map[string]string{
|
||||
"env": "staging",
|
||||
"team": "frontend",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: []string{"env", "team", "module"},
|
||||
},
|
||||
{
|
||||
name: "multiple-endpoints-with-some-disabled",
|
||||
config: &Config{
|
||||
Endpoints: []*endpoint.Endpoint{
|
||||
{
|
||||
Name: "endpoint1",
|
||||
URL: "https://example.com",
|
||||
Enabled: toPtr(true),
|
||||
ExtraLabels: map[string]string{
|
||||
"env": "production",
|
||||
"team": "backend",
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "endpoint2",
|
||||
URL: "https://example.org",
|
||||
Enabled: toPtr(false),
|
||||
ExtraLabels: map[string]string{
|
||||
"module": "auth",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: []string{"env", "team"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
labels := tt.config.GetUniqueExtraMetricLabels()
|
||||
if len(labels) != len(tt.expected) {
|
||||
t.Errorf("expected %d labels, got %d", len(tt.expected), len(labels))
|
||||
}
|
||||
for _, label := range tt.expected {
|
||||
if !contains(labels, label) {
|
||||
t.Errorf("expected label %s to be present", label)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,6 +99,9 @@ type Endpoint struct {
|
||||
// Headers of the request
|
||||
Headers map[string]string `yaml:"headers,omitempty"`
|
||||
|
||||
// ExtraLabels are key-value pairs that can be used to metric the endpoint
|
||||
ExtraLabels map[string]string `yaml:"extra-labels,omitempty"`
|
||||
|
||||
// Interval is the duration to wait between every status check
|
||||
Interval time.Duration `yaml:"interval,omitempty"`
|
||||
|
||||
@@ -128,6 +131,9 @@ type Endpoint struct {
|
||||
|
||||
// NumberOfSuccessesInARow is the number of successful evaluations in a row
|
||||
NumberOfSuccessesInARow int `yaml:"-"`
|
||||
|
||||
// LastReminderSent is the time at which the last reminder was sent for this endpoint.
|
||||
LastReminderSent time.Time `yaml:"-"`
|
||||
}
|
||||
|
||||
// IsEnabled returns whether the endpoint is enabled or not
|
||||
@@ -338,6 +344,9 @@ func (e *Endpoint) getParsedBody() string {
|
||||
if err == nil {
|
||||
body = randRegex.ReplaceAllStringFunc(body, func(match string) string {
|
||||
n, _ := strconv.Atoi(match[15 : len(match)-1])
|
||||
if n > 8192 {
|
||||
n = 8192 // Limit the length of the random string to 8192 bytes to avoid excessive memory usage
|
||||
}
|
||||
const availableCharacterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||
b := make([]byte, n)
|
||||
for i := range b {
|
||||
@@ -417,8 +426,7 @@ func (e *Endpoint) call(result *Result) {
|
||||
} else if endpointType == TypeSSH {
|
||||
// If there's no username/password specified, attempt to validate just the SSH banner
|
||||
if len(e.SSHConfig.Username) == 0 && len(e.SSHConfig.Password) == 0 {
|
||||
result.Connected, result.HTTPStatus, err =
|
||||
client.CheckSSHBanner(strings.TrimPrefix(e.URL, "ssh://"), e.ClientConfig)
|
||||
result.Connected, result.HTTPStatus, err = client.CheckSSHBanner(strings.TrimPrefix(e.URL, "ssh://"), e.ClientConfig)
|
||||
if err != nil {
|
||||
result.AddError(err.Error())
|
||||
return
|
||||
|
||||
16
config/util.go
Normal file
16
config/util.go
Normal file
@@ -0,0 +1,16 @@
|
||||
package config
|
||||
|
||||
// toPtr returns a pointer to the given value
|
||||
func toPtr[T any](value T) *T {
|
||||
return &value
|
||||
}
|
||||
|
||||
// contains checks if a key exists in the slice
|
||||
func contains[T comparable](slice []T, key T) bool {
|
||||
for _, item := range slice {
|
||||
if item == key {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
8
go.mod
8
go.mod
@@ -10,7 +10,7 @@ require (
|
||||
github.com/TwiN/health v1.6.0
|
||||
github.com/TwiN/logr v0.3.1
|
||||
github.com/TwiN/whois v1.1.11
|
||||
github.com/aws/aws-sdk-go v1.55.7
|
||||
github.com/aws/aws-sdk-go v1.55.8
|
||||
github.com/coreos/go-oidc/v3 v3.14.1
|
||||
github.com/gofiber/fiber/v2 v2.52.8
|
||||
github.com/google/go-github/v48 v48.2.0
|
||||
@@ -28,7 +28,7 @@ require (
|
||||
google.golang.org/api v0.242.0
|
||||
gopkg.in/mail.v2 v2.3.1
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
modernc.org/sqlite v1.38.0
|
||||
modernc.org/sqlite v1.38.2
|
||||
)
|
||||
|
||||
require (
|
||||
@@ -71,7 +71,7 @@ require (
|
||||
go.opentelemetry.io/otel v1.36.0 // indirect
|
||||
go.opentelemetry.io/otel/metric v1.36.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.36.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 // indirect
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
|
||||
golang.org/x/image v0.18.0 // indirect
|
||||
golang.org/x/mod v0.25.0 // indirect
|
||||
golang.org/x/sync v0.16.0 // indirect
|
||||
@@ -82,7 +82,7 @@ require (
|
||||
google.golang.org/grpc v1.73.0 // indirect
|
||||
google.golang.org/protobuf v1.36.6 // indirect
|
||||
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect
|
||||
modernc.org/libc v1.65.10 // indirect
|
||||
modernc.org/libc v1.66.3 // indirect
|
||||
modernc.org/mathutil v1.7.1 // indirect
|
||||
modernc.org/memory v1.11.0 // indirect
|
||||
)
|
||||
|
||||
26
go.sum
26
go.sum
@@ -22,8 +22,8 @@ github.com/TwiN/whois v1.1.11 h1:lYiYgPRSQ3kH8sQfgHcBY/uNSGGvWPRikEjn+LJZ9+Q=
|
||||
github.com/TwiN/whois v1.1.11/go.mod h1:TjipCMpJRAJYKmtz/rXQBU6UGxMh6bk8SHazu7OMnQE=
|
||||
github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
|
||||
github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
|
||||
github.com/aws/aws-sdk-go v1.55.7 h1:UJrkFq7es5CShfBwlWAC8DA077vp8PyVbQd3lqLiztE=
|
||||
github.com/aws/aws-sdk-go v1.55.7/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU=
|
||||
github.com/aws/aws-sdk-go v1.55.8 h1:JRmEUbU52aJQZ2AjX4q4Wu7t4uZjOu71uyNmaWlUkJQ=
|
||||
github.com/aws/aws-sdk-go v1.55.8/go.mod h1:ZkViS9AqA6otK+JBBNH2++sx1sgxrPKcSzPPvQkUtXk=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
@@ -159,8 +159,8 @@ golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDf
|
||||
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
|
||||
golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM=
|
||||
golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY=
|
||||
golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 h1:R84qjqJb5nVJMxqWYb3np9L5ZsaDtB+a39EqjV0JSUM=
|
||||
golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0/go.mod h1:S9Xr4PYopiDyqSyp5NjCrhFrqg6A5zA2E/iPHPhqnS8=
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
|
||||
golang.org/x/image v0.18.0 h1:jGzIakQa/ZXI1I0Fxvaa9W7yP25TqT6cHIHn+6CqvSQ=
|
||||
golang.org/x/image v0.18.0/go.mod h1:4yyo5vMFQjVjUcVk4jEQcU9MGy/rulF5WvUILseCM2E=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
@@ -259,16 +259,18 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
modernc.org/cc/v4 v4.26.1 h1:+X5NtzVBn0KgsBCBe+xkDC7twLb/jNVj9FPgiwSQO3s=
|
||||
modernc.org/cc/v4 v4.26.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
|
||||
modernc.org/cc/v4 v4.26.2 h1:991HMkLjJzYBIfha6ECZdjrIYz2/1ayr+FL8GN+CNzM=
|
||||
modernc.org/cc/v4 v4.26.2/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
|
||||
modernc.org/ccgo/v4 v4.28.0 h1:rjznn6WWehKq7dG4JtLRKxb52Ecv8OUGah8+Z/SfpNU=
|
||||
modernc.org/ccgo/v4 v4.28.0/go.mod h1:JygV3+9AV6SmPhDasu4JgquwU81XAKLd3OKTUDNOiKE=
|
||||
modernc.org/fileutil v1.3.3 h1:3qaU+7f7xxTUmvU1pJTZiDLAIoJVdUSSauJNHg9yXoA=
|
||||
modernc.org/fileutil v1.3.3/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc=
|
||||
modernc.org/fileutil v1.3.8 h1:qtzNm7ED75pd1C7WgAGcK4edm4fvhtBsEiI/0NQ54YM=
|
||||
modernc.org/fileutil v1.3.8/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc=
|
||||
modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
|
||||
modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
|
||||
modernc.org/libc v1.65.10 h1:ZwEk8+jhW7qBjHIT+wd0d9VjitRyQef9BnzlzGwMODc=
|
||||
modernc.org/libc v1.65.10/go.mod h1:StFvYpx7i/mXtBAfVOjaU0PWZOvIRoZSgXhrwXzr8Po=
|
||||
modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
|
||||
modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
|
||||
modernc.org/libc v1.66.3 h1:cfCbjTUcdsKyyZZfEUKfoHcP3S0Wkvz3jgSzByEWVCQ=
|
||||
modernc.org/libc v1.66.3/go.mod h1:XD9zO8kt59cANKvHPXpx7yS2ELPheAey0vjIuZOhOU8=
|
||||
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
|
||||
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
|
||||
modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
|
||||
@@ -277,8 +279,8 @@ modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
|
||||
modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
|
||||
modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
|
||||
modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
|
||||
modernc.org/sqlite v1.38.0 h1:+4OrfPQ8pxHKuWG4md1JpR/EYAh3Md7TdejuuzE7EUI=
|
||||
modernc.org/sqlite v1.38.0/go.mod h1:1Bj+yES4SVvBZ4cBOpVZ6QgesMCKpJZDq0nxYzOpmNE=
|
||||
modernc.org/sqlite v1.38.2 h1:Aclu7+tgjgcQVShZqim41Bbw9Cho0y/7WzYptXqkEek=
|
||||
modernc.org/sqlite v1.38.2/go.mod h1:cPTJYSlgg3Sfg046yBShXENNtPrWrDX8bsbAQBzgQ5E=
|
||||
modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
|
||||
modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
|
||||
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
|
||||
|
||||
3
main.go
3
main.go
@@ -9,6 +9,7 @@ import (
|
||||
|
||||
"github.com/TwiN/gatus/v5/config"
|
||||
"github.com/TwiN/gatus/v5/controller"
|
||||
"github.com/TwiN/gatus/v5/metrics"
|
||||
"github.com/TwiN/gatus/v5/storage/store"
|
||||
"github.com/TwiN/gatus/v5/watchdog"
|
||||
"github.com/TwiN/logr"
|
||||
@@ -49,6 +50,7 @@ func main() {
|
||||
|
||||
func start(cfg *config.Config) {
|
||||
go controller.Handle(cfg)
|
||||
metrics.InitializePrometheusMetrics(cfg, nil)
|
||||
watchdog.Monitor(cfg)
|
||||
go listenToConfigurationFileChanges(cfg)
|
||||
}
|
||||
@@ -56,6 +58,7 @@ func start(cfg *config.Config) {
|
||||
func stop(cfg *config.Config) {
|
||||
watchdog.Shutdown(cfg)
|
||||
controller.Shutdown()
|
||||
metrics.UnregisterPrometheusMetrics()
|
||||
}
|
||||
|
||||
func save() {
|
||||
|
||||
@@ -3,82 +3,146 @@ package metrics
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/TwiN/gatus/v5/config"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
)
|
||||
|
||||
const namespace = "gatus" // The prefix of the metrics
|
||||
|
||||
var (
|
||||
initializedMetrics bool // Whether the metrics have been initialized
|
||||
|
||||
resultTotal *prometheus.CounterVec
|
||||
resultDurationSeconds *prometheus.GaugeVec
|
||||
resultConnectedTotal *prometheus.CounterVec
|
||||
resultCodeTotal *prometheus.CounterVec
|
||||
resultCertificateExpirationSeconds *prometheus.GaugeVec
|
||||
resultEndpointSuccess *prometheus.GaugeVec
|
||||
|
||||
// Track if metrics have been initialized to prevent duplicate registration
|
||||
metricsInitialized bool
|
||||
currentRegisterer prometheus.Registerer
|
||||
)
|
||||
|
||||
func initializePrometheusMetrics() {
|
||||
resultTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
// UnregisterPrometheusMetrics unregisters all previously registered metrics
|
||||
func UnregisterPrometheusMetrics() {
|
||||
if !metricsInitialized || currentRegisterer == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Unregister all metrics if they exist
|
||||
if resultTotal != nil {
|
||||
currentRegisterer.Unregister(resultTotal)
|
||||
}
|
||||
if resultDurationSeconds != nil {
|
||||
currentRegisterer.Unregister(resultDurationSeconds)
|
||||
}
|
||||
if resultConnectedTotal != nil {
|
||||
currentRegisterer.Unregister(resultConnectedTotal)
|
||||
}
|
||||
if resultCodeTotal != nil {
|
||||
currentRegisterer.Unregister(resultCodeTotal)
|
||||
}
|
||||
if resultCertificateExpirationSeconds != nil {
|
||||
currentRegisterer.Unregister(resultCertificateExpirationSeconds)
|
||||
}
|
||||
if resultEndpointSuccess != nil {
|
||||
currentRegisterer.Unregister(resultEndpointSuccess)
|
||||
}
|
||||
|
||||
metricsInitialized = false
|
||||
currentRegisterer = nil
|
||||
}
|
||||
|
||||
func InitializePrometheusMetrics(cfg *config.Config, reg prometheus.Registerer) {
|
||||
// If metrics are already initialized, unregister them first
|
||||
if metricsInitialized {
|
||||
UnregisterPrometheusMetrics()
|
||||
}
|
||||
|
||||
if reg == nil {
|
||||
reg = prometheus.DefaultRegisterer
|
||||
}
|
||||
|
||||
// Store the registerer for later unregistration
|
||||
currentRegisterer = reg
|
||||
|
||||
extraLabels := cfg.GetUniqueExtraMetricLabels()
|
||||
resultTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "results_total",
|
||||
Help: "Number of results per endpoint",
|
||||
}, []string{"key", "group", "name", "type", "success"})
|
||||
resultDurationSeconds = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
}, append([]string{"key", "group", "name", "type", "success"}, extraLabels...))
|
||||
reg.MustRegister(resultTotal)
|
||||
|
||||
resultDurationSeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "results_duration_seconds",
|
||||
Help: "Duration of the request in seconds",
|
||||
}, []string{"key", "group", "name", "type"})
|
||||
resultConnectedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
}, append([]string{"key", "group", "name", "type"}, extraLabels...))
|
||||
reg.MustRegister(resultDurationSeconds)
|
||||
|
||||
resultConnectedTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "results_connected_total",
|
||||
Help: "Total number of results in which a connection was successfully established",
|
||||
}, []string{"key", "group", "name", "type"})
|
||||
resultCodeTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
}, append([]string{"key", "group", "name", "type"}, extraLabels...))
|
||||
reg.MustRegister(resultConnectedTotal)
|
||||
|
||||
resultCodeTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "results_code_total",
|
||||
Help: "Total number of results by code",
|
||||
}, []string{"key", "group", "name", "type", "code"})
|
||||
resultCertificateExpirationSeconds = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
}, append([]string{"key", "group", "name", "type", "code"}, extraLabels...))
|
||||
reg.MustRegister(resultCodeTotal)
|
||||
|
||||
resultCertificateExpirationSeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "results_certificate_expiration_seconds",
|
||||
Help: "Number of seconds until the certificate expires",
|
||||
}, []string{"key", "group", "name", "type"})
|
||||
resultEndpointSuccess = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
}, append([]string{"key", "group", "name", "type"}, extraLabels...))
|
||||
reg.MustRegister(resultCertificateExpirationSeconds)
|
||||
|
||||
resultEndpointSuccess = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "results_endpoint_success",
|
||||
Help: "Displays whether or not the endpoint was a success",
|
||||
}, []string{"key", "group", "name", "type"})
|
||||
}, append([]string{"key", "group", "name", "type"}, extraLabels...))
|
||||
reg.MustRegister(resultEndpointSuccess)
|
||||
|
||||
// Mark as initialized
|
||||
metricsInitialized = true
|
||||
}
|
||||
|
||||
// PublishMetricsForEndpoint publishes metrics for the given endpoint and its result.
|
||||
// These metrics will be exposed at /metrics if the metrics are enabled
|
||||
func PublishMetricsForEndpoint(ep *endpoint.Endpoint, result *endpoint.Result) {
|
||||
if !initializedMetrics {
|
||||
initializePrometheusMetrics()
|
||||
initializedMetrics = true
|
||||
func PublishMetricsForEndpoint(ep *endpoint.Endpoint, result *endpoint.Result, extraLabels []string) {
|
||||
labelValues := []string{}
|
||||
for _, label := range extraLabels {
|
||||
if value, ok := ep.ExtraLabels[label]; ok {
|
||||
labelValues = append(labelValues, value)
|
||||
} else {
|
||||
labelValues = append(labelValues, "")
|
||||
}
|
||||
}
|
||||
|
||||
endpointType := ep.Type()
|
||||
resultTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.FormatBool(result.Success)).Inc()
|
||||
resultDurationSeconds.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(result.Duration.Seconds())
|
||||
resultTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.FormatBool(result.Success)}, labelValues...)...).Inc()
|
||||
resultDurationSeconds.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(result.Duration.Seconds())
|
||||
if result.Connected {
|
||||
resultConnectedTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Inc()
|
||||
resultConnectedTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Inc()
|
||||
}
|
||||
if result.DNSRCode != "" {
|
||||
resultCodeTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType), result.DNSRCode).Inc()
|
||||
resultCodeTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType), result.DNSRCode}, labelValues...)...).Inc()
|
||||
}
|
||||
if result.HTTPStatus != 0 {
|
||||
resultCodeTotal.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.Itoa(result.HTTPStatus)).Inc()
|
||||
resultCodeTotal.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType), strconv.Itoa(result.HTTPStatus)}, labelValues...)...).Inc()
|
||||
}
|
||||
if result.CertificateExpiration != 0 {
|
||||
resultCertificateExpirationSeconds.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(result.CertificateExpiration.Seconds())
|
||||
resultCertificateExpirationSeconds.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(result.CertificateExpiration.Seconds())
|
||||
}
|
||||
if result.Success {
|
||||
resultEndpointSuccess.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(1)
|
||||
resultEndpointSuccess.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(1)
|
||||
} else {
|
||||
resultEndpointSuccess.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(0)
|
||||
resultEndpointSuccess.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(0)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,13 +5,110 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/gatus/v5/config"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint/dns"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
)
|
||||
|
||||
// TestInitializePrometheusMetrics tests metrics initialization with extraLabels.
|
||||
// Note: Because of the global Prometheus registry, this test can only safely verify one label set per process.
|
||||
// If the function is called with a different set of labels for the same metric, a panic will occur.
|
||||
func TestInitializePrometheusMetrics(t *testing.T) {
|
||||
cfgWithExtras := &config.Config{
|
||||
Endpoints: []*endpoint.Endpoint{
|
||||
{
|
||||
Name: "TestEP",
|
||||
Group: "G",
|
||||
URL: "http://x/",
|
||||
ExtraLabels: map[string]string{
|
||||
"foo": "foo-val",
|
||||
"hello": "world-val",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
reg := prometheus.NewRegistry()
|
||||
InitializePrometheusMetrics(cfgWithExtras, reg)
|
||||
// Metrics variables should be non-nil
|
||||
if resultTotal == nil {
|
||||
t.Error("resultTotal metric not initialized")
|
||||
}
|
||||
if resultDurationSeconds == nil {
|
||||
t.Error("resultDurationSeconds metric not initialized")
|
||||
}
|
||||
if resultConnectedTotal == nil {
|
||||
t.Error("resultConnectedTotal metric not initialized")
|
||||
}
|
||||
if resultCodeTotal == nil {
|
||||
t.Error("resultCodeTotal metric not initialized")
|
||||
}
|
||||
if resultCertificateExpirationSeconds == nil {
|
||||
t.Error("resultCertificateExpirationSeconds metric not initialized")
|
||||
}
|
||||
if resultEndpointSuccess == nil {
|
||||
t.Error("resultEndpointSuccess metric not initialized")
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Errorf("resultTotal.WithLabelValues panicked: %v", r)
|
||||
}
|
||||
}()
|
||||
_ = resultTotal.WithLabelValues("k", "g", "n", "ty", "true", "fval", "hval")
|
||||
}
|
||||
|
||||
// TestPublishMetricsForEndpoint_withExtraLabels ensures extraLabels are included in the exported metrics.
|
||||
func TestPublishMetricsForEndpoint_withExtraLabels(t *testing.T) {
|
||||
// Only test one label set per process due to Prometheus registry limits.
|
||||
reg := prometheus.NewRegistry()
|
||||
InitializePrometheusMetrics(&config.Config{
|
||||
Endpoints: []*endpoint.Endpoint{
|
||||
{
|
||||
Name: "ep-extra",
|
||||
URL: "https://sample.com",
|
||||
ExtraLabels: map[string]string{
|
||||
"foo": "my-foo",
|
||||
"bar": "my-bar",
|
||||
},
|
||||
},
|
||||
},
|
||||
}, reg)
|
||||
|
||||
ep := &endpoint.Endpoint{
|
||||
Name: "ep-extra",
|
||||
Group: "g1",
|
||||
URL: "https://sample.com",
|
||||
ExtraLabels: map[string]string{
|
||||
"foo": "my-foo",
|
||||
"bar": "my-bar",
|
||||
},
|
||||
}
|
||||
result := &endpoint.Result{
|
||||
HTTPStatus: 200,
|
||||
Connected: true,
|
||||
Duration: 2340 * time.Millisecond,
|
||||
Success: true,
|
||||
}
|
||||
// Order of extraLabels as per GetUniqueExtraMetricLabels is ["foo", "bar"]
|
||||
PublishMetricsForEndpoint(ep, result, []string{"foo", "bar"})
|
||||
|
||||
expected := `
|
||||
# HELP gatus_results_total Number of results per endpoint
|
||||
# TYPE gatus_results_total counter
|
||||
gatus_results_total{bar="my-bar",foo="my-foo",group="g1",key="g1_ep-extra",name="ep-extra",success="true",type="HTTP"} 1
|
||||
`
|
||||
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expected), "gatus_results_total")
|
||||
if err != nil {
|
||||
t.Error("metrics export does not include extraLabels as expected:", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPublishMetricsForEndpoint(t *testing.T) {
|
||||
reg := prometheus.NewRegistry()
|
||||
InitializePrometheusMetrics(&config.Config{}, reg)
|
||||
|
||||
httpEndpoint := &endpoint.Endpoint{Name: "http-ep-name", Group: "http-ep-group", URL: "https://example.org"}
|
||||
PublishMetricsForEndpoint(httpEndpoint, &endpoint.Result{
|
||||
HTTPStatus: 200,
|
||||
@@ -23,8 +120,8 @@ func TestPublishMetricsForEndpoint(t *testing.T) {
|
||||
},
|
||||
Success: true,
|
||||
CertificateExpiration: 49 * time.Hour,
|
||||
})
|
||||
err := testutil.GatherAndCompare(prometheus.Gatherers{prometheus.DefaultGatherer}, bytes.NewBufferString(`
|
||||
}, []string{})
|
||||
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(`
|
||||
# HELP gatus_results_code_total Total number of results by code
|
||||
# TYPE gatus_results_code_total counter
|
||||
gatus_results_code_total{code="200",group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 1
|
||||
@@ -57,8 +154,8 @@ gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-
|
||||
},
|
||||
Success: false,
|
||||
CertificateExpiration: 47 * time.Hour,
|
||||
})
|
||||
err = testutil.GatherAndCompare(prometheus.Gatherers{prometheus.DefaultGatherer}, bytes.NewBufferString(`
|
||||
}, []string{})
|
||||
err = testutil.GatherAndCompare(reg, bytes.NewBufferString(`
|
||||
# HELP gatus_results_code_total Total number of results by code
|
||||
# TYPE gatus_results_code_total counter
|
||||
gatus_results_code_total{code="200",group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 2
|
||||
@@ -82,10 +179,12 @@ gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-
|
||||
if err != nil {
|
||||
t.Errorf("Expected no errors but got: %v", err)
|
||||
}
|
||||
dnsEndpoint := &endpoint.Endpoint{Name: "dns-ep-name", Group: "dns-ep-group", URL: "8.8.8.8", DNSConfig: &dns.Config{
|
||||
QueryType: "A",
|
||||
QueryName: "example.com.",
|
||||
}}
|
||||
dnsEndpoint := &endpoint.Endpoint{
|
||||
Name: "dns-ep-name", Group: "dns-ep-group", URL: "8.8.8.8", DNSConfig: &dns.Config{
|
||||
QueryType: "A",
|
||||
QueryName: "example.com.",
|
||||
},
|
||||
}
|
||||
PublishMetricsForEndpoint(dnsEndpoint, &endpoint.Result{
|
||||
DNSRCode: "NOERROR",
|
||||
Connected: true,
|
||||
@@ -94,8 +193,8 @@ gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-
|
||||
{Condition: "[DNS_RCODE] == NOERROR", Success: true},
|
||||
},
|
||||
Success: true,
|
||||
})
|
||||
err = testutil.GatherAndCompare(prometheus.Gatherers{prometheus.DefaultGatherer}, bytes.NewBufferString(`
|
||||
}, []string{})
|
||||
err = testutil.GatherAndCompare(reg, bytes.NewBufferString(`
|
||||
# HELP gatus_results_code_total Total number of results by code
|
||||
# TYPE gatus_results_code_total counter
|
||||
gatus_results_code_total{code="200",group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 2
|
||||
|
||||
@@ -2,7 +2,9 @@ package watchdog
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/gatus/v5/alerting"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint"
|
||||
@@ -30,14 +32,24 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
|
||||
if !endpointAlert.IsEnabled() || endpointAlert.FailureThreshold > ep.NumberOfFailuresInARow {
|
||||
continue
|
||||
}
|
||||
if endpointAlert.Triggered {
|
||||
logr.Debugf("[watchdog.handleAlertsToTrigger] Alert for endpoint with key=%s with description='%s' has already been TRIGGERED, skipping", ep.Key(), endpointAlert.GetDescription())
|
||||
// Determine if an initial alert should be sent
|
||||
sendInitialAlert := !endpointAlert.Triggered
|
||||
// Determine if a reminder should be sent
|
||||
sendReminder := endpointAlert.Triggered && endpointAlert.MinimumReminderInterval > 0 && time.Since(ep.LastReminderSent) >= endpointAlert.MinimumReminderInterval
|
||||
// If neither initial alert nor reminder needs to be sent, skip to the next alert
|
||||
if !sendInitialAlert && !sendReminder {
|
||||
logr.Debugf("[watchdog.handleAlertsToTrigger] Alert for endpoint=%s with description='%s' is not due for triggering or reminding, skipping", ep.Name, endpointAlert.GetDescription())
|
||||
continue
|
||||
}
|
||||
alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
|
||||
if alertProvider != nil {
|
||||
logr.Infof("[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint with key=%s with description='%s' has been TRIGGERED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
|
||||
var err error
|
||||
alertType := "reminder"
|
||||
if sendInitialAlert {
|
||||
alertType = "initial"
|
||||
}
|
||||
log.Printf("[watchdog.handleAlertsToTrigger] Sending %s %s alert because alert for endpoint=%s with description='%s' has been TRIGGERED", alertType, endpointAlert.Type, ep.Name, endpointAlert.GetDescription())
|
||||
if os.Getenv("MOCK_ALERT_PROVIDER") == "true" {
|
||||
if os.Getenv("MOCK_ALERT_PROVIDER_ERROR") == "true" {
|
||||
err = errors.New("error")
|
||||
@@ -48,7 +60,11 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
|
||||
if err != nil {
|
||||
logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
} else {
|
||||
endpointAlert.Triggered = true
|
||||
// Mark initial alert as triggered and update last reminder time
|
||||
if sendInitialAlert {
|
||||
endpointAlert.Triggered = true
|
||||
}
|
||||
ep.LastReminderSent = time.Now()
|
||||
if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil {
|
||||
logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package watchdog
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/gatus/v5/alerting"
|
||||
"github.com/TwiN/gatus/v5/alerting/alert"
|
||||
@@ -517,6 +518,48 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) {
|
||||
verify(t, ep, 0, 2, false, "")
|
||||
}
|
||||
|
||||
func TestHandleAlertingWithMinimumReminderInterval(t *testing.T) {
|
||||
_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
|
||||
defer os.Clearenv()
|
||||
|
||||
cfg := &config.Config{
|
||||
Alerting: &alerting.Config{
|
||||
Custom: &custom.AlertProvider{
|
||||
DefaultConfig: custom.Config{
|
||||
URL: "https://twin.sh/health",
|
||||
Method: "GET",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
enabled := true
|
||||
ep := &endpoint.Endpoint{
|
||||
URL: "https://example.com",
|
||||
Alerts: []*alert.Alert{
|
||||
{
|
||||
Type: alert.TypeCustom,
|
||||
Enabled: &enabled,
|
||||
FailureThreshold: 2,
|
||||
SuccessThreshold: 3,
|
||||
SendOnResolved: &enabled,
|
||||
Triggered: false,
|
||||
MinimumReminderInterval: 1 * time.Second,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
verify(t, ep, 0, 0, false, "The alert shouldn't start triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 1, 0, false, "The alert shouldn't have triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 2, 0, true, "The alert should've triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 3, 0, true, "The alert should still be triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
|
||||
verify(t, ep, 4, 0, true, "The alert should still be triggered")
|
||||
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
|
||||
}
|
||||
|
||||
func verify(t *testing.T, ep *endpoint.Endpoint, expectedNumberOfFailuresInARow, expectedNumberOfSuccessInARow int, expectedTriggered bool, expectedTriggeredReason string) {
|
||||
if ep.NumberOfFailuresInARow != expectedNumberOfFailuresInARow {
|
||||
t.Errorf("endpoint.NumberOfFailuresInARow should've been %d, got %d", expectedNumberOfFailuresInARow, ep.NumberOfFailuresInARow)
|
||||
|
||||
@@ -27,11 +27,12 @@ var (
|
||||
// Monitor loops over each endpoint and starts a goroutine to monitor each endpoint separately
|
||||
func Monitor(cfg *config.Config) {
|
||||
ctx, cancelFunc = context.WithCancel(context.Background())
|
||||
extraLabels := cfg.GetUniqueExtraMetricLabels()
|
||||
for _, endpoint := range cfg.Endpoints {
|
||||
if endpoint.IsEnabled() {
|
||||
// To prevent multiple requests from running at the same time, we'll wait for a little before each iteration
|
||||
time.Sleep(777 * time.Millisecond)
|
||||
go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.Connectivity, cfg.DisableMonitoringLock, cfg.Metrics, ctx)
|
||||
go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.Connectivity, cfg.DisableMonitoringLock, cfg.Metrics, extraLabels, ctx)
|
||||
}
|
||||
}
|
||||
for _, externalEndpoint := range cfg.ExternalEndpoints {
|
||||
@@ -39,15 +40,15 @@ func Monitor(cfg *config.Config) {
|
||||
// If the external endpoint does not use heartbeat, then it does not need to be monitored periodically, because
|
||||
// alerting is checked every time an external endpoint is pushed to Gatus, unlike normal endpoints.
|
||||
if externalEndpoint.IsEnabled() && externalEndpoint.Heartbeat.Interval > 0 {
|
||||
go monitorExternalEndpointHeartbeat(externalEndpoint, cfg.Alerting, cfg.Maintenance, cfg.Connectivity, cfg.DisableMonitoringLock, cfg.Metrics, ctx)
|
||||
go monitorExternalEndpointHeartbeat(externalEndpoint, cfg.Alerting, cfg.Maintenance, cfg.Connectivity, cfg.DisableMonitoringLock, cfg.Metrics, ctx, extraLabels)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// monitor a single endpoint in a loop
|
||||
func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool, ctx context.Context) {
|
||||
func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool, extraLabels []string, ctx context.Context) {
|
||||
// Run it immediately on start
|
||||
execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics)
|
||||
execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics, extraLabels)
|
||||
// Loop for the next executions
|
||||
ticker := time.NewTicker(ep.Interval)
|
||||
defer ticker.Stop()
|
||||
@@ -57,7 +58,7 @@ func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance
|
||||
logr.Warnf("[watchdog.monitor] Canceling current execution of group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key())
|
||||
return
|
||||
case <-ticker.C:
|
||||
execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics)
|
||||
execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics, extraLabels)
|
||||
}
|
||||
}
|
||||
// Just in case somebody wandered all the way to here and wonders, "what about ExternalEndpoints?"
|
||||
@@ -65,7 +66,7 @@ func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance
|
||||
// periodically like they are for normal endpoints.
|
||||
}
|
||||
|
||||
func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool) {
|
||||
func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool, extraLabels []string) {
|
||||
if !disableMonitoringLock {
|
||||
// By placing the lock here, we prevent multiple endpoints from being monitored at the exact same time, which
|
||||
// could cause performance issues and return inaccurate results
|
||||
@@ -80,7 +81,7 @@ func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance
|
||||
logr.Debugf("[watchdog.execute] Monitoring group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key())
|
||||
result := ep.EvaluateHealth()
|
||||
if enabledMetrics {
|
||||
metrics.PublishMetricsForEndpoint(ep, result)
|
||||
metrics.PublishMetricsForEndpoint(ep, result, extraLabels)
|
||||
}
|
||||
UpdateEndpointStatuses(ep, result)
|
||||
if logr.GetThreshold() == logr.LevelDebug && !result.Success {
|
||||
@@ -104,7 +105,7 @@ func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance
|
||||
logr.Debugf("[watchdog.execute] Waiting for interval=%s before monitoring group=%s endpoint=%s (key=%s) again", ep.Interval, ep.Group, ep.Name, ep.Key())
|
||||
}
|
||||
|
||||
func monitorExternalEndpointHeartbeat(ee *endpoint.ExternalEndpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool, ctx context.Context) {
|
||||
func monitorExternalEndpointHeartbeat(ee *endpoint.ExternalEndpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool, ctx context.Context, extraLabels []string) {
|
||||
ticker := time.NewTicker(ee.Heartbeat.Interval)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
@@ -113,12 +114,12 @@ func monitorExternalEndpointHeartbeat(ee *endpoint.ExternalEndpoint, alertingCon
|
||||
logr.Warnf("[watchdog.monitorExternalEndpointHeartbeat] Canceling current execution of group=%s; endpoint=%s; key=%s", ee.Group, ee.Name, ee.Key())
|
||||
return
|
||||
case <-ticker.C:
|
||||
executeExternalEndpointHeartbeat(ee, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics)
|
||||
executeExternalEndpointHeartbeat(ee, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics, extraLabels)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func executeExternalEndpointHeartbeat(ee *endpoint.ExternalEndpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool) {
|
||||
func executeExternalEndpointHeartbeat(ee *endpoint.ExternalEndpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool, extraLabels []string) {
|
||||
if !disableMonitoringLock {
|
||||
// By placing the lock here, we prevent multiple endpoints from being monitored at the exact same time, which
|
||||
// could cause performance issues and return inaccurate results
|
||||
@@ -152,7 +153,7 @@ func executeExternalEndpointHeartbeat(ee *endpoint.ExternalEndpoint, alertingCon
|
||||
Errors: []string{"heartbeat: no update received within " + ee.Heartbeat.Interval.String()},
|
||||
}
|
||||
if enabledMetrics {
|
||||
metrics.PublishMetricsForEndpoint(convertedEndpoint, result)
|
||||
metrics.PublishMetricsForEndpoint(convertedEndpoint, result, extraLabels)
|
||||
}
|
||||
UpdateEndpointStatuses(convertedEndpoint, result)
|
||||
logr.Infof("[watchdog.monitorExternalEndpointHeartbeat] Checked heartbeat for group=%s; endpoint=%s; key=%s; success=%v; errors=%d; duration=%s", ee.Group, ee.Name, ee.Key(), result.Success, len(result.Errors), result.Duration.Round(time.Millisecond))
|
||||
|
||||
Reference in New Issue
Block a user