Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2f8a3d2a02 | ||
|
|
9495b7389e | ||
|
|
c8bdecbde8 | ||
|
|
394602bc47 | ||
|
|
15813d4297 | ||
|
|
d24c66cf96 | ||
|
|
70d7d0c54c |
@@ -16,4 +16,10 @@ endpoints:
|
||||
url: https://api.github.com/healthz
|
||||
interval: 5m
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[STATUS] == 200"
|
||||
|
||||
- name: check-domain-expiration
|
||||
url: "https://example.org/"
|
||||
interval: 1h
|
||||
conditions:
|
||||
- "[DOMAIN_EXPIRATION] > 720h"
|
||||
@@ -1,4 +1,3 @@
|
||||
version: "3.9"
|
||||
services:
|
||||
gatus:
|
||||
container_name: gatus
|
||||
@@ -13,7 +12,7 @@ services:
|
||||
|
||||
prometheus:
|
||||
container_name: prometheus
|
||||
image: prom/prometheus:v2.14.0
|
||||
image: prom/prometheus:v3.5.0
|
||||
restart: always
|
||||
command: --config.file=/etc/prometheus/prometheus.yml
|
||||
ports:
|
||||
@@ -25,7 +24,7 @@ services:
|
||||
|
||||
grafana:
|
||||
container_name: grafana
|
||||
image: grafana/grafana:6.4.4
|
||||
image: grafana/grafana:12.1.0
|
||||
restart: always
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_PASSWORD: secret
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": 566,
|
||||
"id": 41,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
@@ -39,7 +39,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
@@ -79,7 +80,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -122,7 +123,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red"
|
||||
"color": "red",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
@@ -162,7 +164,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -193,7 +195,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -225,7 +228,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -292,7 +295,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -321,7 +324,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"description": "SSL certificate expiration times for all services",
|
||||
"description": "Domain expiration times for all domains",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -339,7 +342,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red"
|
||||
"color": "red",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "#EAB839",
|
||||
@@ -395,7 +399,137 @@
|
||||
"showHeader": true,
|
||||
"sortBy": []
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "gatus_results_domain_expiration_seconds",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "__auto",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Domain Expiration",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Value": false,
|
||||
"__name__": true,
|
||||
"app_kubernetes_io_instance": true,
|
||||
"app_kubernetes_io_managed_by": true,
|
||||
"app_kubernetes_io_name": true,
|
||||
"app_kubernetes_io_service": true,
|
||||
"helm_sh_chart": true,
|
||||
"instance": true,
|
||||
"job": true,
|
||||
"key": true,
|
||||
"type": true
|
||||
},
|
||||
"includeByName": {},
|
||||
"indexByName": {
|
||||
"Value": 2,
|
||||
"group": 0,
|
||||
"name": 1
|
||||
},
|
||||
"renameByName": {
|
||||
"Value": "Time Until Expiry",
|
||||
"group": "Group",
|
||||
"name": "Service"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"description": "SSL certificate expiration times for all services",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "#EAB839",
|
||||
"value": 172800
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 604800
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "dtdurations"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Time Until Expiry"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.cellOptions",
|
||||
"value": {
|
||||
"applyToRow": false,
|
||||
"type": "color-background"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "s"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 11,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"sortBy": []
|
||||
},
|
||||
"pluginVersion": "12.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -444,6 +578,113 @@
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"description": "Current status distribution across all services",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(gatus_results_endpoint_success)",
|
||||
"legendFormat": "Services UP",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(1 - gatus_results_endpoint_success)",
|
||||
"legendFormat": "Services DOWN",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Service Status Distribution",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
@@ -483,7 +724,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red"
|
||||
"color": "red",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
@@ -525,7 +767,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
"y": 16
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
@@ -546,7 +788,7 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -689,7 +931,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -703,9 +946,9 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
"y": 24
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
@@ -721,7 +964,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -735,112 +978,6 @@
|
||||
],
|
||||
"title": "Response Times",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"description": "Current status distribution across all services",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(gatus_results_endpoint_success)",
|
||||
"legendFormat": "Services UP",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(1 - gatus_results_endpoint_success)",
|
||||
"legendFormat": "Services DOWN",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Service Status Distribution",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"preload": false,
|
||||
@@ -855,8 +992,8 @@
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "myprom",
|
||||
"value": "PA04845DA3A4B088E"
|
||||
"text": "prometheus",
|
||||
"value": "cedv077q7bbwgd"
|
||||
},
|
||||
"description": "Select your Prometheus datasource",
|
||||
"includeAll": false,
|
||||
@@ -877,6 +1014,6 @@
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Gatus - Service Monitoring Dashboard",
|
||||
"uid": "gatus-monitoring2",
|
||||
"version": 10
|
||||
}
|
||||
"uid": "4ea25b6f-2edc-416c-8282-a1164f95537a",
|
||||
"version": 1
|
||||
}
|
||||
108
README.md
108
README.md
@@ -50,6 +50,7 @@ Have any feedback or questions? [Create a discussion](https://github.com/TwiN/ga
|
||||
- [Conditions](#conditions)
|
||||
- [Placeholders](#placeholders)
|
||||
- [Functions](#functions)
|
||||
- [Announcements](#announcements)
|
||||
- [Storage](#storage)
|
||||
- [Client configuration](#client-configuration)
|
||||
- [Tunneling](#tunneling)
|
||||
@@ -95,7 +96,6 @@ Have any feedback or questions? [Create a discussion](https://github.com/TwiN/ga
|
||||
- [Configuring Zulip alerts](#configuring-zulip-alerts)
|
||||
- [Configuring custom alerts](#configuring-custom-alerts)
|
||||
- [Setting a default alert](#setting-a-default-alert)
|
||||
- [Announcements](#announcements)
|
||||
- [Maintenance](#maintenance)
|
||||
- [Security](#security)
|
||||
- [Basic Authentication](#basic-authentication)
|
||||
@@ -309,6 +309,7 @@ You can then configure alerts to be triggered when an endpoint is unhealthy once
|
||||
| `endpoints[].ui.hide-hostname` | Whether to hide the hostname from the results. | `false` |
|
||||
| `endpoints[].ui.hide-port` | Whether to hide the port from the results. | `false` |
|
||||
| `endpoints[].ui.hide-url` | Whether to hide the URL from the results. Useful if the URL contains a token. | `false` |
|
||||
| `endpoints[].ui.hide-errors` | Whether to hide errors from the results. | `false` |
|
||||
| `endpoints[].ui.dont-resolve-failed-conditions` | Whether to resolve failed conditions for the UI. | `false` |
|
||||
| `endpoints[].ui.badge.response-time` | List of response time thresholds. Each time a threshold is reached, the badge has a different color. | `[50, 200, 300, 500, 750]` |
|
||||
| `endpoints[].extra-labels` | Extra labels to add to the metrics. Useful for grouping endpoints together. | `{}` |
|
||||
@@ -366,7 +367,7 @@ or send an HTTP request:
|
||||
POST /api/v1/endpoints/{key}/external?success={success}&error={error}&duration={duration}
|
||||
```
|
||||
Where:
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `(`, `)`, `+` and `&` replaced by `-`.
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `+` and `&` replaced by `-`.
|
||||
- Using the example configuration above, the key would be `core_ext-ep-test`.
|
||||
- `{success}` is a boolean (`true` or `false`) value indicating whether the health check was successful or not.
|
||||
- `{error}` (optional): a string describing the reason for a failed health check. If {success} is false, this should contain the error message; if the check is successful.
|
||||
@@ -1300,7 +1301,7 @@ endpoints:
|
||||
```
|
||||
|
||||
|
||||
#### Configuring ilert alerts
|
||||
#### Configuring Ilert alerts
|
||||
| Parameter | Description | Default |
|
||||
|:-----------------------------------|:-------------------------------------------------------------------------------------------|:--------|
|
||||
| `alerting.ilert` | Configuration for alerts of type `ilert` | `{}` |
|
||||
@@ -1852,6 +1853,40 @@ endpoints:
|
||||
```
|
||||
|
||||
|
||||
#### Configuring SendGrid alerts
|
||||
|
||||
> ⚠️ **WARNING**: This alerting provider has not been tested yet. If you've tested it and confirmed that it works, please remove this warning and create a pull request, or comment on [#1223](https://github.com/TwiN/gatus/discussions/1223) with whether the provider works as intended. Thank you for your cooperation.
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|:--------------------------------------|:-------------------------------------------------------------------------------------------|:--------------|
|
||||
| `alerting.sendgrid` | Configuration for alerts of type `sendgrid` | `{}` |
|
||||
| `alerting.sendgrid.api-key` | SendGrid API key | Required `""` |
|
||||
| `alerting.sendgrid.from` | Email address to send from | Required `""` |
|
||||
| `alerting.sendgrid.to` | Email address(es) to send alerts to (comma-separated for multiple recipients) | Required `""` |
|
||||
| `alerting.sendgrid.default-alert` | Default alert configuration. <br />See [Setting a default alert](#setting-a-default-alert) | N/A |
|
||||
| `alerting.sendgrid.overrides` | List of overrides that may be prioritized over the default configuration | `[]` |
|
||||
| `alerting.sendgrid.overrides[].group` | Endpoint group for which the configuration will be overridden by this configuration | `""` |
|
||||
| `alerting.sendgrid.overrides[].*` | See `alerting.sendgrid.*` parameters | `{}` |
|
||||
|
||||
```yaml
|
||||
alerting:
|
||||
sendgrid:
|
||||
api-key: "SG.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
from: "alerts@example.com"
|
||||
to: "admin@example.com,ops@example.com"
|
||||
|
||||
endpoints:
|
||||
- name: website
|
||||
url: "https://twin.sh/health"
|
||||
interval: 5m
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
alerts:
|
||||
- type: sendgrid
|
||||
send-on-resolved: true
|
||||
```
|
||||
|
||||
|
||||
#### Configuring Signal alerts
|
||||
|
||||
> ⚠️ **WARNING**: This alerting provider has not been tested yet. If you've tested it and confirmed that it works, please remove this warning and create a pull request, or comment on [#1223](https://github.com/TwiN/gatus/discussions/1223) with whether the provider works as intended. Thank you for your cooperation.
|
||||
@@ -1918,40 +1953,6 @@ endpoints:
|
||||
```
|
||||
|
||||
|
||||
#### Configuring SendGrid alerts
|
||||
|
||||
> ⚠️ **WARNING**: This alerting provider has not been tested yet. If you've tested it and confirmed that it works, please remove this warning and create a pull request, or comment on [#1223](https://github.com/TwiN/gatus/discussions/1223) with whether the provider works as intended. Thank you for your cooperation.
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|:--------------------------------------|:-------------------------------------------------------------------------------------------|:--------------|
|
||||
| `alerting.sendgrid` | Configuration for alerts of type `sendgrid` | `{}` |
|
||||
| `alerting.sendgrid.api-key` | SendGrid API key | Required `""` |
|
||||
| `alerting.sendgrid.from` | Email address to send from | Required `""` |
|
||||
| `alerting.sendgrid.to` | Email address(es) to send alerts to (comma-separated for multiple recipients) | Required `""` |
|
||||
| `alerting.sendgrid.default-alert` | Default alert configuration. <br />See [Setting a default alert](#setting-a-default-alert) | N/A |
|
||||
| `alerting.sendgrid.overrides` | List of overrides that may be prioritized over the default configuration | `[]` |
|
||||
| `alerting.sendgrid.overrides[].group` | Endpoint group for which the configuration will be overridden by this configuration | `""` |
|
||||
| `alerting.sendgrid.overrides[].*` | See `alerting.sendgrid.*` parameters | `{}` |
|
||||
|
||||
```yaml
|
||||
alerting:
|
||||
sendgrid:
|
||||
api-key: "SG.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
from: "alerts@example.com"
|
||||
to: "admin@example.com,ops@example.com"
|
||||
|
||||
endpoints:
|
||||
- name: website
|
||||
url: "https://twin.sh/health"
|
||||
interval: 5m
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
alerts:
|
||||
- type: sendgrid
|
||||
send-on-resolved: true
|
||||
```
|
||||
|
||||
|
||||
#### Configuring Slack alerts
|
||||
| Parameter | Description | Default |
|
||||
|:-----------------------------------|:-------------------------------------------------------------------------------------------|:------------------------------------|
|
||||
@@ -2298,14 +2299,14 @@ endpoints:
|
||||
|
||||
> ⚠️ **WARNING**: This alerting provider has not been tested yet. If you've tested it and confirmed that it works, please remove this warning and create a pull request, or comment on [#1223](https://github.com/TwiN/gatus/discussions/1223) with whether the provider works as intended. Thank you for your cooperation.
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|:-------------------------------|:-------------------------------------------------------------------------------------------|:--------------|
|
||||
| `alerting.webex` | Configuration for alerts of type `webex` | `{}` |
|
||||
| `alerting.webex.webhook-url` | Webex Teams webhook URL | Required `""` |
|
||||
| `alerting.webex.default-alert` | Default alert configuration. <br />See [Setting a default alert](#setting-a-default-alert) | N/A |
|
||||
| `alerting.webex.overrides` | List of overrides that may be prioritized over the default configuration | `[]` |
|
||||
| `alerting.webex.overrides[].group` | Endpoint group for which the configuration will be overridden by this configuration | `""` |
|
||||
| `alerting.webex.overrides[].*` | See `alerting.webex.*` parameters | `{}` |
|
||||
| Parameter | Description | Default |
|
||||
|:-----------------------------------|:-------------------------------------------------------------------------------------------|:--------------|
|
||||
| `alerting.webex` | Configuration for alerts of type `webex` | `{}` |
|
||||
| `alerting.webex.webhook-url` | Webex Teams webhook URL | Required `""` |
|
||||
| `alerting.webex.default-alert` | Default alert configuration. <br />See [Setting a default alert](#setting-a-default-alert) | N/A |
|
||||
| `alerting.webex.overrides` | List of overrides that may be prioritized over the default configuration | `[]` |
|
||||
| `alerting.webex.overrides[].group` | Endpoint group for which the configuration will be overridden by this configuration | `""` |
|
||||
| `alerting.webex.overrides[].*` | See `alerting.webex.*` parameters | `{}` |
|
||||
|
||||
```yaml
|
||||
alerting:
|
||||
@@ -2357,7 +2358,7 @@ endpoints:
|
||||
#### Configuring Zulip alerts
|
||||
| Parameter | Description | Default |
|
||||
|:-----------------------------------|:------------------------------------------------------------------------------------|:--------------|
|
||||
| `alerting.zulip` | Configuration for alerts of type `zulip` | `{}` |
|
||||
| `alerting.zulip` | Configuration for alerts of type `zulip` | `{}` |
|
||||
| `alerting.zulip.bot-email` | Bot Email | Required `""` |
|
||||
| `alerting.zulip.bot-api-key` | Bot API key | Required `""` |
|
||||
| `alerting.zulip.domain` | Full organization domain (e.g.: yourZulipDomain.zulipchat.com) | Required `""` |
|
||||
@@ -2675,6 +2676,7 @@ endpoint on the same port your application is configured to run on (`web.port`).
|
||||
| gatus_results_connected_total | counter | Total number of results in which a connection was successfully established | key, group, name, type | All |
|
||||
| gatus_results_duration_seconds | gauge | Duration of the request in seconds | key, group, name, type | All |
|
||||
| gatus_results_certificate_expiration_seconds | gauge | Number of seconds until the certificate expires | key, group, name, type | HTTP, STARTTLS |
|
||||
| gatus_results_domain_expiration_seconds | gauge | Number of seconds until the domains expires | key, group, name, type | HTTP, STARTTLS |
|
||||
| gatus_results_endpoint_success | gauge | Displays whether or not the endpoint was a success (0 failure, 1 success) | key, group, name, type | All |
|
||||
|
||||
See [examples/docker-compose-grafana-prometheus](.examples/docker-compose-grafana-prometheus) for further documentation as well as an example.
|
||||
@@ -3297,7 +3299,7 @@ The path to generate a badge is the following:
|
||||
```
|
||||
Where:
|
||||
- `{duration}` is `30d`, `7d`, `24h` or `1h`
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `(`, `)`, `+` and `&` replaced by `-`.
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `+` and `&` replaced by `-`.
|
||||
|
||||
For instance, if you want the uptime during the last 24 hours from the endpoint `frontend` in the group `core`,
|
||||
the URL would look like this:
|
||||
@@ -3323,7 +3325,7 @@ The path to generate a badge is the following:
|
||||
/api/v1/endpoints/{key}/health/badge.svg
|
||||
```
|
||||
Where:
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `(`, `)`, `+` and `&` replaced by `-`.
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `+` and `&` replaced by `-`.
|
||||
|
||||
For instance, if you want the current status of the endpoint `frontend` in the group `core`,
|
||||
the URL would look like this:
|
||||
@@ -3340,7 +3342,7 @@ The path to generate a badge is the following:
|
||||
/api/v1/endpoints/{key}/health/badge.shields
|
||||
```
|
||||
Where:
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `(`, `)`, `+` and `&` replaced by `-`.
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `+` and `&` replaced by `-`.
|
||||
|
||||
For instance, if you want the current status of the endpoint `frontend` in the group `core`,
|
||||
the URL would look like this:
|
||||
@@ -3363,7 +3365,7 @@ The endpoint to generate a badge is the following:
|
||||
```
|
||||
Where:
|
||||
- `{duration}` is `30d`, `7d`, `24h` or `1h`
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `(`, `)`, `+` and `&` replaced by `-`.
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `+` and `&` replaced by `-`.
|
||||
|
||||
#### Response time (chart)
|
||||

|
||||
@@ -3376,7 +3378,7 @@ The endpoint to generate a response time chart is the following:
|
||||
```
|
||||
Where:
|
||||
- `{duration}` is `30d`, `7d`, or `24h`
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `(`, `)`, `+` and `&` replaced by `-`.
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `+` and `&` replaced by `-`.
|
||||
|
||||
##### How to change the color thresholds of the response time badge
|
||||
To change the response time badges' threshold, a corresponding configuration can be added to an endpoint.
|
||||
@@ -3434,7 +3436,7 @@ The path to get raw uptime data for an endpoint is:
|
||||
```
|
||||
Where:
|
||||
- `{duration}` is `30d`, `7d`, `24h` or `1h`
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `(`, `)`, `+` and `&` replaced by `-`.
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `+` and `&` replaced by `-`.
|
||||
|
||||
For instance, if you want the raw uptime data for the last 24 hours from the endpoint `frontend` in the group `core`, the URL would look like this:
|
||||
```
|
||||
@@ -3448,7 +3450,7 @@ The path to get raw response time data for an endpoint is:
|
||||
```
|
||||
Where:
|
||||
- `{duration}` is `30d`, `7d`, `24h` or `1h`
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `(`, `)`, `+` and `&` replaced by `-`.
|
||||
- `{key}` has the pattern `<GROUP_NAME>_<ENDPOINT_NAME>` in which both variables have ` `, `/`, `_`, `,`, `.`, `#`, `+` and `&` replaced by `-`.
|
||||
|
||||
For instance, if you want the raw response time data for the last 24 hours from the endpoint `frontend` in the group `core`, the URL would look like this:
|
||||
```
|
||||
|
||||
@@ -131,8 +131,8 @@ func TestPing(t *testing.T) {
|
||||
|
||||
func TestCanPerformStartTLS(t *testing.T) {
|
||||
type args struct {
|
||||
address string
|
||||
insecure bool
|
||||
address string
|
||||
insecure bool
|
||||
dnsresolver string
|
||||
}
|
||||
tests := []struct {
|
||||
@@ -168,7 +168,7 @@ func TestCanPerformStartTLS(t *testing.T) {
|
||||
{
|
||||
name: "dns resolver",
|
||||
args: args{
|
||||
address: "smtp.gmail.com:587",
|
||||
address: "smtp.gmail.com:587",
|
||||
dnsresolver: "tcp://1.1.1.1:53",
|
||||
},
|
||||
wantConnected: true,
|
||||
@@ -340,7 +340,7 @@ func TestQueryWebSocket(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTlsRenegotiation(t *testing.T) {
|
||||
tests := []struct {
|
||||
scenarios := []struct {
|
||||
name string
|
||||
cfg TLSConfig
|
||||
expectedConfig tls.RenegotiationSupport
|
||||
@@ -371,12 +371,12 @@ func TestTlsRenegotiation(t *testing.T) {
|
||||
expectedConfig: tls.RenegotiateNever,
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
for _, scenario := range scenarios {
|
||||
t.Run(scenario.name, func(t *testing.T) {
|
||||
tls := &tls.Config{}
|
||||
tlsConfig := configureTLS(tls, test.cfg)
|
||||
if tlsConfig.Renegotiation != test.expectedConfig {
|
||||
t.Errorf("expected tls renegotiation to be %v, but got %v", test.expectedConfig, tls.Renegotiation)
|
||||
tlsConfig := configureTLS(tls, scenario.cfg)
|
||||
if tlsConfig.Renegotiation != scenario.expectedConfig {
|
||||
t.Errorf("expected tls renegotiation to be %v, but got %v", scenario.expectedConfig, tls.Renegotiation)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -513,14 +513,11 @@ func TestQueryDNS(t *testing.T) {
|
||||
|
||||
func TestCheckSSHBanner(t *testing.T) {
|
||||
cfg := &Config{Timeout: 3}
|
||||
|
||||
t.Run("no-auth-ssh", func(t *testing.T) {
|
||||
connected, status, err := CheckSSHBanner("tty.sdf.org", cfg)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Expected: error != nil, got: %v ", err)
|
||||
}
|
||||
|
||||
if connected == false {
|
||||
t.Errorf("Expected: connected == true, got: %v", connected)
|
||||
}
|
||||
@@ -528,14 +525,11 @@ func TestCheckSSHBanner(t *testing.T) {
|
||||
t.Errorf("Expected: 0, got: %v", status)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("invalid-address", func(t *testing.T) {
|
||||
connected, status, err := CheckSSHBanner("idontplaytheodds.com", cfg)
|
||||
|
||||
if err == nil {
|
||||
t.Errorf("Expected: error, got: %v ", err)
|
||||
}
|
||||
|
||||
if connected != false {
|
||||
t.Errorf("Expected: connected == false, got: %v", connected)
|
||||
}
|
||||
@@ -543,5 +537,4 @@ func TestCheckSSHBanner(t *testing.T) {
|
||||
t.Errorf("Expected: 1, got: %v", status)
|
||||
}
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
@@ -353,6 +353,9 @@ func (e *Endpoint) EvaluateHealthWithContext(context *gontext.Gontext) *Result {
|
||||
}
|
||||
result.port = ""
|
||||
}
|
||||
if processedEndpoint.UIConfig.HideErrors {
|
||||
result.Errors = nil
|
||||
}
|
||||
if processedEndpoint.UIConfig.HideConditions {
|
||||
result.ConditionResults = nil
|
||||
}
|
||||
@@ -498,7 +501,7 @@ func (e *Endpoint) call(result *Result) {
|
||||
result.Duration = time.Since(startTime)
|
||||
} else if endpointType == TypeSSH {
|
||||
// If there's no username/password specified, attempt to validate just the SSH banner
|
||||
if len(e.SSHConfig.Username) == 0 && len(e.SSHConfig.Password) == 0 {
|
||||
if e.SSHConfig == nil || (len(e.SSHConfig.Username) == 0 && len(e.SSHConfig.Password) == 0) {
|
||||
result.Connected, result.HTTPStatus, err = client.CheckSSHBanner(strings.TrimPrefix(e.URL, "ssh://"), e.ClientConfig)
|
||||
if err != nil {
|
||||
result.AddError(err.Error())
|
||||
|
||||
@@ -1448,3 +1448,168 @@ func TestEndpoint_preprocessWithContext(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEndpoint_HideUIFeatures(t *testing.T) {
|
||||
defer client.InjectHTTPClient(nil)
|
||||
tests := []struct {
|
||||
name string
|
||||
endpoint Endpoint
|
||||
mockResponse test.MockRoundTripper
|
||||
checkHostname bool
|
||||
expectHostname string
|
||||
checkErrors bool
|
||||
expectErrors bool
|
||||
checkConditions bool
|
||||
expectConditions bool
|
||||
checkErrorContent string
|
||||
}{
|
||||
{
|
||||
name: "hide-conditions",
|
||||
endpoint: Endpoint{
|
||||
Name: "test-endpoint",
|
||||
URL: "https://example.com/health",
|
||||
Conditions: []Condition{"[STATUS] == 200", "[BODY].status == UP"},
|
||||
UIConfig: &ui.Config{HideConditions: true},
|
||||
},
|
||||
mockResponse: test.MockRoundTripper(func(r *http.Request) *http.Response {
|
||||
return &http.Response{StatusCode: http.StatusOK, Body: io.NopCloser(bytes.NewBufferString(`{"status": "UP"}`))}
|
||||
}),
|
||||
checkConditions: true,
|
||||
expectConditions: false,
|
||||
},
|
||||
{
|
||||
name: "hide-hostname",
|
||||
endpoint: Endpoint{
|
||||
Name: "test-endpoint",
|
||||
URL: "https://example.com/health",
|
||||
Conditions: []Condition{"[STATUS] == 200"},
|
||||
UIConfig: &ui.Config{HideHostname: true},
|
||||
},
|
||||
mockResponse: test.MockRoundTripper(func(r *http.Request) *http.Response {
|
||||
return &http.Response{StatusCode: http.StatusOK, Body: http.NoBody}
|
||||
}),
|
||||
checkHostname: true,
|
||||
expectHostname: "",
|
||||
},
|
||||
{
|
||||
name: "hide-url-in-errors",
|
||||
endpoint: Endpoint{
|
||||
Name: "test-endpoint",
|
||||
URL: "https://example.com/health",
|
||||
Conditions: []Condition{"[CONNECTED] == true"},
|
||||
UIConfig: &ui.Config{HideURL: true},
|
||||
ClientConfig: &client.Config{Timeout: time.Millisecond},
|
||||
},
|
||||
mockResponse: nil,
|
||||
checkErrors: true,
|
||||
expectErrors: true,
|
||||
checkErrorContent: "<redacted>",
|
||||
},
|
||||
{
|
||||
name: "hide-port-in-errors",
|
||||
endpoint: Endpoint{
|
||||
Name: "test-endpoint",
|
||||
URL: "https://example.com:9999/health",
|
||||
Conditions: []Condition{"[CONNECTED] == true"},
|
||||
UIConfig: &ui.Config{HidePort: true},
|
||||
ClientConfig: &client.Config{Timeout: time.Millisecond},
|
||||
},
|
||||
mockResponse: nil,
|
||||
checkErrors: true,
|
||||
expectErrors: true,
|
||||
checkErrorContent: "<redacted>",
|
||||
},
|
||||
{
|
||||
name: "hide-errors",
|
||||
endpoint: Endpoint{
|
||||
Name: "test-endpoint",
|
||||
URL: "https://example.com/health",
|
||||
Conditions: []Condition{"[CONNECTED] == true"},
|
||||
UIConfig: &ui.Config{HideErrors: true},
|
||||
ClientConfig: &client.Config{Timeout: time.Millisecond},
|
||||
},
|
||||
mockResponse: nil,
|
||||
checkErrors: true,
|
||||
expectErrors: false,
|
||||
},
|
||||
{
|
||||
name: "dont-resolve-failed-conditions",
|
||||
endpoint: Endpoint{
|
||||
Name: "test-endpoint",
|
||||
URL: "https://example.com/health",
|
||||
Conditions: []Condition{"[STATUS] == 200"},
|
||||
UIConfig: &ui.Config{DontResolveFailedConditions: true},
|
||||
},
|
||||
mockResponse: test.MockRoundTripper(func(r *http.Request) *http.Response {
|
||||
return &http.Response{StatusCode: http.StatusBadGateway, Body: http.NoBody}
|
||||
}),
|
||||
checkConditions: true,
|
||||
expectConditions: true,
|
||||
},
|
||||
{
|
||||
name: "multiple-hide-features",
|
||||
endpoint: Endpoint{
|
||||
Name: "test-endpoint",
|
||||
URL: "https://example.com/health",
|
||||
Conditions: []Condition{"[STATUS] == 200"},
|
||||
UIConfig: &ui.Config{HideConditions: true, HideHostname: true, HideErrors: true},
|
||||
},
|
||||
mockResponse: test.MockRoundTripper(func(r *http.Request) *http.Response {
|
||||
return &http.Response{StatusCode: http.StatusOK, Body: http.NoBody}
|
||||
}),
|
||||
checkConditions: true,
|
||||
expectConditions: false,
|
||||
checkHostname: true,
|
||||
expectHostname: "",
|
||||
checkErrors: true,
|
||||
expectErrors: false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if tt.mockResponse != nil {
|
||||
mockClient := &http.Client{Transport: tt.mockResponse}
|
||||
if tt.endpoint.ClientConfig != nil && tt.endpoint.ClientConfig.Timeout > 0 {
|
||||
mockClient.Timeout = tt.endpoint.ClientConfig.Timeout
|
||||
}
|
||||
client.InjectHTTPClient(mockClient)
|
||||
} else {
|
||||
client.InjectHTTPClient(nil)
|
||||
}
|
||||
err := tt.endpoint.ValidateAndSetDefaults()
|
||||
if err != nil {
|
||||
t.Fatalf("ValidateAndSetDefaults failed: %v", err)
|
||||
}
|
||||
result := tt.endpoint.EvaluateHealth()
|
||||
if tt.checkHostname {
|
||||
if result.Hostname != tt.expectHostname {
|
||||
t.Errorf("Expected hostname '%s', got '%s'", tt.expectHostname, result.Hostname)
|
||||
}
|
||||
}
|
||||
if tt.checkErrors {
|
||||
hasErrors := len(result.Errors) > 0
|
||||
if hasErrors != tt.expectErrors {
|
||||
t.Errorf("Expected errors=%v, got errors=%v (actual errors: %v)", tt.expectErrors, hasErrors, result.Errors)
|
||||
}
|
||||
if tt.checkErrorContent != "" && len(result.Errors) > 0 {
|
||||
found := false
|
||||
for _, err := range result.Errors {
|
||||
if strings.Contains(err, tt.checkErrorContent) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("Expected error to contain '%s', but got: %v", tt.checkErrorContent, result.Errors)
|
||||
}
|
||||
}
|
||||
}
|
||||
if tt.checkConditions {
|
||||
hasConditions := result.ConditionResults != nil && len(result.ConditionResults) > 0
|
||||
if hasConditions != tt.expectConditions {
|
||||
t.Errorf("Expected conditions=%v, got conditions=%v (actual: %v)", tt.expectConditions, hasConditions, result.ConditionResults)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,9 @@ type Config struct {
|
||||
// HidePort whether to hide the port in the Result
|
||||
HidePort bool `yaml:"hide-port"`
|
||||
|
||||
// HideErrors whether to hide the errors in the Result
|
||||
HideErrors bool `yaml:"hide-errors"`
|
||||
|
||||
// DontResolveFailedConditions whether to resolve failed conditions in the Result for display in the UI
|
||||
DontResolveFailedConditions bool `yaml:"dont-resolve-failed-conditions"`
|
||||
|
||||
@@ -58,6 +61,7 @@ func GetDefaultConfig() *Config {
|
||||
HideHostname: false,
|
||||
HideURL: false,
|
||||
HidePort: false,
|
||||
HideErrors: false,
|
||||
DontResolveFailedConditions: false,
|
||||
HideConditions: false,
|
||||
Badge: &Badge{
|
||||
|
||||
@@ -15,8 +15,6 @@ func sanitize(s string) string {
|
||||
s = strings.ReplaceAll(s, ",", "-")
|
||||
s = strings.ReplaceAll(s, " ", "-")
|
||||
s = strings.ReplaceAll(s, "#", "-")
|
||||
s = strings.ReplaceAll(s, "(", "-")
|
||||
s = strings.ReplaceAll(s, ")", "-")
|
||||
s = strings.ReplaceAll(s, "+", "-")
|
||||
s = strings.ReplaceAll(s, "&", "-")
|
||||
return s
|
||||
|
||||
@@ -32,12 +32,12 @@ func TestConvertGroupAndNameToKey(t *testing.T) {
|
||||
{
|
||||
GroupName: "API (v1)",
|
||||
Name: "endpoint",
|
||||
ExpectedOutput: "api--v1-_endpoint",
|
||||
ExpectedOutput: "api-(v1)_endpoint",
|
||||
},
|
||||
{
|
||||
GroupName: "website (admin)",
|
||||
Name: "test",
|
||||
ExpectedOutput: "website--admin-_test",
|
||||
ExpectedOutput: "website-(admin)_test",
|
||||
},
|
||||
{
|
||||
GroupName: "search",
|
||||
|
||||
27
main.go
27
main.go
@@ -183,6 +183,33 @@ func initializeStorage(cfg *config.Config) {
|
||||
}
|
||||
}
|
||||
}
|
||||
// Load persisted triggered alerts for suite endpoints
|
||||
for _, suite := range cfg.Suites {
|
||||
for _, ep := range suite.Endpoints {
|
||||
var checksums []string
|
||||
for _, alert := range ep.Alerts {
|
||||
if alert.IsEnabled() {
|
||||
checksums = append(checksums, alert.Checksum())
|
||||
}
|
||||
}
|
||||
numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep, checksums)
|
||||
if numberOfTriggeredAlertsDeleted > 0 {
|
||||
logr.Debugf("[main.initializeStorage] Deleted %d triggered alerts for suite endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ep.Key())
|
||||
}
|
||||
for _, alert := range ep.Alerts {
|
||||
exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(ep, alert)
|
||||
if err != nil {
|
||||
logr.Errorf("[main.initializeStorage] Failed to get triggered alert for suite endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
continue
|
||||
}
|
||||
if exists {
|
||||
alert.Triggered, alert.ResolveKey = true, resolveKey
|
||||
ep.NumberOfSuccessesInARow, ep.NumberOfFailuresInARow = numberOfSuccessesInARow, alert.FailureThreshold
|
||||
numberOfPersistedTriggeredAlertsLoaded++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if numberOfPersistedTriggeredAlertsLoaded > 0 {
|
||||
logr.Infof("[main.initializeStorage] Loaded %d persisted triggered alerts", numberOfPersistedTriggeredAlertsLoaded)
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ var (
|
||||
resultConnectedTotal *prometheus.CounterVec
|
||||
resultCodeTotal *prometheus.CounterVec
|
||||
resultCertificateExpirationSeconds *prometheus.GaugeVec
|
||||
resultDomainExpirationSeconds *prometheus.GaugeVec
|
||||
resultEndpointSuccess *prometheus.GaugeVec
|
||||
|
||||
// Suite metrics
|
||||
@@ -51,6 +52,9 @@ func UnregisterPrometheusMetrics() {
|
||||
if resultCertificateExpirationSeconds != nil {
|
||||
currentRegisterer.Unregister(resultCertificateExpirationSeconds)
|
||||
}
|
||||
if resultDomainExpirationSeconds != nil {
|
||||
currentRegisterer.Unregister(resultDomainExpirationSeconds)
|
||||
}
|
||||
if resultEndpointSuccess != nil {
|
||||
currentRegisterer.Unregister(resultEndpointSuccess)
|
||||
}
|
||||
@@ -119,6 +123,13 @@ func InitializePrometheusMetrics(cfg *config.Config, reg prometheus.Registerer)
|
||||
}, append([]string{"key", "group", "name", "type"}, extraLabels...))
|
||||
reg.MustRegister(resultCertificateExpirationSeconds)
|
||||
|
||||
resultDomainExpirationSeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "results_domain_expiration_seconds",
|
||||
Help: "Number of seconds until the domain expires",
|
||||
}, append([]string{"key", "group", "name", "type"}, extraLabels...))
|
||||
reg.MustRegister(resultDomainExpirationSeconds)
|
||||
|
||||
resultEndpointSuccess = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "results_endpoint_success",
|
||||
@@ -178,6 +189,9 @@ func PublishMetricsForEndpoint(ep *endpoint.Endpoint, result *endpoint.Result, e
|
||||
if result.CertificateExpiration != 0 {
|
||||
resultCertificateExpirationSeconds.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(result.CertificateExpiration.Seconds())
|
||||
}
|
||||
if result.DomainExpiration != 0 {
|
||||
resultDomainExpirationSeconds.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(result.DomainExpiration.Seconds())
|
||||
}
|
||||
if result.Success {
|
||||
resultEndpointSuccess.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(1)
|
||||
} else {
|
||||
|
||||
@@ -48,6 +48,9 @@ func TestInitializePrometheusMetrics(t *testing.T) {
|
||||
if resultCertificateExpirationSeconds == nil {
|
||||
t.Error("resultCertificateExpirationSeconds metric not initialized")
|
||||
}
|
||||
if resultDomainExpirationSeconds == nil {
|
||||
t.Error("resultDomainExpirationSeconds metric not initialized")
|
||||
}
|
||||
if resultEndpointSuccess == nil {
|
||||
t.Error("resultEndpointSuccess metric not initialized")
|
||||
}
|
||||
@@ -120,9 +123,11 @@ func TestPublishMetricsForEndpoint(t *testing.T) {
|
||||
ConditionResults: []*endpoint.ConditionResult{
|
||||
{Condition: "[STATUS] == 200", Success: true},
|
||||
{Condition: "[CERTIFICATE_EXPIRATION] > 48h", Success: true},
|
||||
{Condition: "[DOMAIN_EXPIRATION] > 24h", Success: true},
|
||||
},
|
||||
Success: true,
|
||||
CertificateExpiration: 49 * time.Hour,
|
||||
DomainExpiration: 25 * time.Hour,
|
||||
}, []string{})
|
||||
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(`
|
||||
# HELP gatus_results_code_total Total number of results by code
|
||||
@@ -140,6 +145,9 @@ gatus_results_total{group="http-ep-group",key="http-ep-group_http-ep-name",name=
|
||||
# HELP gatus_results_certificate_expiration_seconds Number of seconds until the certificate expires
|
||||
# TYPE gatus_results_certificate_expiration_seconds gauge
|
||||
gatus_results_certificate_expiration_seconds{group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 176400
|
||||
# HELP gatus_results_domain_expiration_seconds Number of seconds until the domain expires
|
||||
# TYPE gatus_results_domain_expiration_seconds gauge
|
||||
gatus_results_domain_expiration_seconds{group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 90000
|
||||
# HELP gatus_results_endpoint_success Displays whether or not the endpoint was a success
|
||||
# TYPE gatus_results_endpoint_success gauge
|
||||
gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 1
|
||||
@@ -154,9 +162,11 @@ gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-
|
||||
ConditionResults: []*endpoint.ConditionResult{
|
||||
{Condition: "[STATUS] == 200", Success: true},
|
||||
{Condition: "[CERTIFICATE_EXPIRATION] > 47h", Success: false},
|
||||
{Condition: "[DOMAIN_EXPIRATION] > 24h", Success: true},
|
||||
},
|
||||
Success: false,
|
||||
CertificateExpiration: 47 * time.Hour,
|
||||
DomainExpiration: 24 * time.Hour,
|
||||
}, []string{})
|
||||
err = testutil.GatherAndCompare(reg, bytes.NewBufferString(`
|
||||
# HELP gatus_results_code_total Total number of results by code
|
||||
@@ -175,6 +185,9 @@ gatus_results_total{group="http-ep-group",key="http-ep-group_http-ep-name",name=
|
||||
# HELP gatus_results_certificate_expiration_seconds Number of seconds until the certificate expires
|
||||
# TYPE gatus_results_certificate_expiration_seconds gauge
|
||||
gatus_results_certificate_expiration_seconds{group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 169200
|
||||
# HELP gatus_results_domain_expiration_seconds Number of seconds until the domain expires
|
||||
# TYPE gatus_results_domain_expiration_seconds gauge
|
||||
gatus_results_domain_expiration_seconds{group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 86400
|
||||
# HELP gatus_results_endpoint_success Displays whether or not the endpoint was a success
|
||||
# TYPE gatus_results_endpoint_success gauge
|
||||
gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 0
|
||||
|
||||
@@ -64,7 +64,6 @@ func executeEndpoint(ep *endpoint.Endpoint, cfg *config.Config, extraLabels []st
|
||||
}
|
||||
}
|
||||
if !cfg.Maintenance.IsUnderMaintenance() && !inEndpointMaintenanceWindow {
|
||||
// TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause...
|
||||
HandleAlerting(ep, result, cfg.Alerting)
|
||||
} else {
|
||||
logr.Debug("[watchdog.executeEndpoint] Not handling alerting because currently in the maintenance window")
|
||||
|
||||
@@ -50,6 +50,8 @@ func executeSuite(s *suite.Suite, cfg *config.Config, extraLabels []string) {
|
||||
if cfg.Metrics {
|
||||
metrics.PublishMetricsForSuite(s, result, extraLabels)
|
||||
}
|
||||
// Store result
|
||||
UpdateSuiteStatus(s, result)
|
||||
// Handle alerting for suite endpoints
|
||||
for i, ep := range s.Endpoints {
|
||||
if i < len(result.EndpointResults) {
|
||||
@@ -72,8 +74,6 @@ func executeSuite(s *suite.Suite, cfg *config.Config, extraLabels []string) {
|
||||
}
|
||||
}
|
||||
logr.Infof("[watchdog.executeSuite] Completed suite=%s; success=%v; errors=%d; duration=%v; endpoints_executed=%d/%d", s.Name, result.Success, len(result.Errors), result.Duration, len(result.EndpointResults), len(s.Endpoints))
|
||||
// Store result in database
|
||||
UpdateSuiteStatus(s, result)
|
||||
}
|
||||
|
||||
// UpdateSuiteStatus persists the suite result in the database
|
||||
|
||||
@@ -197,13 +197,12 @@ const buttons = computed(() => {
|
||||
const fetchConfig = async () => {
|
||||
try {
|
||||
const response = await fetch(`${SERVER_URL}/api/v1/config`, { credentials: 'include' })
|
||||
retrievedConfig.value = true
|
||||
|
||||
if (response.status === 200) {
|
||||
const data = await response.json()
|
||||
config.value = data
|
||||
announcements.value = data.announcements || []
|
||||
}
|
||||
retrievedConfig.value = true
|
||||
} catch (error) {
|
||||
console.error('Failed to fetch config:', error)
|
||||
retrievedConfig.value = true
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user