feat(metrics): Add metrics for domain expiration (#1244)

* Add metrics for domain expiration

* Update grafana and prometheus versions and extend grafana dashboard with Domain expiration

* feat(deps) update whois version

---------

Co-authored-by: TwiN <twin@linux.com>
This commit is contained in:
Stefan Balea
2025-10-25 19:45:15 +03:00
committed by GitHub
parent 9495b7389e
commit 2f8a3d2a02
6 changed files with 303 additions and 133 deletions

View File

@@ -16,4 +16,10 @@ endpoints:
url: https://api.github.com/healthz
interval: 5m
conditions:
- "[STATUS] == 200"
- "[STATUS] == 200"
- name: check-domain-expiration
url: "https://example.org/"
interval: 1h
conditions:
- "[DOMAIN_EXPIRATION] > 720h"

View File

@@ -1,4 +1,3 @@
version: "3.9"
services:
gatus:
container_name: gatus
@@ -13,7 +12,7 @@ services:
prometheus:
container_name: prometheus
image: prom/prometheus:v2.14.0
image: prom/prometheus:v3.5.0
restart: always
command: --config.file=/etc/prometheus/prometheus.yml
ports:
@@ -25,7 +24,7 @@ services:
grafana:
container_name: grafana
image: grafana/grafana:6.4.4
image: grafana/grafana:12.1.0
restart: always
environment:
GF_SECURITY_ADMIN_PASSWORD: secret

View File

@@ -19,7 +19,7 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 566,
"id": 41,
"links": [],
"panels": [
{
@@ -39,7 +39,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": 0
},
{
"color": "yellow",
@@ -79,7 +80,7 @@
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.0.2",
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
@@ -122,7 +123,8 @@
"mode": "absolute",
"steps": [
{
"color": "red"
"color": "red",
"value": 0
},
{
"color": "yellow",
@@ -162,7 +164,7 @@
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.0.2",
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
@@ -193,7 +195,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": 0
}
]
},
@@ -225,7 +228,7 @@
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.0.2",
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
@@ -292,7 +295,7 @@
"sort": "none"
}
},
"pluginVersion": "12.0.2",
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
@@ -321,7 +324,7 @@
"type": "prometheus",
"uid": "$datasource"
},
"description": "SSL certificate expiration times for all services",
"description": "Domain expiration times for all domains",
"fieldConfig": {
"defaults": {
"color": {
@@ -339,7 +342,8 @@
"mode": "absolute",
"steps": [
{
"color": "red"
"color": "red",
"value": 0
},
{
"color": "#EAB839",
@@ -395,7 +399,137 @@
"showHeader": true,
"sortBy": []
},
"pluginVersion": "12.0.2",
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"editorMode": "code",
"expr": "gatus_results_domain_expiration_seconds",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"refId": "A"
}
],
"title": "Domain Expiration",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"Value": false,
"__name__": true,
"app_kubernetes_io_instance": true,
"app_kubernetes_io_managed_by": true,
"app_kubernetes_io_name": true,
"app_kubernetes_io_service": true,
"helm_sh_chart": true,
"instance": true,
"job": true,
"key": true,
"type": true
},
"includeByName": {},
"indexByName": {
"Value": 2,
"group": 0,
"name": 1
},
"renameByName": {
"Value": "Time Until Expiry",
"group": "Group",
"name": "Service"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"description": "SSL certificate expiration times for all services",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": 0
},
{
"color": "#EAB839",
"value": 172800
},
{
"color": "green",
"value": 604800
}
]
},
"unit": "dtdurations"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time Until Expiry"
},
"properties": [
{
"id": "custom.cellOptions",
"value": {
"applyToRow": false,
"type": "color-background"
}
},
{
"id": "unit",
"value": "s"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 11,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": []
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
@@ -444,6 +578,113 @@
],
"type": "table"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"description": "Current status distribution across all services",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(gatus_results_endpoint_success)",
"legendFormat": "Services UP",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(1 - gatus_results_endpoint_success)",
"legendFormat": "Services DOWN",
"refId": "B"
}
],
"title": "Service Status Distribution",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
@@ -483,7 +724,8 @@
"mode": "absolute",
"steps": [
{
"color": "red"
"color": "red",
"value": 0
},
{
"color": "green",
@@ -525,7 +767,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 8
"y": 16
},
"id": 2,
"options": {
@@ -546,7 +788,7 @@
}
]
},
"pluginVersion": "12.0.2",
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
@@ -689,7 +931,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": 0
},
{
"color": "red",
@@ -703,9 +946,9 @@
},
"gridPos": {
"h": 8,
"w": 12,
"w": 24,
"x": 0,
"y": 16
"y": 24
},
"id": 4,
"options": {
@@ -721,7 +964,7 @@
"sort": "none"
}
},
"pluginVersion": "12.0.2",
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
@@ -735,112 +978,6 @@
],
"title": "Response Times",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"description": "Current status distribution across all services",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(gatus_results_endpoint_success)",
"legendFormat": "Services UP",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(1 - gatus_results_endpoint_success)",
"legendFormat": "Services DOWN",
"refId": "B"
}
],
"title": "Service Status Distribution",
"type": "timeseries"
}
],
"preload": false,
@@ -855,8 +992,8 @@
"list": [
{
"current": {
"text": "myprom",
"value": "PA04845DA3A4B088E"
"text": "prometheus",
"value": "cedv077q7bbwgd"
},
"description": "Select your Prometheus datasource",
"includeAll": false,
@@ -877,6 +1014,6 @@
"timepicker": {},
"timezone": "",
"title": "Gatus - Service Monitoring Dashboard",
"uid": "gatus-monitoring2",
"version": 10
}
"uid": "4ea25b6f-2edc-416c-8282-a1164f95537a",
"version": 1
}

View File

@@ -2676,6 +2676,7 @@ endpoint on the same port your application is configured to run on (`web.port`).
| gatus_results_connected_total | counter | Total number of results in which a connection was successfully established | key, group, name, type | All |
| gatus_results_duration_seconds | gauge | Duration of the request in seconds | key, group, name, type | All |
| gatus_results_certificate_expiration_seconds | gauge | Number of seconds until the certificate expires | key, group, name, type | HTTP, STARTTLS |
| gatus_results_domain_expiration_seconds | gauge | Number of seconds until the domains expires | key, group, name, type | HTTP, STARTTLS |
| gatus_results_endpoint_success | gauge | Displays whether or not the endpoint was a success (0 failure, 1 success) | key, group, name, type | All |
See [examples/docker-compose-grafana-prometheus](.examples/docker-compose-grafana-prometheus) for further documentation as well as an example.

View File

@@ -17,6 +17,7 @@ var (
resultConnectedTotal *prometheus.CounterVec
resultCodeTotal *prometheus.CounterVec
resultCertificateExpirationSeconds *prometheus.GaugeVec
resultDomainExpirationSeconds *prometheus.GaugeVec
resultEndpointSuccess *prometheus.GaugeVec
// Suite metrics
@@ -51,6 +52,9 @@ func UnregisterPrometheusMetrics() {
if resultCertificateExpirationSeconds != nil {
currentRegisterer.Unregister(resultCertificateExpirationSeconds)
}
if resultDomainExpirationSeconds != nil {
currentRegisterer.Unregister(resultDomainExpirationSeconds)
}
if resultEndpointSuccess != nil {
currentRegisterer.Unregister(resultEndpointSuccess)
}
@@ -119,6 +123,13 @@ func InitializePrometheusMetrics(cfg *config.Config, reg prometheus.Registerer)
}, append([]string{"key", "group", "name", "type"}, extraLabels...))
reg.MustRegister(resultCertificateExpirationSeconds)
resultDomainExpirationSeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Name: "results_domain_expiration_seconds",
Help: "Number of seconds until the domain expires",
}, append([]string{"key", "group", "name", "type"}, extraLabels...))
reg.MustRegister(resultDomainExpirationSeconds)
resultEndpointSuccess = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Name: "results_endpoint_success",
@@ -178,6 +189,9 @@ func PublishMetricsForEndpoint(ep *endpoint.Endpoint, result *endpoint.Result, e
if result.CertificateExpiration != 0 {
resultCertificateExpirationSeconds.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(result.CertificateExpiration.Seconds())
}
if result.DomainExpiration != 0 {
resultDomainExpirationSeconds.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(result.DomainExpiration.Seconds())
}
if result.Success {
resultEndpointSuccess.WithLabelValues(append([]string{ep.Key(), ep.Group, ep.Name, string(endpointType)}, labelValues...)...).Set(1)
} else {

View File

@@ -48,6 +48,9 @@ func TestInitializePrometheusMetrics(t *testing.T) {
if resultCertificateExpirationSeconds == nil {
t.Error("resultCertificateExpirationSeconds metric not initialized")
}
if resultDomainExpirationSeconds == nil {
t.Error("resultDomainExpirationSeconds metric not initialized")
}
if resultEndpointSuccess == nil {
t.Error("resultEndpointSuccess metric not initialized")
}
@@ -120,9 +123,11 @@ func TestPublishMetricsForEndpoint(t *testing.T) {
ConditionResults: []*endpoint.ConditionResult{
{Condition: "[STATUS] == 200", Success: true},
{Condition: "[CERTIFICATE_EXPIRATION] > 48h", Success: true},
{Condition: "[DOMAIN_EXPIRATION] > 24h", Success: true},
},
Success: true,
CertificateExpiration: 49 * time.Hour,
DomainExpiration: 25 * time.Hour,
}, []string{})
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(`
# HELP gatus_results_code_total Total number of results by code
@@ -140,6 +145,9 @@ gatus_results_total{group="http-ep-group",key="http-ep-group_http-ep-name",name=
# HELP gatus_results_certificate_expiration_seconds Number of seconds until the certificate expires
# TYPE gatus_results_certificate_expiration_seconds gauge
gatus_results_certificate_expiration_seconds{group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 176400
# HELP gatus_results_domain_expiration_seconds Number of seconds until the domain expires
# TYPE gatus_results_domain_expiration_seconds gauge
gatus_results_domain_expiration_seconds{group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 90000
# HELP gatus_results_endpoint_success Displays whether or not the endpoint was a success
# TYPE gatus_results_endpoint_success gauge
gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 1
@@ -154,9 +162,11 @@ gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-
ConditionResults: []*endpoint.ConditionResult{
{Condition: "[STATUS] == 200", Success: true},
{Condition: "[CERTIFICATE_EXPIRATION] > 47h", Success: false},
{Condition: "[DOMAIN_EXPIRATION] > 24h", Success: true},
},
Success: false,
CertificateExpiration: 47 * time.Hour,
DomainExpiration: 24 * time.Hour,
}, []string{})
err = testutil.GatherAndCompare(reg, bytes.NewBufferString(`
# HELP gatus_results_code_total Total number of results by code
@@ -175,6 +185,9 @@ gatus_results_total{group="http-ep-group",key="http-ep-group_http-ep-name",name=
# HELP gatus_results_certificate_expiration_seconds Number of seconds until the certificate expires
# TYPE gatus_results_certificate_expiration_seconds gauge
gatus_results_certificate_expiration_seconds{group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 169200
# HELP gatus_results_domain_expiration_seconds Number of seconds until the domain expires
# TYPE gatus_results_domain_expiration_seconds gauge
gatus_results_domain_expiration_seconds{group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 86400
# HELP gatus_results_endpoint_success Displays whether or not the endpoint was a success
# TYPE gatus_results_endpoint_success gauge
gatus_results_endpoint_success{group="http-ep-group",key="http-ep-group_http-ep-name",name="http-ep-name",type="HTTP"} 0