Compare commits

...

13 Commits

Author SHA1 Message Date
TwinProduction
8ec256edbf Implement has() function to determine if an element at a JSONPath exists 2021-03-10 21:49:13 -05:00
TwinProduction
a48ec41bca Add test for invalid path 2021-03-09 19:39:22 -05:00
TwinProduction
541e0264ab Don't export, persist or retain result body after evaluation 2021-03-08 21:30:11 -05:00
TwinProduction
f945e4b8a2 #93: Gracefully handle breaking change to uptime maps by renaming variables 2021-03-06 15:19:35 -05:00
TwinProduction
076b92a2b4 Minor update 2021-03-05 20:33:06 -05:00
TwinProduction
02e9f74a04 Move alerting configuration documentation under Alerting 2021-03-05 20:25:20 -05:00
TwinProduction
b37dd5e819 Minor update 2021-03-05 00:50:24 -05:00
TwinProduction
1775f80ffe Back to alpine/1.16 (the change in reflected memory usage was due to 1.16's MADV_FREE change after all) 2021-03-05 00:49:58 -05:00
TwinProduction
3187db1e9a Switch gocache to FIFO instead of LRU 2021-03-05 00:40:11 -05:00
TwinProduction
932eab00a0 Test using Docker image with Go 1.15 instead of alpine, which has 1.16 2021-03-05 00:38:40 -05:00
TwinProduction
c842ac2343 Fix memory issue caused by previous shallow copy 2021-03-05 00:19:21 -05:00
TwinProduction
6320237326 Significantly improve uptime calculation 2021-03-04 23:00:30 -05:00
TwinProduction
8fe9d013b5 Close #48: Implement Discord alerting providers 2021-03-04 21:26:17 -05:00
28 changed files with 1030 additions and 855 deletions

112
README.md
View File

@@ -20,12 +20,13 @@ core applications: https://status.twinnation.org/
- [Features](#features)
- [Usage](#usage)
- [Configuration](#configuration)
- [Configuration](#configuration)
- [Conditions](#conditions)
- [Placeholders](#placeholders)
- [Functions](#functions)
- [Alerting](#alerting)
- [Configuring Slack alerts](#configuring-slack-alerts)
- [Configuring Discord alerts](#configuring-discord-alerts)
- [Configuring PagerDuty alerts](#configuring-pagerduty-alerts)
- [Configuring Twilio alerts](#configuring-twilio-alerts)
- [Configuring Mattermost alerts](#configuring-mattermost-alerts)
@@ -84,7 +85,7 @@ services:
- "[RESPONSE_TIME] < 300" # Response time must be under 300ms
- name: example
url: "https://example.org/"
interval: 30s
interval: 5m
conditions:
- "[STATUS] == 200"
```
@@ -93,10 +94,10 @@ This example would look like this:
![Simple example](.github/assets/example.png)
Note that you can also add environment variables in the configuration file (i.e. `$DOMAIN`, `${DOMAIN}`)
Note that you can also add environment variables in the configuration file (e.g. `$DOMAIN`, `${DOMAIN}`)
### Configuration
## Configuration
| Parameter | Description | Default |
|:---------------------------------------- |:----------------------------------------------------------------------------- |:-------------- |
@@ -110,43 +111,21 @@ Note that you can also add environment variables in the configuration file (i.e.
| `services[].url` | URL to send the request to | Required `""` |
| `services[].method` | Request method | `GET` |
| `services[].insecure` | Whether to skip verifying the server's certificate chain and host name | `false` |
| `services[].conditions` | Conditions used to determine the health of the service | `[]` |
| `services[].conditions` | Conditions used to determine the health of the service. See [Conditions](#conditions) | `[]` |
| `services[].interval` | Duration to wait between every status check | `60s` |
| `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` |
| `services[].body` | Request body | `""` |
| `services[].headers` | Request headers | `{}` |
| `services[].dns` | Configuration for a service of type DNS. See [Monitoring using DNS queries](#monitoring-using-dns-queries) | `""` |
| `services[].dns` | Configuration for a service of type DNS. See [Monitoring a service using DNS queries](#monitoring-a-service-using-dns-queries). | `""` |
| `services[].dns.query-type` | Query type for DNS service | `""` |
| `services[].dns.query-name` | Query name for DNS service | `""` |
| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `pagerduty`, `twilio`, `mattermost`, `messagebird`, `custom` | Required `""` |
| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `discord`m `pagerduty`, `twilio`, `mattermost`, `messagebird`, `custom` | Required `""` |
| `services[].alerts[].enabled` | Whether to enable the alert | `false` |
| `services[].alerts[].failure-threshold` | Number of failures in a row needed before triggering the alert | `3` |
| `services[].alerts[].success-threshold` | Number of successes in a row before an ongoing incident is marked as resolved | `2` |
| `services[].alerts[].send-on-resolved` | Whether to send a notification once a triggered alert is marked as resolved | `false` |
| `services[].alerts[].description` | Description of the alert. Will be included in the alert sent | `""` |
| `alerting` | Configuration for alerting | `{}` |
| `alerting.slack` | Configuration for alerts of type `slack` | `{}` |
| `alerting.slack.webhook-url` | Slack Webhook URL | Required `""` |
| `alerting.pagerduty` | Configuration for alerts of type `pagerduty` | `{}` |
| `alerting.pagerduty.integration-key` | PagerDuty Events API v2 integration key. | Required `""` |
| `alerting.twilio` | Settings for alerts of type `twilio` | `{}` |
| `alerting.twilio.sid` | Twilio account SID | Required `""` |
| `alerting.twilio.token` | Twilio auth token | Required `""` |
| `alerting.twilio.from` | Number to send Twilio alerts from | Required `""` |
| `alerting.twilio.to` | Number to send twilio alerts to | Required `""` |
| `alerting.mattermost` | Configuration for alerts of type `mattermost` | `{}` |
| `alerting.mattermost.webhook-url` | Mattermost Webhook URL | Required `""` |
| `alerting.mattermost.insecure` | Whether to skip verifying the server's certificate chain and host name | `false` |
| `alerting.messagebird` | Settings for alerts of type `messagebird` | `{}` |
| `alerting.messagebird.access-key` | Messagebird access key | Required `""` |
| `alerting.messagebird.originator` | The sender of the message | Required `""` |
| `alerting.messagebird.recipients` | The recipients of the message | Required `""` |
| `alerting.custom` | Configuration for custom actions on failure or alerts | `{}` |
| `alerting.custom.url` | Custom alerting request url | Required `""` |
| `alerting.custom.method` | Request method | `GET` |
| `alerting.custom.insecure` | Whether to skip verifying the server's certificate chain and host name | `false` |
| `alerting.custom.body` | Custom alerting request body. | `""` |
| `alerting.custom.headers` | Custom alerting request headers | `{}` |
| `alerting` | Configuration for alerting. See [Alerting](#alerting) | `{}` |
| `security` | Security configuration | `{}` |
| `security.basic` | Basic authentication security configuration | `{}` |
| `security.basic.username` | Username for Basic authentication | Required `""` |
@@ -156,7 +135,8 @@ Note that you can also add environment variables in the configuration file (i.e.
| `web.address` | Address to listen on | `0.0.0.0` |
| `web.port` | Port to listen on | `8080` |
For Kubernetes configuration, see [Kubernetes](#kubernetes-alpha)
- For Kubernetes configuration, see [Kubernetes](#kubernetes-alpha).
- For alerting configuration, see [Alerting](#alerting).
### Conditions
@@ -179,6 +159,8 @@ Here are some examples of conditions you can use:
| `[BODY].age == [BODY].id` | JSONPath value of `$.age` is equal JSONPath `$.id` | `{"age":1,"id":1}` | |
| `len([BODY].data) < 5` | Array at JSONPath `$.data` has less than 5 elements | `{"data":[{"id":1}]}` | |
| `len([BODY].name) == 8` | String at JSONPath `$.name` has a length of 8 | `{"name":"john.doe"}` | `{"name":"bob"}` |
| `has([BODY].errors) == false` | JSONPath `$.errors` does not exist | `{"name":"john.doe"}` | `{"errors":[]}` |
| `has([BODY].users) == true` | JSONPath `$.users` exists | `{"users":[]}` | `{}` |
| `[BODY].name == pat(john*)` | String at JSONPath `$.name` matches pattern `john*` | `{"name":"john.doe"}` | `{"name":"bob"}` |
| `[BODY].id == any(1, 2)` | Value at JSONPath `$.id` is equal to `1` or `2` | 1, 2 | 3, 4, 5 |
| `[CERTIFICATE_EXPIRATION] > 48h` | Certificate expiration is more than 48h away | 49h, 50h, 123h | 1h, 24h, ... |
@@ -202,6 +184,7 @@ Here are some examples of conditions you can use:
| Function | Description | Example |
|:-----------|:---------------------------------------------------------------------------------------------------------------- |:-------------------------- |
| `len` | Returns the length of the object/slice. Works only with the `[BODY]` placeholder. | `len([BODY].username) > 8`
| `has` | Returns `true` or `false` based on whether a given path is valid. Works only with the `[BODY]` placeholder. | `has([BODY].errors) == false`
| `pat` | Specifies that the string passed as parameter should be evaluated as a pattern. Works only with `==` and `!=`. | `[IP] == pat(192.168.*)`
| `any` | Specifies that any one of the values passed as parameters is a valid value. Works only with `==` and `!=`. | `[BODY].ip == any(127.0.0.1, ::1)`
@@ -216,6 +199,33 @@ individual services with configurable descriptions and thresholds.
Note that if an alerting provider is not configured properly, all alerts configured with the provider's type will be
ignored.
| Parameter | Description | Default |
|:---------------------------------------- |:----------------------------------------------------------------------------- |:-------------- |
| `alerting.slack` | Configuration for alerts of type `slack` | `{}` |
| `alerting.slack.webhook-url` | Slack Webhook URL | Required `""` |
| `alerting.discord` | Configuration for alerts of type `discord` | `{}` |
| `alerting.discord.webhook-url` | Discord Webhook URL | Required `""` |
| `alerting.pagerduty` | Configuration for alerts of type `pagerduty` | `{}` |
| `alerting.pagerduty.integration-key` | PagerDuty Events API v2 integration key. | Required `""` |
| `alerting.twilio` | Settings for alerts of type `twilio` | `{}` |
| `alerting.twilio.sid` | Twilio account SID | Required `""` |
| `alerting.twilio.token` | Twilio auth token | Required `""` |
| `alerting.twilio.from` | Number to send Twilio alerts from | Required `""` |
| `alerting.twilio.to` | Number to send twilio alerts to | Required `""` |
| `alerting.mattermost` | Configuration for alerts of type `mattermost` | `{}` |
| `alerting.mattermost.webhook-url` | Mattermost Webhook URL | Required `""` |
| `alerting.mattermost.insecure` | Whether to skip verifying the server's certificate chain and host name | `false` |
| `alerting.messagebird` | Settings for alerts of type `messagebird` | `{}` |
| `alerting.messagebird.access-key` | Messagebird access key | Required `""` |
| `alerting.messagebird.originator` | The sender of the message | Required `""` |
| `alerting.messagebird.recipients` | The recipients of the message | Required `""` |
| `alerting.custom` | Configuration for custom actions on failure or alerts | `{}` |
| `alerting.custom.url` | Custom alerting request url | Required `""` |
| `alerting.custom.method` | Request method | `GET` |
| `alerting.custom.insecure` | Whether to skip verifying the server's certificate chain and host name | `false` |
| `alerting.custom.body` | Custom alerting request body. | `""` |
| `alerting.custom.headers` | Custom alerting request headers | `{}` |
#### Configuring Slack alerts
@@ -223,6 +233,7 @@ ignored.
alerting:
slack:
webhook-url: "https://hooks.slack.com/services/**********/**********/**********"
services:
- name: twinnation
url: "https://twinnation.org/health"
@@ -248,6 +259,29 @@ Here's an example of what the notifications look like:
![Slack notifications](.github/assets/slack-alerts.png)
#### Configuring Discord alerts
```yaml
alerting:
discord:
webhook-url: "https://discord.com/api/webhooks/**********/**********"
services:
- name: twinnation
url: "https://twinnation.org/health"
interval: 30s
alerts:
- type: discord
enabled: true
description: "healthcheck failed"
send-on-resolved: true
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 300"
```
#### Configuring PagerDuty alerts
It is highly recommended to set `services[].alerts[].send-on-resolved` to `true` for alerts
@@ -259,6 +293,7 @@ PagerDuty instead.
alerting:
pagerduty:
integration-key: "********************************"
services:
- name: twinnation
url: "https://twinnation.org/health"
@@ -269,7 +304,7 @@ services:
failure-threshold: 3
success-threshold: 5
send-on-resolved: true
description: "healthcheck failed 3 times in a row"
description: "healthcheck failed"
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
@@ -286,6 +321,7 @@ alerting:
token: "..."
from: "+1-234-567-8901"
to: "+1-234-567-8901"
services:
- name: twinnation
interval: 30s
@@ -295,7 +331,7 @@ services:
enabled: true
failure-threshold: 5
send-on-resolved: true
description: "healthcheck failed 5 times in a row"
description: "healthcheck failed"
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
@@ -310,6 +346,7 @@ alerting:
mattermost:
webhook-url: "http://**********/hooks/**********"
insecure: true
services:
- name: twinnation
url: "https://twinnation.org/health"
@@ -317,7 +354,7 @@ services:
alerts:
- type: mattermost
enabled: true
description: "healthcheck failed 3 times in a row"
description: "healthcheck failed"
send-on-resolved: true
conditions:
- "[STATUS] == 200"
@@ -349,7 +386,7 @@ services:
enabled: true
failure-threshold: 3
send-on-resolved: true
description: "healthcheck failed 3 times in a row"
description: "healthcheck failed"
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
@@ -395,7 +432,7 @@ services:
failure-threshold: 10
success-threshold: 3
send-on-resolved: true
description: "healthcheck failed 10 times in a row"
description: "healthcheck failed"
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
@@ -414,6 +451,7 @@ alerting:
As a result, the `[ALERT_TRIGGERED_OR_RESOLVED]` in the body of first example of this section would be replaced by
`partial_outage` when an alert is triggered and `operational` when an alert is resolved.
### Kubernetes (ALPHA)
> **WARNING**: This feature is in ALPHA. This means that it is very likely to change in the near future, which means that
@@ -527,8 +565,6 @@ services:
avatar
}
}
headers:
Content-Type: application/json # XXX: as of v1.9.2, this header is automatically added when graphql is set to true
conditions:
- "[STATUS] == 200"
- "[BODY].data.users[0].gender == female"

View File

@@ -2,6 +2,7 @@ package alerting
import (
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/alerting/provider/discord"
"github.com/TwinProduction/gatus/alerting/provider/mattermost"
"github.com/TwinProduction/gatus/alerting/provider/messagebird"
"github.com/TwinProduction/gatus/alerting/provider/pagerduty"
@@ -11,8 +12,11 @@ import (
// Config is the configuration for alerting providers
type Config struct {
// Slack is the configuration for the slack alerting provider
Slack *slack.AlertProvider `yaml:"slack"`
// Custom is the configuration for the custom alerting provider
Custom *custom.AlertProvider `yaml:"custom"`
// Discord is the configuration for the discord alerting provider
Discord *discord.AlertProvider `yaml:"discord"`
// Mattermost is the configuration for the mattermost alerting provider
Mattermost *mattermost.AlertProvider `yaml:"mattermost"`
@@ -20,12 +24,12 @@ type Config struct {
// Messagebird is the configuration for the messagebird alerting provider
Messagebird *messagebird.AlertProvider `yaml:"messagebird"`
// Pagerduty is the configuration for the pagerduty alerting provider
// PagerDuty is the configuration for the pagerduty alerting provider
PagerDuty *pagerduty.AlertProvider `yaml:"pagerduty"`
// Slack is the configuration for the slack alerting provider
Slack *slack.AlertProvider `yaml:"slack"`
// Twilio is the configuration for the twilio alerting provider
Twilio *twilio.AlertProvider `yaml:"twilio"`
// Custom is the configuration for the custom alerting provider
Custom *custom.AlertProvider `yaml:"custom"`
}

View File

@@ -0,0 +1,63 @@
package discord
import (
"fmt"
"net/http"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/core"
)
// AlertProvider is the configuration necessary for sending an alert using Discord
type AlertProvider struct {
WebhookURL string `yaml:"webhook-url"`
}
// IsValid returns whether the provider's configuration is valid
func (provider *AlertProvider) IsValid() bool {
return len(provider.WebhookURL) > 0
}
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
var message, results string
var colorCode int
if resolved {
message = fmt.Sprintf("An alert for **%s** has been resolved after passing successfully %d time(s) in a row", service.Name, alert.SuccessThreshold)
colorCode = 3066993
} else {
message = fmt.Sprintf("An alert for **%s** has been triggered due to having failed %d time(s) in a row", service.Name, alert.FailureThreshold)
colorCode = 15158332
}
for _, conditionResult := range result.ConditionResults {
var prefix string
if conditionResult.Success {
prefix = ":white_check_mark:"
} else {
prefix = ":x:"
}
results += fmt.Sprintf("%s - `%s`\\n", prefix, conditionResult.Condition)
}
return &custom.AlertProvider{
URL: provider.WebhookURL,
Method: http.MethodPost,
Body: fmt.Sprintf(`{
"content": "",
"embeds": [
{
"title": ":helmet_with_white_cross: Gatus",
"description": "%s:\n> %s",
"color": %d,
"fields": [
{
"name": "Condition results",
"value": "%s",
"inline": false
}
]
}
]
}`, message, alert.Description, colorCode, results),
Headers: map[string]string{"Content-Type": "application/json"},
}
}

View File

@@ -0,0 +1,65 @@
package discord
import (
"encoding/json"
"net/http"
"strings"
"testing"
"github.com/TwinProduction/gatus/core"
)
func TestAlertProvider_IsValid(t *testing.T) {
invalidProvider := AlertProvider{WebhookURL: ""}
if invalidProvider.IsValid() {
t.Error("provider shouldn't have been valid")
}
validProvider := AlertProvider{WebhookURL: "http://example.com"}
if !validProvider.IsValid() {
t.Error("provider should've been valid")
}
}
func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) {
provider := AlertProvider{WebhookURL: "http://example.com"}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "SUCCESSFUL_CONDITION", Success: true}}}, true)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}
if !strings.Contains(customAlertProvider.Body, "resolved") {
t.Error("customAlertProvider.Body should've contained the substring resolved")
}
if customAlertProvider.URL != "http://example.com" {
t.Errorf("expected URL to be %s, got %s", "http://example.com", customAlertProvider.URL)
}
if customAlertProvider.Method != http.MethodPost {
t.Errorf("expected method to be %s, got %s", http.MethodPost, customAlertProvider.Method)
}
body := make(map[string]interface{})
err := json.Unmarshal([]byte(customAlertProvider.Body), &body)
if err != nil {
t.Error("expected body to be valid JSON, got error:", err.Error())
}
}
func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) {
provider := AlertProvider{WebhookURL: "http://example.com"}
customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false)
if customAlertProvider == nil {
t.Fatal("customAlertProvider shouldn't have been nil")
}
if !strings.Contains(customAlertProvider.Body, "triggered") {
t.Error("customAlertProvider.Body should've contained the substring triggered")
}
if customAlertProvider.URL != "http://example.com" {
t.Errorf("expected URL to be %s, got %s", "http://example.com", customAlertProvider.URL)
}
if customAlertProvider.Method != http.MethodPost {
t.Errorf("expected method to be %s, got %s", http.MethodPost, customAlertProvider.Method)
}
body := make(map[string]interface{})
err := json.Unmarshal([]byte(customAlertProvider.Body), &body)
if err != nil {
t.Error("expected body to be valid JSON, got error:", err.Error())
}
}

View File

@@ -2,6 +2,7 @@ package provider
import (
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/alerting/provider/discord"
"github.com/TwinProduction/gatus/alerting/provider/mattermost"
"github.com/TwinProduction/gatus/alerting/provider/messagebird"
"github.com/TwinProduction/gatus/alerting/provider/pagerduty"
@@ -22,9 +23,10 @@ type AlertProvider interface {
var (
// Validate interface implementation on compile
_ AlertProvider = (*custom.AlertProvider)(nil)
_ AlertProvider = (*twilio.AlertProvider)(nil)
_ AlertProvider = (*slack.AlertProvider)(nil)
_ AlertProvider = (*discord.AlertProvider)(nil)
_ AlertProvider = (*mattermost.AlertProvider)(nil)
_ AlertProvider = (*messagebird.AlertProvider)(nil)
_ AlertProvider = (*pagerduty.AlertProvider)(nil)
_ AlertProvider = (*slack.AlertProvider)(nil)
_ AlertProvider = (*twilio.AlertProvider)(nil)
)

View File

@@ -228,12 +228,13 @@ func validateAlertingConfig(config *Config) {
return
}
alertTypes := []core.AlertType{
core.SlackAlert,
core.CustomAlert,
core.DiscordAlert,
core.MattermostAlert,
core.MessagebirdAlert,
core.TwilioAlert,
core.PagerDutyAlert,
core.CustomAlert,
core.SlackAlert,
core.TwilioAlert,
}
var validProviders, invalidProviders []core.AlertType
for _, alertType := range alertTypes {
@@ -255,12 +256,18 @@ func validateAlertingConfig(config *Config) {
// GetAlertingProviderByAlertType returns an provider.AlertProvider by its corresponding core.AlertType
func GetAlertingProviderByAlertType(config *Config, alertType core.AlertType) provider.AlertProvider {
switch alertType {
case core.SlackAlert:
if config.Alerting.Slack == nil {
case core.CustomAlert:
if config.Alerting.Custom == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Slack
return config.Alerting.Custom
case core.DiscordAlert:
if config.Alerting.Discord == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Discord
case core.MattermostAlert:
if config.Alerting.Mattermost == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
@@ -273,24 +280,24 @@ func GetAlertingProviderByAlertType(config *Config, alertType core.AlertType) pr
return nil
}
return config.Alerting.Messagebird
case core.TwilioAlert:
if config.Alerting.Twilio == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Twilio
case core.PagerDutyAlert:
if config.Alerting.PagerDuty == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.PagerDuty
case core.CustomAlert:
if config.Alerting.Custom == nil {
case core.SlackAlert:
if config.Alerting.Slack == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Custom
return config.Alerting.Slack
case core.TwilioAlert:
if config.Alerting.Twilio == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Twilio
}
return nil
}

View File

@@ -6,6 +6,14 @@ import (
"testing"
"time"
"github.com/TwinProduction/gatus/alerting"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/alerting/provider/discord"
"github.com/TwinProduction/gatus/alerting/provider/mattermost"
"github.com/TwinProduction/gatus/alerting/provider/messagebird"
"github.com/TwinProduction/gatus/alerting/provider/pagerduty"
"github.com/TwinProduction/gatus/alerting/provider/slack"
"github.com/TwinProduction/gatus/alerting/provider/twilio"
"github.com/TwinProduction/gatus/core"
"github.com/TwinProduction/gatus/k8stest"
v1 "k8s.io/api/core/v1"
@@ -338,6 +346,8 @@ debug: true
alerting:
slack:
webhook-url: "http://example.com"
discord:
webhook-url: "http://example.org"
pagerduty:
integration-key: "00000000000000000000000000000000"
messagebird:
@@ -356,7 +366,9 @@ services:
success-threshold: 5
description: "Healthcheck failed 7 times in a row"
- type: messagebird
- type: discord
enabled: true
failure-threshold: 10
conditions:
- "[STATUS] == 200"
`))
@@ -369,6 +381,7 @@ services:
if config.Metrics {
t.Error("Metrics should've been false by default")
}
// Alerting providers
if config.Alerting == nil {
t.Fatal("config.Alerting shouldn't have been nil")
}
@@ -396,6 +409,16 @@ services:
if config.Alerting.Messagebird.Recipients != "31619191919" {
t.Errorf("Messagebird to recipients should've been %s, but was %s", "31619191919", config.Alerting.Messagebird.Recipients)
}
if config.Alerting.Discord == nil || !config.Alerting.Discord.IsValid() {
t.Fatal("Discord alerting config should've been valid")
}
if config.Alerting.Discord.WebhookURL != "http://example.org" {
t.Errorf("Discord webhook should've been %s, but was %s", "http://example.org", config.Alerting.Discord.WebhookURL)
}
if GetAlertingProviderByAlertType(config, core.DiscordAlert) != config.Alerting.Discord {
t.Error("expected discord configuration")
}
// Services
if len(config.Services) != 1 {
t.Error("There should've been 1 service")
}
@@ -405,11 +428,12 @@ services:
if config.Services[0].Interval != 60*time.Second {
t.Errorf("Interval should have been %s, because it is the default value", 60*time.Second)
}
if config.Services[0].Alerts == nil {
t.Fatal("The service alerts shouldn't have been nil")
if len(config.Services[0].Alerts) != 4 {
t.Fatal("There should've been 4 alerts configured")
}
if len(config.Services[0].Alerts) != 3 {
t.Fatal("There should've been 3 alert configured")
if config.Services[0].Alerts[0].Type != core.SlackAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type)
}
if !config.Services[0].Alerts[0].Enabled {
t.Error("The alert should've been enabled")
@@ -420,23 +444,35 @@ services:
if config.Services[0].Alerts[0].SuccessThreshold != 2 {
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[0].SuccessThreshold)
}
if config.Services[0].Alerts[1].Type != core.PagerDutyAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.PagerDutyAlert, config.Services[0].Alerts[1].Type)
}
if config.Services[0].Alerts[1].Description != "Healthcheck failed 7 times in a row" {
t.Errorf("The description of the alert should've been %s, but it was %s", "Healthcheck failed 7 times in a row", config.Services[0].Alerts[1].Description)
}
if config.Services[0].Alerts[1].FailureThreshold != 7 {
t.Errorf("The failure threshold of the alert should've been %d, but it was %d", 7, config.Services[0].Alerts[1].FailureThreshold)
}
if config.Services[0].Alerts[1].SuccessThreshold != 5 {
t.Errorf("The success threshold of the alert should've been %d, but it was %d", 5, config.Services[0].Alerts[1].SuccessThreshold)
}
if config.Services[0].Alerts[0].Type != core.SlackAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type)
}
if config.Services[0].Alerts[1].Type != core.PagerDutyAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.PagerDutyAlert, config.Services[0].Alerts[1].Type)
}
if config.Services[0].Alerts[1].Description != "Healthcheck failed 7 times in a row" {
t.Errorf("The description of the alert should've been %s, but it was %s", "Healthcheck failed 7 times in a row", config.Services[0].Alerts[0].Description)
}
if config.Services[0].Alerts[2].Type != core.MessagebirdAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.MessagebirdAlert, config.Services[0].Alerts[1].Type)
t.Errorf("The type of the alert should've been %s, but it was %s", core.MessagebirdAlert, config.Services[0].Alerts[2].Type)
}
if config.Services[0].Alerts[2].Enabled {
t.Error("The alert should've been disabled")
}
if config.Services[0].Alerts[3].Type != core.DiscordAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.DiscordAlert, config.Services[0].Alerts[3].Type)
}
if config.Services[0].Alerts[3].FailureThreshold != 10 {
t.Errorf("The failure threshold of the alert should've been %d, but it was %d", 10, config.Services[0].Alerts[3].FailureThreshold)
}
if config.Services[0].Alerts[3].SuccessThreshold != 2 {
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[3].SuccessThreshold)
}
}
@@ -809,3 +845,38 @@ kubernetes:
// TODO: find a way to test this?
t.Error("Function should've panicked because testing with ClusterModeIn isn't supported")
}
func TestGetAlertingProviderByAlertType(t *testing.T) {
cfg := &Config{
Alerting: &alerting.Config{
Custom: &custom.AlertProvider{},
Discord: &discord.AlertProvider{},
Mattermost: &mattermost.AlertProvider{},
Messagebird: &messagebird.AlertProvider{},
PagerDuty: &pagerduty.AlertProvider{},
Slack: &slack.AlertProvider{},
Twilio: &twilio.AlertProvider{},
},
}
if GetAlertingProviderByAlertType(cfg, core.CustomAlert) != cfg.Alerting.Custom {
t.Error("expected Custom configuration")
}
if GetAlertingProviderByAlertType(cfg, core.DiscordAlert) != cfg.Alerting.Discord {
t.Error("expected Discord configuration")
}
if GetAlertingProviderByAlertType(cfg, core.MattermostAlert) != cfg.Alerting.Mattermost {
t.Error("expected Mattermost configuration")
}
if GetAlertingProviderByAlertType(cfg, core.MessagebirdAlert) != cfg.Alerting.Messagebird {
t.Error("expected Messagebird configuration")
}
if GetAlertingProviderByAlertType(cfg, core.PagerDutyAlert) != cfg.Alerting.PagerDuty {
t.Error("expected PagerDuty configuration")
}
if GetAlertingProviderByAlertType(cfg, core.SlackAlert) != cfg.Alerting.Slack {
t.Error("expected Slack configuration")
}
if GetAlertingProviderByAlertType(cfg, core.TwilioAlert) != cfg.Alerting.Twilio {
t.Error("expected Twilio configuration")
}
}

View File

@@ -26,7 +26,7 @@ const (
)
var (
cache = gocache.NewCache().WithMaxSize(100).WithEvictionPolicy(gocache.LeastRecentlyUsed)
cache = gocache.NewCache().WithMaxSize(100).WithEvictionPolicy(gocache.FirstInFirstOut)
// staticFolder is the path to the location of the static folder from the root path of the project
// The only reason this is exposed is to allow running tests from a different path than the root path of the project

View File

@@ -38,7 +38,6 @@ var (
Hostname: "example.org",
IP: "127.0.0.1",
HTTPStatus: 200,
Body: []byte("body"),
Errors: nil,
Connected: true,
Success: true,
@@ -64,7 +63,6 @@ var (
Hostname: "example.org",
IP: "127.0.0.1",
HTTPStatus: 200,
Body: []byte("body"),
Errors: []string{"error-1", "error-2"},
Connected: true,
Success: false,

View File

@@ -40,8 +40,11 @@ type Alert struct {
type AlertType string
const (
// SlackAlert is the AlertType for the slack alerting provider
SlackAlert AlertType = "slack"
// CustomAlert is the AlertType for the custom alerting provider
CustomAlert AlertType = "custom"
// DiscordAlert is the AlertType for the discord alerting provider
DiscordAlert AlertType = "discord"
// MattermostAlert is the AlertType for the mattermost alerting provider
MattermostAlert AlertType = "mattermost"
@@ -52,9 +55,9 @@ const (
// PagerDutyAlert is the AlertType for the pagerduty alerting provider
PagerDutyAlert AlertType = "pagerduty"
// SlackAlert is the AlertType for the slack alerting provider
SlackAlert AlertType = "slack"
// TwilioAlert is the AlertType for the twilio alerting provider
TwilioAlert AlertType = "twilio"
// CustomAlert is the AlertType for the custom alerting provider
CustomAlert AlertType = "custom"
)

View File

@@ -23,7 +23,7 @@ const (
// DNSRCodePlaceholder is a place holder for DNS_RCODE
//
// Values that could be NOERROR, FORMERR, SERVFAIL, NXDOMAIN, NOTIMP and REFUSED
// Values that could replace the placeholder: NOERROR, FORMERR, SERVFAIL, NXDOMAIN, NOTIMP, REFUSED
DNSRCodePlaceholder = "[DNS_RCODE]"
// ResponseTimePlaceholder is a placeholder for the request response time, in milliseconds.
@@ -51,14 +51,19 @@ const (
// Usage: len([BODY].articles) == 10, len([BODY].name) > 5
LengthFunctionPrefix = "len("
// HasFunctionPrefix is the prefix for the has function
//
// Usage: has([BODY].errors) == true
HasFunctionPrefix = "has("
// PatternFunctionPrefix is the prefix for the pattern function
//
// Usage: pat(192.168.*.*)
// Usage: [IP] == pat(192.168.*.*)
PatternFunctionPrefix = "pat("
// AnyFunctionPrefix is the prefix for the any function
//
// Usage: any(1.1.1.1, 1.0.0.1)
// Usage: [IP] == any(1.1.1.1, 1.0.0.1)
AnyFunctionPrefix = "any("
// FunctionSuffix is the suffix for all functions
@@ -123,6 +128,12 @@ func (c Condition) evaluate(result *Result) bool {
return success
}
// hasBodyPlaceholder checks whether the condition has a BodyPlaceholder
// Used for determining whether the response body should be read or not
func (c Condition) hasBodyPlaceholder() bool {
return strings.Contains(string(c), BodyPlaceholder)
}
// isEqual compares two strings.
//
// Supports the pattern and the any functions.
@@ -181,7 +192,7 @@ func isEqual(first, second string) bool {
func sanitizeAndResolve(elements []string, result *Result) ([]string, []string) {
parameters := make([]string, len(elements))
resolvedParameters := make([]string, len(elements))
body := strings.TrimSpace(string(result.Body))
body := strings.TrimSpace(string(result.body))
for i, element := range elements {
element = strings.TrimSpace(element)
parameters[i] = element
@@ -203,26 +214,39 @@ func sanitizeAndResolve(elements []string, result *Result) ([]string, []string)
default:
// if contains the BodyPlaceholder, then evaluate json path
if strings.Contains(element, BodyPlaceholder) {
wantLength := false
checkingForLength := false
checkingForExistence := false
if strings.HasPrefix(element, LengthFunctionPrefix) && strings.HasSuffix(element, FunctionSuffix) {
wantLength = true
checkingForLength = true
element = strings.TrimSuffix(strings.TrimPrefix(element, LengthFunctionPrefix), FunctionSuffix)
}
resolvedElement, resolvedElementLength, err := jsonpath.Eval(strings.TrimPrefix(element, BodyPlaceholder+"."), result.Body)
if err != nil {
if err.Error() != "unexpected end of JSON input" {
result.Errors = append(result.Errors, err.Error())
}
if wantLength {
element = LengthFunctionPrefix + element + FunctionSuffix + " " + InvalidConditionElementSuffix
if strings.HasPrefix(element, HasFunctionPrefix) && strings.HasSuffix(element, FunctionSuffix) {
checkingForExistence = true
element = strings.TrimSuffix(strings.TrimPrefix(element, HasFunctionPrefix), FunctionSuffix)
}
resolvedElement, resolvedElementLength, err := jsonpath.Eval(strings.TrimPrefix(element, BodyPlaceholder+"."), result.body)
if checkingForExistence {
if err != nil {
element = "false"
} else {
element = element + " " + InvalidConditionElementSuffix
element = "true"
}
} else {
if wantLength {
element = strconv.Itoa(resolvedElementLength)
if err != nil {
if err.Error() != "unexpected end of JSON input" {
result.Errors = append(result.Errors, err.Error())
}
if checkingForLength {
element = LengthFunctionPrefix + element + FunctionSuffix + " " + InvalidConditionElementSuffix
} else {
element = element + " " + InvalidConditionElementSuffix
}
} else {
element = resolvedElement
if checkingForLength {
element = strconv.Itoa(resolvedElementLength)
} else {
element = resolvedElement
}
}
}
}

View File

@@ -5,7 +5,7 @@ import "testing"
func BenchmarkCondition_evaluateWithBodyStringAny(b *testing.B) {
condition := Condition("[BODY].name == any(john.doe, jane.doe)")
for n := 0; n < b.N; n++ {
result := &Result{Body: []byte("{\"name\": \"john.doe\"}")}
result := &Result{body: []byte("{\"name\": \"john.doe\"}")}
condition.evaluate(result)
}
b.ReportAllocs()
@@ -14,7 +14,7 @@ func BenchmarkCondition_evaluateWithBodyStringAny(b *testing.B) {
func BenchmarkCondition_evaluateWithBodyStringAnyFailure(b *testing.B) {
condition := Condition("[BODY].name == any(john.doe, jane.doe)")
for n := 0; n < b.N; n++ {
result := &Result{Body: []byte("{\"name\": \"bob.doe\"}")}
result := &Result{body: []byte("{\"name\": \"bob.doe\"}")}
condition.evaluate(result)
}
b.ReportAllocs()
@@ -23,7 +23,7 @@ func BenchmarkCondition_evaluateWithBodyStringAnyFailure(b *testing.B) {
func BenchmarkCondition_evaluateWithBodyString(b *testing.B) {
condition := Condition("[BODY].name == john.doe")
for n := 0; n < b.N; n++ {
result := &Result{Body: []byte("{\"name\": \"john.doe\"}")}
result := &Result{body: []byte("{\"name\": \"john.doe\"}")}
condition.evaluate(result)
}
b.ReportAllocs()
@@ -32,7 +32,7 @@ func BenchmarkCondition_evaluateWithBodyString(b *testing.B) {
func BenchmarkCondition_evaluateWithBodyStringFailure(b *testing.B) {
condition := Condition("[BODY].name == john.doe")
for n := 0; n < b.N; n++ {
result := &Result{Body: []byte("{\"name\": \"bob.doe\"}")}
result := &Result{body: []byte("{\"name\": \"bob.doe\"}")}
condition.evaluate(result)
}
b.ReportAllocs()
@@ -41,7 +41,7 @@ func BenchmarkCondition_evaluateWithBodyStringFailure(b *testing.B) {
func BenchmarkCondition_evaluateWithBodyStringLen(b *testing.B) {
condition := Condition("len([BODY].name) == 8")
for n := 0; n < b.N; n++ {
result := &Result{Body: []byte("{\"name\": \"john.doe\"}")}
result := &Result{body: []byte("{\"name\": \"john.doe\"}")}
condition.evaluate(result)
}
b.ReportAllocs()
@@ -50,7 +50,7 @@ func BenchmarkCondition_evaluateWithBodyStringLen(b *testing.B) {
func BenchmarkCondition_evaluateWithBodyStringLenFailure(b *testing.B) {
condition := Condition("len([BODY].name) == 8")
for n := 0; n < b.N; n++ {
result := &Result{Body: []byte("{\"name\": \"bob.doe\"}")}
result := &Result{body: []byte("{\"name\": \"bob.doe\"}")}
condition.evaluate(result)
}
b.ReportAllocs()

File diff suppressed because it is too large Load Diff

View File

@@ -60,26 +60,26 @@ func (d *DNS) query(url string, result *Result) {
switch rr.Header().Rrtype {
case dns.TypeA:
if a, ok := rr.(*dns.A); ok {
result.Body = []byte(a.A.String())
result.body = []byte(a.A.String())
}
case dns.TypeAAAA:
if aaaa, ok := rr.(*dns.AAAA); ok {
result.Body = []byte(aaaa.AAAA.String())
result.body = []byte(aaaa.AAAA.String())
}
case dns.TypeCNAME:
if cname, ok := rr.(*dns.CNAME); ok {
result.Body = []byte(cname.Target)
result.body = []byte(cname.Target)
}
case dns.TypeMX:
if mx, ok := rr.(*dns.MX); ok {
result.Body = []byte(mx.Mx)
result.body = []byte(mx.Mx)
}
case dns.TypeNS:
if ns, ok := rr.(*dns.NS); ok {
result.Body = []byte(ns.Ns)
result.body = []byte(ns.Ns)
}
default:
result.Body = []byte("query type is not supported yet")
result.body = []byte("query type is not supported yet")
}
}
}

View File

@@ -91,12 +91,12 @@ func TestIntegrationQuery(t *testing.T) {
if test.inputDNS.QueryType == "NS" {
// Because there are often multiple nameservers backing a single domain, we'll only look at the suffix
if !pattern.Match(test.expectedBody, string(result.Body)) {
t.Errorf("got %s, expected result %s,", string(result.Body), test.expectedBody)
if !pattern.Match(test.expectedBody, string(result.body)) {
t.Errorf("got %s, expected result %s,", string(result.body), test.expectedBody)
}
} else {
if string(result.Body) != test.expectedBody {
t.Errorf("got %s, expected result %s,", string(result.Body), test.expectedBody)
if string(result.body) != test.expectedBody {
t.Errorf("got %s, expected result %s,", string(result.body), test.expectedBody)
}
}
})

View File

@@ -12,10 +12,7 @@ type Result struct {
// DNSRCode is the response code of a DNS query in a human readable format
DNSRCode string `json:"-"`
// Body is the response body
Body []byte `json:"-"`
// Hostname extracted from the Service URL
// Hostname extracted from Service.URL
Hostname string `json:"hostname"`
// IP resolved from the Service URL
@@ -41,4 +38,11 @@ type Result struct {
// CertificateExpiration is the duration before the certificate expires
CertificateExpiration time.Duration `json:"-"`
// body is the response body
//
// Note that this variable is only used during the evaluation of a service's health.
// This means that the call Service.EvaluateHealth both populates the body (if necessary)
// and sets it to nil after the evaluation has been completed.
body []byte
}

View File

@@ -58,22 +58,10 @@ func NewServiceStatus(service *Service) *ServiceStatus {
}
}
// ShallowCopy creates a shallow copy of ServiceStatus
func (ss *ServiceStatus) ShallowCopy() *ServiceStatus {
return &ServiceStatus{
Name: ss.Name,
Group: ss.Group,
Key: ss.Key,
Results: ss.Results,
Events: ss.Events,
Uptime: ss.Uptime,
}
}
// WithResultPagination makes a shallow copy of the ServiceStatus with only the results
// WithResultPagination returns a shallow copy of the ServiceStatus with only the results
// within the range defined by the page and pageSize parameters
func (ss *ServiceStatus) WithResultPagination(page, pageSize int) *ServiceStatus {
shallowCopy := ss.ShallowCopy()
func (ss ServiceStatus) WithResultPagination(page, pageSize int) *ServiceStatus {
shallowCopy := ss
numberOfResults := len(shallowCopy.Results)
start := numberOfResults - (page * pageSize)
end := numberOfResults - ((page - 1) * pageSize)
@@ -90,7 +78,7 @@ func (ss *ServiceStatus) WithResultPagination(page, pageSize int) *ServiceStatus
} else {
shallowCopy.Results = shallowCopy.Results[start:end]
}
return shallowCopy
return &shallowCopy
}
// AddResult adds a Result to ServiceStatus.Results and makes sure that there are

View File

@@ -0,0 +1,92 @@
package core
import (
"testing"
"time"
)
var (
firstCondition = Condition("[STATUS] == 200")
secondCondition = Condition("[RESPONSE_TIME] < 500")
thirdCondition = Condition("[CERTIFICATE_EXPIRATION] < 72h")
timestamp = time.Now()
testService = Service{
Name: "name",
Group: "group",
URL: "https://example.org/what/ever",
Method: "GET",
Body: "body",
Interval: 30 * time.Second,
Conditions: []*Condition{&firstCondition, &secondCondition, &thirdCondition},
Alerts: nil,
Insecure: false,
NumberOfFailuresInARow: 0,
NumberOfSuccessesInARow: 0,
}
testSuccessfulResult = Result{
Hostname: "example.org",
IP: "127.0.0.1",
HTTPStatus: 200,
body: []byte("body"),
Errors: nil,
Connected: true,
Success: true,
Timestamp: timestamp,
Duration: 150 * time.Millisecond,
CertificateExpiration: 10 * time.Hour,
ConditionResults: []*ConditionResult{
{
Condition: "[STATUS] == 200",
Success: true,
},
{
Condition: "[RESPONSE_TIME] < 500",
Success: true,
},
{
Condition: "[CERTIFICATE_EXPIRATION] < 72h",
Success: true,
},
},
}
testUnsuccessfulResult = Result{
Hostname: "example.org",
IP: "127.0.0.1",
HTTPStatus: 200,
body: []byte("body"),
Errors: []string{"error-1", "error-2"},
Connected: true,
Success: false,
Timestamp: timestamp,
Duration: 750 * time.Millisecond,
CertificateExpiration: 10 * time.Hour,
ConditionResults: []*ConditionResult{
{
Condition: "[STATUS] == 200",
Success: true,
},
{
Condition: "[RESPONSE_TIME] < 500",
Success: false,
},
{
Condition: "[CERTIFICATE_EXPIRATION] < 72h",
Success: false,
},
},
}
)
func BenchmarkServiceStatus_WithResultPagination(b *testing.B) {
service := &testService
serviceStatus := NewServiceStatus(service)
for i := 0; i < MaximumNumberOfResults; i++ {
serviceStatus.AddResult(&testSuccessfulResult)
}
for n := 0; n < b.N; n++ {
serviceStatus.WithResultPagination(1, 20)
}
b.ReportAllocs()
}

View File

@@ -26,7 +26,7 @@ func TestServiceStatus_AddResult(t *testing.T) {
serviceStatus.AddResult(&Result{Timestamp: time.Now()})
}
if len(serviceStatus.Results) != MaximumNumberOfResults {
t.Errorf("expected serviceStatus.Results to not exceed a length of 20")
t.Errorf("expected serviceStatus.Results to not exceed a length of %d", MaximumNumberOfResults)
}
}

View File

@@ -80,7 +80,7 @@ type Service struct {
// NumberOfFailuresInARow is the number of unsuccessful evaluations in a row
NumberOfFailuresInARow int
// NumberOfFailuresInARow is the number of successful evaluations in a row
// NumberOfSuccessesInARow is the number of successful evaluations in a row
NumberOfSuccessesInARow int
}
@@ -149,6 +149,8 @@ func (service *Service) EvaluateHealth() *Result {
}
}
result.Timestamp = time.Now()
// No need to keep the body after the service has been evaluated
result.body = nil
return result
}
@@ -220,9 +222,12 @@ func (service *Service) call(result *Result) {
}
result.HTTPStatus = response.StatusCode
result.Connected = response.StatusCode > 0
result.Body, err = ioutil.ReadAll(response.Body)
if err != nil {
result.Errors = append(result.Errors, err.Error())
// Only read the body if there's a condition that uses the BodyPlaceholder
if service.needsToReadBody() {
result.body, err = ioutil.ReadAll(response.Body)
if err != nil {
result.Errors = append(result.Errors, err.Error())
}
}
}
}
@@ -247,3 +252,13 @@ func (service *Service) buildHTTPRequest() *http.Request {
}
return request
}
// needsToReadBody checks if there's any conditions that requires the response body to be read
func (service *Service) needsToReadBody() bool {
for _, condition := range service.Conditions {
if condition.hasBodyPlaceholder() {
return true
}
}
return false
}

View File

@@ -10,7 +10,7 @@ import (
func TestService_ValidateAndSetDefaults(t *testing.T) {
condition := Condition("[STATUS] == 200")
service := Service{
Name: "TwiNNatioN",
Name: "twinnation-health",
URL: "https://twinnation.org/health",
Conditions: []*Condition{&condition},
Alerts: []*Alert{{Type: PagerDutyAlert}},
@@ -94,7 +94,7 @@ func TestService_ValidateAndSetDefaultsWithDNS(t *testing.T) {
func TestService_GetAlertsTriggered(t *testing.T) {
condition := Condition("[STATUS] == 200")
service := Service{
Name: "TwiNNatioN",
Name: "twinnation-health",
URL: "https://twinnation.org/health",
Conditions: []*Condition{&condition},
Alerts: []*Alert{{Type: PagerDutyAlert, Enabled: true}},
@@ -118,7 +118,7 @@ func TestService_GetAlertsTriggered(t *testing.T) {
func TestService_buildHTTPRequest(t *testing.T) {
condition := Condition("[STATUS] == 200")
service := Service{
Name: "TwiNNatioN",
Name: "twinnation-health",
URL: "https://twinnation.org/health",
Conditions: []*Condition{&condition},
}
@@ -138,7 +138,7 @@ func TestService_buildHTTPRequest(t *testing.T) {
func TestService_buildHTTPRequestWithCustomUserAgent(t *testing.T) {
condition := Condition("[STATUS] == 200")
service := Service{
Name: "TwiNNatioN",
Name: "twinnation-health",
URL: "https://twinnation.org/health",
Conditions: []*Condition{&condition},
Headers: map[string]string{
@@ -161,7 +161,7 @@ func TestService_buildHTTPRequestWithCustomUserAgent(t *testing.T) {
func TestService_buildHTTPRequestWithHostHeader(t *testing.T) {
condition := Condition("[STATUS] == 200")
service := Service{
Name: "TwiNNatioN",
Name: "twinnation-health",
URL: "https://twinnation.org/health",
Method: "POST",
Conditions: []*Condition{&condition},
@@ -182,13 +182,13 @@ func TestService_buildHTTPRequestWithHostHeader(t *testing.T) {
func TestService_buildHTTPRequestWithGraphQLEnabled(t *testing.T) {
condition := Condition("[STATUS] == 200")
service := Service{
Name: "TwiNNatioN",
Name: "twinnation-graphql",
URL: "https://twinnation.org/graphql",
Method: "POST",
Conditions: []*Condition{&condition},
GraphQL: true,
Body: `{
user(gender: "female") {
users(gender: "female") {
id
name
gender
@@ -206,16 +206,17 @@ func TestService_buildHTTPRequestWithGraphQLEnabled(t *testing.T) {
}
body, _ := ioutil.ReadAll(request.Body)
if !strings.HasPrefix(string(body), "{\"query\":") {
t.Error("request.Body should've started with '{\"query\":', but it didn't:", string(body))
t.Error("request.body should've started with '{\"query\":', but it didn't:", string(body))
}
}
func TestIntegrationEvaluateHealth(t *testing.T) {
condition := Condition("[STATUS] == 200")
bodyCondition := Condition("[BODY].status == UP")
service := Service{
Name: "TwiNNatioN",
Name: "twinnation-health",
URL: "https://twinnation.org/health",
Conditions: []*Condition{&condition},
Conditions: []*Condition{&condition, &bodyCondition},
}
result := service.EvaluateHealth()
if !result.ConditionResults[0].Success {
@@ -232,7 +233,7 @@ func TestIntegrationEvaluateHealth(t *testing.T) {
func TestIntegrationEvaluateHealthWithFailure(t *testing.T) {
condition := Condition("[STATUS] == 500")
service := Service{
Name: "TwiNNatioN",
Name: "twinnation-health",
URL: "https://twinnation.org/health",
Conditions: []*Condition{&condition},
}
@@ -252,7 +253,7 @@ func TestIntegrationEvaluateHealthForDNS(t *testing.T) {
conditionSuccess := Condition("[DNS_RCODE] == NOERROR")
conditionBody := Condition("[BODY] == 93.184.216.34")
service := Service{
Name: "TwiNNatioN",
Name: "example",
URL: "8.8.8.8",
DNS: &DNS{
QueryType: "A",
@@ -275,7 +276,7 @@ func TestIntegrationEvaluateHealthForDNS(t *testing.T) {
func TestIntegrationEvaluateHealthForICMP(t *testing.T) {
conditionSuccess := Condition("[CONNECTED] == true")
service := Service{
Name: "ICMP test",
Name: "icmp-test",
URL: "icmp://127.0.0.1",
Conditions: []*Condition{&conditionSuccess},
}
@@ -294,7 +295,7 @@ func TestIntegrationEvaluateHealthForICMP(t *testing.T) {
func TestService_getIP(t *testing.T) {
conditionSuccess := Condition("[CONNECTED] == true")
service := Service{
Name: "Invalid URL test",
Name: "invalid-url-test",
URL: "",
Conditions: []*Condition{&conditionSuccess},
}
@@ -304,3 +305,27 @@ func TestService_getIP(t *testing.T) {
t.Error("service.getIP(result) should've thrown an error because the URL is invalid, thus cannot be parsed")
}
}
func TestService_NeedsToReadBody(t *testing.T) {
statusCondition := Condition("[STATUS] == 200")
bodyCondition := Condition("[BODY].status == UP")
bodyConditionWithLength := Condition("len([BODY].tags) > 0")
if (&Service{Conditions: []*Condition{&statusCondition}}).needsToReadBody() {
t.Error("expected false, got true")
}
if !(&Service{Conditions: []*Condition{&bodyCondition}}).needsToReadBody() {
t.Error("expected true, got false")
}
if !(&Service{Conditions: []*Condition{&bodyConditionWithLength}}).needsToReadBody() {
t.Error("expected true, got false")
}
if !(&Service{Conditions: []*Condition{&statusCondition, &bodyCondition}}).needsToReadBody() {
t.Error("expected true, got false")
}
if !(&Service{Conditions: []*Condition{&bodyCondition, &statusCondition}}).needsToReadBody() {
t.Error("expected true, got false")
}
if !(&Service{Conditions: []*Condition{&bodyConditionWithLength, &statusCondition}}).needsToReadBody() {
t.Error("expected true, got false")
}
}

View File

@@ -1,15 +1,10 @@
package core
import (
"log"
"time"
)
const (
// RFC3339WithoutMinutesAndSeconds is the format defined by RFC3339 (see time.RFC3339) but with the minutes
// and seconds hardcoded to 0.
RFC3339WithoutMinutesAndSeconds = "2006-01-02T15:00:00Z07:00"
numberOfHoursInTenDays = 10 * 24
sevenDays = 7 * 24 * time.Hour
)
@@ -25,47 +20,45 @@ type Uptime struct {
// LastHour is the uptime percentage over the past hour
LastHour float64 `json:"1h"`
// SuccessCountPerHour is a map containing the number of successes per hour, per timestamp following the
// custom RFC3339WithoutMinutesAndSeconds format
SuccessCountPerHour map[string]uint64 `json:"-"`
// SuccessfulExecutionsPerHour is a map containing the number of successes (value)
// for every hourly unix timestamps (key)
SuccessfulExecutionsPerHour map[int64]uint64 `json:"-"`
// TotalCountPerHour is a map containing the total number of checks per hour, per timestamp following the
// custom RFC3339WithoutMinutesAndSeconds format
TotalCountPerHour map[string]uint64 `json:"-"`
// TotalExecutionsPerHour is a map containing the total number of checks (value)
// for every hourly unix timestamps (key)
TotalExecutionsPerHour map[int64]uint64 `json:"-"`
}
// NewUptime creates a new Uptime
func NewUptime() *Uptime {
return &Uptime{
SuccessCountPerHour: make(map[string]uint64),
TotalCountPerHour: make(map[string]uint64),
SuccessfulExecutionsPerHour: make(map[int64]uint64),
TotalExecutionsPerHour: make(map[int64]uint64),
}
}
// ProcessResult processes the result by extracting the relevant from the result and recalculating the uptime
// if necessary
func (uptime *Uptime) ProcessResult(result *Result) {
timestampDateWithHour := result.Timestamp.Format(RFC3339WithoutMinutesAndSeconds)
if result.Success {
uptime.SuccessCountPerHour[timestampDateWithHour]++
if uptime.SuccessfulExecutionsPerHour == nil || uptime.TotalExecutionsPerHour == nil {
uptime.SuccessfulExecutionsPerHour = make(map[int64]uint64)
uptime.TotalExecutionsPerHour = make(map[int64]uint64)
}
uptime.TotalCountPerHour[timestampDateWithHour]++
unixTimestampFlooredAtHour := result.Timestamp.Unix() - (result.Timestamp.Unix() % 3600)
if result.Success {
uptime.SuccessfulExecutionsPerHour[unixTimestampFlooredAtHour]++
}
uptime.TotalExecutionsPerHour[unixTimestampFlooredAtHour]++
// Clean up only when we're starting to have too many useless keys
// Note that this is only triggered when there are more entries than there should be after
// 10 days, despite the fact that we are deleting everything that's older than 7 days.
// This is to prevent re-iterating on every `ProcessResult` as soon as the uptime has been logged for 7 days.
if len(uptime.TotalCountPerHour) > numberOfHoursInTenDays {
sevenDaysAgo := time.Now().Add(-(sevenDays + time.Hour))
for k := range uptime.TotalCountPerHour {
dateWithHour, err := time.Parse(time.RFC3339, k)
if err != nil {
// This shouldn't happen, but we'll log it in case it does happen
log.Println("[uptime][ProcessResult] Failed to parse programmatically generated timestamp:", err.Error())
continue
}
if sevenDaysAgo.Unix() > dateWithHour.Unix() {
delete(uptime.TotalCountPerHour, k)
delete(uptime.SuccessCountPerHour, k)
if len(uptime.TotalExecutionsPerHour) > numberOfHoursInTenDays {
sevenDaysAgo := time.Now().Add(-(sevenDays + time.Hour)).Unix()
for hourlyUnixTimestamp := range uptime.TotalExecutionsPerHour {
if sevenDaysAgo > hourlyUnixTimestamp {
delete(uptime.TotalExecutionsPerHour, hourlyUnixTimestamp)
delete(uptime.SuccessfulExecutionsPerHour, hourlyUnixTimestamp)
}
}
}
@@ -92,9 +85,9 @@ func (uptime *Uptime) recalculate() {
// The oldest uptime bracket starts 7 days ago, so we'll start from there
timestamp := now.Add(-sevenDays)
for now.Sub(timestamp) >= 0 {
timestampDateWithHour := timestamp.Format(RFC3339WithoutMinutesAndSeconds)
successCountForTimestamp := uptime.SuccessCountPerHour[timestampDateWithHour]
totalCountForTimestamp := uptime.TotalCountPerHour[timestampDateWithHour]
hourlyUnixTimestamp := timestamp.Unix() - (timestamp.Unix() % 3600)
successCountForTimestamp := uptime.SuccessfulExecutionsPerHour[hourlyUnixTimestamp]
totalCountForTimestamp := uptime.TotalExecutionsPerHour[hourlyUnixTimestamp]
uptimeBrackets["7d_success"] += successCountForTimestamp
uptimeBrackets["7d_total"] += totalCountForTimestamp
if now.Sub(timestamp) <= 24*time.Hour {

View File

@@ -51,10 +51,9 @@ func TestServiceStatus_AddResultUptimeIsCleaningUpAfterItself(t *testing.T) {
timestamp := now.Add(-12 * 24 * time.Hour)
for timestamp.Unix() <= now.Unix() {
serviceStatus.AddResult(&Result{Timestamp: timestamp, Success: true})
if len(serviceStatus.Uptime.SuccessCountPerHour) > numberOfHoursInTenDays {
t.Errorf("At no point in time should there be more than %d entries in serviceStatus.SuccessCountPerHour", numberOfHoursInTenDays)
if len(serviceStatus.Uptime.SuccessfulExecutionsPerHour) > numberOfHoursInTenDays {
t.Errorf("At no point in time should there be more than %d entries in serviceStatus.SuccessfulExecutionsPerHour, but there are %d", numberOfHoursInTenDays, len(serviceStatus.Uptime.SuccessfulExecutionsPerHour))
}
//fmt.Printf("timestamp=%s; uptimeDuringLastHour=%f; timeAgo=%s\n", timestamp.Format(time.RFC3339), serviceStatus.UptimeDuringLastHour, time.Since(timestamp))
if now.Sub(timestamp) > time.Hour && serviceStatus.Uptime.LastHour != 0 {
t.Error("most recent timestamp > 1h ago, expected serviceStatus.Uptime.LastHour to be 0, got", serviceStatus.Uptime.LastHour)
}

View File

@@ -20,6 +20,15 @@ func TestEval(t *testing.T) {
}
}
func TestEvalWithInvalidPath(t *testing.T) {
path := "errors"
data := `{}`
_, _, err := Eval(path, []byte(data))
if err == nil {
t.Error("Expected error, but got", err)
}
}
func TestEvalWithLongSimpleWalk(t *testing.T) {
path := "long.simple.walk"
data := `{"long": {"simple": {"walk": "value"}}}`

View File

@@ -58,7 +58,7 @@ func (s *Store) GetServiceStatusByKey(key string) *core.ServiceStatus {
if serviceStatus == nil {
return nil
}
return serviceStatus.(*core.ServiceStatus).ShallowCopy()
return serviceStatus.(*core.ServiceStatus)
}
// Insert adds the observed result for the specified service into the store

View File

@@ -33,7 +33,6 @@ var (
Hostname: "example.org",
IP: "127.0.0.1",
HTTPStatus: 200,
Body: []byte("body"),
Errors: nil,
Connected: true,
Success: true,
@@ -59,7 +58,6 @@ var (
Hostname: "example.org",
IP: "127.0.0.1",
HTTPStatus: 200,
Body: []byte("body"),
Errors: []string{"error-1", "error-2"},
Connected: true,
Success: false,
@@ -107,9 +105,6 @@ func TestStore_Insert(t *testing.T) {
if r.DNSRCode != expectedResult.DNSRCode {
t.Errorf("Result at index %d should've had a DNSRCode of %s, but was actually %s", i, expectedResult.DNSRCode, r.DNSRCode)
}
if len(r.Body) != len(expectedResult.Body) {
t.Errorf("Result at index %d should've had a body of length %d, but was actually %d", i, len(expectedResult.Body), len(r.Body))
}
if r.Hostname != expectedResult.Hostname {
t.Errorf("Result at index %d should've had a Hostname of %s, but was actually %s", i, expectedResult.Hostname, r.Hostname)
}

View File

@@ -32,7 +32,6 @@ var (
Hostname: "example.org",
IP: "127.0.0.1",
HTTPStatus: 200,
Body: []byte("body"),
Errors: nil,
Connected: true,
Success: true,
@@ -58,7 +57,6 @@ var (
Hostname: "example.org",
IP: "127.0.0.1",
HTTPStatus: 200,
Body: []byte("body"),
Errors: []string{"error-1", "error-2"},
Connected: true,
Success: false,
@@ -127,8 +125,11 @@ func BenchmarkStore_Insert(b *testing.B) {
for _, scenario := range scenarios {
b.Run(scenario.Name, func(b *testing.B) {
for n := 0; n < b.N; n++ {
scenario.Store.Insert(&testService, &testSuccessfulResult)
scenario.Store.Insert(&testService, &testUnsuccessfulResult)
if n%100 == 0 {
scenario.Store.Insert(&testService, &testSuccessfulResult)
} else {
scenario.Store.Insert(&testService, &testUnsuccessfulResult)
}
}
b.ReportAllocs()
})

View File

@@ -1,7 +1,6 @@
package watchdog
import (
"fmt"
"log"
"sync"
"time"
@@ -37,26 +36,22 @@ func monitor(service *core.Service) {
monitoringMutex.Lock()
}
if cfg.Debug {
log.Printf("[watchdog][monitor] Monitoring serviceName=%s", service.Name)
log.Printf("[watchdog][monitor] Monitoring group=%s; service=%s", service.Group, service.Name)
}
result := service.EvaluateHealth()
metric.PublishMetricsForService(service, result)
UpdateServiceStatuses(service, result)
var extra string
if !result.Success {
extra = fmt.Sprintf("responseBody=%s", result.Body)
}
log.Printf(
"[watchdog][monitor] Monitored serviceName=%s; success=%v; errors=%d; requestDuration=%s; %s",
"[watchdog][monitor] Monitored group=%s; service=%s; success=%v; errors=%d; duration=%s",
service.Group,
service.Name,
result.Success,
len(result.Errors),
result.Duration.Round(time.Millisecond),
extra,
)
HandleAlerting(service, result)
if cfg.Debug {
log.Printf("[watchdog][monitor] Waiting for interval=%s before monitoring serviceName=%s again", service.Interval, service.Name)
log.Printf("[watchdog][monitor] Waiting for interval=%s before monitoring group=%s service=%s again", service.Interval, service.Group, service.Name)
}
if !cfg.DisableMonitoringLock {
monitoringMutex.Unlock()