Compare commits

..

7 Commits

Author SHA1 Message Date
TwinProduction
ac5ad9d173 Work on common provider interface to make adding new providers easier 2020-09-26 14:23:43 -04:00
TwinProduction
7dcd462883 Clean up old code 2020-09-25 18:23:30 -04:00
TwinProduction
5655661778 Add missing provider.go file 2020-09-24 19:54:15 -04:00
TwinProduction
fac7b8551a Start working on implementing common provider interface 2020-09-24 19:52:59 -04:00
TwinProduction
59fed008e0 Rename service's EvaluateConditions() to EvaluateHealth() 2020-09-24 19:49:32 -04:00
TwinProduction
286e8f8590 Add config validation logs on application start 2020-09-22 17:46:40 -04:00
TwinProduction
4daf261d95 Minor update 2020-09-21 10:41:23 -04:00
13 changed files with 193 additions and 95 deletions

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"fmt"
"github.com/TwinProduction/gatus/client"
"github.com/TwinProduction/gatus/core"
"io/ioutil"
"net/http"
"strings"
@@ -20,6 +21,10 @@ func (provider *AlertProvider) IsValid() bool {
return len(provider.Url) > 0
}
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *AlertProvider {
return provider
}
func (provider *AlertProvider) buildRequest(serviceName, alertDescription string, resolved bool) *http.Request {
body := provider.Body
providerUrl := provider.Url

View File

@@ -15,7 +15,17 @@ func (provider *AlertProvider) IsValid() bool {
}
// https://developer.pagerduty.com/docs/events-api-v2/trigger-events/
func (provider *AlertProvider) ToCustomAlertProvider(eventAction, resolveKey string, service *core.Service, message string) *custom.AlertProvider {
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
var message, eventAction, resolveKey string
if resolved {
message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description)
eventAction = "resolve"
resolveKey = alert.ResolveKey
} else {
message = fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description)
eventAction = "trigger"
resolveKey = ""
}
return &custom.AlertProvider{
Url: "https://events.pagerduty.com/v2/enqueue",
Method: "POST",

View File

@@ -0,0 +1,11 @@
package provider
import (
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/core"
)
type AlertProvider interface {
IsValid() bool
ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider
}

View File

@@ -4,6 +4,7 @@ import (
"encoding/base64"
"fmt"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/core"
"net/url"
)
@@ -18,7 +19,13 @@ func (provider *AlertProvider) IsValid() bool {
return len(provider.Token) > 0 && len(provider.SID) > 0 && len(provider.From) > 0 && len(provider.To) > 0
}
func (provider *AlertProvider) ToCustomAlertProvider(message string) *custom.AlertProvider {
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
var message string
if resolved {
message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description)
} else {
message = fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description)
}
return &custom.AlertProvider{
Url: fmt.Sprintf("https://api.twilio.com/2010-04-01/Accounts/%s/Messages.json", provider.SID),
Method: "POST",

13
client/client_test.go Normal file
View File

@@ -0,0 +1,13 @@
package client
import "testing"
func TestGetHttpClient(t *testing.T) {
if client != nil {
t.Error("client should've been nil since it hasn't been called a single time yet")
}
_ = GetHttpClient()
if client == nil {
t.Error("client shouldn't have been nil, since it has been called once")
}
}

View File

@@ -1,15 +1,15 @@
metrics: true
services:
- name: twinnation
url: "https://twinnation.org/health"
interval: 30s
url: https://twinnation.org/health
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 1000"
- name: cat-fact
interval: 1m
url: "https://cat-fact.herokuapp.com/facts/random"
interval: 1m
conditions:
- "[STATUS] == 200"
- "[BODY].deleted == false"

View File

@@ -3,6 +3,7 @@ package config
import (
"errors"
"github.com/TwinProduction/gatus/alerting"
"github.com/TwinProduction/gatus/alerting/provider"
"github.com/TwinProduction/gatus/core"
"gopkg.in/yaml.v2"
"io/ioutil"
@@ -36,7 +37,7 @@ func Get() *Config {
}
func Load(configFile string) error {
log.Printf("[config][Load] Attempting to load config from configFile=%s", configFile)
log.Printf("[config][Load] Reading configuration from configFile=%s", configFile)
cfg, err := readConfigurationFile(configFile)
if err != nil {
if os.IsNotExist(err) {
@@ -75,13 +76,79 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) {
// Parse configuration file
err = yaml.Unmarshal(yamlBytes, &config)
// Check if the configuration file at least has services.
if config == nil || len(config.Services) == 0 {
if config == nil || config.Services == nil || len(config.Services) == 0 {
err = ErrNoServiceInConfig
} else {
// Set the default values if they aren't set
for _, service := range config.Services {
service.Validate()
}
validateAlertingConfig(config)
validateServicesConfig(config)
}
return
}
func validateServicesConfig(config *Config) {
for _, service := range config.Services {
if config.Debug {
log.Printf("[config][validateServicesConfig] Validating service '%s'", service.Name)
}
service.ValidateAndSetDefaults()
}
log.Printf("[config][validateServicesConfig] Validated %d services", len(config.Services))
}
func validateAlertingConfig(config *Config) {
if config.Alerting == nil {
log.Printf("[config][validateAlertingConfig] Alerting is not configured")
return
}
alertTypes := []core.AlertType{
core.SlackAlert,
core.TwilioAlert,
core.PagerDutyAlert,
core.CustomAlert,
}
var validProviders, invalidProviders []core.AlertType
for _, alertType := range alertTypes {
alertProvider := GetAlertingProviderByAlertType(config, alertType)
if alertProvider != nil {
if alertProvider.IsValid() {
validProviders = append(validProviders, alertType)
} else {
log.Printf("[config][validateAlertingConfig] Ignoring provider=%s because configuration is invalid", alertType)
invalidProviders = append(invalidProviders, alertType)
}
} else {
invalidProviders = append(invalidProviders, alertType)
}
}
log.Printf("[config][validateAlertingConfig] configuredProviders=%s; ignoredProviders=%s", validProviders, invalidProviders)
}
func GetAlertingProviderByAlertType(config *Config, alertType core.AlertType) provider.AlertProvider {
switch alertType {
case core.SlackAlert:
if config.Alerting.Slack == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Slack
case core.TwilioAlert:
if config.Alerting.Twilio == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Twilio
case core.PagerDutyAlert:
if config.Alerting.PagerDuty == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.PagerDuty
case core.CustomAlert:
if config.Alerting.Custom == nil {
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
return nil
}
return config.Alerting.Custom
}
return nil
}

View File

@@ -1,7 +1,6 @@
package config
import (
"fmt"
"github.com/TwinProduction/gatus/core"
"testing"
"time"
@@ -36,7 +35,6 @@ services:
if config.Services[1].Url != "https://api.github.com/healthz" {
t.Errorf("URL should have been %s", "https://api.github.com/healthz")
}
fmt.Println(config.Services[0].Interval)
if config.Services[0].Interval != 15*time.Second {
t.Errorf("Interval should have been %s", 15*time.Second)
}
@@ -123,6 +121,8 @@ func TestParseAndValidateConfigBytesWithAlerting(t *testing.T) {
alerting:
slack:
webhook-url: "http://example.com"
pagerduty:
integration-key: "00000000000000000000000000000000"
services:
- name: twinnation
url: https://twinnation.org/actuator/health
@@ -144,10 +144,19 @@ services:
t.Error("Metrics should've been false by default")
}
if config.Alerting == nil {
t.Fatal("config.AlertingConfig shouldn't have been nil")
t.Fatal("config.Alerting shouldn't have been nil")
}
if config.Alerting.Slack == nil || !config.Alerting.Slack.IsValid() {
t.Fatal("Slack alerting config should've been valid")
}
if config.Alerting.Slack.WebhookUrl != "http://example.com" {
t.Errorf("Slack webhook should've been %s, but was %s", "http://example.com", config.Alerting.Slack)
t.Errorf("Slack webhook should've been %s, but was %s", "http://example.com", config.Alerting.Slack.WebhookUrl)
}
if config.Alerting.PagerDuty == nil || !config.Alerting.PagerDuty.IsValid() {
t.Fatal("PagerDuty alerting config should've been valid")
}
if config.Alerting.PagerDuty.IntegrationKey != "00000000000000000000000000000000" {
t.Errorf("PagerDuty integration key should've been %s, but was %s", "00000000000000000000000000000000", config.Alerting.PagerDuty.IntegrationKey)
}
if len(config.Services) != 1 {
t.Error("There should've been 1 service")
@@ -180,3 +189,31 @@ services:
t.Errorf("The type of the alert should've been %s, but it was %s", "Healthcheck failed 7 times in a row", config.Services[0].Alerts[0].Description)
}
}
func TestParseAndValidateConfigBytesWithInvalidPagerDutyAlertingConfig(t *testing.T) {
config, err := parseAndValidateConfigBytes([]byte(`
alerting:
pagerduty:
integration-key: "INVALID_KEY"
services:
- name: twinnation
url: https://twinnation.org/actuator/health
conditions:
- "[STATUS] == 200"
`))
if err != nil {
t.Error("No error should've been returned")
}
if config == nil {
t.Fatal("Config shouldn't have been nil")
}
if config.Alerting == nil {
t.Fatal("config.Alerting shouldn't have been nil")
}
if config.Alerting.PagerDuty == nil {
t.Fatal("PagerDuty alerting config shouldn't have been nil")
}
if config.Alerting.PagerDuty.IsValid() {
t.Fatal("PagerDuty alerting config should've been invalid")
}
}

View File

@@ -50,7 +50,8 @@ type Service struct {
NumberOfSuccessesInARow int
}
func (service *Service) Validate() {
// ValidateAndSetDefaults validates the service's configuration and sets the default value of fields that have one
func (service *Service) ValidateAndSetDefaults() {
// Set default values
if service.Interval == 0 {
service.Interval = 1 * time.Minute
@@ -83,7 +84,8 @@ func (service *Service) Validate() {
}
}
func (service *Service) EvaluateConditions() *Result {
// EvaluateHealth sends a request to the service's URL and evaluates the conditions of the service.
func (service *Service) EvaluateHealth() *Result {
result := &Result{Success: true, Errors: []string{}}
service.getIp(result)
if len(result.Errors) == 0 {

View File

@@ -4,14 +4,14 @@ import (
"testing"
)
func TestIntegrationEvaluateConditions(t *testing.T) {
func TestIntegrationEvaluateHealth(t *testing.T) {
condition := Condition("[STATUS] == 200")
service := Service{
Name: "TwiNNatioN",
Url: "https://twinnation.org/health",
Conditions: []*Condition{&condition},
}
result := service.EvaluateConditions()
result := service.EvaluateHealth()
if !result.ConditionResults[0].Success {
t.Errorf("Condition '%s' should have been a success", condition)
}
@@ -20,14 +20,14 @@ func TestIntegrationEvaluateConditions(t *testing.T) {
}
}
func TestIntegrationEvaluateConditionsWithFailure(t *testing.T) {
func TestIntegrationEvaluateHealthWithFailure(t *testing.T) {
condition := Condition("[STATUS] == 500")
service := Service{
Name: "TwiNNatioN",
Url: "https://twinnation.org/health",
Conditions: []*Condition{&condition},
}
result := service.EvaluateConditions()
result := service.EvaluateHealth()
if result.ConditionResults[0].Success {
t.Errorf("Condition '%s' should have been a failure", condition)
}

View File

@@ -49,10 +49,10 @@ spec:
protocol: TCP
resources:
limits:
cpu: 50m
cpu: 200m
memory: 50M
requests:
cpu: 20m
cpu: 50m
memory: 20M
volumeMounts:
- mountPath: /config

View File

@@ -2,8 +2,6 @@ package watchdog
import (
"encoding/json"
"fmt"
"github.com/TwinProduction/gatus/alerting/provider/custom"
"github.com/TwinProduction/gatus/config"
"github.com/TwinProduction/gatus/core"
"log"
@@ -36,42 +34,16 @@ func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *conf
}
continue
}
var alertProvider *custom.AlertProvider
if alert.Type == core.SlackAlert {
if cfg.Alerting.Slack != nil && cfg.Alerting.Slack.IsValid() {
log.Printf("[watchdog][handleAlertsToTrigger] Sending Slack alert because alert with description='%s' has been triggered", alert.Description)
alertProvider = cfg.Alerting.Slack.ToCustomAlertProvider(service, alert, result, false)
} else {
log.Printf("[watchdog][handleAlertsToTrigger] Not sending Slack alert despite being triggered, because there is no Slack webhook configured")
}
} else if alert.Type == core.PagerDutyAlert {
if cfg.Alerting.PagerDuty != nil && cfg.Alerting.PagerDuty.IsValid() {
log.Printf("[watchdog][handleAlertsToTrigger] Sending PagerDuty alert because alert with description='%s' has been triggered", alert.Description)
alertProvider = cfg.Alerting.PagerDuty.ToCustomAlertProvider("trigger", "", service, fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description))
} else {
log.Printf("[watchdog][handleAlertsToTrigger] Not sending PagerDuty alert despite being triggered, because PagerDuty isn't configured properly")
}
} else if alert.Type == core.TwilioAlert {
if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() {
log.Printf("[watchdog][handleAlertsToTrigger] Sending Twilio alert because alert with description='%s' has been triggered", alert.Description)
alertProvider = cfg.Alerting.Twilio.ToCustomAlertProvider(fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description))
} else {
log.Printf("[watchdog][handleAlertsToTrigger] Not sending Twilio alert despite being triggered, because Twilio config settings missing")
}
} else if alert.Type == core.CustomAlert {
if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() {
log.Printf("[watchdog][handleAlertsToTrigger] Sending custom alert because alert with description='%s' has been triggered", alert.Description)
alertProvider = cfg.Alerting.Custom
} else {
log.Printf("[watchdog][handleAlertsToTrigger] Not sending custom alert despite being triggered, because there is no custom url configured")
}
}
if alertProvider != nil {
alertProvider := config.GetAlertingProviderByAlertType(cfg, alert.Type)
if alertProvider != nil && alertProvider.IsValid() {
log.Printf("[watchdog][handleAlertsToTrigger] Sending %s alert because alert with description='%s' has been triggered", alert.Type, alert.Description)
customAlertProvider := alertProvider.ToCustomAlertProvider(service, alert, result, false)
// TODO: retry on error
var err error
// We need to extract the DedupKey from PagerDuty's response
if alert.Type == core.PagerDutyAlert {
var body []byte
body, err = alertProvider.Send(service.Name, alert.Description, true)
body, err = customAlertProvider.Send(service.Name, alert.Description, false)
if err == nil {
var response pagerDutyResponse
err = json.Unmarshal(body, &response)
@@ -82,13 +54,17 @@ func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *conf
}
}
} else {
_, err = alertProvider.Send(service.Name, alert.Description, false)
// All other alert types don't need to extract anything from the body, so we can just send the request right away
_, err = customAlertProvider.Send(service.Name, alert.Description, false)
}
if err != nil {
log.Printf("[watchdog][handleAlertsToTrigger] Ran into error sending an alert: %s", err.Error())
} else {
alert.Triggered = true
}
} else {
log.Printf("[watchdog][handleAlertsToResolve] Not sending alert of type=%s despite being triggered, because the provider wasn't configured properly", alert.Type)
}
}
}
@@ -103,44 +79,12 @@ func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *conf
if !alert.SendOnResolved {
continue
}
var alertProvider *custom.AlertProvider
if alert.Type == core.SlackAlert {
if cfg.Alerting.Slack != nil && cfg.Alerting.Slack.IsValid() {
log.Printf("[watchdog][handleAlertsToResolve] Sending Slack alert because alert with description='%s' has been resolved", alert.Description)
alertProvider = cfg.Alerting.Slack.ToCustomAlertProvider(service, alert, result, true)
} else {
log.Printf("[watchdog][handleAlertsToResolve] Not sending Slack alert despite being resolved, because there is no Slack webhook configured")
}
} else if alert.Type == core.PagerDutyAlert {
if cfg.Alerting.PagerDuty != nil && cfg.Alerting.PagerDuty.IsValid() {
log.Printf("[watchdog][handleAlertsToResolve] Sending PagerDuty alert because alert with description='%s' has been resolved", alert.Description)
alertProvider = cfg.Alerting.PagerDuty.ToCustomAlertProvider("resolve", alert.ResolveKey, service, fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description))
} else {
log.Printf("[watchdog][handleAlertsToResolve] Not sending PagerDuty alert despite being resolved, because PagerDuty isn't configured properly")
}
} else if alert.Type == core.TwilioAlert {
if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() {
log.Printf("[watchdog][handleAlertsToResolve] Sending Twilio alert because alert with description='%s' has been resolved", alert.Description)
alertProvider = cfg.Alerting.Twilio.ToCustomAlertProvider(fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description))
} else {
log.Printf("[watchdog][handleAlertsToResolve] Not sending Twilio alert despite being resolved, because Twilio isn't configured properly")
}
} else if alert.Type == core.CustomAlert {
if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() {
log.Printf("[watchdog][handleAlertsToResolve] Sending custom alert because alert with description='%s' has been resolved", alert.Description)
alertProvider = &custom.AlertProvider{
Url: cfg.Alerting.Custom.Url,
Method: cfg.Alerting.Custom.Method,
Body: cfg.Alerting.Custom.Body,
Headers: cfg.Alerting.Custom.Headers,
}
} else {
log.Printf("[watchdog][handleAlertsToResolve] Not sending custom alert despite being resolved, because the custom provider isn't configured properly")
}
}
if alertProvider != nil {
alertProvider := config.GetAlertingProviderByAlertType(cfg, alert.Type)
if alertProvider != nil && alertProvider.IsValid() {
log.Printf("[watchdog][handleAlertsToResolve] Sending %s alert because alert with description='%s' has been resolved", alert.Type, alert.Description)
customAlertProvider := alertProvider.ToCustomAlertProvider(service, alert, result, true)
// TODO: retry on error
_, err := alertProvider.Send(service.Name, alert.Description, true)
_, err := customAlertProvider.Send(service.Name, alert.Description, true)
if err != nil {
log.Printf("[watchdog][handleAlertsToResolve] Ran into error sending an alert: %s", err.Error())
} else {
@@ -148,6 +92,8 @@ func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *conf
alert.ResolveKey = ""
}
}
} else {
log.Printf("[watchdog][handleAlertsToResolve] Not sending alert of type=%s despite being resolved, because the provider wasn't configured properly", alert.Type)
}
}
service.NumberOfFailuresInARow = 0

View File

@@ -50,7 +50,7 @@ func monitor(service *core.Service) {
if cfg.Debug {
log.Printf("[watchdog][monitor] Monitoring serviceName=%s", service.Name)
}
result := service.EvaluateConditions()
result := service.EvaluateHealth()
metric.PublishMetricsForService(service, result)
serviceResultsMutex.Lock()
serviceResults[service.Name] = append(serviceResults[service.Name], result)