Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ac5ad9d173 | ||
|
|
7dcd462883 | ||
|
|
5655661778 | ||
|
|
fac7b8551a | ||
|
|
59fed008e0 | ||
|
|
286e8f8590 | ||
|
|
4daf261d95 | ||
|
|
b94f494b24 | ||
|
|
4c72746286 | ||
|
|
ae2c4b1ea9 | ||
|
|
abd49bca8e | ||
|
|
5820a17659 | ||
|
|
a6a8cca427 | ||
|
|
faf478be99 | ||
|
|
f7c2905aa4 | ||
|
|
e283c6eba1 | ||
|
|
39d3459555 | ||
|
|
f0ceebf55d |
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@@ -0,0 +1 @@
|
||||
* text=lf
|
||||
BIN
.github/assets/pagerduty-integration-key.png
vendored
Normal file
BIN
.github/assets/pagerduty-integration-key.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 86 KiB |
49
README.md
49
README.md
@@ -35,7 +35,7 @@ The main features of Gatus are:
|
||||
- **Highly flexible health check conditions**: While checking the response status may be enough for some use cases, Gatus goes much further and allows you to add conditions on the response time, the response body and even the IP address.
|
||||
- **Ability to use Gatus for user acceptance tests**: Thanks to the point above, you can leverage this application to create automated user acceptance tests.
|
||||
- **Very easy to configure**: Not only is the configuration designed to be as readable as possible, it's also extremely easy to add a new service or a new endpoint to monitor.
|
||||
- **Alerting**: While having a pretty visual dashboard is useful to keep track of the state of your application(s), you probably don't want to stare at it all day. Thus, notifications via Slack are supported out of the box with the ability to configure a custom alerting provider for any needs you might have, whether it be a different provider like PagerDuty or a custom application that manages automated rollbacks.
|
||||
- **Alerting**: While having a pretty visual dashboard is useful to keep track of the state of your application(s), you probably don't want to stare at it all day. Thus, notifications via Slack, PagerDuty and Twilio are supported out of the box with the ability to configure a custom alerting provider for any needs you might have, whether it be a different provider or a custom application that manages automated rollbacks.
|
||||
- **Metrics**
|
||||
- **Low resource consumption**: As with most Go applications, the resource footprint that this application requires is negligibly small.
|
||||
|
||||
@@ -94,15 +94,17 @@ Note that you can also add environment variables in the configuration file (i.e.
|
||||
| `services[].alerts[].send-on-resolved` | Whether to send a notification once a triggered alert is marked as resolved | `false` |
|
||||
| `services[].alerts[].description` | Description of the alert. Will be included in the alert sent | `""` |
|
||||
| `alerting` | Configuration for alerting | `{}` |
|
||||
| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` |
|
||||
| `alerting.pagerduty` | PagerDuty Events API v2 integration key. Used for alerts of type `pagerduty` | `""` |
|
||||
| `alerting.slack` | Configuration for alerts of type `slack` | `""` |
|
||||
| `alerting.slack.webhook-url` | Slack Webhook URL | Required `""` |
|
||||
| `alerting.pagerduty` | Configuration for alerts of type `pagerduty` | `""` |
|
||||
| `alerting.pagerduty.integration-key` | PagerDuty Events API v2 integration key. | Required `""` |
|
||||
| `alerting.twilio` | Settings for alerts of type `twilio` | `""` |
|
||||
| `alerting.twilio.sid` | Twilio account SID | Required `""` |
|
||||
| `alerting.twilio.token` | Twilio auth token | Required `""` |
|
||||
| `alerting.twilio.from` | Number to send Twilio alerts from | Required `""` |
|
||||
| `alerting.twilio.to` | Number to send twilio alerts to | Required `""` |
|
||||
| `alerting.custom` | Configuration for custom actions on failure or alerts | `""` |
|
||||
| `alerting.custom.url` | Custom alerting request url | `""` |
|
||||
| `alerting.custom.url` | Custom alerting request url | Required `""` |
|
||||
| `alerting.custom.body` | Custom alerting request body. | `""` |
|
||||
| `alerting.custom.headers` | Custom alerting request headers | `{}` |
|
||||
|
||||
@@ -111,18 +113,18 @@ Note that you can also add environment variables in the configuration file (i.e.
|
||||
|
||||
Here are some examples of conditions you can use:
|
||||
|
||||
| Condition | Description | Passing values | Failing values |
|
||||
| -----------------------------| ------------------------------------------------------- | ------------------------ | -------------- |
|
||||
| `[STATUS] == 200` | Status must be equal to 200 | 200 | 201, 404, ... |
|
||||
| `[STATUS] < 300` | Status must lower than 300 | 200, 201, 299 | 301, 302, ... |
|
||||
| `[STATUS] <= 299` | Status must be less than or equal to 299 | 200, 201, 299 | 301, 302, ... |
|
||||
| `[STATUS] > 400` | Status must be greater than 400 | 401, 402, 403, 404 | 400, 200, ... |
|
||||
| `[RESPONSE_TIME] < 500` | Response time must be below 500ms | 100ms, 200ms, 300ms | 500ms, 501ms |
|
||||
| `[BODY] == 1` | The body must be equal to 1 | 1 | Anything else |
|
||||
| `[BODY].data.id == 1` | The jsonpath `$.data.id` is equal to 1 | `{"data":{"id":1}}` | |
|
||||
| `[BODY].data[0].id == 1` | The jsonpath `$.data[0].id` is equal to 1 | `{"data":[{"id":1}]}` | |
|
||||
| `len([BODY].data) > 0` | Array at jsonpath `$.data` has less than 5 elements | `{"data":[{"id":1}]}` | |
|
||||
| `len([BODY].name) == 8` | String at jsonpath `$.name` has a length of 8 | `{"name":"john.doe"}` | `{"name":"bob"}` |
|
||||
| Condition | Description | Passing values | Failing values |
|
||||
| -----------------------------| ------------------------------------------------------- | -------------------------- | -------------- |
|
||||
| `[STATUS] == 200` | Status must be equal to 200 | 200 | 201, 404, ... |
|
||||
| `[STATUS] < 300` | Status must lower than 300 | 200, 201, 299 | 301, 302, ... |
|
||||
| `[STATUS] <= 299` | Status must be less than or equal to 299 | 200, 201, 299 | 301, 302, ... |
|
||||
| `[STATUS] > 400` | Status must be greater than 400 | 401, 402, 403, 404 | 400, 200, ... |
|
||||
| `[RESPONSE_TIME] < 500` | Response time must be below 500ms | 100ms, 200ms, 300ms | 500ms, 501ms |
|
||||
| `[BODY] == 1` | The body must be equal to 1 | 1 | Anything else |
|
||||
| `[BODY].user.name == john` | JSONPath value of `$.user.name` is equal to `john` | `{"user":{"name":"john"}}` | |
|
||||
| `[BODY].data[0].id == 1` | JSONPath value of `$.data[0].id` is equal to 1 | `{"data":[{"id":1}]}` | |
|
||||
| `len([BODY].data) < 5` | Array at JSONPath `$.data` has less than 5 elements | `{"data":[{"id":1}]}` | |
|
||||
| `len([BODY].name) == 8` | String at JSONPath `$.name` has a length of 8 | `{"name":"john.doe"}` | `{"name":"bob"}` |
|
||||
|
||||
|
||||
### Alerting
|
||||
@@ -133,7 +135,8 @@ Here are some examples of conditions you can use:
|
||||
|
||||
```yaml
|
||||
alerting:
|
||||
slack: "https://hooks.slack.com/services/**********/**********/**********"
|
||||
slack:
|
||||
webhook-url: "https://hooks.slack.com/services/**********/**********/**********"
|
||||
services:
|
||||
- name: twinnation
|
||||
interval: 30s
|
||||
@@ -168,7 +171,8 @@ PagerDuty instead.
|
||||
|
||||
```yaml
|
||||
alerting:
|
||||
pagerduty: "********************************"
|
||||
pagerduty:
|
||||
integration-key: "********************************"
|
||||
services:
|
||||
- name: twinnation
|
||||
interval: 30s
|
||||
@@ -259,10 +263,17 @@ services:
|
||||
|
||||
## Docker
|
||||
|
||||
Other than using one of the examples provided in the `examples` folder, you can also try it out locally by
|
||||
creating a configuration file - we'll call it `config.yaml` for this example - and running the following
|
||||
command:
|
||||
```
|
||||
docker run -p 8080:8080 --name gatus twinproduction/gatus
|
||||
docker run -p 8080:8080 --mount type=bind,source="$(pwd)"/test.yaml,target=/config/config.yaml --name gatus twinproduction/gatus
|
||||
```
|
||||
|
||||
If you're on Windows, replace `"$(pwd)"` by the absolute path to your current directory, e.g.:
|
||||
```
|
||||
docker run -p 8080:8080 --mount type=bind,source=E:/Go/src/github.com/TwinProduction/gatus/test.yaml,target=/config/config.yaml --name gatus twinproduction/gatus
|
||||
```
|
||||
|
||||
## Running the tests
|
||||
|
||||
|
||||
@@ -1,158 +0,0 @@
|
||||
package alerting
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/config"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
"log"
|
||||
)
|
||||
|
||||
// Handle takes care of alerts to resolve and alerts to trigger based on result success or failure
|
||||
func Handle(service *core.Service, result *core.Result) {
|
||||
cfg := config.Get()
|
||||
if cfg.Alerting == nil {
|
||||
return
|
||||
}
|
||||
if result.Success {
|
||||
handleAlertsToResolve(service, result, cfg)
|
||||
} else {
|
||||
handleAlertsToTrigger(service, result, cfg)
|
||||
}
|
||||
}
|
||||
|
||||
func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *config.Config) {
|
||||
service.NumberOfSuccessesInARow = 0
|
||||
service.NumberOfFailuresInARow++
|
||||
for _, alert := range service.Alerts {
|
||||
// If the alert hasn't been triggered, move to the next one
|
||||
if !alert.Enabled || alert.FailureThreshold != service.NumberOfFailuresInARow {
|
||||
continue
|
||||
}
|
||||
if alert.Triggered {
|
||||
if cfg.Debug {
|
||||
log.Printf("[alerting][handleAlertsToTrigger] Alert with description='%s' has already been triggered, skipping", alert.Description)
|
||||
}
|
||||
continue
|
||||
}
|
||||
var alertProvider *core.CustomAlertProvider
|
||||
if alert.Type == core.SlackAlert {
|
||||
if len(cfg.Alerting.Slack) > 0 {
|
||||
log.Printf("[alerting][handleAlertsToTrigger] Sending Slack alert because alert with description='%s' has been triggered", alert.Description)
|
||||
alertProvider = core.CreateSlackCustomAlertProvider(cfg.Alerting.Slack, service, alert, result, false)
|
||||
} else {
|
||||
log.Printf("[alerting][handleAlertsToTrigger] Not sending Slack alert despite being triggered, because there is no Slack webhook configured")
|
||||
}
|
||||
} else if alert.Type == core.PagerDutyAlert {
|
||||
if len(cfg.Alerting.PagerDuty) > 0 {
|
||||
log.Printf("[alerting][handleAlertsToTrigger] Sending PagerDuty alert because alert with description='%s' has been triggered", alert.Description)
|
||||
alertProvider = core.CreatePagerDutyCustomAlertProvider(cfg.Alerting.PagerDuty, "trigger", "", service, fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description))
|
||||
} else {
|
||||
log.Printf("[alerting][handleAlertsToTrigger] Not sending PagerDuty alert despite being triggered, because PagerDuty isn't configured properly")
|
||||
}
|
||||
} else if alert.Type == core.TwilioAlert {
|
||||
if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() {
|
||||
log.Printf("[alerting][handleAlertsToTrigger] Sending Twilio alert because alert with description='%s' has been triggered", alert.Description)
|
||||
alertProvider = core.CreateTwilioCustomAlertProvider(cfg.Alerting.Twilio, fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description))
|
||||
} else {
|
||||
log.Printf("[alerting][handleAlertsToTrigger] Not sending Twilio alert despite being triggered, because Twilio config settings missing")
|
||||
}
|
||||
} else if alert.Type == core.CustomAlert {
|
||||
if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() {
|
||||
log.Printf("[alerting][handleAlertsToTrigger] Sending custom alert because alert with description='%s' has been triggered", alert.Description)
|
||||
alertProvider = &core.CustomAlertProvider{
|
||||
Url: cfg.Alerting.Custom.Url,
|
||||
Method: cfg.Alerting.Custom.Method,
|
||||
Body: cfg.Alerting.Custom.Body,
|
||||
Headers: cfg.Alerting.Custom.Headers,
|
||||
}
|
||||
} else {
|
||||
log.Printf("[alerting][handleAlertsToTrigger] Not sending custom alert despite being triggered, because there is no custom url configured")
|
||||
}
|
||||
}
|
||||
if alertProvider != nil {
|
||||
// TODO: retry on error
|
||||
var err error
|
||||
if alert.Type == core.PagerDutyAlert {
|
||||
var body []byte
|
||||
body, err = alertProvider.Send(service.Name, alert.Description, true)
|
||||
if err == nil {
|
||||
var response pagerDutyResponse
|
||||
err = json.Unmarshal(body, &response)
|
||||
if err != nil {
|
||||
log.Printf("[alerting][handleAlertsToTrigger] Ran into error unmarshaling pager duty response: %s", err.Error())
|
||||
} else {
|
||||
alert.ResolveKey = response.DedupKey
|
||||
}
|
||||
}
|
||||
} else {
|
||||
_, err = alertProvider.Send(service.Name, alert.Description, false)
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("[alerting][handleAlertsToTrigger] Ran into error sending an alert: %s", err.Error())
|
||||
} else {
|
||||
alert.Triggered = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *config.Config) {
|
||||
service.NumberOfSuccessesInARow++
|
||||
for _, alert := range service.Alerts {
|
||||
if !alert.Enabled || !alert.Triggered || alert.SuccessThreshold > service.NumberOfSuccessesInARow {
|
||||
continue
|
||||
}
|
||||
alert.Triggered = false
|
||||
if !alert.SendOnResolved {
|
||||
continue
|
||||
}
|
||||
var alertProvider *core.CustomAlertProvider
|
||||
if alert.Type == core.SlackAlert {
|
||||
if len(cfg.Alerting.Slack) > 0 {
|
||||
log.Printf("[alerting][handleAlertsToResolve] Sending Slack alert because alert with description='%s' has been resolved", alert.Description)
|
||||
alertProvider = core.CreateSlackCustomAlertProvider(cfg.Alerting.Slack, service, alert, result, true)
|
||||
} else {
|
||||
log.Printf("[alerting][handleAlertsToResolve] Not sending Slack alert despite being resolved, because there is no Slack webhook configured")
|
||||
}
|
||||
} else if alert.Type == core.PagerDutyAlert {
|
||||
if len(cfg.Alerting.PagerDuty) > 0 {
|
||||
log.Printf("[alerting][handleAlertsToResolve] Sending PagerDuty alert because alert with description='%s' has been resolved", alert.Description)
|
||||
alertProvider = core.CreatePagerDutyCustomAlertProvider(cfg.Alerting.PagerDuty, "resolve", alert.ResolveKey, service, fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description))
|
||||
} else {
|
||||
log.Printf("[alerting][handleAlertsToResolve] Not sending PagerDuty alert despite being resolved, because PagerDuty isn't configured properly")
|
||||
}
|
||||
} else if alert.Type == core.TwilioAlert {
|
||||
if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() {
|
||||
log.Printf("[alerting][handleAlertsToResolve] Sending Twilio alert because alert with description='%s' has been resolved", alert.Description)
|
||||
alertProvider = core.CreateTwilioCustomAlertProvider(cfg.Alerting.Twilio, fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description))
|
||||
} else {
|
||||
log.Printf("[alerting][handleAlertsToResolve] Not sending Twilio alert despite being resolved, because Twilio isn't configured properly")
|
||||
}
|
||||
} else if alert.Type == core.CustomAlert {
|
||||
if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() {
|
||||
log.Printf("[alerting][handleAlertsToResolve] Sending custom alert because alert with description='%s' has been resolved", alert.Description)
|
||||
alertProvider = &core.CustomAlertProvider{
|
||||
Url: cfg.Alerting.Custom.Url,
|
||||
Method: cfg.Alerting.Custom.Method,
|
||||
Body: cfg.Alerting.Custom.Body,
|
||||
Headers: cfg.Alerting.Custom.Headers,
|
||||
}
|
||||
} else {
|
||||
log.Printf("[alerting][handleAlertsToResolve] Not sending custom alert despite being resolved, because the custom provider isn't configured properly")
|
||||
}
|
||||
}
|
||||
if alertProvider != nil {
|
||||
// TODO: retry on error
|
||||
_, err := alertProvider.Send(service.Name, alert.Description, true)
|
||||
if err != nil {
|
||||
log.Printf("[alerting][handleAlertsToResolve] Ran into error sending an alert: %s", err.Error())
|
||||
} else {
|
||||
if alert.Type == core.PagerDutyAlert {
|
||||
alert.ResolveKey = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
service.NumberOfFailuresInARow = 0
|
||||
}
|
||||
15
alerting/config.go
Normal file
15
alerting/config.go
Normal file
@@ -0,0 +1,15 @@
|
||||
package alerting
|
||||
|
||||
import (
|
||||
"github.com/TwinProduction/gatus/alerting/provider/custom"
|
||||
"github.com/TwinProduction/gatus/alerting/provider/pagerduty"
|
||||
"github.com/TwinProduction/gatus/alerting/provider/slack"
|
||||
"github.com/TwinProduction/gatus/alerting/provider/twilio"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
Slack *slack.AlertProvider `yaml:"slack"`
|
||||
PagerDuty *pagerduty.AlertProvider `yaml:"pagerduty"`
|
||||
Twilio *twilio.AlertProvider `yaml:"twilio"`
|
||||
Custom *custom.AlertProvider `yaml:"custom"`
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
package alerting
|
||||
|
||||
type pagerDutyResponse struct {
|
||||
Status string `json:"status"`
|
||||
Message string `json:"message"`
|
||||
DedupKey string `json:"dedup_key"`
|
||||
}
|
||||
85
alerting/provider/custom/custom.go
Normal file
85
alerting/provider/custom/custom.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package custom
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/client"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type AlertProvider struct {
|
||||
Url string `yaml:"url"`
|
||||
Method string `yaml:"method,omitempty"`
|
||||
Body string `yaml:"body,omitempty"`
|
||||
Headers map[string]string `yaml:"headers,omitempty"`
|
||||
}
|
||||
|
||||
func (provider *AlertProvider) IsValid() bool {
|
||||
return len(provider.Url) > 0
|
||||
}
|
||||
|
||||
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *AlertProvider {
|
||||
return provider
|
||||
}
|
||||
|
||||
func (provider *AlertProvider) buildRequest(serviceName, alertDescription string, resolved bool) *http.Request {
|
||||
body := provider.Body
|
||||
providerUrl := provider.Url
|
||||
method := provider.Method
|
||||
if strings.Contains(body, "[ALERT_DESCRIPTION]") {
|
||||
body = strings.ReplaceAll(body, "[ALERT_DESCRIPTION]", alertDescription)
|
||||
}
|
||||
if strings.Contains(body, "[SERVICE_NAME]") {
|
||||
body = strings.ReplaceAll(body, "[SERVICE_NAME]", serviceName)
|
||||
}
|
||||
if strings.Contains(body, "[ALERT_TRIGGERED_OR_RESOLVED]") {
|
||||
if resolved {
|
||||
body = strings.ReplaceAll(body, "[ALERT_TRIGGERED_OR_RESOLVED]", "RESOLVED")
|
||||
} else {
|
||||
body = strings.ReplaceAll(body, "[ALERT_TRIGGERED_OR_RESOLVED]", "TRIGGERED")
|
||||
}
|
||||
}
|
||||
if strings.Contains(providerUrl, "[ALERT_DESCRIPTION]") {
|
||||
providerUrl = strings.ReplaceAll(providerUrl, "[ALERT_DESCRIPTION]", alertDescription)
|
||||
}
|
||||
if strings.Contains(providerUrl, "[SERVICE_NAME]") {
|
||||
providerUrl = strings.ReplaceAll(providerUrl, "[SERVICE_NAME]", serviceName)
|
||||
}
|
||||
if strings.Contains(providerUrl, "[ALERT_TRIGGERED_OR_RESOLVED]") {
|
||||
if resolved {
|
||||
providerUrl = strings.ReplaceAll(providerUrl, "[ALERT_TRIGGERED_OR_RESOLVED]", "RESOLVED")
|
||||
} else {
|
||||
providerUrl = strings.ReplaceAll(providerUrl, "[ALERT_TRIGGERED_OR_RESOLVED]", "TRIGGERED")
|
||||
}
|
||||
}
|
||||
if len(method) == 0 {
|
||||
method = "GET"
|
||||
}
|
||||
bodyBuffer := bytes.NewBuffer([]byte(body))
|
||||
request, _ := http.NewRequest(method, providerUrl, bodyBuffer)
|
||||
for k, v := range provider.Headers {
|
||||
request.Header.Set(k, v)
|
||||
}
|
||||
return request
|
||||
}
|
||||
|
||||
// Send a request to the alert provider and return the body
|
||||
func (provider *AlertProvider) Send(serviceName, alertDescription string, resolved bool) ([]byte, error) {
|
||||
request := provider.buildRequest(serviceName, alertDescription, resolved)
|
||||
response, err := client.GetHttpClient().Do(request)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if response.StatusCode > 399 {
|
||||
body, err := ioutil.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("call to provider alert returned status code %d", response.StatusCode)
|
||||
} else {
|
||||
return nil, fmt.Errorf("call to provider alert returned status code %d: %s", response.StatusCode, string(body))
|
||||
}
|
||||
}
|
||||
return ioutil.ReadAll(response.Body)
|
||||
}
|
||||
@@ -1,16 +1,27 @@
|
||||
package core
|
||||
package custom
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCustomAlertProvider_buildRequestWhenResolved(t *testing.T) {
|
||||
func TestAlertProvider_IsValid(t *testing.T) {
|
||||
invalidProvider := AlertProvider{Url: ""}
|
||||
if invalidProvider.IsValid() {
|
||||
t.Error("provider shouldn't have been valid")
|
||||
}
|
||||
validProvider := AlertProvider{Url: "http://example.com"}
|
||||
if !validProvider.IsValid() {
|
||||
t.Error("provider should've been valid")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertProvider_buildRequestWhenResolved(t *testing.T) {
|
||||
const (
|
||||
ExpectedUrl = "http://example.com/service-name"
|
||||
ExpectedBody = "service-name,alert-description,RESOLVED"
|
||||
)
|
||||
customAlertProvider := &CustomAlertProvider{
|
||||
customAlertProvider := &AlertProvider{
|
||||
Url: "http://example.com/[SERVICE_NAME]",
|
||||
Method: "GET",
|
||||
Body: "[SERVICE_NAME],[ALERT_DESCRIPTION],[ALERT_TRIGGERED_OR_RESOLVED]",
|
||||
@@ -26,12 +37,12 @@ func TestCustomAlertProvider_buildRequestWhenResolved(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCustomAlertProvider_buildRequestWhenTriggered(t *testing.T) {
|
||||
func TestAlertProvider_buildRequestWhenTriggered(t *testing.T) {
|
||||
const (
|
||||
ExpectedUrl = "http://example.com/service-name"
|
||||
ExpectedBody = "service-name,alert-description,TRIGGERED"
|
||||
)
|
||||
customAlertProvider := &CustomAlertProvider{
|
||||
customAlertProvider := &AlertProvider{
|
||||
Url: "http://example.com/[SERVICE_NAME]",
|
||||
Method: "GET",
|
||||
Body: "[SERVICE_NAME],[ALERT_DESCRIPTION],[ALERT_TRIGGERED_OR_RESOLVED]",
|
||||
46
alerting/provider/pagerduty/pagerduty.go
Normal file
46
alerting/provider/pagerduty/pagerduty.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package pagerduty
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/alerting/provider/custom"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
)
|
||||
|
||||
type AlertProvider struct {
|
||||
IntegrationKey string `yaml:"integration-key"`
|
||||
}
|
||||
|
||||
func (provider *AlertProvider) IsValid() bool {
|
||||
return len(provider.IntegrationKey) == 32
|
||||
}
|
||||
|
||||
// https://developer.pagerduty.com/docs/events-api-v2/trigger-events/
|
||||
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
|
||||
var message, eventAction, resolveKey string
|
||||
if resolved {
|
||||
message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description)
|
||||
eventAction = "resolve"
|
||||
resolveKey = alert.ResolveKey
|
||||
} else {
|
||||
message = fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description)
|
||||
eventAction = "trigger"
|
||||
resolveKey = ""
|
||||
}
|
||||
return &custom.AlertProvider{
|
||||
Url: "https://events.pagerduty.com/v2/enqueue",
|
||||
Method: "POST",
|
||||
Body: fmt.Sprintf(`{
|
||||
"routing_key": "%s",
|
||||
"dedup_key": "%s",
|
||||
"event_action": "%s",
|
||||
"payload": {
|
||||
"summary": "%s",
|
||||
"source": "%s",
|
||||
"severity": "critical"
|
||||
}
|
||||
}`, provider.IntegrationKey, resolveKey, eventAction, message, service.Name),
|
||||
Headers: map[string]string{
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}
|
||||
}
|
||||
14
alerting/provider/pagerduty/pagerduty_test.go
Normal file
14
alerting/provider/pagerduty/pagerduty_test.go
Normal file
@@ -0,0 +1,14 @@
|
||||
package pagerduty
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestAlertProvider_IsValid(t *testing.T) {
|
||||
invalidProvider := AlertProvider{IntegrationKey: ""}
|
||||
if invalidProvider.IsValid() {
|
||||
t.Error("provider shouldn't have been valid")
|
||||
}
|
||||
validProvider := AlertProvider{IntegrationKey: "00000000000000000000000000000000"}
|
||||
if !validProvider.IsValid() {
|
||||
t.Error("provider should've been valid")
|
||||
}
|
||||
}
|
||||
11
alerting/provider/provider.go
Normal file
11
alerting/provider/provider.go
Normal file
@@ -0,0 +1,11 @@
|
||||
package provider
|
||||
|
||||
import (
|
||||
"github.com/TwinProduction/gatus/alerting/provider/custom"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
)
|
||||
|
||||
type AlertProvider interface {
|
||||
IsValid() bool
|
||||
ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider
|
||||
}
|
||||
60
alerting/provider/slack/slack.go
Normal file
60
alerting/provider/slack/slack.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package slack
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/alerting/provider/custom"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
)
|
||||
|
||||
type AlertProvider struct {
|
||||
WebhookUrl string `yaml:"webhook-url"`
|
||||
}
|
||||
|
||||
func (provider *AlertProvider) IsValid() bool {
|
||||
return len(provider.WebhookUrl) > 0
|
||||
}
|
||||
|
||||
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
|
||||
var message string
|
||||
var color string
|
||||
if resolved {
|
||||
message = fmt.Sprintf("An alert for *%s* has been resolved after passing successfully %d time(s) in a row", service.Name, alert.SuccessThreshold)
|
||||
color = "#36A64F"
|
||||
} else {
|
||||
message = fmt.Sprintf("An alert for *%s* has been triggered due to having failed %d time(s) in a row", service.Name, alert.FailureThreshold)
|
||||
color = "#DD0000"
|
||||
}
|
||||
var results string
|
||||
for _, conditionResult := range result.ConditionResults {
|
||||
var prefix string
|
||||
if conditionResult.Success {
|
||||
prefix = ":heavy_check_mark:"
|
||||
} else {
|
||||
prefix = ":x:"
|
||||
}
|
||||
results += fmt.Sprintf("%s - `%s`\n", prefix, conditionResult.Condition)
|
||||
}
|
||||
return &custom.AlertProvider{
|
||||
Url: provider.WebhookUrl,
|
||||
Method: "POST",
|
||||
Body: fmt.Sprintf(`{
|
||||
"text": "",
|
||||
"attachments": [
|
||||
{
|
||||
"title": ":helmet_with_white_cross: Gatus",
|
||||
"text": "%s:\n> %s",
|
||||
"short": false,
|
||||
"color": "%s",
|
||||
"fields": [
|
||||
{
|
||||
"title": "Condition results",
|
||||
"value": "%s",
|
||||
"short": false
|
||||
}
|
||||
]
|
||||
},
|
||||
]
|
||||
}`, message, alert.Description, color, results),
|
||||
Headers: map[string]string{"Content-Type": "application/json"},
|
||||
}
|
||||
}
|
||||
14
alerting/provider/slack/slack_test.go
Normal file
14
alerting/provider/slack/slack_test.go
Normal file
@@ -0,0 +1,14 @@
|
||||
package slack
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestAlertProvider_IsValid(t *testing.T) {
|
||||
invalidProvider := AlertProvider{WebhookUrl: ""}
|
||||
if invalidProvider.IsValid() {
|
||||
t.Error("provider shouldn't have been valid")
|
||||
}
|
||||
validProvider := AlertProvider{WebhookUrl: "http://example.com"}
|
||||
if !validProvider.IsValid() {
|
||||
t.Error("provider should've been valid")
|
||||
}
|
||||
}
|
||||
42
alerting/provider/twilio/twilio.go
Normal file
42
alerting/provider/twilio/twilio.go
Normal file
@@ -0,0 +1,42 @@
|
||||
package twilio
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/alerting/provider/custom"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
"net/url"
|
||||
)
|
||||
|
||||
type AlertProvider struct {
|
||||
SID string `yaml:"sid"`
|
||||
Token string `yaml:"token"`
|
||||
From string `yaml:"from"`
|
||||
To string `yaml:"to"`
|
||||
}
|
||||
|
||||
func (provider *AlertProvider) IsValid() bool {
|
||||
return len(provider.Token) > 0 && len(provider.SID) > 0 && len(provider.From) > 0 && len(provider.To) > 0
|
||||
}
|
||||
|
||||
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
|
||||
var message string
|
||||
if resolved {
|
||||
message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description)
|
||||
} else {
|
||||
message = fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description)
|
||||
}
|
||||
return &custom.AlertProvider{
|
||||
Url: fmt.Sprintf("https://api.twilio.com/2010-04-01/Accounts/%s/Messages.json", provider.SID),
|
||||
Method: "POST",
|
||||
Body: url.Values{
|
||||
"To": {provider.To},
|
||||
"From": {provider.From},
|
||||
"Body": {message},
|
||||
}.Encode(),
|
||||
Headers: map[string]string{
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Authorization": fmt.Sprintf("Basic %s", base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s:%s", provider.SID, provider.Token)))),
|
||||
},
|
||||
}
|
||||
}
|
||||
19
alerting/provider/twilio/twilio_test.go
Normal file
19
alerting/provider/twilio/twilio_test.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package twilio
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestTwilioAlertProvider_IsValid(t *testing.T) {
|
||||
invalidProvider := AlertProvider{}
|
||||
if invalidProvider.IsValid() {
|
||||
t.Error("provider shouldn't have been valid")
|
||||
}
|
||||
validProvider := AlertProvider{
|
||||
SID: "1",
|
||||
Token: "1",
|
||||
From: "1",
|
||||
To: "1",
|
||||
}
|
||||
if !validProvider.IsValid() {
|
||||
t.Error("provider should've been valid")
|
||||
}
|
||||
}
|
||||
13
client/client_test.go
Normal file
13
client/client_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package client
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestGetHttpClient(t *testing.T) {
|
||||
if client != nil {
|
||||
t.Error("client should've been nil since it hasn't been called a single time yet")
|
||||
}
|
||||
_ = GetHttpClient()
|
||||
if client == nil {
|
||||
t.Error("client shouldn't have been nil, since it has been called once")
|
||||
}
|
||||
}
|
||||
@@ -1,15 +1,15 @@
|
||||
metrics: true
|
||||
services:
|
||||
- name: twinnation
|
||||
url: "https://twinnation.org/health"
|
||||
interval: 30s
|
||||
url: https://twinnation.org/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[BODY].status == UP"
|
||||
- "[RESPONSE_TIME] < 1000"
|
||||
- name: cat-fact
|
||||
interval: 1m
|
||||
url: "https://cat-fact.herokuapp.com/facts/random"
|
||||
interval: 1m
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[BODY].deleted == false"
|
||||
|
||||
@@ -2,6 +2,8 @@ package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"github.com/TwinProduction/gatus/alerting"
|
||||
"github.com/TwinProduction/gatus/alerting/provider"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
"gopkg.in/yaml.v2"
|
||||
"io/ioutil"
|
||||
@@ -21,10 +23,10 @@ var (
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
Metrics bool `yaml:"metrics"`
|
||||
Debug bool `yaml:"debug"`
|
||||
Alerting *core.AlertingConfig `yaml:"alerting"`
|
||||
Services []*core.Service `yaml:"services"`
|
||||
Metrics bool `yaml:"metrics"`
|
||||
Debug bool `yaml:"debug"`
|
||||
Alerting *alerting.Config `yaml:"alerting"`
|
||||
Services []*core.Service `yaml:"services"`
|
||||
}
|
||||
|
||||
func Get() *Config {
|
||||
@@ -35,7 +37,7 @@ func Get() *Config {
|
||||
}
|
||||
|
||||
func Load(configFile string) error {
|
||||
log.Printf("[config][Load] Attempting to load config from configFile=%s", configFile)
|
||||
log.Printf("[config][Load] Reading configuration from configFile=%s", configFile)
|
||||
cfg, err := readConfigurationFile(configFile)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
@@ -74,13 +76,79 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) {
|
||||
// Parse configuration file
|
||||
err = yaml.Unmarshal(yamlBytes, &config)
|
||||
// Check if the configuration file at least has services.
|
||||
if config == nil || len(config.Services) == 0 {
|
||||
if config == nil || config.Services == nil || len(config.Services) == 0 {
|
||||
err = ErrNoServiceInConfig
|
||||
} else {
|
||||
// Set the default values if they aren't set
|
||||
for _, service := range config.Services {
|
||||
service.Validate()
|
||||
}
|
||||
validateAlertingConfig(config)
|
||||
validateServicesConfig(config)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func validateServicesConfig(config *Config) {
|
||||
for _, service := range config.Services {
|
||||
if config.Debug {
|
||||
log.Printf("[config][validateServicesConfig] Validating service '%s'", service.Name)
|
||||
}
|
||||
service.ValidateAndSetDefaults()
|
||||
}
|
||||
log.Printf("[config][validateServicesConfig] Validated %d services", len(config.Services))
|
||||
}
|
||||
|
||||
func validateAlertingConfig(config *Config) {
|
||||
if config.Alerting == nil {
|
||||
log.Printf("[config][validateAlertingConfig] Alerting is not configured")
|
||||
return
|
||||
}
|
||||
alertTypes := []core.AlertType{
|
||||
core.SlackAlert,
|
||||
core.TwilioAlert,
|
||||
core.PagerDutyAlert,
|
||||
core.CustomAlert,
|
||||
}
|
||||
var validProviders, invalidProviders []core.AlertType
|
||||
for _, alertType := range alertTypes {
|
||||
alertProvider := GetAlertingProviderByAlertType(config, alertType)
|
||||
if alertProvider != nil {
|
||||
if alertProvider.IsValid() {
|
||||
validProviders = append(validProviders, alertType)
|
||||
} else {
|
||||
log.Printf("[config][validateAlertingConfig] Ignoring provider=%s because configuration is invalid", alertType)
|
||||
invalidProviders = append(invalidProviders, alertType)
|
||||
}
|
||||
} else {
|
||||
invalidProviders = append(invalidProviders, alertType)
|
||||
}
|
||||
}
|
||||
log.Printf("[config][validateAlertingConfig] configuredProviders=%s; ignoredProviders=%s", validProviders, invalidProviders)
|
||||
}
|
||||
|
||||
func GetAlertingProviderByAlertType(config *Config, alertType core.AlertType) provider.AlertProvider {
|
||||
switch alertType {
|
||||
case core.SlackAlert:
|
||||
if config.Alerting.Slack == nil {
|
||||
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
|
||||
return nil
|
||||
}
|
||||
return config.Alerting.Slack
|
||||
case core.TwilioAlert:
|
||||
if config.Alerting.Twilio == nil {
|
||||
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
|
||||
return nil
|
||||
}
|
||||
return config.Alerting.Twilio
|
||||
case core.PagerDutyAlert:
|
||||
if config.Alerting.PagerDuty == nil {
|
||||
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
|
||||
return nil
|
||||
}
|
||||
return config.Alerting.PagerDuty
|
||||
case core.CustomAlert:
|
||||
if config.Alerting.Custom == nil {
|
||||
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
|
||||
return nil
|
||||
}
|
||||
return config.Alerting.Custom
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -36,7 +35,6 @@ services:
|
||||
if config.Services[1].Url != "https://api.github.com/healthz" {
|
||||
t.Errorf("URL should have been %s", "https://api.github.com/healthz")
|
||||
}
|
||||
fmt.Println(config.Services[0].Interval)
|
||||
if config.Services[0].Interval != 15*time.Second {
|
||||
t.Errorf("Interval should have been %s", 15*time.Second)
|
||||
}
|
||||
@@ -121,7 +119,10 @@ badconfig:
|
||||
func TestParseAndValidateConfigBytesWithAlerting(t *testing.T) {
|
||||
config, err := parseAndValidateConfigBytes([]byte(`
|
||||
alerting:
|
||||
slack: "http://example.com"
|
||||
slack:
|
||||
webhook-url: "http://example.com"
|
||||
pagerduty:
|
||||
integration-key: "00000000000000000000000000000000"
|
||||
services:
|
||||
- name: twinnation
|
||||
url: https://twinnation.org/actuator/health
|
||||
@@ -143,10 +144,19 @@ services:
|
||||
t.Error("Metrics should've been false by default")
|
||||
}
|
||||
if config.Alerting == nil {
|
||||
t.Fatal("config.AlertingConfig shouldn't have been nil")
|
||||
t.Fatal("config.Alerting shouldn't have been nil")
|
||||
}
|
||||
if config.Alerting.Slack != "http://example.com" {
|
||||
t.Errorf("Slack webhook should've been %s, but was %s", "http://example.com", config.Alerting.Slack)
|
||||
if config.Alerting.Slack == nil || !config.Alerting.Slack.IsValid() {
|
||||
t.Fatal("Slack alerting config should've been valid")
|
||||
}
|
||||
if config.Alerting.Slack.WebhookUrl != "http://example.com" {
|
||||
t.Errorf("Slack webhook should've been %s, but was %s", "http://example.com", config.Alerting.Slack.WebhookUrl)
|
||||
}
|
||||
if config.Alerting.PagerDuty == nil || !config.Alerting.PagerDuty.IsValid() {
|
||||
t.Fatal("PagerDuty alerting config should've been valid")
|
||||
}
|
||||
if config.Alerting.PagerDuty.IntegrationKey != "00000000000000000000000000000000" {
|
||||
t.Errorf("PagerDuty integration key should've been %s, but was %s", "00000000000000000000000000000000", config.Alerting.PagerDuty.IntegrationKey)
|
||||
}
|
||||
if len(config.Services) != 1 {
|
||||
t.Error("There should've been 1 service")
|
||||
@@ -179,3 +189,31 @@ services:
|
||||
t.Errorf("The type of the alert should've been %s, but it was %s", "Healthcheck failed 7 times in a row", config.Services[0].Alerts[0].Description)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAndValidateConfigBytesWithInvalidPagerDutyAlertingConfig(t *testing.T) {
|
||||
config, err := parseAndValidateConfigBytes([]byte(`
|
||||
alerting:
|
||||
pagerduty:
|
||||
integration-key: "INVALID_KEY"
|
||||
services:
|
||||
- name: twinnation
|
||||
url: https://twinnation.org/actuator/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
`))
|
||||
if err != nil {
|
||||
t.Error("No error should've been returned")
|
||||
}
|
||||
if config == nil {
|
||||
t.Fatal("Config shouldn't have been nil")
|
||||
}
|
||||
if config.Alerting == nil {
|
||||
t.Fatal("config.Alerting shouldn't have been nil")
|
||||
}
|
||||
if config.Alerting.PagerDuty == nil {
|
||||
t.Fatal("PagerDuty alerting config shouldn't have been nil")
|
||||
}
|
||||
if config.Alerting.PagerDuty.IsValid() {
|
||||
t.Fatal("PagerDuty alerting config should've been invalid")
|
||||
}
|
||||
}
|
||||
|
||||
178
core/alerting.go
178
core/alerting.go
@@ -1,178 +0,0 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/client"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type AlertingConfig struct {
|
||||
Slack string `yaml:"slack"`
|
||||
PagerDuty string `yaml:"pagerduty"`
|
||||
Twilio *TwilioAlertProvider `yaml:"twilio"`
|
||||
Custom *CustomAlertProvider `yaml:"custom"`
|
||||
}
|
||||
|
||||
type TwilioAlertProvider struct {
|
||||
SID string `yaml:"sid"`
|
||||
Token string `yaml:"token"`
|
||||
From string `yaml:"from"`
|
||||
To string `yaml:"to"`
|
||||
}
|
||||
|
||||
func (provider *TwilioAlertProvider) IsValid() bool {
|
||||
return len(provider.Token) > 0 && len(provider.SID) > 0 && len(provider.From) > 0 && len(provider.To) > 0
|
||||
}
|
||||
|
||||
type CustomAlertProvider struct {
|
||||
Url string `yaml:"url"`
|
||||
Method string `yaml:"method,omitempty"`
|
||||
Body string `yaml:"body,omitempty"`
|
||||
Headers map[string]string `yaml:"headers,omitempty"`
|
||||
}
|
||||
|
||||
func (provider *CustomAlertProvider) IsValid() bool {
|
||||
return len(provider.Url) > 0
|
||||
}
|
||||
|
||||
func (provider *CustomAlertProvider) buildRequest(serviceName, alertDescription string, resolved bool) *http.Request {
|
||||
body := provider.Body
|
||||
providerUrl := provider.Url
|
||||
if strings.Contains(body, "[ALERT_DESCRIPTION]") {
|
||||
body = strings.ReplaceAll(body, "[ALERT_DESCRIPTION]", alertDescription)
|
||||
}
|
||||
if strings.Contains(body, "[SERVICE_NAME]") {
|
||||
body = strings.ReplaceAll(body, "[SERVICE_NAME]", serviceName)
|
||||
}
|
||||
if strings.Contains(body, "[ALERT_TRIGGERED_OR_RESOLVED]") {
|
||||
if resolved {
|
||||
body = strings.ReplaceAll(body, "[ALERT_TRIGGERED_OR_RESOLVED]", "RESOLVED")
|
||||
} else {
|
||||
body = strings.ReplaceAll(body, "[ALERT_TRIGGERED_OR_RESOLVED]", "TRIGGERED")
|
||||
}
|
||||
}
|
||||
if strings.Contains(providerUrl, "[ALERT_DESCRIPTION]") {
|
||||
providerUrl = strings.ReplaceAll(providerUrl, "[ALERT_DESCRIPTION]", alertDescription)
|
||||
}
|
||||
if strings.Contains(providerUrl, "[SERVICE_NAME]") {
|
||||
providerUrl = strings.ReplaceAll(providerUrl, "[SERVICE_NAME]", serviceName)
|
||||
}
|
||||
if strings.Contains(providerUrl, "[ALERT_TRIGGERED_OR_RESOLVED]") {
|
||||
if resolved {
|
||||
providerUrl = strings.ReplaceAll(providerUrl, "[ALERT_TRIGGERED_OR_RESOLVED]", "RESOLVED")
|
||||
} else {
|
||||
providerUrl = strings.ReplaceAll(providerUrl, "[ALERT_TRIGGERED_OR_RESOLVED]", "TRIGGERED")
|
||||
}
|
||||
}
|
||||
bodyBuffer := bytes.NewBuffer([]byte(body))
|
||||
request, _ := http.NewRequest(provider.Method, providerUrl, bodyBuffer)
|
||||
for k, v := range provider.Headers {
|
||||
request.Header.Set(k, v)
|
||||
}
|
||||
return request
|
||||
}
|
||||
|
||||
// Send a request to the alert provider and return the body
|
||||
func (provider *CustomAlertProvider) Send(serviceName, alertDescription string, resolved bool) ([]byte, error) {
|
||||
request := provider.buildRequest(serviceName, alertDescription, resolved)
|
||||
response, err := client.GetHttpClient().Do(request)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if response.StatusCode > 399 {
|
||||
body, err := ioutil.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("call to provider alert returned status code %d", response.StatusCode)
|
||||
} else {
|
||||
return nil, fmt.Errorf("call to provider alert returned status code %d: %s", response.StatusCode, string(body))
|
||||
}
|
||||
}
|
||||
return ioutil.ReadAll(response.Body)
|
||||
}
|
||||
|
||||
func CreateSlackCustomAlertProvider(slackWebHookUrl string, service *Service, alert *Alert, result *Result, resolved bool) *CustomAlertProvider {
|
||||
var message string
|
||||
var color string
|
||||
if resolved {
|
||||
message = fmt.Sprintf("An alert for *%s* has been resolved after passing successfully %d time(s) in a row", service.Name, alert.SuccessThreshold)
|
||||
color = "#36A64F"
|
||||
} else {
|
||||
message = fmt.Sprintf("An alert for *%s* has been triggered due to having failed %d time(s) in a row", service.Name, alert.FailureThreshold)
|
||||
color = "#DD0000"
|
||||
}
|
||||
var results string
|
||||
for _, conditionResult := range result.ConditionResults {
|
||||
var prefix string
|
||||
if conditionResult.Success {
|
||||
prefix = ":heavy_check_mark:"
|
||||
} else {
|
||||
prefix = ":x:"
|
||||
}
|
||||
results += fmt.Sprintf("%s - `%s`\n", prefix, conditionResult.Condition)
|
||||
}
|
||||
return &CustomAlertProvider{
|
||||
Url: slackWebHookUrl,
|
||||
Method: "POST",
|
||||
Body: fmt.Sprintf(`{
|
||||
"text": "",
|
||||
"attachments": [
|
||||
{
|
||||
"title": ":helmet_with_white_cross: Gatus",
|
||||
"text": "%s:\n> %s",
|
||||
"short": false,
|
||||
"color": "%s",
|
||||
"fields": [
|
||||
{
|
||||
"title": "Condition results",
|
||||
"value": "%s",
|
||||
"short": false
|
||||
}
|
||||
]
|
||||
},
|
||||
]
|
||||
}`, message, alert.Description, color, results),
|
||||
Headers: map[string]string{"Content-Type": "application/json"},
|
||||
}
|
||||
}
|
||||
|
||||
func CreateTwilioCustomAlertProvider(provider *TwilioAlertProvider, message string) *CustomAlertProvider {
|
||||
return &CustomAlertProvider{
|
||||
Url: fmt.Sprintf("https://api.twilio.com/2010-04-01/Accounts/%s/Messages.json", provider.SID),
|
||||
Method: "POST",
|
||||
Body: url.Values{
|
||||
"To": {provider.To},
|
||||
"From": {provider.From},
|
||||
"Body": {message},
|
||||
}.Encode(),
|
||||
Headers: map[string]string{
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Authorization": fmt.Sprintf("Basic %s", base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s:%s", provider.SID, provider.Token)))),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// https://developer.pagerduty.com/docs/events-api-v2/trigger-events/
|
||||
func CreatePagerDutyCustomAlertProvider(routingKey, eventAction, resolveKey string, service *Service, message string) *CustomAlertProvider {
|
||||
return &CustomAlertProvider{
|
||||
Url: "https://events.pagerduty.com/v2/enqueue",
|
||||
Method: "POST",
|
||||
Body: fmt.Sprintf(`{
|
||||
"routing_key": "%s",
|
||||
"dedup_key": "%s",
|
||||
"event_action": "%s",
|
||||
"payload": {
|
||||
"summary": "%s",
|
||||
"source": "%s",
|
||||
"severity": "critical"
|
||||
}
|
||||
}`, routingKey, resolveKey, eventAction, message, service.Name),
|
||||
Headers: map[string]string{
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -50,7 +50,8 @@ type Service struct {
|
||||
NumberOfSuccessesInARow int
|
||||
}
|
||||
|
||||
func (service *Service) Validate() {
|
||||
// ValidateAndSetDefaults validates the service's configuration and sets the default value of fields that have one
|
||||
func (service *Service) ValidateAndSetDefaults() {
|
||||
// Set default values
|
||||
if service.Interval == 0 {
|
||||
service.Interval = 1 * time.Minute
|
||||
@@ -83,7 +84,8 @@ func (service *Service) Validate() {
|
||||
}
|
||||
}
|
||||
|
||||
func (service *Service) EvaluateConditions() *Result {
|
||||
// EvaluateHealth sends a request to the service's URL and evaluates the conditions of the service.
|
||||
func (service *Service) EvaluateHealth() *Result {
|
||||
result := &Result{Success: true, Errors: []string{}}
|
||||
service.getIp(result)
|
||||
if len(result.Errors) == 0 {
|
||||
|
||||
@@ -4,14 +4,14 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestIntegrationEvaluateConditions(t *testing.T) {
|
||||
func TestIntegrationEvaluateHealth(t *testing.T) {
|
||||
condition := Condition("[STATUS] == 200")
|
||||
service := Service{
|
||||
Name: "TwiNNatioN",
|
||||
Url: "https://twinnation.org/health",
|
||||
Conditions: []*Condition{&condition},
|
||||
}
|
||||
result := service.EvaluateConditions()
|
||||
result := service.EvaluateHealth()
|
||||
if !result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a success", condition)
|
||||
}
|
||||
@@ -20,14 +20,14 @@ func TestIntegrationEvaluateConditions(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegrationEvaluateConditionsWithFailure(t *testing.T) {
|
||||
func TestIntegrationEvaluateHealthWithFailure(t *testing.T) {
|
||||
condition := Condition("[STATUS] == 500")
|
||||
service := Service{
|
||||
Name: "TwiNNatioN",
|
||||
Url: "https://twinnation.org/health",
|
||||
Conditions: []*Condition{&condition},
|
||||
}
|
||||
result := service.EvaluateConditions()
|
||||
result := service.EvaluateHealth()
|
||||
if result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a failure", condition)
|
||||
}
|
||||
|
||||
74
docs/pagerduty-integration-guide.md
Normal file
74
docs/pagerduty-integration-guide.md
Normal file
@@ -0,0 +1,74 @@
|
||||
# PagerDuty + Gatus Integration Benefits
|
||||
- Notify on-call responders based on alerts sent from Gatus.
|
||||
- Incidents will automatically resolve in PagerDuty when the service that caused the incident in Gatus returns to a healthy state.
|
||||
|
||||
|
||||
# How it Works
|
||||
- Services that do not meet the user-specified conditions and that are configured with alerts of type `pagerduty` will trigger a new incident on the corresponding PagerDuty service when the alert's defined `failure-threshold` has been reached.
|
||||
- Once the unhealthy services have returned to a healthy state for the number of executions defined in `success-threshold`, the previously triggered incident will be automatically resolved.
|
||||
|
||||
|
||||
# Requirements
|
||||
- PagerDuty integrations require an Admin base role for account authorization. If you do not have this role, please reach out to an Admin or Account Owner within your organization to configure the integration.
|
||||
|
||||
|
||||
# Support
|
||||
If you need help with this integration, please create an issue at https://github.com/TwinProduction/gatus/issues
|
||||
|
||||
|
||||
# Integration Walkthrough
|
||||
## In PagerDuty
|
||||
### Integrating With a PagerDuty Service
|
||||
1. From the **Configuration** menu, select **Services**.
|
||||
2. There are two ways to add an integration to a service:
|
||||
* **If you are adding your integration to an existing service**: Click the **name** of the service you want to add the integration to. Then, select the **Integrations** tab and click the **New Integration** button.
|
||||
* **If you are creating a new service for your integration**: Please read our documentation in section [Configuring Services and Integrations](https://support.pagerduty.com/docs/services-and-integrations#section-configuring-services-and-integrations) and follow the steps outlined in the [Create a New Service](https://support.pagerduty.com/docs/services-and-integrations#section-create-a-new-service) section, selecting **Gatus** as the **Integration Type** in step 4. Continue with the In Gatus section (below) once you have finished these steps.
|
||||
3. Enter an **Integration Name** in the format `gatus-service-name` (e.g. `Gatus-Shopping-Cart`) and select **Gatus** from the Integration Type menu.
|
||||
4. Click the **Add Integration** button to save your new integration. You will be redirected to the Integrations tab for your service.
|
||||
5. An **Integration Key** will be generated on this screen. Keep this key saved in a safe place, as it will be used when you configure the integration with **Gatus** in the next section.
|
||||

|
||||
|
||||
|
||||
## In Gatus
|
||||
In your configuration file, you must first specify the integration key at `alerting.pagerduty.integration-key`, like so:
|
||||
```yaml
|
||||
alerting:
|
||||
pagerduty:
|
||||
integration-key: "********************************"
|
||||
```
|
||||
You can now add alerts of type `pagerduty` in the services you've defined, like so:
|
||||
```yaml
|
||||
services:
|
||||
- name: twinnation
|
||||
interval: 30s
|
||||
url: "https://twinnation.org/health"
|
||||
alerts:
|
||||
- type: pagerduty
|
||||
enabled: true
|
||||
failure-threshold: 3
|
||||
success-threshold: 5
|
||||
description: "healthcheck failed 3 times in a row"
|
||||
send-on-resolved: true
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[BODY].status == UP"
|
||||
- "[RESPONSE_TIME] < 300"
|
||||
```
|
||||
|
||||
The sample above will do the following:
|
||||
- Send a request to the `https://twinnation.org/health` (`services[].url`) specified every **30s** (`services[].interval`)
|
||||
- Evaluate the conditions to determine whether the service is "healthy" or not
|
||||
- **If all conditions are not met 3 (`services[].alerts[].failure-threshold`) times in a row**: Gatus will create a new incident
|
||||
- **If, after an incident has been triggered, all conditions are met 5 (`services[].alerts[].success-threshold`) times in a row _AND_ `services[].alerts[].send-on-resolved` is set to `true`**: Gatus will resolve the triggered incident
|
||||
|
||||
It is highly recommended to set `services[].alerts[].send-on-resolved` to true for alerts of type `pagerduty`.
|
||||
|
||||
|
||||
# How to Uninstall
|
||||
1. Navigate to the PagerDuty service you'd like to uninstall the Gatus integration from
|
||||
2. Click on the **Integration** tab
|
||||
3. Click on the **Gatus** integration
|
||||
4. Click on **Delete Integration**
|
||||
|
||||
While the above will prevent incidents from being created, you are also highly encouraged to disable the alerts
|
||||
in your Gatus configuration files or simply remove the integration key from the configuration file.
|
||||
@@ -49,10 +49,10 @@ spec:
|
||||
protocol: TCP
|
||||
resources:
|
||||
limits:
|
||||
cpu: 50m
|
||||
cpu: 200m
|
||||
memory: 50M
|
||||
requests:
|
||||
cpu: 20m
|
||||
cpu: 50m
|
||||
memory: 20M
|
||||
volumeMounts:
|
||||
- mountPath: /config
|
||||
|
||||
106
watchdog/alerting.go
Normal file
106
watchdog/alerting.go
Normal file
@@ -0,0 +1,106 @@
|
||||
package watchdog
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"github.com/TwinProduction/gatus/config"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
"log"
|
||||
)
|
||||
|
||||
// HandleAlerting takes care of alerts to resolve and alerts to trigger based on result success or failure
|
||||
func HandleAlerting(service *core.Service, result *core.Result) {
|
||||
cfg := config.Get()
|
||||
if cfg.Alerting == nil {
|
||||
return
|
||||
}
|
||||
if result.Success {
|
||||
handleAlertsToResolve(service, result, cfg)
|
||||
} else {
|
||||
handleAlertsToTrigger(service, result, cfg)
|
||||
}
|
||||
}
|
||||
|
||||
func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *config.Config) {
|
||||
service.NumberOfSuccessesInARow = 0
|
||||
service.NumberOfFailuresInARow++
|
||||
for _, alert := range service.Alerts {
|
||||
// If the alert hasn't been triggered, move to the next one
|
||||
if !alert.Enabled || alert.FailureThreshold != service.NumberOfFailuresInARow {
|
||||
continue
|
||||
}
|
||||
if alert.Triggered {
|
||||
if cfg.Debug {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Alert with description='%s' has already been triggered, skipping", alert.Description)
|
||||
}
|
||||
continue
|
||||
}
|
||||
alertProvider := config.GetAlertingProviderByAlertType(cfg, alert.Type)
|
||||
if alertProvider != nil && alertProvider.IsValid() {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Sending %s alert because alert with description='%s' has been triggered", alert.Type, alert.Description)
|
||||
customAlertProvider := alertProvider.ToCustomAlertProvider(service, alert, result, false)
|
||||
// TODO: retry on error
|
||||
var err error
|
||||
// We need to extract the DedupKey from PagerDuty's response
|
||||
if alert.Type == core.PagerDutyAlert {
|
||||
var body []byte
|
||||
body, err = customAlertProvider.Send(service.Name, alert.Description, false)
|
||||
if err == nil {
|
||||
var response pagerDutyResponse
|
||||
err = json.Unmarshal(body, &response)
|
||||
if err != nil {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Ran into error unmarshaling pager duty response: %s", err.Error())
|
||||
} else {
|
||||
alert.ResolveKey = response.DedupKey
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// All other alert types don't need to extract anything from the body, so we can just send the request right away
|
||||
_, err = customAlertProvider.Send(service.Name, alert.Description, false)
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Ran into error sending an alert: %s", err.Error())
|
||||
} else {
|
||||
alert.Triggered = true
|
||||
}
|
||||
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Not sending alert of type=%s despite being triggered, because the provider wasn't configured properly", alert.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *config.Config) {
|
||||
service.NumberOfSuccessesInARow++
|
||||
for _, alert := range service.Alerts {
|
||||
if !alert.Enabled || !alert.Triggered || alert.SuccessThreshold > service.NumberOfSuccessesInARow {
|
||||
continue
|
||||
}
|
||||
alert.Triggered = false
|
||||
if !alert.SendOnResolved {
|
||||
continue
|
||||
}
|
||||
alertProvider := config.GetAlertingProviderByAlertType(cfg, alert.Type)
|
||||
if alertProvider != nil && alertProvider.IsValid() {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Sending %s alert because alert with description='%s' has been resolved", alert.Type, alert.Description)
|
||||
customAlertProvider := alertProvider.ToCustomAlertProvider(service, alert, result, true)
|
||||
// TODO: retry on error
|
||||
_, err := customAlertProvider.Send(service.Name, alert.Description, true)
|
||||
if err != nil {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Ran into error sending an alert: %s", err.Error())
|
||||
} else {
|
||||
if alert.Type == core.PagerDutyAlert {
|
||||
alert.ResolveKey = ""
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Not sending alert of type=%s despite being resolved, because the provider wasn't configured properly", alert.Type)
|
||||
}
|
||||
}
|
||||
service.NumberOfFailuresInARow = 0
|
||||
}
|
||||
|
||||
type pagerDutyResponse struct {
|
||||
Status string `json:"status"`
|
||||
Message string `json:"message"`
|
||||
DedupKey string `json:"dedup_key"`
|
||||
}
|
||||
@@ -3,7 +3,6 @@ package watchdog
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/alerting"
|
||||
"github.com/TwinProduction/gatus/config"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
"github.com/TwinProduction/gatus/metric"
|
||||
@@ -51,7 +50,7 @@ func monitor(service *core.Service) {
|
||||
if cfg.Debug {
|
||||
log.Printf("[watchdog][monitor] Monitoring serviceName=%s", service.Name)
|
||||
}
|
||||
result := service.EvaluateConditions()
|
||||
result := service.EvaluateHealth()
|
||||
metric.PublishMetricsForService(service, result)
|
||||
serviceResultsMutex.Lock()
|
||||
serviceResults[service.Name] = append(serviceResults[service.Name], result)
|
||||
@@ -71,7 +70,7 @@ func monitor(service *core.Service) {
|
||||
result.Duration.Round(time.Millisecond),
|
||||
extra,
|
||||
)
|
||||
alerting.Handle(service, result)
|
||||
HandleAlerting(service, result)
|
||||
if cfg.Debug {
|
||||
log.Printf("[watchdog][monitor] Waiting for interval=%s before monitoring serviceName=%s again", service.Interval, service.Name)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user