Compare commits
27 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
230c8821ec | ||
|
|
ecef3d9538 | ||
|
|
3ecfe4d322 | ||
|
|
6a3f65db7f | ||
|
|
ed490669b1 | ||
|
|
187b38ee22 | ||
|
|
261fe27b2e | ||
|
|
a29f175cba | ||
|
|
f26dacaf22 | ||
|
|
b4d81a9661 | ||
|
|
81c59f02d8 | ||
|
|
8101646ba5 | ||
|
|
93b7220d5f | ||
|
|
18d3236586 | ||
|
|
bc16ae1794 | ||
|
|
d019942278 | ||
|
|
8c5551f93b | ||
|
|
3b15feaf7d | ||
|
|
326ad2c80d | ||
|
|
b4b9f5c1be | ||
|
|
ac5ad9d173 | ||
|
|
7dcd462883 | ||
|
|
5655661778 | ||
|
|
fac7b8551a | ||
|
|
59fed008e0 | ||
|
|
286e8f8590 | ||
|
|
4daf261d95 |
6
.github/workflows/build.yml
vendored
6
.github/workflows/build.yml
vendored
@@ -14,13 +14,13 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Set up Go 1.14
|
||||
- name: Set up Go 1.15
|
||||
uses: actions/setup-go@v1
|
||||
with:
|
||||
go-version: 1.14
|
||||
go-version: 1.15
|
||||
id: go
|
||||
- name: Check out code into the Go module directory
|
||||
uses: actions/checkout@v1
|
||||
uses: actions/checkout@v2
|
||||
- name: Build binary to make sure it works
|
||||
run: go build -mod vendor
|
||||
- name: Test
|
||||
|
||||
114
README.md
114
README.md
@@ -1,13 +1,13 @@
|
||||

|
||||
|
||||

|
||||
[](https://goreportcard.com/report/github.com/TwinProduction/gatus)
|
||||
[](https://goreportcard.com/report/github.com/TwinProduction/gatus)
|
||||
[](https://cloud.docker.com/repository/docker/twinproduction/gatus)
|
||||
|
||||
A service health dashboard in Go that is meant to be used as a docker
|
||||
image with a custom configuration file.
|
||||
|
||||
I personally deploy it in my Kubernetes cluster and have it monitor the status of my
|
||||
I personally deploy it in my Kubernetes cluster and let it monitor the status of my
|
||||
core applications: https://status.twinnation.org/
|
||||
|
||||
|
||||
@@ -17,6 +17,8 @@ core applications: https://status.twinnation.org/
|
||||
- [Usage](#usage)
|
||||
- [Configuration](#configuration)
|
||||
- [Conditions](#conditions)
|
||||
- [Placeholders](#placeholders)
|
||||
- [Functions](#functions)
|
||||
- [Alerting](#alerting)
|
||||
- [Configuring Slack alerts](#configuring-slack-alerts)
|
||||
- [Configuring PagerDuty alerts](#configuring-pagerduty-alerts)
|
||||
@@ -27,6 +29,9 @@ core applications: https://status.twinnation.org/
|
||||
- [Using in Production](#using-in-production)
|
||||
- [FAQ](#faq)
|
||||
- [Sending a GraphQL request](#sending-a-graphql-request)
|
||||
- [Recommended interval](#recommended-interval)
|
||||
- [Default timeouts](#default-timeouts)
|
||||
- [Monitoring a TCP service](#monitoring-a-tcp-service)
|
||||
|
||||
|
||||
## Features
|
||||
@@ -75,13 +80,14 @@ Note that you can also add environment variables in the configuration file (i.e.
|
||||
### Configuration
|
||||
|
||||
| Parameter | Description | Default |
|
||||
| ---------------------------------------- | ----------------------------------------------------------------------------- | -------------- |
|
||||
|:---------------------------------------- |:----------------------------------------------------------------------------- |:-------------- |
|
||||
| `debug` | Whether to enable debug logs | `false` |
|
||||
| `metrics` | Whether to expose metrics at /metrics | `false` |
|
||||
| `services` | List of services to monitor | Required `[]` |
|
||||
| `services[].name` | Name of the service. Can be anything. | Required `""` |
|
||||
| `services[].url` | URL to send the request to | Required `""` |
|
||||
| `services[].conditions` | Conditions used to determine the health of the service | `[]` |
|
||||
| `services[].insecure` | Whether to skip verifying the server's certificate chain and host name | `false` |
|
||||
| `services[].interval` | Duration to wait between every status check | `60s` |
|
||||
| `services[].method` | Request method | `GET` |
|
||||
| `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` |
|
||||
@@ -114,23 +120,48 @@ Note that you can also add environment variables in the configuration file (i.e.
|
||||
Here are some examples of conditions you can use:
|
||||
|
||||
| Condition | Description | Passing values | Failing values |
|
||||
| -----------------------------| ------------------------------------------------------- | -------------------------- | -------------- |
|
||||
|:-----------------------------|:------------------------------------------------------- |:-------------------------- | -------------- |
|
||||
| `[STATUS] == 200` | Status must be equal to 200 | 200 | 201, 404, ... |
|
||||
| `[STATUS] < 300` | Status must lower than 300 | 200, 201, 299 | 301, 302, ... |
|
||||
| `[STATUS] <= 299` | Status must be less than or equal to 299 | 200, 201, 299 | 301, 302, ... |
|
||||
| `[STATUS] > 400` | Status must be greater than 400 | 401, 402, 403, 404 | 400, 200, ... |
|
||||
| `[CONNECTED] == true` | Connection to host must've been successful | true, false | |
|
||||
| `[RESPONSE_TIME] < 500` | Response time must be below 500ms | 100ms, 200ms, 300ms | 500ms, 501ms |
|
||||
| `[BODY] == 1` | The body must be equal to 1 | 1 | Anything else |
|
||||
| `[IP] == 127.0.0.1` | Target IP must be 127.0.0.1 | 127.0.0.1 | 0.0.0.0 |
|
||||
| `[BODY] == 1` | The body must be equal to 1 | 1 | `{}`, `2`, ... |
|
||||
| `[BODY].user.name == john` | JSONPath value of `$.user.name` is equal to `john` | `{"user":{"name":"john"}}` | |
|
||||
| `[BODY].data[0].id == 1` | JSONPath value of `$.data[0].id` is equal to 1 | `{"data":[{"id":1}]}` | |
|
||||
| `[BODY].age == [BODY].id` | JSONPath value of `$.age` is equal JSONPath `$.id` | `{"age":1,"id":1}` | |
|
||||
| `len([BODY].data) < 5` | Array at JSONPath `$.data` has less than 5 elements | `{"data":[{"id":1}]}` | |
|
||||
| `len([BODY].name) == 8` | String at JSONPath `$.name` has a length of 8 | `{"name":"john.doe"}` | `{"name":"bob"}` |
|
||||
| `[BODY].name == pat(john*)` | String at JSONPath `$.name` matches pattern `john*` | `{"name":"john.doe"}` | `{"name":"bob"}` |
|
||||
|
||||
|
||||
#### Placeholders
|
||||
|
||||
| Placeholder | Description | Example of resolved value |
|
||||
|:----------------------- |:------------------------------------------------------- |:------------------------- |
|
||||
| `[STATUS]` | Resolves into the HTTP status of the request | 404
|
||||
| `[RESPONSE_TIME]` | Resolves into the response time the request took, in ms | 10
|
||||
| `[IP]` | Resolves into the IP of the target host | 192.168.0.232
|
||||
| `[BODY]` | Resolves into the response body. Supports JSONPath. | `{"name":"john.doe"}`
|
||||
| `[CONNECTED]` | Resolves into whether a connection could be established | `true`
|
||||
|
||||
|
||||
#### Functions
|
||||
|
||||
| Function | Description | Example |
|
||||
|:-----------|:---------------------------------------------------------------------------------------------------------------- |:-------------------------- |
|
||||
| `len` | Returns the length of the object/slice. Works only with the `[BODY]` placeholder. | `len([BODY].username) > 8`
|
||||
| `pat` | Specifies that the string passed as parameter should be evaluated as a pattern. Works only with `==` and `!=`. | `[IP] == pat(192.168.*)`
|
||||
|
||||
**NOTE**: Use `pat` only when you need to. `[STATUS] == pat(2*)` is a lot more expensive than `[STATUS] < 300`.
|
||||
|
||||
|
||||
|
||||
### Alerting
|
||||
|
||||
|
||||
|
||||
#### Configuring Slack alerts
|
||||
|
||||
```yaml
|
||||
@@ -139,8 +170,8 @@ alerting:
|
||||
webhook-url: "https://hooks.slack.com/services/**********/**********/**********"
|
||||
services:
|
||||
- name: twinnation
|
||||
interval: 30s
|
||||
url: "https://twinnation.org/health"
|
||||
interval: 30s
|
||||
alerts:
|
||||
- type: slack
|
||||
enabled: true
|
||||
@@ -175,15 +206,15 @@ alerting:
|
||||
integration-key: "********************************"
|
||||
services:
|
||||
- name: twinnation
|
||||
interval: 30s
|
||||
url: "https://twinnation.org/health"
|
||||
interval: 30s
|
||||
alerts:
|
||||
- type: pagerduty
|
||||
enabled: true
|
||||
failure-threshold: 3
|
||||
success-threshold: 5
|
||||
description: "healthcheck failed 3 times in a row"
|
||||
send-on-resolved: true
|
||||
description: "healthcheck failed 3 times in a row"
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[BODY].status == UP"
|
||||
@@ -208,8 +239,8 @@ services:
|
||||
- type: twilio
|
||||
enabled: true
|
||||
failure-threshold: 5
|
||||
description: "healthcheck failed 5 times in a row"
|
||||
send-on-resolved: true
|
||||
description: "healthcheck failed 5 times in a row"
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[BODY].status == UP"
|
||||
@@ -245,8 +276,8 @@ alerting:
|
||||
}
|
||||
services:
|
||||
- name: twinnation
|
||||
interval: 30s
|
||||
url: "https://twinnation.org/health"
|
||||
interval: 30s
|
||||
alerts:
|
||||
- type: custom
|
||||
enabled: true
|
||||
@@ -267,12 +298,12 @@ Other than using one of the examples provided in the `examples` folder, you can
|
||||
creating a configuration file - we'll call it `config.yaml` for this example - and running the following
|
||||
command:
|
||||
```
|
||||
docker run -p 8080:8080 --mount type=bind,source="$(pwd)"/test.yaml,target=/config/config.yaml --name gatus twinproduction/gatus
|
||||
docker run -p 8080:8080 --mount type=bind,source="$(pwd)"/config.yaml,target=/config/config.yaml --name gatus twinproduction/gatus
|
||||
```
|
||||
|
||||
If you're on Windows, replace `"$(pwd)"` by the absolute path to your current directory, e.g.:
|
||||
```
|
||||
docker run -p 8080:8080 --mount type=bind,source=E:/Go/src/github.com/TwinProduction/gatus/test.yaml,target=/config/config.yaml --name gatus twinproduction/gatus
|
||||
docker run -p 8080:8080 --mount type=bind,source=C:/Users/Chris/Desktop/config.yaml,target=/config/config.yaml --name gatus twinproduction/gatus
|
||||
```
|
||||
|
||||
## Running the tests
|
||||
@@ -320,3 +351,60 @@ will send a `POST` request to `http://localhost:8080/playground` with the follow
|
||||
```json
|
||||
{"query":" {\n user(gender: \"female\") {\n id\n name\n gender\n avatar\n }\n }"}
|
||||
```
|
||||
|
||||
|
||||
### Recommended interval
|
||||
|
||||
To ensure that Gatus provides reliable and accurate results (i.e. response time), Gatus only evaluates one service at a time.
|
||||
In other words, even if you have multiple services with the exact same interval, they will not execute at the same time.
|
||||
|
||||
You can test this yourself by running Gatus with several services configured with a very short, unrealistic interval,
|
||||
such as 1ms. You'll notice that the response time does not fluctuate - that is because while services are evaluated on
|
||||
different goroutines, there's a global lock that prevents multiple services from running at the same time.
|
||||
|
||||
Unfortunately, there is a drawback. If you have a lot of services, including some that are very slow or prone to time out (the default
|
||||
time out is 10s for HTTP and 5s for TCP), then it means that for the entire duration of the request, no other services can be evaluated.
|
||||
|
||||
**This does mean that Gatus will be unable to evaluate the health of other services**.
|
||||
The interval does not include the duration of the request itself, which means that if a service has an interval of 30s
|
||||
and the request takes 2s to complete, the timestamp between two evaluations will be 32s, not 30s.
|
||||
|
||||
While this does not prevent Gatus' from performing health checks on all other services, it may cause Gatus to be unable
|
||||
to respect the configured interval, for instance:
|
||||
- Service A has an interval of 5s, and times out after 10s to complete
|
||||
- Service B has an interval of 5s, and takes 1ms to complete
|
||||
- Service B will be unable to run every 5s, because service A's health evaluation takes longer than its interval
|
||||
|
||||
To sum it up, while Gatus can really handle any interval you throw at it, you're better off having slow requests with
|
||||
higher interval.
|
||||
|
||||
As a rule of the thumb, I personally set interval for more complex health checks to `5m` (5 minutes) and
|
||||
simple health checks used for alerting (PagerDuty/Twilio) to `30s`.
|
||||
|
||||
|
||||
### Default timeouts
|
||||
|
||||
| Protocol | Timeout |
|
||||
|:-------- |:------- |
|
||||
| HTTP | 10s
|
||||
| TCP | 5s
|
||||
|
||||
|
||||
### Monitoring a TCP service
|
||||
|
||||
By prefixing `services[].url` with `tcp:\\`, you can monitor TCP services at a very basic level:
|
||||
|
||||
```yaml
|
||||
- name: redis
|
||||
url: "tcp://127.0.0.1:6379"
|
||||
interval: 30s
|
||||
conditions:
|
||||
- "[CONNECTED] == true"
|
||||
```
|
||||
|
||||
Placeholders `[STATUS]` and `[BODY]` as well as the fields `services[].body`, `services[].insecure`,
|
||||
`services[].headers`, `services[].method` and `services[].graphql` are not supported for TCP services.
|
||||
|
||||
**NOTE**: `[CONNECTED] == true` does not guarantee that the service itself is healthy - it only guarantees that there's
|
||||
something at the given address listening to the given port, and that a connection to that address was successfully
|
||||
established.
|
||||
|
||||
@@ -4,11 +4,14 @@ import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/client"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// AlertProvider is the configuration necessary for sending an alert using a custom HTTP request
|
||||
// Technically, all alert providers should be reachable using the custom alert provider
|
||||
type AlertProvider struct {
|
||||
Url string `yaml:"url"`
|
||||
Method string `yaml:"method,omitempty"`
|
||||
@@ -16,10 +19,16 @@ type AlertProvider struct {
|
||||
Headers map[string]string `yaml:"headers,omitempty"`
|
||||
}
|
||||
|
||||
// IsValid returns whether the provider's configuration is valid
|
||||
func (provider *AlertProvider) IsValid() bool {
|
||||
return len(provider.Url) > 0
|
||||
}
|
||||
|
||||
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
|
||||
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *AlertProvider {
|
||||
return provider
|
||||
}
|
||||
|
||||
func (provider *AlertProvider) buildRequest(serviceName, alertDescription string, resolved bool) *http.Request {
|
||||
body := provider.Body
|
||||
providerUrl := provider.Url
|
||||
@@ -64,7 +73,7 @@ func (provider *AlertProvider) buildRequest(serviceName, alertDescription string
|
||||
// Send a request to the alert provider and return the body
|
||||
func (provider *AlertProvider) Send(serviceName, alertDescription string, resolved bool) ([]byte, error) {
|
||||
request := provider.buildRequest(serviceName, alertDescription, resolved)
|
||||
response, err := client.GetHttpClient().Do(request)
|
||||
response, err := client.GetHttpClient(false).Do(request)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -6,16 +6,28 @@ import (
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
)
|
||||
|
||||
// AlertProvider is the configuration necessary for sending an alert using PagerDuty
|
||||
type AlertProvider struct {
|
||||
IntegrationKey string `yaml:"integration-key"`
|
||||
}
|
||||
|
||||
// IsValid returns whether the provider's configuration is valid
|
||||
func (provider *AlertProvider) IsValid() bool {
|
||||
return len(provider.IntegrationKey) == 32
|
||||
}
|
||||
|
||||
// https://developer.pagerduty.com/docs/events-api-v2/trigger-events/
|
||||
func (provider *AlertProvider) ToCustomAlertProvider(eventAction, resolveKey string, service *core.Service, message string) *custom.AlertProvider {
|
||||
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
|
||||
var message, eventAction, resolveKey string
|
||||
if resolved {
|
||||
message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description)
|
||||
eventAction = "resolve"
|
||||
resolveKey = alert.ResolveKey
|
||||
} else {
|
||||
message = fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description)
|
||||
eventAction = "trigger"
|
||||
resolveKey = ""
|
||||
}
|
||||
return &custom.AlertProvider{
|
||||
Url: "https://events.pagerduty.com/v2/enqueue",
|
||||
Method: "POST",
|
||||
|
||||
15
alerting/provider/provider.go
Normal file
15
alerting/provider/provider.go
Normal file
@@ -0,0 +1,15 @@
|
||||
package provider
|
||||
|
||||
import (
|
||||
"github.com/TwinProduction/gatus/alerting/provider/custom"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
)
|
||||
|
||||
// AlertProvider is the interface that each providers should implement
|
||||
type AlertProvider interface {
|
||||
// IsValid returns whether the provider's configuration is valid
|
||||
IsValid() bool
|
||||
|
||||
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
|
||||
ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider
|
||||
}
|
||||
@@ -6,14 +6,17 @@ import (
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
)
|
||||
|
||||
// AlertProvider is the configuration necessary for sending an alert using Slack
|
||||
type AlertProvider struct {
|
||||
WebhookUrl string `yaml:"webhook-url"`
|
||||
}
|
||||
|
||||
// IsValid returns whether the provider's configuration is valid
|
||||
func (provider *AlertProvider) IsValid() bool {
|
||||
return len(provider.WebhookUrl) > 0
|
||||
}
|
||||
|
||||
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
|
||||
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
|
||||
var message string
|
||||
var color string
|
||||
|
||||
@@ -4,9 +4,11 @@ import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/alerting/provider/custom"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
"net/url"
|
||||
)
|
||||
|
||||
// AlertProvider is the configuration necessary for sending an alert using Twilio
|
||||
type AlertProvider struct {
|
||||
SID string `yaml:"sid"`
|
||||
Token string `yaml:"token"`
|
||||
@@ -14,11 +16,19 @@ type AlertProvider struct {
|
||||
To string `yaml:"to"`
|
||||
}
|
||||
|
||||
// IsValid returns whether the provider's configuration is valid
|
||||
func (provider *AlertProvider) IsValid() bool {
|
||||
return len(provider.Token) > 0 && len(provider.SID) > 0 && len(provider.From) > 0 && len(provider.To) > 0
|
||||
}
|
||||
|
||||
func (provider *AlertProvider) ToCustomAlertProvider(message string) *custom.AlertProvider {
|
||||
// ToCustomAlertProvider converts the provider into a custom.AlertProvider
|
||||
func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider {
|
||||
var message string
|
||||
if resolved {
|
||||
message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description)
|
||||
} else {
|
||||
message = fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description)
|
||||
}
|
||||
return &custom.AlertProvider{
|
||||
Url: fmt.Sprintf("https://api.twilio.com/2010-04-01/Accounts/%s/Messages.json", provider.SID),
|
||||
Method: "POST",
|
||||
|
||||
@@ -1,19 +1,45 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"net"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
client *http.Client
|
||||
secureHttpClient *http.Client
|
||||
insecureHttpClient *http.Client
|
||||
)
|
||||
|
||||
func GetHttpClient() *http.Client {
|
||||
if client == nil {
|
||||
client = &http.Client{
|
||||
Timeout: time.Second * 10,
|
||||
func GetHttpClient(insecure bool) *http.Client {
|
||||
if insecure {
|
||||
if insecureHttpClient == nil {
|
||||
insecureHttpClient = &http.Client{
|
||||
Timeout: time.Second * 10,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
return insecureHttpClient
|
||||
} else {
|
||||
if secureHttpClient == nil {
|
||||
secureHttpClient = &http.Client{
|
||||
Timeout: time.Second * 10,
|
||||
}
|
||||
}
|
||||
return secureHttpClient
|
||||
}
|
||||
return client
|
||||
}
|
||||
|
||||
func CanCreateConnectionToTcpService(address string) bool {
|
||||
conn, err := net.DialTimeout("tcp", address, 5*time.Second)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_ = conn.Close()
|
||||
return true
|
||||
}
|
||||
|
||||
28
client/client_test.go
Normal file
28
client/client_test.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGetHttpClient(t *testing.T) {
|
||||
if secureHttpClient != nil {
|
||||
t.Error("secureHttpClient should've been nil since it hasn't been called a single time yet")
|
||||
}
|
||||
if insecureHttpClient != nil {
|
||||
t.Error("insecureHttpClient should've been nil since it hasn't been called a single time yet")
|
||||
}
|
||||
_ = GetHttpClient(false)
|
||||
if secureHttpClient == nil {
|
||||
t.Error("secureHttpClient shouldn't have been nil, since it has been called once")
|
||||
}
|
||||
if insecureHttpClient != nil {
|
||||
t.Error("insecureHttpClient should've been nil since it hasn't been called a single time yet")
|
||||
}
|
||||
_ = GetHttpClient(true)
|
||||
if secureHttpClient == nil {
|
||||
t.Error("secureHttpClient shouldn't have been nil, since it has been called once")
|
||||
}
|
||||
if insecureHttpClient == nil {
|
||||
t.Error("insecureHttpClient shouldn't have been nil, since it has been called once")
|
||||
}
|
||||
}
|
||||
@@ -1,15 +1,15 @@
|
||||
metrics: true
|
||||
services:
|
||||
- name: twinnation
|
||||
url: "https://twinnation.org/health"
|
||||
interval: 30s
|
||||
url: https://twinnation.org/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[BODY].status == UP"
|
||||
- "[RESPONSE_TIME] < 1000"
|
||||
- name: cat-fact
|
||||
interval: 1m
|
||||
url: "https://cat-fact.herokuapp.com/facts/random"
|
||||
interval: 1m
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[BODY].deleted == false"
|
||||
|
||||
@@ -3,6 +3,7 @@ package config
|
||||
import (
|
||||
"errors"
|
||||
"github.com/TwinProduction/gatus/alerting"
|
||||
"github.com/TwinProduction/gatus/alerting/provider"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
"gopkg.in/yaml.v2"
|
||||
"io/ioutil"
|
||||
@@ -11,6 +12,8 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultConfigurationFilePath is the default path that will be used to search for the configuration file
|
||||
// if a custom path isn't configured through the GATUS_CONFIG_FILE environment variable
|
||||
DefaultConfigurationFilePath = "config/config.yaml"
|
||||
)
|
||||
|
||||
@@ -21,6 +24,7 @@ var (
|
||||
config *Config
|
||||
)
|
||||
|
||||
// Config is the main configuration structure
|
||||
type Config struct {
|
||||
Metrics bool `yaml:"metrics"`
|
||||
Debug bool `yaml:"debug"`
|
||||
@@ -36,7 +40,7 @@ func Get() *Config {
|
||||
}
|
||||
|
||||
func Load(configFile string) error {
|
||||
log.Printf("[config][Load] Attempting to load config from configFile=%s", configFile)
|
||||
log.Printf("[config][Load] Reading configuration from configFile=%s", configFile)
|
||||
cfg, err := readConfigurationFile(configFile)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
@@ -75,13 +79,79 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) {
|
||||
// Parse configuration file
|
||||
err = yaml.Unmarshal(yamlBytes, &config)
|
||||
// Check if the configuration file at least has services.
|
||||
if config == nil || len(config.Services) == 0 {
|
||||
if config == nil || config.Services == nil || len(config.Services) == 0 {
|
||||
err = ErrNoServiceInConfig
|
||||
} else {
|
||||
// Set the default values if they aren't set
|
||||
for _, service := range config.Services {
|
||||
service.Validate()
|
||||
}
|
||||
validateAlertingConfig(config)
|
||||
validateServicesConfig(config)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func validateServicesConfig(config *Config) {
|
||||
for _, service := range config.Services {
|
||||
if config.Debug {
|
||||
log.Printf("[config][validateServicesConfig] Validating service '%s'", service.Name)
|
||||
}
|
||||
service.ValidateAndSetDefaults()
|
||||
}
|
||||
log.Printf("[config][validateServicesConfig] Validated %d services", len(config.Services))
|
||||
}
|
||||
|
||||
func validateAlertingConfig(config *Config) {
|
||||
if config.Alerting == nil {
|
||||
log.Printf("[config][validateAlertingConfig] Alerting is not configured")
|
||||
return
|
||||
}
|
||||
alertTypes := []core.AlertType{
|
||||
core.SlackAlert,
|
||||
core.TwilioAlert,
|
||||
core.PagerDutyAlert,
|
||||
core.CustomAlert,
|
||||
}
|
||||
var validProviders, invalidProviders []core.AlertType
|
||||
for _, alertType := range alertTypes {
|
||||
alertProvider := GetAlertingProviderByAlertType(config, alertType)
|
||||
if alertProvider != nil {
|
||||
if alertProvider.IsValid() {
|
||||
validProviders = append(validProviders, alertType)
|
||||
} else {
|
||||
log.Printf("[config][validateAlertingConfig] Ignoring provider=%s because configuration is invalid", alertType)
|
||||
invalidProviders = append(invalidProviders, alertType)
|
||||
}
|
||||
} else {
|
||||
invalidProviders = append(invalidProviders, alertType)
|
||||
}
|
||||
}
|
||||
log.Printf("[config][validateAlertingConfig] configuredProviders=%s; ignoredProviders=%s", validProviders, invalidProviders)
|
||||
}
|
||||
|
||||
func GetAlertingProviderByAlertType(config *Config, alertType core.AlertType) provider.AlertProvider {
|
||||
switch alertType {
|
||||
case core.SlackAlert:
|
||||
if config.Alerting.Slack == nil {
|
||||
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
|
||||
return nil
|
||||
}
|
||||
return config.Alerting.Slack
|
||||
case core.TwilioAlert:
|
||||
if config.Alerting.Twilio == nil {
|
||||
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
|
||||
return nil
|
||||
}
|
||||
return config.Alerting.Twilio
|
||||
case core.PagerDutyAlert:
|
||||
if config.Alerting.PagerDuty == nil {
|
||||
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
|
||||
return nil
|
||||
}
|
||||
return config.Alerting.PagerDuty
|
||||
case core.CustomAlert:
|
||||
if config.Alerting.Custom == nil {
|
||||
// Since we're returning an interface, we need to explicitly return nil, even if the provider itself is nil
|
||||
return nil
|
||||
}
|
||||
return config.Alerting.Custom
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -36,7 +35,6 @@ services:
|
||||
if config.Services[1].Url != "https://api.github.com/healthz" {
|
||||
t.Errorf("URL should have been %s", "https://api.github.com/healthz")
|
||||
}
|
||||
fmt.Println(config.Services[0].Interval)
|
||||
if config.Services[0].Interval != 15*time.Second {
|
||||
t.Errorf("Interval should have been %s", 15*time.Second)
|
||||
}
|
||||
@@ -123,6 +121,8 @@ func TestParseAndValidateConfigBytesWithAlerting(t *testing.T) {
|
||||
alerting:
|
||||
slack:
|
||||
webhook-url: "http://example.com"
|
||||
pagerduty:
|
||||
integration-key: "00000000000000000000000000000000"
|
||||
services:
|
||||
- name: twinnation
|
||||
url: https://twinnation.org/actuator/health
|
||||
@@ -144,10 +144,19 @@ services:
|
||||
t.Error("Metrics should've been false by default")
|
||||
}
|
||||
if config.Alerting == nil {
|
||||
t.Fatal("config.AlertingConfig shouldn't have been nil")
|
||||
t.Fatal("config.Alerting shouldn't have been nil")
|
||||
}
|
||||
if config.Alerting.Slack == nil || !config.Alerting.Slack.IsValid() {
|
||||
t.Fatal("Slack alerting config should've been valid")
|
||||
}
|
||||
if config.Alerting.Slack.WebhookUrl != "http://example.com" {
|
||||
t.Errorf("Slack webhook should've been %s, but was %s", "http://example.com", config.Alerting.Slack)
|
||||
t.Errorf("Slack webhook should've been %s, but was %s", "http://example.com", config.Alerting.Slack.WebhookUrl)
|
||||
}
|
||||
if config.Alerting.PagerDuty == nil || !config.Alerting.PagerDuty.IsValid() {
|
||||
t.Fatal("PagerDuty alerting config should've been valid")
|
||||
}
|
||||
if config.Alerting.PagerDuty.IntegrationKey != "00000000000000000000000000000000" {
|
||||
t.Errorf("PagerDuty integration key should've been %s, but was %s", "00000000000000000000000000000000", config.Alerting.PagerDuty.IntegrationKey)
|
||||
}
|
||||
if len(config.Services) != 1 {
|
||||
t.Error("There should've been 1 service")
|
||||
@@ -180,3 +189,31 @@ services:
|
||||
t.Errorf("The type of the alert should've been %s, but it was %s", "Healthcheck failed 7 times in a row", config.Services[0].Alerts[0].Description)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAndValidateConfigBytesWithInvalidPagerDutyAlertingConfig(t *testing.T) {
|
||||
config, err := parseAndValidateConfigBytes([]byte(`
|
||||
alerting:
|
||||
pagerduty:
|
||||
integration-key: "INVALID_KEY"
|
||||
services:
|
||||
- name: twinnation
|
||||
url: https://twinnation.org/actuator/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
`))
|
||||
if err != nil {
|
||||
t.Error("No error should've been returned")
|
||||
}
|
||||
if config == nil {
|
||||
t.Fatal("Config shouldn't have been nil")
|
||||
}
|
||||
if config.Alerting == nil {
|
||||
t.Fatal("config.Alerting shouldn't have been nil")
|
||||
}
|
||||
if config.Alerting.PagerDuty == nil {
|
||||
t.Fatal("PagerDuty alerting config shouldn't have been nil")
|
||||
}
|
||||
if config.Alerting.PagerDuty.IsValid() {
|
||||
t.Fatal("PagerDuty alerting config should've been invalid")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,23 +2,41 @@ package core
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/jsonpath"
|
||||
"github.com/TwinProduction/gatus/pattern"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
StatusPlaceholder = "[STATUS]"
|
||||
IPPlaceHolder = "[IP]"
|
||||
ResponseTimePlaceHolder = "[RESPONSE_TIME]"
|
||||
BodyPlaceHolder = "[BODY]"
|
||||
ConnectedPlaceHolder = "[CONNECTED]"
|
||||
|
||||
LengthFunctionPrefix = "len("
|
||||
PatternFunctionPrefix = "pat("
|
||||
FunctionSuffix = ")"
|
||||
|
||||
InvalidConditionElementSuffix = "(INVALID)"
|
||||
)
|
||||
|
||||
type Condition string
|
||||
|
||||
// evaluate the Condition with the Result of the health check
|
||||
func (c *Condition) evaluate(result *Result) bool {
|
||||
condition := string(*c)
|
||||
success := false
|
||||
var resolvedCondition string
|
||||
if strings.Contains(condition, "==") {
|
||||
parts := sanitizeAndResolve(strings.Split(condition, "=="), result)
|
||||
success = parts[0] == parts[1]
|
||||
success = isEqual(parts[0], parts[1])
|
||||
resolvedCondition = fmt.Sprintf("%v == %v", parts[0], parts[1])
|
||||
} else if strings.Contains(condition, "!=") {
|
||||
parts := sanitizeAndResolve(strings.Split(condition, "!="), result)
|
||||
success = parts[0] != parts[1]
|
||||
success = !isEqual(parts[0], parts[1])
|
||||
resolvedCondition = fmt.Sprintf("%v != %v", parts[0], parts[1])
|
||||
} else if strings.Contains(condition, "<=") {
|
||||
parts := sanitizeAndResolveNumerical(strings.Split(condition, "<="), result)
|
||||
@@ -49,3 +67,83 @@ func (c *Condition) evaluate(result *Result) bool {
|
||||
result.ConditionResults = append(result.ConditionResults, &ConditionResult{Condition: conditionToDisplay, Success: success})
|
||||
return success
|
||||
}
|
||||
|
||||
// isEqual compares two strings.
|
||||
//
|
||||
// It also supports the pattern function. That is to say, if one of the strings starts with PatternFunctionPrefix
|
||||
// and ends with FunctionSuffix, it will be treated like a pattern.
|
||||
func isEqual(first, second string) bool {
|
||||
var isFirstPattern, isSecondPattern bool
|
||||
if strings.HasPrefix(first, PatternFunctionPrefix) && strings.HasSuffix(first, FunctionSuffix) {
|
||||
isFirstPattern = true
|
||||
first = strings.TrimSuffix(strings.TrimPrefix(first, PatternFunctionPrefix), FunctionSuffix)
|
||||
}
|
||||
if strings.HasPrefix(second, PatternFunctionPrefix) && strings.HasSuffix(second, FunctionSuffix) {
|
||||
isSecondPattern = true
|
||||
second = strings.TrimSuffix(strings.TrimPrefix(second, PatternFunctionPrefix), FunctionSuffix)
|
||||
}
|
||||
if isFirstPattern && !isSecondPattern {
|
||||
return pattern.Match(first, second)
|
||||
} else if !isFirstPattern && isSecondPattern {
|
||||
return pattern.Match(second, first)
|
||||
} else {
|
||||
return first == second
|
||||
}
|
||||
}
|
||||
|
||||
// sanitizeAndResolve sanitizes and resolves a list of element and returns the list of resolved elements
|
||||
func sanitizeAndResolve(list []string, result *Result) []string {
|
||||
var sanitizedList []string
|
||||
body := strings.TrimSpace(string(result.Body))
|
||||
for _, element := range list {
|
||||
element = strings.TrimSpace(element)
|
||||
switch strings.ToUpper(element) {
|
||||
case StatusPlaceholder:
|
||||
element = strconv.Itoa(result.HttpStatus)
|
||||
case IPPlaceHolder:
|
||||
element = result.Ip
|
||||
case ResponseTimePlaceHolder:
|
||||
element = strconv.Itoa(int(result.Duration.Milliseconds()))
|
||||
case BodyPlaceHolder:
|
||||
element = body
|
||||
case ConnectedPlaceHolder:
|
||||
element = strconv.FormatBool(result.Connected)
|
||||
default:
|
||||
// if contains the BodyPlaceHolder, then evaluate json path
|
||||
if strings.Contains(element, BodyPlaceHolder) {
|
||||
wantLength := false
|
||||
if strings.HasPrefix(element, LengthFunctionPrefix) && strings.HasSuffix(element, FunctionSuffix) {
|
||||
wantLength = true
|
||||
element = strings.TrimSuffix(strings.TrimPrefix(element, LengthFunctionPrefix), FunctionSuffix)
|
||||
}
|
||||
resolvedElement, resolvedElementLength, err := jsonpath.Eval(strings.Replace(element, fmt.Sprintf("%s.", BodyPlaceHolder), "", 1), result.Body)
|
||||
if err != nil {
|
||||
result.Errors = append(result.Errors, err.Error())
|
||||
element = fmt.Sprintf("%s %s", element, InvalidConditionElementSuffix)
|
||||
} else {
|
||||
if wantLength {
|
||||
element = fmt.Sprintf("%d", resolvedElementLength)
|
||||
} else {
|
||||
element = resolvedElement
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
sanitizedList = append(sanitizedList, element)
|
||||
}
|
||||
return sanitizedList
|
||||
}
|
||||
|
||||
func sanitizeAndResolveNumerical(list []string, result *Result) []int {
|
||||
var sanitizedNumbers []int
|
||||
sanitizedList := sanitizeAndResolve(list, result)
|
||||
for _, element := range sanitizedList {
|
||||
if number, err := strconv.Atoi(element); err != nil {
|
||||
// Default to 0 if the string couldn't be converted to an integer
|
||||
sanitizedNumbers = append(sanitizedNumbers, 0)
|
||||
} else {
|
||||
sanitizedNumbers = append(sanitizedNumbers, number)
|
||||
}
|
||||
}
|
||||
return sanitizedNumbers
|
||||
}
|
||||
|
||||
@@ -113,6 +113,24 @@ func TestCondition_evaluateWithBodyJsonPathComplex(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCondition_evaluateWithBodyJsonPathDoublePlaceholders(t *testing.T) {
|
||||
condition := Condition("[BODY].user.firstName != [BODY].user.lastName")
|
||||
result := &Result{Body: []byte("{\"user\": {\"firstName\": \"john\", \"lastName\": \"doe\"}}")}
|
||||
condition.evaluate(result)
|
||||
if !result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a success", condition)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCondition_evaluateWithBodyJsonPathDoublePlaceholdersFailure(t *testing.T) {
|
||||
condition := Condition("[BODY].user.firstName == [BODY].user.lastName")
|
||||
result := &Result{Body: []byte("{\"user\": {\"firstName\": \"john\", \"lastName\": \"doe\"}}")}
|
||||
condition.evaluate(result)
|
||||
if result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a failure", condition)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCondition_evaluateWithBodyJsonPathLongInt(t *testing.T) {
|
||||
condition := Condition("[BODY].data.id == 1")
|
||||
result := &Result{Body: []byte("{\"data\": {\"id\": 1}}")}
|
||||
@@ -184,3 +202,84 @@ func TestCondition_evaluateWithBodyStringLength(t *testing.T) {
|
||||
t.Errorf("Condition '%s' should have been a success", condition)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCondition_evaluateWithBodyStringPattern(t *testing.T) {
|
||||
condition := Condition("[BODY].name == pat(*ohn*)")
|
||||
result := &Result{Body: []byte("{\"name\": \"john.doe\"}")}
|
||||
condition.evaluate(result)
|
||||
if !result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a success", condition)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCondition_evaluateWithBodyStringPatternFailure(t *testing.T) {
|
||||
condition := Condition("[BODY].name == pat(bob*)")
|
||||
result := &Result{Body: []byte("{\"name\": \"john.doe\"}")}
|
||||
condition.evaluate(result)
|
||||
if result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a failure", condition)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCondition_evaluateWithBodyPatternFailure(t *testing.T) {
|
||||
condition := Condition("[BODY] == pat(*john*)")
|
||||
result := &Result{Body: []byte("{\"name\": \"john.doe\"}")}
|
||||
condition.evaluate(result)
|
||||
if !result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a success", condition)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCondition_evaluateWithIPPattern(t *testing.T) {
|
||||
condition := Condition("[IP] == pat(10.*)")
|
||||
result := &Result{Ip: "10.0.0.0"}
|
||||
condition.evaluate(result)
|
||||
if !result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a success", condition)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCondition_evaluateWithIPPatternFailure(t *testing.T) {
|
||||
condition := Condition("[IP] == pat(10.*)")
|
||||
result := &Result{Ip: "255.255.255.255"}
|
||||
condition.evaluate(result)
|
||||
if result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a failure", condition)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCondition_evaluateWithStatusPattern(t *testing.T) {
|
||||
condition := Condition("[STATUS] == pat(4*)")
|
||||
result := &Result{HttpStatus: 404}
|
||||
condition.evaluate(result)
|
||||
if !result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a success", condition)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCondition_evaluateWithStatusPatternFailure(t *testing.T) {
|
||||
condition := Condition("[STATUS] != pat(4*)")
|
||||
result := &Result{HttpStatus: 404}
|
||||
condition.evaluate(result)
|
||||
if result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a failure", condition)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCondition_evaluateWithConnected(t *testing.T) {
|
||||
condition := Condition("[CONNECTED] == true")
|
||||
result := &Result{Connected: true}
|
||||
condition.evaluate(result)
|
||||
if !result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a success", condition)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCondition_evaluateWithConnectedFailure(t *testing.T) {
|
||||
condition := Condition("[CONNECTED] == true")
|
||||
result := &Result{Connected: false}
|
||||
condition.evaluate(result)
|
||||
if result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a failure", condition)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
@@ -46,11 +47,15 @@ type Service struct {
|
||||
// Alerts is the alerting configuration for the service in case of failure
|
||||
Alerts []*Alert `yaml:"alerts"`
|
||||
|
||||
// Insecure is whether to skip verifying the server's certificate chain and host name
|
||||
Insecure bool `yaml:"insecure,omitempty"`
|
||||
|
||||
NumberOfFailuresInARow int
|
||||
NumberOfSuccessesInARow int
|
||||
}
|
||||
|
||||
func (service *Service) Validate() {
|
||||
// ValidateAndSetDefaults validates the service's configuration and sets the default value of fields that have one
|
||||
func (service *Service) ValidateAndSetDefaults() {
|
||||
// Set default values
|
||||
if service.Interval == 0 {
|
||||
service.Interval = 1 * time.Minute
|
||||
@@ -83,7 +88,8 @@ func (service *Service) Validate() {
|
||||
}
|
||||
}
|
||||
|
||||
func (service *Service) EvaluateConditions() *Result {
|
||||
// EvaluateHealth sends a request to the service's URL and evaluates the conditions of the service.
|
||||
func (service *Service) EvaluateHealth() *Result {
|
||||
result := &Result{Success: true, Errors: []string{}}
|
||||
service.getIp(result)
|
||||
if len(result.Errors) == 0 {
|
||||
@@ -131,19 +137,30 @@ func (service *Service) getIp(result *Result) {
|
||||
}
|
||||
|
||||
func (service *Service) call(result *Result) {
|
||||
request := service.buildRequest()
|
||||
startTime := time.Now()
|
||||
response, err := client.GetHttpClient().Do(request)
|
||||
if err != nil {
|
||||
result.Duration = time.Since(startTime)
|
||||
result.Errors = append(result.Errors, err.Error())
|
||||
return
|
||||
isServiceTcp := strings.HasPrefix(service.Url, "tcp://")
|
||||
var request *http.Request
|
||||
var response *http.Response
|
||||
var err error
|
||||
if !isServiceTcp {
|
||||
request = service.buildRequest()
|
||||
}
|
||||
result.Duration = time.Since(startTime)
|
||||
result.HttpStatus = response.StatusCode
|
||||
result.Body, err = ioutil.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
result.Errors = append(result.Errors, err.Error())
|
||||
startTime := time.Now()
|
||||
if isServiceTcp {
|
||||
result.Connected = client.CanCreateConnectionToTcpService(strings.TrimPrefix(service.Url, "tcp://"))
|
||||
result.Duration = time.Since(startTime)
|
||||
} else {
|
||||
response, err = client.GetHttpClient(service.Insecure).Do(request)
|
||||
result.Duration = time.Since(startTime)
|
||||
if err != nil {
|
||||
result.Errors = append(result.Errors, err.Error())
|
||||
return
|
||||
}
|
||||
result.HttpStatus = response.StatusCode
|
||||
result.Connected = response.StatusCode > 0
|
||||
result.Body, err = ioutil.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
result.Errors = append(result.Errors, err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,33 +4,39 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestIntegrationEvaluateConditions(t *testing.T) {
|
||||
func TestIntegrationEvaluateHealth(t *testing.T) {
|
||||
condition := Condition("[STATUS] == 200")
|
||||
service := Service{
|
||||
Name: "TwiNNatioN",
|
||||
Url: "https://twinnation.org/health",
|
||||
Conditions: []*Condition{&condition},
|
||||
}
|
||||
result := service.EvaluateConditions()
|
||||
result := service.EvaluateHealth()
|
||||
if !result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a success", condition)
|
||||
}
|
||||
if !result.Connected {
|
||||
t.Error("Because the connection has been established, result.Connected should've been true")
|
||||
}
|
||||
if !result.Success {
|
||||
t.Error("Because all conditions passed, this should have been a success")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegrationEvaluateConditionsWithFailure(t *testing.T) {
|
||||
func TestIntegrationEvaluateHealthWithFailure(t *testing.T) {
|
||||
condition := Condition("[STATUS] == 500")
|
||||
service := Service{
|
||||
Name: "TwiNNatioN",
|
||||
Url: "https://twinnation.org/health",
|
||||
Conditions: []*Condition{&condition},
|
||||
}
|
||||
result := service.EvaluateConditions()
|
||||
result := service.EvaluateHealth()
|
||||
if result.ConditionResults[0].Success {
|
||||
t.Errorf("Condition '%s' should have been a failure", condition)
|
||||
}
|
||||
if !result.Connected {
|
||||
t.Error("Because the connection has been established, result.Connected should've been true")
|
||||
}
|
||||
if result.Success {
|
||||
t.Error("Because one of the conditions failed, success should have been false")
|
||||
}
|
||||
|
||||
@@ -9,16 +9,37 @@ type HealthStatus struct {
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
// Result of the evaluation of a Service
|
||||
type Result struct {
|
||||
HttpStatus int `json:"status"`
|
||||
Body []byte `json:"-"`
|
||||
Hostname string `json:"hostname"`
|
||||
Ip string `json:"-"`
|
||||
Duration time.Duration `json:"duration"`
|
||||
Errors []string `json:"errors"`
|
||||
// HttpStatus is the HTTP response status code
|
||||
HttpStatus int `json:"status"`
|
||||
|
||||
// Body is the response body
|
||||
Body []byte `json:"-"`
|
||||
|
||||
// Hostname extracted from the Service Url
|
||||
Hostname string `json:"hostname"`
|
||||
|
||||
// Ip resolved from the Service Url
|
||||
Ip string `json:"-"`
|
||||
|
||||
// Connected whether a connection to the host was established successfully
|
||||
Connected bool `json:"-"`
|
||||
|
||||
// Duration time that the request took
|
||||
Duration time.Duration `json:"duration"`
|
||||
|
||||
// Errors encountered during the evaluation of the service's health
|
||||
Errors []string `json:"errors"`
|
||||
|
||||
// ConditionResults results of the service's conditions
|
||||
ConditionResults []*ConditionResult `json:"condition-results"`
|
||||
Success bool `json:"success"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
|
||||
// Success whether the result signifies a success or not
|
||||
Success bool `json:"success"`
|
||||
|
||||
// Timestamp when the request was sent
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
}
|
||||
|
||||
type ConditionResult struct {
|
||||
|
||||
74
core/util.go
74
core/util.go
@@ -1,74 +0,0 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/jsonpath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
StatusPlaceholder = "[STATUS]"
|
||||
IPPlaceHolder = "[IP]"
|
||||
ResponseTimePlaceHolder = "[RESPONSE_TIME]"
|
||||
BodyPlaceHolder = "[BODY]"
|
||||
|
||||
LengthFunctionPrefix = "len("
|
||||
FunctionSuffix = ")"
|
||||
|
||||
InvalidConditionElementSuffix = "(INVALID)"
|
||||
)
|
||||
|
||||
func sanitizeAndResolve(list []string, result *Result) []string {
|
||||
var sanitizedList []string
|
||||
body := strings.TrimSpace(string(result.Body))
|
||||
for _, element := range list {
|
||||
element = strings.TrimSpace(element)
|
||||
switch strings.ToUpper(element) {
|
||||
case StatusPlaceholder:
|
||||
element = strconv.Itoa(result.HttpStatus)
|
||||
case IPPlaceHolder:
|
||||
element = result.Ip
|
||||
case ResponseTimePlaceHolder:
|
||||
element = strconv.Itoa(int(result.Duration.Milliseconds()))
|
||||
case BodyPlaceHolder:
|
||||
element = body
|
||||
default:
|
||||
// if starts with BodyPlaceHolder, then evaluate json path
|
||||
if strings.Contains(element, BodyPlaceHolder) {
|
||||
wantLength := false
|
||||
if strings.HasPrefix(element, LengthFunctionPrefix) && strings.HasSuffix(element, FunctionSuffix) {
|
||||
wantLength = true
|
||||
element = strings.TrimSuffix(strings.TrimPrefix(element, LengthFunctionPrefix), FunctionSuffix)
|
||||
}
|
||||
resolvedElement, resolvedElementLength, err := jsonpath.Eval(strings.Replace(element, fmt.Sprintf("%s.", BodyPlaceHolder), "", 1), result.Body)
|
||||
if err != nil {
|
||||
result.Errors = append(result.Errors, err.Error())
|
||||
element = fmt.Sprintf("%s %s", element, InvalidConditionElementSuffix)
|
||||
} else {
|
||||
if wantLength {
|
||||
element = fmt.Sprintf("%d", resolvedElementLength)
|
||||
} else {
|
||||
element = resolvedElement
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
sanitizedList = append(sanitizedList, element)
|
||||
}
|
||||
return sanitizedList
|
||||
}
|
||||
|
||||
func sanitizeAndResolveNumerical(list []string, result *Result) []int {
|
||||
var sanitizedNumbers []int
|
||||
sanitizedList := sanitizeAndResolve(list, result)
|
||||
for _, element := range sanitizedList {
|
||||
if number, err := strconv.Atoi(element); err != nil {
|
||||
// Default to 0 if the string couldn't be converted to an integer
|
||||
sanitizedNumbers = append(sanitizedNumbers, 0)
|
||||
} else {
|
||||
sanitizedNumbers = append(sanitizedNumbers, number)
|
||||
}
|
||||
}
|
||||
return sanitizedNumbers
|
||||
}
|
||||
@@ -49,10 +49,10 @@ spec:
|
||||
protocol: TCP
|
||||
resources:
|
||||
limits:
|
||||
cpu: 50m
|
||||
cpu: 200m
|
||||
memory: 50M
|
||||
requests:
|
||||
cpu: 20m
|
||||
cpu: 50m
|
||||
memory: 20M
|
||||
volumeMounts:
|
||||
- mountPath: /config
|
||||
|
||||
12
pattern/pattern.go
Normal file
12
pattern/pattern.go
Normal file
@@ -0,0 +1,12 @@
|
||||
package pattern
|
||||
|
||||
import "path/filepath"
|
||||
|
||||
// Match checks whether a string matches a pattern
|
||||
func Match(pattern, s string) bool {
|
||||
if pattern == "*" {
|
||||
return true
|
||||
}
|
||||
matched, _ := filepath.Match(pattern, s)
|
||||
return matched
|
||||
}
|
||||
37
pattern/pattern_test.go
Normal file
37
pattern/pattern_test.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package pattern
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestMatch(t *testing.T) {
|
||||
testMatch(t, "*", "livingroom_123", true)
|
||||
testMatch(t, "**", "livingroom_123", true)
|
||||
testMatch(t, "living*", "livingroom_123", true)
|
||||
testMatch(t, "*living*", "livingroom_123", true)
|
||||
testMatch(t, "*123", "livingroom_123", true)
|
||||
testMatch(t, "*_*", "livingroom_123", true)
|
||||
testMatch(t, "living*_*3", "livingroom_123", true)
|
||||
testMatch(t, "living*room_*3", "livingroom_123", true)
|
||||
testMatch(t, "living*room_*3", "livingroom_123", true)
|
||||
testMatch(t, "*vin*om*2*", "livingroom_123", true)
|
||||
testMatch(t, "livingroom_123", "livingroom_123", true)
|
||||
testMatch(t, "*livingroom_123*", "livingroom_123", true)
|
||||
testMatch(t, "livingroom", "livingroom_123", false)
|
||||
testMatch(t, "livingroom123", "livingroom_123", false)
|
||||
testMatch(t, "what", "livingroom_123", false)
|
||||
testMatch(t, "*what*", "livingroom_123", false)
|
||||
testMatch(t, "*.*", "livingroom_123", false)
|
||||
testMatch(t, "room*123", "livingroom_123", false)
|
||||
}
|
||||
|
||||
func testMatch(t *testing.T, pattern, key string, expectedToMatch bool) {
|
||||
matched := Match(pattern, key)
|
||||
if expectedToMatch {
|
||||
if !matched {
|
||||
t.Errorf("%s should've matched pattern '%s'", key, pattern)
|
||||
}
|
||||
} else {
|
||||
if matched {
|
||||
t.Errorf("%s shouldn't have matched pattern '%s'", key, pattern)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -90,6 +90,15 @@
|
||||
#social img:hover {
|
||||
opacity: 1;
|
||||
}
|
||||
#settings {
|
||||
position: fixed;
|
||||
left: 5px;
|
||||
bottom: 5px;
|
||||
padding: 5px;
|
||||
}
|
||||
#settings select:focus {
|
||||
box-shadow: none;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
@@ -128,9 +137,26 @@
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<div id="settings">
|
||||
<div class="input-group input-group-sm">
|
||||
<div class="input-group-prepend">
|
||||
<div class="input-group-text">↻</div>
|
||||
</div>
|
||||
<select class="form-control form-control-sm" id="refresh-rate">
|
||||
<option value="10">10s</option>
|
||||
<option value="30" selected>30s</option>
|
||||
<option value="60">1m</option>
|
||||
<option value="120">2m</option>
|
||||
<option value="300">5m</option>
|
||||
<option value="600">10m</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
let serviceStatuses = {};
|
||||
let timerHandler = 0;
|
||||
let refreshIntervalHandler = 0;
|
||||
let userClickedStatus = false;
|
||||
|
||||
function showTooltip(serviceName, index, element) {
|
||||
@@ -291,10 +317,19 @@
|
||||
.replace(/'/g, ''');
|
||||
}
|
||||
|
||||
refreshResults();
|
||||
setInterval(function() {
|
||||
function setRefreshInterval(seconds) {
|
||||
refreshResults();
|
||||
}, 30000);
|
||||
refreshIntervalHandler = setInterval(function() {
|
||||
refreshResults();
|
||||
}, seconds * 1000)
|
||||
}
|
||||
|
||||
$("#refresh-rate").change(function() {
|
||||
clearInterval(refreshIntervalHandler);
|
||||
setRefreshInterval($(this).val())
|
||||
});
|
||||
setRefreshInterval(30);
|
||||
$("#refresh-rate").val(30);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -2,8 +2,6 @@ package watchdog
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/TwinProduction/gatus/alerting/provider/custom"
|
||||
"github.com/TwinProduction/gatus/config"
|
||||
"github.com/TwinProduction/gatus/core"
|
||||
"log"
|
||||
@@ -32,46 +30,20 @@ func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *conf
|
||||
}
|
||||
if alert.Triggered {
|
||||
if cfg.Debug {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Alert with description='%s' has already been triggered, skipping", alert.Description)
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Alert with description='%s' has already been TRIGGERED, skipping", alert.Description)
|
||||
}
|
||||
continue
|
||||
}
|
||||
var alertProvider *custom.AlertProvider
|
||||
if alert.Type == core.SlackAlert {
|
||||
if cfg.Alerting.Slack != nil && cfg.Alerting.Slack.IsValid() {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Sending Slack alert because alert with description='%s' has been triggered", alert.Description)
|
||||
alertProvider = cfg.Alerting.Slack.ToCustomAlertProvider(service, alert, result, false)
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Not sending Slack alert despite being triggered, because there is no Slack webhook configured")
|
||||
}
|
||||
} else if alert.Type == core.PagerDutyAlert {
|
||||
if cfg.Alerting.PagerDuty != nil && cfg.Alerting.PagerDuty.IsValid() {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Sending PagerDuty alert because alert with description='%s' has been triggered", alert.Description)
|
||||
alertProvider = cfg.Alerting.PagerDuty.ToCustomAlertProvider("trigger", "", service, fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description))
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Not sending PagerDuty alert despite being triggered, because PagerDuty isn't configured properly")
|
||||
}
|
||||
} else if alert.Type == core.TwilioAlert {
|
||||
if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Sending Twilio alert because alert with description='%s' has been triggered", alert.Description)
|
||||
alertProvider = cfg.Alerting.Twilio.ToCustomAlertProvider(fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description))
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Not sending Twilio alert despite being triggered, because Twilio config settings missing")
|
||||
}
|
||||
} else if alert.Type == core.CustomAlert {
|
||||
if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Sending custom alert because alert with description='%s' has been triggered", alert.Description)
|
||||
alertProvider = cfg.Alerting.Custom
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Not sending custom alert despite being triggered, because there is no custom url configured")
|
||||
}
|
||||
}
|
||||
if alertProvider != nil {
|
||||
alertProvider := config.GetAlertingProviderByAlertType(cfg, alert.Type)
|
||||
if alertProvider != nil && alertProvider.IsValid() {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Sending %s alert because alert with description='%s' has been TRIGGERED", alert.Type, alert.Description)
|
||||
customAlertProvider := alertProvider.ToCustomAlertProvider(service, alert, result, false)
|
||||
// TODO: retry on error
|
||||
var err error
|
||||
// We need to extract the DedupKey from PagerDuty's response
|
||||
if alert.Type == core.PagerDutyAlert {
|
||||
var body []byte
|
||||
body, err = alertProvider.Send(service.Name, alert.Description, true)
|
||||
body, err = customAlertProvider.Send(service.Name, alert.Description, false)
|
||||
if err == nil {
|
||||
var response pagerDutyResponse
|
||||
err = json.Unmarshal(body, &response)
|
||||
@@ -82,13 +54,17 @@ func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *conf
|
||||
}
|
||||
}
|
||||
} else {
|
||||
_, err = alertProvider.Send(service.Name, alert.Description, false)
|
||||
// All other alert types don't need to extract anything from the body, so we can just send the request right away
|
||||
_, err = customAlertProvider.Send(service.Name, alert.Description, false)
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Ran into error sending an alert: %s", err.Error())
|
||||
} else {
|
||||
alert.Triggered = true
|
||||
}
|
||||
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Not sending alert of type=%s despite being TRIGGERED, because the provider wasn't configured properly", alert.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -103,44 +79,12 @@ func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *conf
|
||||
if !alert.SendOnResolved {
|
||||
continue
|
||||
}
|
||||
var alertProvider *custom.AlertProvider
|
||||
if alert.Type == core.SlackAlert {
|
||||
if cfg.Alerting.Slack != nil && cfg.Alerting.Slack.IsValid() {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Sending Slack alert because alert with description='%s' has been resolved", alert.Description)
|
||||
alertProvider = cfg.Alerting.Slack.ToCustomAlertProvider(service, alert, result, true)
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Not sending Slack alert despite being resolved, because there is no Slack webhook configured")
|
||||
}
|
||||
} else if alert.Type == core.PagerDutyAlert {
|
||||
if cfg.Alerting.PagerDuty != nil && cfg.Alerting.PagerDuty.IsValid() {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Sending PagerDuty alert because alert with description='%s' has been resolved", alert.Description)
|
||||
alertProvider = cfg.Alerting.PagerDuty.ToCustomAlertProvider("resolve", alert.ResolveKey, service, fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description))
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Not sending PagerDuty alert despite being resolved, because PagerDuty isn't configured properly")
|
||||
}
|
||||
} else if alert.Type == core.TwilioAlert {
|
||||
if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Sending Twilio alert because alert with description='%s' has been resolved", alert.Description)
|
||||
alertProvider = cfg.Alerting.Twilio.ToCustomAlertProvider(fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description))
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Not sending Twilio alert despite being resolved, because Twilio isn't configured properly")
|
||||
}
|
||||
} else if alert.Type == core.CustomAlert {
|
||||
if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Sending custom alert because alert with description='%s' has been resolved", alert.Description)
|
||||
alertProvider = &custom.AlertProvider{
|
||||
Url: cfg.Alerting.Custom.Url,
|
||||
Method: cfg.Alerting.Custom.Method,
|
||||
Body: cfg.Alerting.Custom.Body,
|
||||
Headers: cfg.Alerting.Custom.Headers,
|
||||
}
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Not sending custom alert despite being resolved, because the custom provider isn't configured properly")
|
||||
}
|
||||
}
|
||||
if alertProvider != nil {
|
||||
alertProvider := config.GetAlertingProviderByAlertType(cfg, alert.Type)
|
||||
if alertProvider != nil && alertProvider.IsValid() {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Sending %s alert because alert with description='%s' has been RESOLVED", alert.Type, alert.Description)
|
||||
customAlertProvider := alertProvider.ToCustomAlertProvider(service, alert, result, true)
|
||||
// TODO: retry on error
|
||||
_, err := alertProvider.Send(service.Name, alert.Description, true)
|
||||
_, err := customAlertProvider.Send(service.Name, alert.Description, true)
|
||||
if err != nil {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Ran into error sending an alert: %s", err.Error())
|
||||
} else {
|
||||
@@ -148,6 +92,8 @@ func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *conf
|
||||
alert.ResolveKey = ""
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Not sending alert of type=%s despite being RESOLVED, because the provider wasn't configured properly", alert.Type)
|
||||
}
|
||||
}
|
||||
service.NumberOfFailuresInARow = 0
|
||||
|
||||
@@ -50,7 +50,7 @@ func monitor(service *core.Service) {
|
||||
if cfg.Debug {
|
||||
log.Printf("[watchdog][monitor] Monitoring serviceName=%s", service.Name)
|
||||
}
|
||||
result := service.EvaluateConditions()
|
||||
result := service.EvaluateHealth()
|
||||
metric.PublishMetricsForService(service, result)
|
||||
serviceResultsMutex.Lock()
|
||||
serviceResults[service.Name] = append(serviceResults[service.Name], result)
|
||||
|
||||
Reference in New Issue
Block a user