Compare commits

..

20 Commits

Author SHA1 Message Date
TwinProduction
37c4715453 Support custom alert provider 2020-08-27 22:23:21 -04:00
TwinProduction
4b57654592 Fix issue with tooltip overflowing at the top 2020-08-25 14:27:13 -04:00
TwinProduction
af6298de05 Add documentation for alerts 2020-08-22 14:15:44 -04:00
TwinProduction
22fef4e9aa Add tests for alert configuration 2020-08-22 14:15:21 -04:00
TwinProduction
9a3c9e4d61 Set default alert threshold to 3 2020-08-22 14:15:08 -04:00
TwinProduction
62f7bdbd63 Add favicon.ico and logo-small-padding.png 2020-08-21 22:17:53 -04:00
TwinProduction
04d6c8bb82 Improve mobile-friendliness and add logo 2020-08-21 22:07:46 -04:00
TwinProduction
e1721fa237 Update Go to 1.15 2020-08-21 21:57:23 -04:00
TwinProduction
6f4cf69c4e Implement Slack alerting (#2) 2020-08-20 21:11:22 -04:00
TwinProduction
6596d253aa Continue working on #2: Slack alerts 2020-08-19 19:41:01 -04:00
TwinProduction
857fe5eb8c Rename SendMessage to SendSlackMessage 2020-08-19 19:40:00 -04:00
TwinProduction
8abcab6a8f Start working on #2: Slack alerts 2020-08-18 22:24:00 -04:00
TwinProduction
0fd8bf4198 Add Go report card badge 2020-08-17 22:21:20 -04:00
TwinProduction
946101e995 Add documentation in watchdog.go 2020-08-17 20:25:29 -04:00
TwinProduction
f930687b4a Clean up code for len() function 2020-08-16 15:19:53 -04:00
TwinProduction
43aa31be58 Add missing yaml identifier to enable code highlighting 2020-08-15 18:34:05 -04:00
TwinProduction
adfee25a22 Update interval in config.yaml 2020-08-15 16:59:05 -04:00
TwinProduction
1f241ecdb3 Support Gzip and cache result to prevent wasting CPU 2020-08-15 16:44:28 -04:00
TwinProduction
7849cc6dd4 Regenerate the table only if there's a change 2020-08-15 16:42:47 -04:00
TwinProduction
a62eab58ef Update examples 2020-08-14 20:05:10 -04:00
20 changed files with 466 additions and 95 deletions

121
README.md
View File

@@ -1,6 +1,7 @@
# gatus
![Gatus](static/logo-with-name.png)
![build](https://github.com/TwinProduction/gatus/workflows/build/badge.svg?branch=master)
[![Go Report Card](https://goreportcard.com/badge/github.com/TwinProduction/gatus)](https://goreportcard.com/report/github.com/TwinProduction/gatus)
[![Docker pulls](https://img.shields.io/docker/pulls/twinproduction/gatus.svg)](https://cloud.docker.com/repository/docker/twinproduction/gatus)
A service health dashboard in Go that is meant to be used as a docker
@@ -10,6 +11,20 @@ I personally deploy it in my Kubernetes cluster and have it monitor the status o
core applications: https://status.twinnation.org/
## Table of Contents
- [Usage](#usage)
- [Configuration](#configuration)
- [Conditions](#conditions)
- [Docker](#docker)
- [Running the tests](#running-the-tests)
- [Using in Production](#using-in-production)
- [FAQ](#faq)
- [Sending a GraphQL request](#sending-a-graphql-request)
- [Configuring Slack alerts](#configuring-slack-alerts)
- [Configuring custom alert](#configuring-custom-alerts)
## Usage
By default, the configuration file is expected to be at `config/config.yaml`.
@@ -22,14 +37,14 @@ Here's a simple example:
metrics: true # Whether to expose metrics at /metrics
services:
- name: twinnation # Name of your service, can be anything
url: https://twinnation.org/health
interval: 15s # Duration to wait between every status check (default: 10s)
url: "https://twinnation.org/health"
interval: 30s # Duration to wait between every status check (default: 10s)
conditions:
- "[STATUS] == 200" # Status must be 200
- "[BODY].status == UP" # The json path "$.status" must be equal to UP
- "[RESPONSE_TIME] < 300" # Response time must be under 300ms
- name: example
url: https://example.org/
url: "https://example.org/"
interval: 30s
conditions:
- "[STATUS] == 200"
@@ -40,17 +55,28 @@ Note that you can also add environment variables in the your configuration file
### Configuration
| Parameter | Description | Default |
| ----------------------- | --------------------------------------------------------------- | -------------- |
| `metrics` | Whether to expose metrics at /metrics | `false` |
| `services[].name` | Name of the service. Can be anything. | Required `""` |
| `services[].url` | URL to send the request to | Required `""` |
| `services[].conditions` | Conditions used to determine the health of the service | `[]` |
| `services[].interval` | Duration to wait between every status check | `10s` |
| `services[].method` | Request method | `GET` |
| `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` |
| `services[].body` | Request body | `""` |
| `services[].headers` | Request headers | `{}` |
| Parameter | Description | Default |
| --------------------------------- | --------------------------------------------------------------- | -------------- |
| `metrics` | Whether to expose metrics at /metrics | `false` |
| `services` | List of services to monitor | Required `[]` |
| `services[].name` | Name of the service. Can be anything. | Required `""` |
| `services[].url` | URL to send the request to | Required `""` |
| `services[].conditions` | Conditions used to determine the health of the service | `[]` |
| `services[].interval` | Duration to wait between every status check | `10s` |
| `services[].method` | Request method | `GET` |
| `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` |
| `services[].body` | Request body | `""` |
| `services[].headers` | Request headers | `{}` |
| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `custom` | Required `""` |
| `services[].alerts[].enabled` | Whether to enable the alert | `false` |
| `services[].alerts[].threshold` | Number of failures in a row needed before triggering the alert | `3` |
| `services[].alerts[].description` | Description of the alert. Will be included in the alert sent | `""` |
| `alerting` | Configuration for alerting | `{}` |
| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` |
| `alerting.custom` | Configuration for custom actions on failure or alerts | `""` |
| `alerting.custom.url` | Custom alerting request url | `""` |
| `alerting.custom.body` | Custom alerting request body. | `""` |
| `alerting.custom.headers` | Custom alerting request headers | `{}` |
### Conditions
@@ -107,7 +133,7 @@ See the [example](example) folder.
By setting `services[].graphql` to true, the body will automatically be wrapped by the standard GraphQL `query` parameter.
For instance, the following configuration:
```
```yaml
services:
- name: filter users by gender
url: http://localhost:8080/playground
@@ -132,4 +158,67 @@ services:
will send a `POST` request to `http://localhost:8080/playground` with the following body:
```json
{"query":" {\n user(gender: \"female\") {\n id\n name\n gender\n avatar\n }\n }"}
```
### Configuring Slack alerts
```yaml
alerting:
slack: "https://hooks.slack.com/services/**********/**********/**********"
services:
- name: twinnation
interval: 30s
url: "https://twinnation.org/health"
alerts:
- type: slack
enabled: true
description: "healthcheck failed 3 times in a row"
- type: slack
enabled: true
threshold: 5
description: "healthcheck failed 5 times in a row"
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 300"
```
### Configuring custom alerts
While they're called alerts, you can use this feature to call anything.
For instance, you could automate rollbacks by having an application that keeps tracks of new deployments, and by
leveraging Gatus, you could have Gatus call that application endpoint when a service starts failing. Your application
would then check if the service that started failing was recently deployed, and if it was, then automatically
roll it back.
The values `[ALERT_DESCRIPTION]` and `[SERVICE_NAME]` are automatically substituted for the alert description and the
service name accordingly in the body (`alerting.custom.body`) and the url (`alerting.custom.url`).
For all intents and purpose, we'll configure the custom alert with a Slack webhook, but you can call anything you want.
```yaml
alerting:
custom:
url: "https://hooks.slack.com/services/**********/**********/**********"
method: "POST"
body: |
{
"text": "[SERVICE_NAME] - [ALERT_DESCRIPTION]"
}
services:
- name: twinnation
interval: 30s
url: "https://twinnation.org/health"
alerts:
- type: custom
enabled: true
threshold: 10
description: "healthcheck failed 10 times in a row"
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 300"
```

View File

@@ -1,15 +1,15 @@
metrics: true
services:
- name: twinnation
interval: 10s
interval: 30s
url: https://twinnation.org/health
conditions:
- "[STATUS] == 200"
- "[BODY].status == UP"
- "[RESPONSE_TIME] < 1000"
- name: twinnation-articles-api
interval: 10s
url: https://twinnation.org/api/v1/articles/24
interval: 30s
url: "https://twinnation.org/api/v1/articles/24"
conditions:
- "[STATUS] == 200"
- "[BODY].id == 24"

View File

@@ -21,8 +21,9 @@ var (
)
type Config struct {
Metrics bool `yaml:"metrics"`
Services []*core.Service `yaml:"services"`
Metrics bool `yaml:"metrics"`
Alerting *core.AlertingConfig `yaml:"alerting"`
Services []*core.Service `yaml:"services"`
}
func Get() *Config {

View File

@@ -2,6 +2,7 @@ package config
import (
"fmt"
"github.com/TwinProduction/gatus/core"
"testing"
"time"
)
@@ -23,6 +24,9 @@ services:
if err != nil {
t.Error("No error should've been returned")
}
if config == nil {
t.Fatal("Config shouldn't have been nil")
}
if len(config.Services) != 2 {
t.Error("Should have returned two services")
}
@@ -58,6 +62,9 @@ services:
if err != nil {
t.Error("No error should've been returned")
}
if config == nil {
t.Fatal("Config shouldn't have been nil")
}
if config.Metrics {
t.Error("Metrics should've been false by default")
}
@@ -81,6 +88,9 @@ services:
if err != nil {
t.Error("No error should've been returned")
}
if config == nil {
t.Fatal("Config shouldn't have been nil")
}
if !config.Metrics {
t.Error("Metrics should have been true")
}
@@ -107,3 +117,62 @@ badconfig:
t.Error("The error returned should have been of type ErrNoServiceInConfig")
}
}
func TestParseAndValidateConfigBytesWithAlerting(t *testing.T) {
config, err := parseAndValidateConfigBytes([]byte(`
alerting:
slack: "http://example.com"
services:
- name: twinnation
url: https://twinnation.org/actuator/health
alerts:
- type: slack
enabled: true
threshold: 7
description: "Healthcheck failed 7 times in a row"
conditions:
- "[STATUS] == 200"
`))
if err != nil {
t.Error("No error should've been returned")
}
if config == nil {
t.Fatal("Config shouldn't have been nil")
}
if config.Metrics {
t.Error("Metrics should've been false by default")
}
if config.Alerting == nil {
t.Fatal("config.AlertingConfig shouldn't have been nil")
}
if config.Alerting.Slack != "http://example.com" {
t.Errorf("Slack webhook should've been %s, but was %s", "http://example.com", config.Alerting.Slack)
}
if len(config.Services) != 1 {
t.Error("There should've been 1 service")
}
if config.Services[0].Url != "https://twinnation.org/actuator/health" {
t.Errorf("URL should have been %s", "https://twinnation.org/actuator/health")
}
if config.Services[0].Interval != 10*time.Second {
t.Errorf("Interval should have been %s, because it is the default value", 10*time.Second)
}
if config.Services[0].Alerts == nil {
t.Fatal("The service alerts shouldn't have been nil")
}
if len(config.Services[0].Alerts) != 1 {
t.Fatal("There should've been 1 alert configured")
}
if !config.Services[0].Alerts[0].Enabled {
t.Error("The alert should've been enabled")
}
if config.Services[0].Alerts[0].Threshold != 7 {
t.Errorf("The threshold of the alert should've been %d, but it was %d", 7, config.Services[0].Alerts[0].Threshold)
}
if config.Services[0].Alerts[0].Type != core.SlackAlert {
t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type)
}
if config.Services[0].Alerts[0].Description != "Healthcheck failed 7 times in a row" {
t.Errorf("The type of the alert should've been %s, but it was %s", "Healthcheck failed 7 times in a row", config.Services[0].Alerts[0].Description)
}
}

23
core/alert.go Normal file
View File

@@ -0,0 +1,23 @@
package core
// Alert is the service's alert configuration
type Alert struct {
// Type of alert
Type AlertType `yaml:"type"`
// Enabled defines whether or not the alert is enabled
Enabled bool `yaml:"enabled"`
// Threshold is the number of failures in a row needed before triggering the alert
Threshold int `yaml:"threshold"`
// Description of the alert. Will be included in the alert sent.
Description string `yaml:"description"`
}
type AlertType string
const (
SlackAlert AlertType = "slack"
CustomAlert AlertType = "custom"
)

56
core/alerting.go Normal file
View File

@@ -0,0 +1,56 @@
package core
import (
"bytes"
"fmt"
"github.com/TwinProduction/gatus/client"
"net/http"
"strings"
)
type AlertingConfig struct {
Slack string `yaml:"slack"`
Custom *CustomAlertProvider `yaml:"custom"`
}
type CustomAlertProvider struct {
Url string `yaml:"url"`
Method string `yaml:"method,omitempty"`
Body string `yaml:"body,omitempty"`
Headers map[string]string `yaml:"headers,omitempty"`
}
func (provider *CustomAlertProvider) buildRequest(serviceName, alertDescription string) *http.Request {
body := provider.Body
url := provider.Url
if strings.Contains(provider.Body, "[ALERT_DESCRIPTION]") {
body = strings.ReplaceAll(provider.Body, "[ALERT_DESCRIPTION]", alertDescription)
}
if strings.Contains(provider.Body, "[SERVICE_NAME]") {
body = strings.ReplaceAll(provider.Body, "[SERVICE_NAME]", serviceName)
}
if strings.Contains(provider.Url, "[ALERT_DESCRIPTION]") {
url = strings.ReplaceAll(provider.Url, "[ALERT_DESCRIPTION]", alertDescription)
}
if strings.Contains(provider.Url, "[SERVICE_NAME]") {
url = strings.ReplaceAll(provider.Url, "[SERVICE_NAME]", serviceName)
}
bodyBuffer := bytes.NewBuffer([]byte(body))
request, _ := http.NewRequest(provider.Method, url, bodyBuffer)
for k, v := range provider.Headers {
request.Header.Set(k, v)
}
return request
}
func (provider *CustomAlertProvider) Send(serviceName, alertDescription string) error {
request := provider.buildRequest(serviceName, alertDescription)
response, err := client.GetHttpClient().Do(request)
if err != nil {
return err
}
if response.StatusCode > 399 {
return fmt.Errorf("call to provider alert returned status code %d", response.StatusCode)
}
return nil
}

View File

@@ -26,6 +26,9 @@ type Service struct {
Headers map[string]string `yaml:"headers,omitempty"`
Interval time.Duration `yaml:"interval,omitempty"`
Conditions []*Condition `yaml:"conditions"`
Alerts []*Alert `yaml:"alerts"`
numberOfFailuresInARow int
}
func (service *Service) Validate() {
@@ -39,6 +42,11 @@ func (service *Service) Validate() {
if len(service.Headers) == 0 {
service.Headers = make(map[string]string)
}
for _, alert := range service.Alerts {
if alert.Threshold <= 0 {
alert.Threshold = 3
}
}
if len(service.Url) == 0 {
panic(ErrNoUrl)
}
@@ -68,9 +76,29 @@ func (service *Service) EvaluateConditions() *Result {
}
}
result.Timestamp = time.Now()
if result.Success {
service.numberOfFailuresInARow = 0
// TODO: Send notification that alert has been resolved?
} else {
service.numberOfFailuresInARow++
}
return result
}
func (service *Service) GetAlertsTriggered() []Alert {
var alerts []Alert
if service.numberOfFailuresInARow == 0 {
return alerts
}
for _, alert := range service.Alerts {
if alert.Enabled && alert.Threshold == service.numberOfFailuresInARow {
alerts = append(alerts, *alert)
continue
}
}
return alerts
}
func (service *Service) getIp(result *Result) {
urlObject, err := url.Parse(service.Url)
if err != nil {

View File

@@ -13,6 +13,9 @@ const (
ResponseTimePlaceHolder = "[RESPONSE_TIME]"
BodyPlaceHolder = "[BODY]"
LengthFunctionPrefix = "len("
FunctionSuffix = ")"
InvalidConditionElementSuffix = "(INVALID)"
)
@@ -34,9 +37,9 @@ func sanitizeAndResolve(list []string, result *Result) []string {
// if starts with BodyPlaceHolder, then evaluate json path
if strings.Contains(element, BodyPlaceHolder) {
wantLength := false
if strings.HasPrefix(element, "len(") && strings.HasSuffix(element, ")") {
if strings.HasPrefix(element, LengthFunctionPrefix) && strings.HasSuffix(element, FunctionSuffix) {
wantLength = true
element = strings.TrimSuffix(strings.TrimPrefix(element, "len("), ")")
element = strings.TrimSuffix(strings.TrimPrefix(element, LengthFunctionPrefix), FunctionSuffix)
}
resolvedElement, resolvedElementLength, err := jsonpath.Eval(strings.Replace(element, fmt.Sprintf("%s.", BodyPlaceHolder), "", 1), result.Body)
if err != nil {

View File

@@ -2,11 +2,12 @@ metrics: true
services:
- name: TwiNNatioN
url: https://twinnation.org/health
interval: 10s
interval: 30s
conditions:
- "[STATUS] == 200"
- name: GitHub
url: https://api.github.com/healthz
interval: 5m
conditions:
- "[STATUS] == 200"
- name: Example

View File

@@ -10,6 +10,7 @@ data:
- "[STATUS] == 200"
- name: GitHub
url: https://api.github.com/healthz
interval: 5m
conditions:
- "[STATUS] == 200"
- name: Example

2
go.mod
View File

@@ -1,6 +1,6 @@
module github.com/TwinProduction/gatus
go 1.14
go 1.15
require (
github.com/prometheus/client_golang v1.2.1

44
main.go
View File

@@ -1,6 +1,8 @@
package main
import (
"bytes"
"compress/gzip"
"encoding/json"
"github.com/TwinProduction/gatus/config"
"github.com/TwinProduction/gatus/watchdog"
@@ -8,6 +10,16 @@ import (
"log"
"net/http"
"os"
"strings"
"time"
)
const CacheTTL = 10 * time.Second
var (
cachedServiceResults []byte
cachedServiceResultsGzipped []byte
cachedServiceResultsTimestamp time.Time
)
func main() {
@@ -37,14 +49,30 @@ func loadConfiguration() *config.Config {
return config.Get()
}
func serviceResultsHandler(writer http.ResponseWriter, _ *http.Request) {
serviceResults := watchdog.GetServiceResults()
data, err := json.Marshal(serviceResults)
if err != nil {
log.Printf("[main][serviceResultsHandler] Unable to marshall object to JSON: %s", err.Error())
writer.WriteHeader(http.StatusInternalServerError)
_, _ = writer.Write([]byte("Unable to marshall object to JSON"))
return
func serviceResultsHandler(writer http.ResponseWriter, r *http.Request) {
if isExpired := cachedServiceResultsTimestamp.IsZero() || time.Now().Sub(cachedServiceResultsTimestamp) > CacheTTL; isExpired {
buffer := &bytes.Buffer{}
gzipWriter := gzip.NewWriter(buffer)
serviceResults := watchdog.GetServiceResults()
data, err := json.Marshal(serviceResults)
if err != nil {
log.Printf("[main][serviceResultsHandler] Unable to marshall object to JSON: %s", err.Error())
writer.WriteHeader(http.StatusInternalServerError)
_, _ = writer.Write([]byte("Unable to marshall object to JSON"))
return
}
gzipWriter.Write(data)
gzipWriter.Close()
cachedServiceResults = data
cachedServiceResultsGzipped = buffer.Bytes()
cachedServiceResultsTimestamp = time.Now()
}
var data []byte
if strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") {
writer.Header().Set("Content-Encoding", "gzip")
data = cachedServiceResultsGzipped
} else {
data = cachedServiceResults
}
writer.Header().Add("Content-type", "application/json")
writer.WriteHeader(http.StatusOK)

BIN
static/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -31,11 +31,18 @@
}
.status {
cursor: pointer;
transition: opacity 500ms ease-in-out;
transition: all 500ms ease-in-out;
overflow-x: hidden;
padding: .25em 0;
color: white;
}
.title {
font-size: 2.5rem;
}
.status:hover {
opacity: 0.7;
transition: opacity 100ms ease-in-out;
color: black;
}
.status-over-time {
overflow: auto;
@@ -48,6 +55,9 @@
opacity: 0.5;
margin-top: 5px;
}
.status-min-max-ms {
overflow-x: hidden;
}
#tooltip {
position: fixed;
top: 0;
@@ -76,9 +86,16 @@
</style>
</head>
<body>
<div class="container my-3 rounded p-4 border shadow">
<div class="mb-3">
<div class="display-4">Health Status</div>
<div class="container my-3 rounded p-3 border shadow">
<div class="mb-2">
<div class="row">
<div class="col-8 text-left my-auto">
<div class="title display-4">Health Status</div>
</div>
<div class="col-4 text-right">
<img src="logo.png" alt="GaTuS" style="position: relative; min-width: 50px; max-width: 200px; width: 20%;"/>
</div>
</div>
</div>
<div id="results"></div>
</div>
@@ -136,9 +153,15 @@
let tooltipBoundingClientRect = document.querySelector('#tooltip').getBoundingClientRect();
if (targetLeftPosition + window.scrollX + tooltipBoundingClientRect.width + 50 > document.body.getBoundingClientRect().width) {
targetLeftPosition = element.getBoundingClientRect().x - tooltipBoundingClientRect.width + element.getBoundingClientRect().width;
if (targetLeftPosition < 0) {
targetLeftPosition += -targetLeftPosition;
}
}
if (targetTopPosition + window.scrollY + tooltipBoundingClientRect.height + 50 > document.body.getBoundingClientRect().height) {
targetTopPosition = element.getBoundingClientRect().y - (tooltipBoundingClientRect.height + 10)
if (targetTopPosition + window.scrollY + tooltipBoundingClientRect.height + 50 > document.body.getBoundingClientRect().height && targetTopPosition >= 0) {
targetTopPosition = element.getBoundingClientRect().y - (tooltipBoundingClientRect.height + 10);
if (targetTopPosition < 0) {
targetTopPosition = element.getBoundingClientRect().y + 30;
}
}
$("#tooltip").css({top: targetTopPosition + "px", left: targetLeftPosition + "px"});
}
@@ -160,62 +183,69 @@
function refreshResults() {
$.getJSON("/api/v1/results", function (data) {
serviceStatuses = data;
let output = "";
for (let serviceName in data) {
let serviceStatusOverTime = "";
let hostname = data[serviceName][data[serviceName].length-1].hostname
let minResponseTime = null;
let maxResponseTime = null;
let newestTimestamp = null;
let oldestTimestamp = null;
for (let key in data[serviceName]) {
let serviceResult = data[serviceName][key];
serviceStatusOverTime = createStatusBadge(serviceName, key, serviceResult.success) + serviceStatusOverTime;
const responseTime = parseInt(serviceResult.duration/1000000);
if (minResponseTime == null || minResponseTime > responseTime) {
minResponseTime = responseTime;
}
if (maxResponseTime == null || maxResponseTime < responseTime) {
maxResponseTime = responseTime;
}
const timestamp = new Date(serviceResult.timestamp);
if (newestTimestamp == null || newestTimestamp > timestamp) {
newestTimestamp = timestamp;
}
if (oldestTimestamp == null || oldestTimestamp < timestamp) {
oldestTimestamp = timestamp;
}
}
output += ""
+ "<div class='container py-3 border-left border-right border-top border-black'>"
+ " <div class='row mb-2'>"
+ " <div class='col-10'>"
+ " <span class='font-weight-bold'>" + serviceName + "</span> <span class='text-secondary font-weight-lighter'>- " + hostname + "</span>"
+ " </div>"
+ " <div class='col-2 text-right'>"
+ " <span class='font-weight-lighter'>" + (minResponseTime === maxResponseTime ? minResponseTime : (minResponseTime + "-" + maxResponseTime)) + "ms</span>"
+ " </div>"
+ " </div>"
+ " <div class='row'>"
+ " <div class='col-12 d-flex flex-row-reverse status-over-time'>"
+ " " + serviceStatusOverTime
+ " </div>"
+ " </div>"
+ " <div class='row status-time-ago'>"
+ " <div class='col-6'>"
+ " " + generatePrettyTimeAgo(newestTimestamp)
+ " </div>"
+ " <div class='col-6 text-right'>"
+ " " + generatePrettyTimeAgo(oldestTimestamp)
+ " </div>"
+ " </div>"
+ "</div>";
// Update the table only if there's a change
if (JSON.stringify(serviceStatuses) !== JSON.stringify(data)) {
serviceStatuses = data;
buildTable();
}
$("#results").html(output);
});
}
function buildTable() {
let output = "";
for (let serviceName in serviceStatuses) {
let serviceStatusOverTime = "";
let hostname = serviceStatuses[serviceName][serviceStatuses[serviceName].length-1].hostname
let minResponseTime = null;
let maxResponseTime = null;
let newestTimestamp = null;
let oldestTimestamp = null;
for (let key in serviceStatuses[serviceName]) {
let serviceResult = serviceStatuses[serviceName][key];
serviceStatusOverTime = createStatusBadge(serviceName, key, serviceResult.success) + serviceStatusOverTime;
const responseTime = parseInt(serviceResult.duration/1000000);
if (minResponseTime == null || minResponseTime > responseTime) {
minResponseTime = responseTime;
}
if (maxResponseTime == null || maxResponseTime < responseTime) {
maxResponseTime = responseTime;
}
const timestamp = new Date(serviceResult.timestamp);
if (newestTimestamp == null || newestTimestamp < timestamp) {
newestTimestamp = timestamp;
}
if (oldestTimestamp == null || oldestTimestamp > timestamp) {
oldestTimestamp = timestamp;
}
}
output += ""
+ "<div class='container py-3 border-left border-right border-top border-black'>"
+ " <div class='row mb-2'>"
+ " <div class='col-md-10'>"
+ " <span class='font-weight-bold'>" + serviceName + "</span> <span class='text-secondary font-weight-lighter'>- " + hostname + "</span>"
+ " </div>"
+ " <div class='col-md-2 text-right'>"
+ " <span class='font-weight-lighter status-min-max-ms'>" + (minResponseTime === maxResponseTime ? minResponseTime : (minResponseTime + "-" + maxResponseTime)) + "ms</span>"
+ " </div>"
+ " </div>"
+ " <div class='row'>"
+ " <div class='col-12 d-flex flex-row-reverse status-over-time'>"
+ " " + serviceStatusOverTime
+ " </div>"
+ " </div>"
+ " <div class='row status-time-ago'>"
+ " <div class='col-6'>"
+ " " + generatePrettyTimeAgo(oldestTimestamp)
+ " </div>"
+ " <div class='col-6 text-right'>"
+ " " + generatePrettyTimeAgo(newestTimestamp)
+ " </div>"
+ " </div>"
+ "</div>";
}
$("#results").html(output);
}
function prettifyTimestamp(timestamp) {
let date = new Date(timestamp);
let YYYY = date.getFullYear();
@@ -224,7 +254,7 @@
let hh = ((date.getHours())<10?"0":"")+""+(date.getHours());
let mm = ((date.getMinutes())<10?"0":"")+""+(date.getMinutes());
let ss = ((date.getSeconds())<10?"0":"")+""+(date.getSeconds());
return YYYY+"-"+MM+"-"+DD+" "+hh+":"+mm+":"+ss;
return YYYY + "-" + MM + "-" + DD + " " + hh + ":" + mm + ":" + ss;
}
function generatePrettyTimeAgo(t) {

BIN
static/logo-256px.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
static/logo-candidate.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 83 KiB

BIN
static/logo-with-name.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

BIN
static/logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

View File

@@ -15,10 +15,12 @@ var (
rwLock sync.RWMutex
)
// GetServiceResults returns a list of the last 20 results for each services
func GetServiceResults() *map[string][]*core.Result {
return &serviceResults
}
// Monitor loops over each services and starts a goroutine to monitor each services separately
func Monitor(cfg *config.Config) {
for _, service := range cfg.Services {
go monitor(service)
@@ -27,12 +29,13 @@ func Monitor(cfg *config.Config) {
}
}
// monitor monitors a single service in a loop
func monitor(service *core.Service) {
for {
// By placing the lock here, we prevent multiple services from being monitored at the exact same time, which
// could cause performance issues and return inaccurate results
rwLock.Lock()
log.Printf("[watchdog][Monitor] Monitoring serviceName=%s", service.Name)
log.Printf("[watchdog][monitor] Monitoring serviceName=%s", service.Name)
result := service.EvaluateConditions()
metric.PublishMetricsForService(service, result)
serviceResults[service.Name] = append(serviceResults[service.Name], result)
@@ -45,13 +48,52 @@ func monitor(service *core.Service) {
extra = fmt.Sprintf("responseBody=%s", result.Body)
}
log.Printf(
"[watchdog][Monitor] Finished monitoring serviceName=%s; errors=%d; requestDuration=%s; %s",
"[watchdog][monitor] Finished monitoring serviceName=%s; errors=%d; requestDuration=%s; %s",
service.Name,
len(result.Errors),
result.Duration.Round(time.Millisecond),
extra,
)
log.Printf("[watchdog][Monitor] Waiting interval=%s before monitoring serviceName=%s", service.Interval, service.Name)
cfg := config.Get()
if cfg.Alerting != nil {
for _, alertTriggered := range service.GetAlertsTriggered() {
var alertProvider *core.CustomAlertProvider
if alertTriggered.Type == core.SlackAlert {
if len(cfg.Alerting.Slack) > 0 {
log.Printf("[watchdog][monitor] Sending Slack alert because alert with description=%s has been triggered", alertTriggered.Description)
alertProvider = &core.CustomAlertProvider{
Url: cfg.Alerting.Slack,
Method: "POST",
Body: fmt.Sprintf(`{"text":"*[Gatus]*\n*service:* %s\n*description:* %s"}`, service.Name, alertTriggered.Description),
Headers: map[string]string{"Content-Type": "application/json"},
}
} else {
log.Printf("[watchdog][monitor] Not sending Slack alert despite being triggered, because there is no Slack webhook configured")
}
} else if alertTriggered.Type == core.CustomAlert {
if cfg.Alerting.Custom != nil && len(cfg.Alerting.Custom.Url) > 0 {
log.Printf("[watchdog][monitor] Sending custom alert because alert with description=%s has been triggered", alertTriggered.Description)
alertProvider = &core.CustomAlertProvider{
Url: cfg.Alerting.Custom.Url,
Method: cfg.Alerting.Custom.Method,
Body: cfg.Alerting.Custom.Body,
Headers: cfg.Alerting.Custom.Headers,
}
} else {
log.Printf("[watchdog][monitor] Not sending custom alert despite being triggered, because there is no custom url configured")
}
}
if alertProvider != nil {
err := alertProvider.Send(service.Name, alertTriggered.Description)
if err != nil {
log.Printf("[watchdog][monitor] Ran into error sending an alert: %s", err.Error())
}
}
}
}
log.Printf("[watchdog][monitor] Waiting for interval=%s before monitoring serviceName=%s", service.Interval, service.Name)
time.Sleep(service.Interval)
}
}