feat(suite): Implement Suites (#1239)

* feat(suite): Implement Suites

Fixes #1230

* Update docs

* Fix variable alignment

* Prevent always-run endpoint from running if a context placeholder fails to resolve in the URL

* Return errors when a context placeholder path fails to resolve

* Add a couple of unit tests

* Add a couple of unit tests

* fix(ui): Update group count properly

Fixes #1233

* refactor: Pass down entire config instead of several sub-configs

* fix: Change default suite interval and timeout

* fix: Deprecate disable-monitoring-lock in favor of concurrency

* fix: Make sure there are no duplicate keys

* Refactor some code

* Update watchdog/watchdog.go

* Update web/app/src/components/StepDetailsModal.vue

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* chore: Remove useless log

* fix: Set default concurrency to 3 instead of 5

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
TwiN
2025-09-05 15:39:12 -04:00
committed by GitHub
parent 10cabb9dde
commit d668a14703
74 changed files with 7513 additions and 652 deletions

55
config/suite/result.go Normal file
View File

@@ -0,0 +1,55 @@
package suite
import (
"time"
"github.com/TwiN/gatus/v5/config/endpoint"
)
// Result represents the result of a suite execution
type Result struct {
// Name of the suite
Name string `json:"name,omitempty"`
// Group of the suite
Group string `json:"group,omitempty"`
// Success indicates whether all required endpoints succeeded
Success bool `json:"success"`
// Timestamp is when the suite execution started
Timestamp time.Time `json:"timestamp"`
// Duration is how long the entire suite execution took
Duration time.Duration `json:"duration"`
// EndpointResults contains the results of each endpoint execution
EndpointResults []*endpoint.Result `json:"endpointResults"`
// Context is the final state of the context after all endpoints executed
Context map[string]interface{} `json:"-"`
// Errors contains any suite-level errors
Errors []string `json:"errors,omitempty"`
}
// AddError adds an error to the suite result
func (r *Result) AddError(err string) {
r.Errors = append(r.Errors, err)
}
// CalculateSuccess determines if the suite execution was successful
func (r *Result) CalculateSuccess() {
r.Success = true
// Check if any endpoints failed (all endpoints are required)
for _, epResult := range r.EndpointResults {
if !epResult.Success {
r.Success = false
break
}
}
// Also check for suite-level errors
if len(r.Errors) > 0 {
r.Success = false
}
}

214
config/suite/suite.go Normal file
View File

@@ -0,0 +1,214 @@
package suite
import (
"errors"
"fmt"
"strconv"
"time"
"github.com/TwiN/gatus/v5/config/endpoint"
"github.com/TwiN/gatus/v5/config/gontext"
"github.com/TwiN/gatus/v5/config/key"
)
var (
// ErrSuiteWithNoName is the error returned when a suite has no name
ErrSuiteWithNoName = errors.New("suite must have a name")
// ErrSuiteWithNoEndpoints is the error returned when a suite has no endpoints
ErrSuiteWithNoEndpoints = errors.New("suite must have at least one endpoint")
// ErrSuiteWithDuplicateEndpointNames is the error returned when a suite has duplicate endpoint names
ErrSuiteWithDuplicateEndpointNames = errors.New("suite cannot have duplicate endpoint names")
// ErrSuiteWithInvalidTimeout is the error returned when a suite has an invalid timeout
ErrSuiteWithInvalidTimeout = errors.New("suite timeout must be positive")
// DefaultInterval is the default interval for suite execution
DefaultInterval = 10 * time.Minute
// DefaultTimeout is the default timeout for suite execution
DefaultTimeout = 5 * time.Minute
)
// Suite is a collection of endpoints that are executed sequentially with shared context
type Suite struct {
// Name of the suite. Must be unique.
Name string `yaml:"name"`
// Group the suite belongs to. Used for grouping multiple suites together.
Group string `yaml:"group,omitempty"`
// Enabled defines whether the suite is enabled
Enabled *bool `yaml:"enabled,omitempty"`
// Interval is the duration to wait between suite executions
Interval time.Duration `yaml:"interval,omitempty"`
// Timeout is the maximum duration for the entire suite execution
Timeout time.Duration `yaml:"timeout,omitempty"`
// InitialContext holds initial values that can be referenced by endpoints
InitialContext map[string]interface{} `yaml:"context,omitempty"`
// Endpoints in the suite (executed sequentially)
Endpoints []*endpoint.Endpoint `yaml:"endpoints"`
}
// IsEnabled returns whether the suite is enabled
func (s *Suite) IsEnabled() bool {
if s.Enabled == nil {
return true
}
return *s.Enabled
}
// Key returns a unique key for the suite
func (s *Suite) Key() string {
return key.ConvertGroupAndNameToKey(s.Group, s.Name)
}
// ValidateAndSetDefaults validates the suite configuration and sets default values
func (s *Suite) ValidateAndSetDefaults() error {
// Validate name
if len(s.Name) == 0 {
return ErrSuiteWithNoName
}
// Validate endpoints
if len(s.Endpoints) == 0 {
return ErrSuiteWithNoEndpoints
}
// Check for duplicate endpoint names
endpointNames := make(map[string]bool)
for _, ep := range s.Endpoints {
if endpointNames[ep.Name] {
return fmt.Errorf("%w: duplicate endpoint name '%s'", ErrSuiteWithDuplicateEndpointNames, ep.Name)
}
endpointNames[ep.Name] = true
// Suite endpoints inherit the group from the suite
ep.Group = s.Group
// Validate each endpoint
if err := ep.ValidateAndSetDefaults(); err != nil {
return fmt.Errorf("invalid endpoint '%s': %w", ep.Name, err)
}
}
// Set default interval
if s.Interval == 0 {
s.Interval = DefaultInterval
}
// Set default timeout
if s.Timeout == 0 {
s.Timeout = DefaultTimeout
}
// Validate timeout
if s.Timeout < 0 {
return ErrSuiteWithInvalidTimeout
}
// Initialize context if nil
if s.InitialContext == nil {
s.InitialContext = make(map[string]interface{})
}
return nil
}
// Execute executes all endpoints in the suite sequentially with context sharing
func (s *Suite) Execute() *Result {
start := time.Now()
// Initialize context from suite configuration
ctx := gontext.New(s.InitialContext)
// Create suite result
result := &Result{
Name: s.Name,
Group: s.Group,
Success: true,
Timestamp: start,
EndpointResults: make([]*endpoint.Result, 0, len(s.Endpoints)),
}
// Set up timeout for the entire suite execution
timeoutChan := time.After(s.Timeout)
// Execute each endpoint sequentially
suiteHasFailed := false
for _, ep := range s.Endpoints {
// Skip non-always-run endpoints if suite has already failed
if suiteHasFailed && !ep.AlwaysRun {
continue
}
// Check timeout
select {
case <-timeoutChan:
result.AddError(fmt.Sprintf("suite execution timed out after %v", s.Timeout))
result.Success = false
break
default:
}
// Execute endpoint with context
epStartTime := time.Now()
epResult := ep.EvaluateHealthWithContext(ctx)
epDuration := time.Since(epStartTime)
// Set endpoint name, timestamp, and duration on the result
epResult.Name = ep.Name
epResult.Timestamp = epStartTime
epResult.Duration = epDuration
// Store values from the endpoint result if configured (always store, even on failure)
if ep.Store != nil {
_, err := StoreResultValues(ctx, ep.Store, epResult)
if err != nil {
epResult.AddError(fmt.Sprintf("failed to store values: %v", err))
}
}
result.EndpointResults = append(result.EndpointResults, epResult)
// Mark suite as failed on any endpoint failure
if !epResult.Success {
result.Success = false
suiteHasFailed = true
}
}
result.Context = ctx.GetAll()
result.Duration = time.Since(start)
result.CalculateSuccess()
return result
}
// StoreResultValues extracts values from an endpoint result and stores them in the gontext
func StoreResultValues(ctx *gontext.Gontext, mappings map[string]string, result *endpoint.Result) (map[string]interface{}, error) {
if mappings == nil || len(mappings) == 0 {
return nil, nil
}
storedValues := make(map[string]interface{})
for contextKey, placeholder := range mappings {
value, err := extractValueForStorage(placeholder, result)
if err != nil {
// Continue storing other values even if one fails
storedValues[contextKey] = fmt.Sprintf("ERROR: %v", err)
continue
}
if err := ctx.Set(contextKey, value); err != nil {
return storedValues, fmt.Errorf("failed to store %s: %w", contextKey, err)
}
storedValues[contextKey] = value
}
return storedValues, nil
}
// extractValueForStorage extracts a value from an endpoint result for storage in context
func extractValueForStorage(placeholder string, result *endpoint.Result) (interface{}, error) {
// Use the unified ResolvePlaceholder function (no context needed for extraction)
resolved, err := endpoint.ResolvePlaceholder(placeholder, result, nil)
if err != nil {
return nil, err
}
// Try to parse as number or boolean to store as proper types
// Try int first for whole numbers
if num, err := strconv.ParseInt(resolved, 10, 64); err == nil {
return num, nil
}
// Then try float for decimals
if num, err := strconv.ParseFloat(resolved, 64); err == nil {
return num, nil
}
// Then try boolean
if boolVal, err := strconv.ParseBool(resolved); err == nil {
return boolVal, nil
}
return resolved, nil
}

View File

@@ -0,0 +1,26 @@
package suite
// Status represents the status of a suite
type Status struct {
// Name of the suite
Name string `json:"name,omitempty"`
// Group the suite is a part of. Used for grouping multiple suites together on the front end.
Group string `json:"group,omitempty"`
// Key of the Suite
Key string `json:"key"`
// Results is the list of suite execution results
Results []*Result `json:"results"`
}
// NewStatus creates a new Status for a given Suite
func NewStatus(s *Suite) *Status {
return &Status{
Name: s.Name,
Group: s.Group,
Key: s.Key(),
Results: []*Result{},
}
}

449
config/suite/suite_test.go Normal file
View File

@@ -0,0 +1,449 @@
package suite
import (
"testing"
"time"
"github.com/TwiN/gatus/v5/config/endpoint"
"github.com/TwiN/gatus/v5/config/gontext"
)
func TestSuite_ValidateAndSetDefaults(t *testing.T) {
tests := []struct {
name string
suite *Suite
wantErr bool
}{
{
name: "valid-suite",
suite: &Suite{
Name: "test-suite",
Endpoints: []*endpoint.Endpoint{
{
Name: "endpoint1",
URL: "https://example.org",
Conditions: []endpoint.Condition{
endpoint.Condition("[STATUS] == 200"),
},
},
},
},
wantErr: false,
},
{
name: "suite-without-name",
suite: &Suite{
Endpoints: []*endpoint.Endpoint{
{
Name: "endpoint1",
URL: "https://example.org",
Conditions: []endpoint.Condition{
endpoint.Condition("[STATUS] == 200"),
},
},
},
},
wantErr: true,
},
{
name: "suite-without-endpoints",
suite: &Suite{
Name: "test-suite",
Endpoints: []*endpoint.Endpoint{},
},
wantErr: true,
},
{
name: "suite-with-duplicate-endpoint-names",
suite: &Suite{
Name: "test-suite",
Endpoints: []*endpoint.Endpoint{
{
Name: "duplicate",
URL: "https://example.org",
Conditions: []endpoint.Condition{
endpoint.Condition("[STATUS] == 200"),
},
},
{
Name: "duplicate",
URL: "https://example.com",
Conditions: []endpoint.Condition{
endpoint.Condition("[STATUS] == 200"),
},
},
},
},
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.suite.ValidateAndSetDefaults()
if (err != nil) != tt.wantErr {
t.Errorf("Suite.ValidateAndSetDefaults() error = %v, wantErr %v", err, tt.wantErr)
}
// Check defaults were set
if err == nil {
if tt.suite.Interval == 0 {
t.Errorf("Expected Interval to be set to default, got 0")
}
if tt.suite.Timeout == 0 {
t.Errorf("Expected Timeout to be set to default, got 0")
}
}
})
}
}
func TestSuite_IsEnabled(t *testing.T) {
tests := []struct {
name string
enabled *bool
want bool
}{
{
name: "nil-defaults-to-true",
enabled: nil,
want: true,
},
{
name: "explicitly-enabled",
enabled: boolPtr(true),
want: true,
},
{
name: "explicitly-disabled",
enabled: boolPtr(false),
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := &Suite{Enabled: tt.enabled}
if got := s.IsEnabled(); got != tt.want {
t.Errorf("Suite.IsEnabled() = %v, want %v", got, tt.want)
}
})
}
}
func TestSuite_Key(t *testing.T) {
tests := []struct {
name string
suite *Suite
want string
}{
{
name: "with-group",
suite: &Suite{
Name: "test-suite",
Group: "test-group",
},
want: "test-group_test-suite",
},
{
name: "without-group",
suite: &Suite{
Name: "test-suite",
Group: "",
},
want: "_test-suite",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := tt.suite.Key(); got != tt.want {
t.Errorf("Suite.Key() = %v, want %v", got, tt.want)
}
})
}
}
func TestSuite_DefaultValues(t *testing.T) {
s := &Suite{
Name: "test",
Endpoints: []*endpoint.Endpoint{
{
Name: "endpoint1",
URL: "https://example.org",
Conditions: []endpoint.Condition{
endpoint.Condition("[STATUS] == 200"),
},
},
},
}
err := s.ValidateAndSetDefaults()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if s.Interval != DefaultInterval {
t.Errorf("Expected Interval to be %v, got %v", DefaultInterval, s.Interval)
}
if s.Timeout != DefaultTimeout {
t.Errorf("Expected Timeout to be %v, got %v", DefaultTimeout, s.Timeout)
}
if s.InitialContext == nil {
t.Error("Expected InitialContext to be initialized, got nil")
}
}
// Helper function to create bool pointers
func boolPtr(b bool) *bool {
return &b
}
func TestStoreResultValues(t *testing.T) {
ctx := gontext.New(nil)
// Create a mock result
result := &endpoint.Result{
HTTPStatus: 200,
IP: "192.168.1.1",
Duration: 100 * time.Millisecond,
Body: []byte(`{"status": "OK", "value": 42}`),
Connected: true,
}
// Define store mappings
mappings := map[string]string{
"response_code": "[STATUS]",
"server_ip": "[IP]",
"response_time": "[RESPONSE_TIME]",
"status": "[BODY].status",
"value": "[BODY].value",
"connected": "[CONNECTED]",
}
// Store values
stored, err := StoreResultValues(ctx, mappings, result)
if err != nil {
t.Fatalf("Unexpected error storing values: %v", err)
}
// Verify stored values
if stored["response_code"] != int64(200) {
t.Errorf("Expected response_code=200, got %v", stored["response_code"])
}
if stored["server_ip"] != "192.168.1.1" {
t.Errorf("Expected server_ip=192.168.1.1, got %v", stored["server_ip"])
}
if stored["status"] != "OK" {
t.Errorf("Expected status=OK, got %v", stored["status"])
}
if stored["value"] != int64(42) { // Now parsed as int64 for whole numbers
t.Errorf("Expected value=42, got %v", stored["value"])
}
if stored["connected"] != true {
t.Errorf("Expected connected=true, got %v", stored["connected"])
}
// Verify values are in context
val, err := ctx.Get("status")
if err != nil || val != "OK" {
t.Errorf("Expected status=OK in context, got %v, err=%v", val, err)
}
}
func TestSuite_ExecuteWithAlwaysRunEndpoints(t *testing.T) {
suite := &Suite{
Name: "test-suite",
Endpoints: []*endpoint.Endpoint{
{
Name: "create-resource",
URL: "https://example.org",
Conditions: []endpoint.Condition{
endpoint.Condition("[STATUS] == 200"),
},
Store: map[string]string{
"created_id": "[BODY]",
},
},
{
Name: "failing-endpoint",
URL: "https://example.org",
Conditions: []endpoint.Condition{
endpoint.Condition("[STATUS] != 200"), // This will fail
},
},
{
Name: "cleanup-resource",
URL: "https://example.org",
Conditions: []endpoint.Condition{
endpoint.Condition("[STATUS] == 200"),
},
AlwaysRun: true,
},
},
}
if err := suite.ValidateAndSetDefaults(); err != nil {
t.Fatalf("suite validation failed: %v", err)
}
result := suite.Execute()
if result.Success {
t.Error("expected suite to fail due to middle endpoint failure")
}
if len(result.EndpointResults) != 3 {
t.Errorf("expected 3 endpoint results, got %d", len(result.EndpointResults))
}
if result.EndpointResults[0].Name != "create-resource" {
t.Errorf("expected first endpoint to be 'create-resource', got '%s'", result.EndpointResults[0].Name)
}
if result.EndpointResults[1].Name != "failing-endpoint" {
t.Errorf("expected second endpoint to be 'failing-endpoint', got '%s'", result.EndpointResults[1].Name)
}
if result.EndpointResults[1].Success {
t.Error("expected failing-endpoint to fail")
}
if result.EndpointResults[2].Name != "cleanup-resource" {
t.Errorf("expected third endpoint to be 'cleanup-resource', got '%s'", result.EndpointResults[2].Name)
}
if !result.EndpointResults[2].Success {
t.Error("expected cleanup endpoint to succeed")
}
}
func TestSuite_ExecuteWithoutAlwaysRunEndpoints(t *testing.T) {
suite := &Suite{
Name: "test-suite",
Endpoints: []*endpoint.Endpoint{
{
Name: "create-resource",
URL: "https://example.org",
Conditions: []endpoint.Condition{
endpoint.Condition("[STATUS] == 200"),
},
},
{
Name: "failing-endpoint",
URL: "https://example.org",
Conditions: []endpoint.Condition{
endpoint.Condition("[STATUS] != 200"), // This will fail
},
},
{
Name: "skipped-endpoint",
URL: "https://example.org",
Conditions: []endpoint.Condition{
endpoint.Condition("[STATUS] == 200"),
},
},
},
}
if err := suite.ValidateAndSetDefaults(); err != nil {
t.Fatalf("suite validation failed: %v", err)
}
result := suite.Execute()
if result.Success {
t.Error("expected suite to fail due to middle endpoint failure")
}
if len(result.EndpointResults) != 2 {
t.Errorf("expected 2 endpoint results (execution should stop after failure), got %d", len(result.EndpointResults))
}
if result.EndpointResults[0].Name != "create-resource" {
t.Errorf("expected first endpoint to be 'create-resource', got '%s'", result.EndpointResults[0].Name)
}
if result.EndpointResults[1].Name != "failing-endpoint" {
t.Errorf("expected second endpoint to be 'failing-endpoint', got '%s'", result.EndpointResults[1].Name)
}
}
func TestResult_AddError(t *testing.T) {
result := &Result{
Name: "test-suite",
Timestamp: time.Now(),
}
if len(result.Errors) != 0 {
t.Errorf("Expected 0 errors initially, got %d", len(result.Errors))
}
result.AddError("first error")
if len(result.Errors) != 1 {
t.Errorf("Expected 1 error after AddError, got %d", len(result.Errors))
}
if result.Errors[0] != "first error" {
t.Errorf("Expected 'first error', got '%s'", result.Errors[0])
}
result.AddError("second error")
if len(result.Errors) != 2 {
t.Errorf("Expected 2 errors after second AddError, got %d", len(result.Errors))
}
if result.Errors[1] != "second error" {
t.Errorf("Expected 'second error', got '%s'", result.Errors[1])
}
}
func TestResult_CalculateSuccess(t *testing.T) {
tests := []struct {
name string
endpointResults []*endpoint.Result
errors []string
expectedSuccess bool
}{
{
name: "no-endpoints-no-errors",
endpointResults: []*endpoint.Result{},
errors: []string{},
expectedSuccess: true,
},
{
name: "all-endpoints-successful-no-errors",
endpointResults: []*endpoint.Result{
{Success: true},
{Success: true},
},
errors: []string{},
expectedSuccess: true,
},
{
name: "second-endpoint-failed-no-errors",
endpointResults: []*endpoint.Result{
{Success: true},
{Success: false},
},
errors: []string{},
expectedSuccess: false,
},
{
name: "first-endpoint-failed-no-errors",
endpointResults: []*endpoint.Result{
{Success: false},
{Success: true},
},
errors: []string{},
expectedSuccess: false,
},
{
name: "all-endpoints-successful-with-errors",
endpointResults: []*endpoint.Result{
{Success: true},
{Success: true},
},
errors: []string{"suite level error"},
expectedSuccess: false,
},
{
name: "endpoint-failed-and-errors",
endpointResults: []*endpoint.Result{
{Success: true},
{Success: false},
},
errors: []string{"suite level error"},
expectedSuccess: false,
},
{
name: "no-endpoints-with-errors",
endpointResults: []*endpoint.Result{},
errors: []string{"configuration error"},
expectedSuccess: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := &Result{
Name: "test-suite",
Timestamp: time.Now(),
EndpointResults: tt.endpointResults,
Errors: tt.errors,
}
result.CalculateSuccess()
if result.Success != tt.expectedSuccess {
t.Errorf("Expected success=%v, got %v", tt.expectedSuccess, result.Success)
}
})
}
}