fix(tunneling): Add exponential backoff retry (#1303)
This commit is contained in:
@@ -54,7 +54,6 @@ func New(config *Config) *SSHTunnel {
|
|||||||
tunnel := &SSHTunnel{
|
tunnel := &SSHTunnel{
|
||||||
config: config,
|
config: config,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse authentication methods once during initialization to avoid
|
// Parse authentication methods once during initialization to avoid
|
||||||
// expensive cryptographic operations on every connection attempt
|
// expensive cryptographic operations on every connection attempt
|
||||||
if config.PrivateKey != "" {
|
if config.PrivateKey != "" {
|
||||||
@@ -66,7 +65,6 @@ func New(config *Config) *SSHTunnel {
|
|||||||
} else if config.Password != "" {
|
} else if config.Password != "" {
|
||||||
tunnel.authMethods = []ssh.AuthMethod{ssh.Password(config.Password)}
|
tunnel.authMethods = []ssh.AuthMethod{ssh.Password(config.Password)}
|
||||||
}
|
}
|
||||||
|
|
||||||
return tunnel
|
return tunnel
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -131,27 +129,34 @@ func (t *SSHTunnel) Dial(network, addr string) (net.Conn, error) {
|
|||||||
client = t.client
|
client = t.client
|
||||||
t.mu.Unlock()
|
t.mu.Unlock()
|
||||||
}
|
}
|
||||||
// Create connection through SSH tunnel
|
// Attempt dial with exponential backoff retry
|
||||||
conn, err := client.Dial(network, addr)
|
const maxRetries = 3
|
||||||
if err != nil {
|
const baseDelay = time.Second
|
||||||
// Close stale connection before retry to prevent leak
|
var lastErr error
|
||||||
|
for attempt := 0; attempt < maxRetries; attempt++ {
|
||||||
|
if attempt > 0 {
|
||||||
|
// Exponential backoff: 1s, 2s, 4s
|
||||||
|
delay := baseDelay << (attempt - 1)
|
||||||
|
time.Sleep(delay)
|
||||||
|
// Close stale connection and reconnect
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
if t.client != nil {
|
if t.client != nil {
|
||||||
t.client.Close()
|
_ = t.client.Close()
|
||||||
t.client = nil
|
t.client = nil
|
||||||
}
|
}
|
||||||
|
if err := t.connectUnsafe(); err != nil {
|
||||||
t.mu.Unlock()
|
t.mu.Unlock()
|
||||||
// Retry once - connection might be stale
|
lastErr = fmt.Errorf("reconnect attempt %d failed: %w", attempt, err)
|
||||||
if connErr := t.Connect(); connErr != nil {
|
continue
|
||||||
return nil, fmt.Errorf("SSH tunnel dial failed: %w (retry failed: %v)", err, connErr)
|
|
||||||
}
|
}
|
||||||
t.mu.RLock()
|
|
||||||
client = t.client
|
client = t.client
|
||||||
t.mu.RUnlock()
|
t.mu.Unlock()
|
||||||
conn, err = client.Dial(network, addr)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("SSH tunnel dial failed after retry: %w", err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
conn, err := client.Dial(network, addr)
|
||||||
|
if err == nil {
|
||||||
return conn, nil
|
return conn, nil
|
||||||
|
}
|
||||||
|
lastErr = err
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("SSH tunnel dial failed after %d attempts: %w", maxRetries, lastErr)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user