Files
common/database/metrics.go
Ask Bjørn Hansen 94b718a925 fix(database): correct metrics and improve error handling
- Fix metrics double-counting: track deltas for WaitCount/WaitDuration
  instead of adding cumulative values each tick
- Replace fmt.Printf with structured logging in pool monitor
- Add PoolOptions validation (MaxConns > 0, MinConns >= 0)
- Warn when DATABASE_URI overrides non-default PoolOptions
- Improve findAndParseConfig to report all tried files and errors
- Remove dead code in pgdb/config.go (unreachable host default)
- Fix errcheck lint issues for file.Close() calls
- Add context parameter to OpenDBMonitor() (breaking change)
2025-11-29 12:56:49 -08:00

116 lines
3.6 KiB
Go

package database
import (
"context"
"database/sql"
"time"
"github.com/prometheus/client_golang/prometheus"
"go.ntppool.org/common/logger"
)
// DatabaseMetrics holds the Prometheus metrics for database connection pool monitoring
type DatabaseMetrics struct {
ConnectionsOpen prometheus.Gauge
ConnectionsIdle prometheus.Gauge
ConnectionsInUse prometheus.Gauge
ConnectionsWaitCount prometheus.Counter
ConnectionsWaitDuration prometheus.Histogram
// Track last values for delta calculation (cumulative stats from sql.DBStats)
lastWaitCount int64
lastWaitDuration time.Duration
}
// NewDatabaseMetrics creates a new set of database metrics and registers them
func NewDatabaseMetrics(registerer prometheus.Registerer) *DatabaseMetrics {
metrics := &DatabaseMetrics{
ConnectionsOpen: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "database_connections_open",
Help: "Number of open database connections",
}),
ConnectionsIdle: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "database_connections_idle",
Help: "Number of idle database connections",
}),
ConnectionsInUse: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "database_connections_in_use",
Help: "Number of database connections in use",
}),
ConnectionsWaitCount: prometheus.NewCounter(prometheus.CounterOpts{
Name: "database_connections_wait_count_total",
Help: "Total number of times a connection had to wait",
}),
ConnectionsWaitDuration: prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "database_connections_wait_duration_seconds",
Help: "Time spent waiting for a database connection",
Buckets: prometheus.DefBuckets,
}),
}
if registerer != nil {
registerer.MustRegister(
metrics.ConnectionsOpen,
metrics.ConnectionsIdle,
metrics.ConnectionsInUse,
metrics.ConnectionsWaitCount,
metrics.ConnectionsWaitDuration,
)
}
return metrics
}
// monitorConnectionPool runs a background goroutine to collect connection pool metrics
func monitorConnectionPool(ctx context.Context, db *sql.DB, registerer prometheus.Registerer) {
if registerer == nil {
return // No metrics collection if no registerer provided
}
metrics := NewDatabaseMetrics(registerer)
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
log := logger.FromContext(ctx)
for {
select {
case <-ctx.Done():
log.InfoContext(ctx, "database connection pool monitor stopped")
return
case <-ticker.C:
stats := db.Stats()
// Update gauge metrics (current state)
metrics.ConnectionsOpen.Set(float64(stats.OpenConnections))
metrics.ConnectionsIdle.Set(float64(stats.Idle))
metrics.ConnectionsInUse.Set(float64(stats.InUse))
// Update counter with delta (WaitCount is cumulative in sql.DBStats)
waitCountDelta := stats.WaitCount - metrics.lastWaitCount
if waitCountDelta > 0 {
metrics.ConnectionsWaitCount.Add(float64(waitCountDelta))
metrics.lastWaitCount = stats.WaitCount
}
// Update histogram with delta (WaitDuration is cumulative in sql.DBStats)
waitDurationDelta := stats.WaitDuration - metrics.lastWaitDuration
if waitDurationDelta > 0 {
metrics.ConnectionsWaitDuration.Observe(waitDurationDelta.Seconds())
metrics.lastWaitDuration = stats.WaitDuration
}
// Log connection pool stats for high usage or waiting
if stats.OpenConnections > 20 || waitCountDelta > 0 {
log.WarnContext(ctx, "high database connection usage",
"open", stats.OpenConnections,
"idle", stats.Idle,
"in_use", stats.InUse,
"wait_count", stats.WaitCount,
"wait_duration", stats.WaitDuration,
)
}
}
}
}