diff --git a/metricsserver/metrics.go b/metricsserver/metrics.go index bee1541..622c1c1 100644 --- a/metricsserver/metrics.go +++ b/metricsserver/metrics.go @@ -1,3 +1,8 @@ +// Package metricsserver provides a standalone HTTP server for exposing Prometheus metrics. +// +// This package implements a dedicated metrics server that exposes application metrics +// via HTTP. It uses a custom Prometheus registry to avoid conflicts with other metric +// collectors and provides graceful shutdown capabilities. package metricsserver import ( @@ -13,10 +18,13 @@ import ( "go.ntppool.org/common/logger" ) +// Metrics provides a custom Prometheus registry and HTTP handlers for metrics exposure. +// It isolates application metrics from the default global registry. type Metrics struct { r *prometheus.Registry } +// New creates a new Metrics instance with a custom Prometheus registry. func New() *Metrics { r := prometheus.NewRegistry() @@ -27,10 +35,13 @@ func New() *Metrics { return m } +// Registry returns the custom Prometheus registry. +// Use this to register your application's metrics collectors. func (m *Metrics) Registry() *prometheus.Registry { return m.r } +// Handler returns an HTTP handler for the /metrics endpoint with OpenMetrics support. func (m *Metrics) Handler() http.Handler { log := logger.NewStdLog("prom http", false, nil) @@ -41,9 +52,8 @@ func (m *Metrics) Handler() http.Handler { }) } -// ListenAndServe starts a goroutine with a server running on -// the specified port. The server will shutdown and return when -// the provided context is done +// ListenAndServe starts a metrics server on the specified port and blocks until ctx is done. +// The server exposes the metrics handler and shuts down gracefully when the context is cancelled. func (m *Metrics) ListenAndServe(ctx context.Context, port int) error { log := logger.Setup() diff --git a/metricsserver/metrics_test.go b/metricsserver/metrics_test.go new file mode 100644 index 0000000..9d1aced --- /dev/null +++ b/metricsserver/metrics_test.go @@ -0,0 +1,242 @@ +package metricsserver + +import ( + "context" + "fmt" + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +func TestNew(t *testing.T) { + metrics := New() + + if metrics == nil { + t.Fatal("New returned nil") + } + + if metrics.r == nil { + t.Error("metrics registry is nil") + } +} + +func TestRegistry(t *testing.T) { + metrics := New() + registry := metrics.Registry() + + if registry == nil { + t.Fatal("Registry() returned nil") + } + + if registry != metrics.r { + t.Error("Registry() did not return the metrics registry") + } + + // Test that we can register a metric + counter := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "test_counter", + Help: "A test counter", + }) + + err := registry.Register(counter) + if err != nil { + t.Errorf("failed to register metric: %v", err) + } + + // Test that the metric is registered + metricFamilies, err := registry.Gather() + if err != nil { + t.Errorf("failed to gather metrics: %v", err) + } + + found := false + for _, mf := range metricFamilies { + if mf.GetName() == "test_counter" { + found = true + break + } + } + + if !found { + t.Error("registered metric not found in registry") + } +} + +func TestHandler(t *testing.T) { + metrics := New() + + // Register a test metric + counter := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "test_requests_total", + Help: "Total number of test requests", + }, + []string{"method"}, + ) + metrics.Registry().MustRegister(counter) + counter.WithLabelValues("GET").Inc() + + // Test the handler + handler := metrics.Handler() + if handler == nil { + t.Fatal("Handler() returned nil") + } + + // Create a test request + req := httptest.NewRequest("GET", "/metrics", nil) + recorder := httptest.NewRecorder() + + // Call the handler + handler.ServeHTTP(recorder, req) + + // Check response + resp := recorder.Result() + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + t.Errorf("expected status 200, got %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatalf("failed to read response body: %v", err) + } + + bodyStr := string(body) + + // Check for our test metric + if !strings.Contains(bodyStr, "test_requests_total") { + t.Error("test metric not found in metrics output") + } + + // Check for OpenMetrics format indicators + if !strings.Contains(bodyStr, "# TYPE") { + t.Error("metrics output missing TYPE comments") + } +} + +func TestListenAndServe(t *testing.T) { + metrics := New() + + // Register a test metric + counter := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "test_requests_total", + Help: "Total number of test requests", + }, + []string{"method"}, + ) + metrics.Registry().MustRegister(counter) + counter.WithLabelValues("GET").Inc() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Start server in a goroutine + errCh := make(chan error, 1) + go func() { + // Use a high port number to avoid conflicts + errCh <- metrics.ListenAndServe(ctx, 9999) + }() + + // Give the server a moment to start + time.Sleep(100 * time.Millisecond) + + // Test metrics endpoint + resp, err := http.Get("http://localhost:9999/metrics") + if err != nil { + t.Fatalf("failed to GET /metrics: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + t.Errorf("expected status 200, got %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatalf("failed to read response body: %v", err) + } + + bodyStr := string(body) + + // Check for our test metric + if !strings.Contains(bodyStr, "test_requests_total") { + t.Error("test metric not found in metrics output") + } + + // Cancel context to stop server + cancel() + + // Wait for server to stop + select { + case err := <-errCh: + if err != nil { + t.Errorf("server returned error: %v", err) + } + case <-time.After(5 * time.Second): + t.Error("server did not stop within timeout") + } +} + +func TestListenAndServeContextCancellation(t *testing.T) { + metrics := New() + + ctx, cancel := context.WithCancel(context.Background()) + + // Start server + errCh := make(chan error, 1) + go func() { + errCh <- metrics.ListenAndServe(ctx, 9998) + }() + + // Give server time to start + time.Sleep(100 * time.Millisecond) + + // Cancel context + cancel() + + // Server should stop gracefully + select { + case err := <-errCh: + if err != nil { + t.Errorf("server returned error on graceful shutdown: %v", err) + } + case <-time.After(5 * time.Second): + t.Error("server did not stop within timeout after context cancellation") + } +} + +// Benchmark the metrics handler response time +func BenchmarkMetricsHandler(b *testing.B) { + metrics := New() + + // Register some test metrics + for i := 0; i < 10; i++ { + counter := prometheus.NewCounter(prometheus.CounterOpts{ + Name: fmt.Sprintf("bench_counter_%d", i), + Help: "A benchmark counter", + }) + metrics.Registry().MustRegister(counter) + counter.Add(float64(i * 100)) + } + + handler := metrics.Handler() + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + req := httptest.NewRequest("GET", "/metrics", nil) + recorder := httptest.NewRecorder() + handler.ServeHTTP(recorder, req) + + if recorder.Code != http.StatusOK { + b.Fatalf("unexpected status code: %d", recorder.Code) + } + } +} \ No newline at end of file