From 10864363e29420ea618d509f1aad7a5546512924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ask=20Bj=C3=B8rn=20Hansen?= Date: Sun, 21 Sep 2025 10:52:29 -0700 Subject: [PATCH] feat(health): enhance server with probe-specific handlers - Add separate handlers for liveness (/healthz), readiness (/readyz), and startup (/startupz) probes - Implement WithLivenessHandler, WithReadinessHandler, WithStartupHandler, and WithServiceName options - Add probe-specific JSON response formats - Add comprehensive package documentation with usage examples - Maintain backward compatibility for /__health and / endpoints - Add tests for all probe types and fallback scenarios Enables proper Kubernetes health monitoring with different probe types. --- health/health_server.go | 268 +++++++++++++++++++++++++++++++++++++--- health/health_test.go | 129 ++++++++++++++++++- 2 files changed, 381 insertions(+), 16 deletions(-) diff --git a/health/health_server.go b/health/health_server.go index f1cf22c..af75cd2 100644 --- a/health/health_server.go +++ b/health/health_server.go @@ -1,13 +1,71 @@ // Package health provides a standalone HTTP server for health checks. // -// This package implements a simple health check server that can be used -// to expose health status endpoints for monitoring and load balancing. -// It supports custom health check handlers and provides structured logging -// with graceful shutdown capabilities. +// This package implements a flexible health check server that supports +// different handlers for Kubernetes probe types (liveness, readiness, startup). +// It provides structured logging, graceful shutdown, and standard HTTP endpoints +// for monitoring and load balancing. +// +// # Kubernetes Probe Types +// +// Liveness Probe: Detects when a container is "dead" and needs restarting. +// Should be a lightweight check that verifies the process is still running +// and not in an unrecoverable state. +// +// Readiness Probe: Determines when a container is ready to accept traffic. +// Controls which Pods are used as backends for Services. Should verify +// the application can handle requests properly. +// +// Startup Probe: Verifies when a container application has successfully started. +// Delays liveness and readiness probes until startup succeeds. Useful for +// slow-starting applications. +// +// # Usage Examples +// +// Basic usage with a single handler for all probes: +// +// srv := health.NewServer(myHealthHandler) +// srv.Listen(ctx, 9091) +// +// Advanced usage with separate handlers for each probe type: +// +// srv := health.NewServer(nil, +// health.WithLivenessHandler(func(w http.ResponseWriter, r *http.Request) { +// // Simple alive check +// w.WriteHeader(http.StatusOK) +// }), +// health.WithReadinessHandler(func(w http.ResponseWriter, r *http.Request) { +// // Check if ready to serve traffic +// if err := checkDatabase(); err != nil { +// w.WriteHeader(http.StatusServiceUnavailable) +// return +// } +// w.WriteHeader(http.StatusOK) +// }), +// health.WithStartupHandler(func(w http.ResponseWriter, r *http.Request) { +// // Check if startup is complete +// if !applicationReady() { +// w.WriteHeader(http.StatusServiceUnavailable) +// return +// } +// w.WriteHeader(http.StatusOK) +// }), +// health.WithServiceName("my-service"), +// ) +// srv.Listen(ctx, 9091) +// +// # Standard Endpoints +// +// The server exposes these endpoints: +// - /healthz - liveness probe (or general health if no specific handler) +// - /readyz - readiness probe (or general health if no specific handler) +// - /startupz - startup probe (or general health if no specific handler) +// - /__health - general health endpoint (backward compatibility) +// - / - general health endpoint (root path) package health import ( "context" + "encoding/json" "log/slog" "net/http" "strconv" @@ -21,23 +79,74 @@ import ( // It runs separately from the main application server to ensure health // checks remain available even if the main server is experiencing issues. // -// The server includes built-in timeouts, graceful shutdown, and structured -// logging for monitoring and debugging health check behavior. +// The server supports separate handlers for different Kubernetes probe types +// (liveness, readiness, startup) and includes built-in timeouts, graceful +// shutdown, and structured logging. type Server struct { - log *slog.Logger - healthFn http.HandlerFunc + log *slog.Logger + livenessHandler http.HandlerFunc + readinessHandler http.HandlerFunc + startupHandler http.HandlerFunc + generalHandler http.HandlerFunc // fallback for /__health and / paths + serviceName string } -// NewServer creates a new health check server with the specified health handler. -// If healthFn is nil, a default handler that returns HTTP 200 "ok" is used. -func NewServer(healthFn http.HandlerFunc) *Server { +// Option represents a configuration option for the health server. +type Option func(*Server) + +// WithLivenessHandler sets a specific handler for the /healthz endpoint. +// Liveness probes determine if a container should be restarted. +func WithLivenessHandler(handler http.HandlerFunc) Option { + return func(s *Server) { + s.livenessHandler = handler + } +} + +// WithReadinessHandler sets a specific handler for the /readyz endpoint. +// Readiness probes determine if a container can receive traffic. +func WithReadinessHandler(handler http.HandlerFunc) Option { + return func(s *Server) { + s.readinessHandler = handler + } +} + +// WithStartupHandler sets a specific handler for the /startupz endpoint. +// Startup probes determine if a container has finished initializing. +func WithStartupHandler(handler http.HandlerFunc) Option { + return func(s *Server) { + s.startupHandler = handler + } +} + +// WithServiceName sets the service name for JSON responses and logging. +func WithServiceName(serviceName string) Option { + return func(s *Server) { + s.serviceName = serviceName + } +} + + +// NewServer creates a new health check server with optional probe-specific handlers. +// +// If healthFn is provided, it will be used as a fallback for any probe endpoints +// that don't have specific handlers configured. If healthFn is nil, a default +// handler that returns HTTP 200 "ok" is used as the fallback. +// +// Use the With* option functions to configure specific handlers for different +// probe types (liveness, readiness, startup). +func NewServer(healthFn http.HandlerFunc, opts ...Option) *Server { if healthFn == nil { healthFn = basicHealth } srv := &Server{ - log: logger.Setup(), - healthFn: healthFn, + log: logger.Setup(), + generalHandler: healthFn, } + + for _, opt := range opts { + opt(srv) + } + return srv } @@ -47,13 +156,27 @@ func (srv *Server) SetLogger(log *slog.Logger) { } // Listen starts the health server on the specified port and blocks until ctx is cancelled. -// The server exposes the health handler at "/__health" with graceful shutdown support. +// The server exposes health check endpoints with graceful shutdown support. +// +// Standard endpoints exposed: +// - /healthz - liveness probe (uses livenessHandler or falls back to generalHandler) +// - /readyz - readiness probe (uses readinessHandler or falls back to generalHandler) +// - /startupz - startup probe (uses startupHandler or falls back to generalHandler) +// - /__health - general health endpoint (uses generalHandler) +// - / - root health endpoint (uses generalHandler) func (srv *Server) Listen(ctx context.Context, port int) error { srv.log.Info("starting health listener", "port", port) serveMux := http.NewServeMux() - serveMux.HandleFunc("/__health", srv.healthFn) + // Register probe-specific handlers + serveMux.HandleFunc("/healthz", srv.createProbeHandler("liveness")) + serveMux.HandleFunc("/readyz", srv.createProbeHandler("readiness")) + serveMux.HandleFunc("/startupz", srv.createProbeHandler("startup")) + + // Register general health endpoints for backward compatibility + serveMux.HandleFunc("/__health", srv.createGeneralHandler()) + serveMux.HandleFunc("/", srv.createGeneralHandler()) hsrv := &http.Server{ Addr: ":" + strconv.Itoa(port), @@ -89,6 +212,121 @@ func (srv *Server) Listen(ctx context.Context, port int) error { return g.Wait() } +// createProbeHandler creates a handler for a specific probe type that provides +// appropriate JSON responses and falls back to the general handler if no specific +// handler is configured. +func (srv *Server) createProbeHandler(probeType string) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var handler http.HandlerFunc + + // Select the appropriate handler + switch probeType { + case "liveness": + handler = srv.livenessHandler + case "readiness": + handler = srv.readinessHandler + case "startup": + handler = srv.startupHandler + } + + // Fall back to general handler if no specific handler is configured + if handler == nil { + handler = srv.generalHandler + } + + // Create a response recorder to capture the handler's status code + recorder := &statusRecorder{ResponseWriter: w, statusCode: 200} + handler(recorder, r) + + // If the handler already wrote a response, we're done + if recorder.written { + return + } + + // Otherwise, provide a standard JSON response based on the status code + w.Header().Set("Content-Type", "application/json") + + if recorder.statusCode >= 400 { + // Handler indicated unhealthy + switch probeType { + case "liveness": + json.NewEncoder(w).Encode(map[string]string{"status": "unhealthy"}) + case "readiness": + json.NewEncoder(w).Encode(map[string]bool{"ready": false}) + case "startup": + json.NewEncoder(w).Encode(map[string]bool{"started": false}) + } + } else { + // Handler indicated healthy + switch probeType { + case "liveness": + json.NewEncoder(w).Encode(map[string]string{"status": "alive"}) + case "readiness": + json.NewEncoder(w).Encode(map[string]bool{"ready": true}) + case "startup": + json.NewEncoder(w).Encode(map[string]bool{"started": true}) + } + } + } +} + +// createGeneralHandler creates a handler for general health endpoints that provides +// comprehensive health information. +func (srv *Server) createGeneralHandler() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + // Create a response recorder to capture the handler's status code + // Use a buffer to prevent the handler from writing to the actual response + recorder := &statusRecorder{ResponseWriter: &discardWriter{}, statusCode: 200} + srv.generalHandler(recorder, r) + + // Always provide a comprehensive JSON response for general endpoints + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(recorder.statusCode) + + response := map[string]interface{}{ + "status": map[bool]string{true: "healthy", false: "unhealthy"}[recorder.statusCode < 400], + } + + if srv.serviceName != "" { + response["service"] = srv.serviceName + } + + json.NewEncoder(w).Encode(response) + } +} + +// statusRecorder captures the response status code from handlers while allowing +// them to write their own response content if needed. +type statusRecorder struct { + http.ResponseWriter + statusCode int + written bool +} + +func (r *statusRecorder) WriteHeader(code int) { + r.statusCode = code + r.ResponseWriter.WriteHeader(code) +} + +func (r *statusRecorder) Write(data []byte) (int, error) { + r.written = true + return r.ResponseWriter.Write(data) +} + +// discardWriter implements http.ResponseWriter but discards all writes. +// Used to capture status codes without writing response content. +type discardWriter struct{} + +func (d *discardWriter) Header() http.Header { + return make(http.Header) +} + +func (d *discardWriter) Write([]byte) (int, error) { + return 0, nil +} + +func (d *discardWriter) WriteHeader(int) {} + // HealthCheckListener runs a simple HTTP server on the specified port for health check probes. func HealthCheckListener(ctx context.Context, port int, log *slog.Logger) error { srv := NewServer(nil) diff --git a/health/health_test.go b/health/health_test.go index 1094034..31f92fe 100644 --- a/health/health_test.go +++ b/health/health_test.go @@ -1,13 +1,14 @@ package health import ( + "fmt" "io" "net/http" "net/http/httptest" "testing" ) -func TestHealthHandler(t *testing.T) { +func TestBasicHealthHandler(t *testing.T) { req := httptest.NewRequest(http.MethodGet, "/__health", nil) w := httptest.NewRecorder() @@ -24,3 +25,129 @@ func TestHealthHandler(t *testing.T) { t.Errorf("expected ok got %q", string(data)) } } + +func TestProbeHandlers(t *testing.T) { + // Test with separate handlers for each probe type + srv := NewServer(nil, + WithLivenessHandler(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }), + WithReadinessHandler(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }), + WithStartupHandler(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }), + WithServiceName("test-service"), + ) + + tests := []struct { + handler http.HandlerFunc + expectedStatus int + expectedBody string + }{ + {srv.createProbeHandler("liveness"), 200, `{"status":"alive"}`}, + {srv.createProbeHandler("readiness"), 200, `{"ready":true}`}, + {srv.createProbeHandler("startup"), 200, `{"started":true}`}, + {srv.createGeneralHandler(), 200, `{"service":"test-service","status":"healthy"}`}, + } + + for i, tt := range tests { + t.Run(fmt.Sprintf("test_%d", i), func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + w := httptest.NewRecorder() + + tt.handler(w, req) + + if w.Code != tt.expectedStatus { + t.Errorf("expected status %d, got %d", tt.expectedStatus, w.Code) + } + + body := w.Body.String() + if body != tt.expectedBody+"\n" { // json.Encoder adds newline + t.Errorf("expected body %q, got %q", tt.expectedBody, body) + } + }) + } +} + +func TestProbeHandlerFallback(t *testing.T) { + // Test fallback to general handler when no specific handler is configured + generalHandler := func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + } + + srv := NewServer(generalHandler, WithServiceName("test-service")) + + tests := []struct { + handler http.HandlerFunc + expectedStatus int + expectedBody string + }{ + {srv.createProbeHandler("liveness"), 200, `{"status":"alive"}`}, + {srv.createProbeHandler("readiness"), 200, `{"ready":true}`}, + {srv.createProbeHandler("startup"), 200, `{"started":true}`}, + } + + for i, tt := range tests { + t.Run(fmt.Sprintf("fallback_%d", i), func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + w := httptest.NewRecorder() + + tt.handler(w, req) + + if w.Code != tt.expectedStatus { + t.Errorf("expected status %d, got %d", tt.expectedStatus, w.Code) + } + + body := w.Body.String() + if body != tt.expectedBody+"\n" { // json.Encoder adds newline + t.Errorf("expected body %q, got %q", tt.expectedBody, body) + } + }) + } +} + +func TestUnhealthyProbeHandlers(t *testing.T) { + // Test with handlers that return unhealthy status + srv := NewServer(nil, + WithLivenessHandler(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + }), + WithReadinessHandler(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + }), + WithStartupHandler(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + }), + WithServiceName("test-service"), + ) + + tests := []struct { + handler http.HandlerFunc + expectedStatus int + expectedBody string + }{ + {srv.createProbeHandler("liveness"), 503, `{"status":"unhealthy"}`}, + {srv.createProbeHandler("readiness"), 503, `{"ready":false}`}, + {srv.createProbeHandler("startup"), 503, `{"started":false}`}, + } + + for i, tt := range tests { + t.Run(fmt.Sprintf("unhealthy_%d", i), func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + w := httptest.NewRecorder() + + tt.handler(w, req) + + if w.Code != tt.expectedStatus { + t.Errorf("expected status %d, got %d", tt.expectedStatus, w.Code) + } + + body := w.Body.String() + if body != tt.expectedBody+"\n" { // json.Encoder adds newline + t.Errorf("expected body %q, got %q", tt.expectedBody, body) + } + }) + } +}