// Package health provides a standalone HTTP server for health checks. // // This package implements a flexible health check server that supports // different handlers for Kubernetes probe types (liveness, readiness, startup). // It provides structured logging, graceful shutdown, and standard HTTP endpoints // for monitoring and load balancing. // // # Kubernetes Probe Types // // Liveness Probe: Detects when a container is "dead" and needs restarting. // Should be a lightweight check that verifies the process is still running // and not in an unrecoverable state. // // Readiness Probe: Determines when a container is ready to accept traffic. // Controls which Pods are used as backends for Services. Should verify // the application can handle requests properly. // // Startup Probe: Verifies when a container application has successfully started. // Delays liveness and readiness probes until startup succeeds. Useful for // slow-starting applications. // // # Usage Examples // // Basic usage with a single handler for all probes: // // srv := health.NewServer(myHealthHandler) // srv.Listen(ctx, 9091) // // Advanced usage with separate handlers for each probe type: // // srv := health.NewServer(nil, // health.WithLivenessHandler(func(w http.ResponseWriter, r *http.Request) { // // Simple alive check // w.WriteHeader(http.StatusOK) // }), // health.WithReadinessHandler(func(w http.ResponseWriter, r *http.Request) { // // Check if ready to serve traffic // if err := checkDatabase(); err != nil { // w.WriteHeader(http.StatusServiceUnavailable) // return // } // w.WriteHeader(http.StatusOK) // }), // health.WithStartupHandler(func(w http.ResponseWriter, r *http.Request) { // // Check if startup is complete // if !applicationReady() { // w.WriteHeader(http.StatusServiceUnavailable) // return // } // w.WriteHeader(http.StatusOK) // }), // health.WithServiceName("my-service"), // ) // srv.Listen(ctx, 9091) // // # Standard Endpoints // // The server exposes these endpoints: // - /healthz - liveness probe (or general health if no specific handler) // - /readyz - readiness probe (or general health if no specific handler) // - /startupz - startup probe (or general health if no specific handler) // - /__health - general health endpoint (backward compatibility) // - / - general health endpoint (root path) package health import ( "context" "encoding/json" "log/slog" "net/http" "strconv" "time" "go.ntppool.org/common/logger" "golang.org/x/sync/errgroup" ) // Server is a standalone HTTP server dedicated to health checks. // It runs separately from the main application server to ensure health // checks remain available even if the main server is experiencing issues. // // The server supports separate handlers for different Kubernetes probe types // (liveness, readiness, startup) and includes built-in timeouts, graceful // shutdown, and structured logging. type Server struct { log *slog.Logger livenessHandler http.HandlerFunc readinessHandler http.HandlerFunc startupHandler http.HandlerFunc generalHandler http.HandlerFunc // fallback for /__health and / paths serviceName string } // Option represents a configuration option for the health server. type Option func(*Server) // WithLivenessHandler sets a specific handler for the /healthz endpoint. // Liveness probes determine if a container should be restarted. func WithLivenessHandler(handler http.HandlerFunc) Option { return func(s *Server) { s.livenessHandler = handler } } // WithReadinessHandler sets a specific handler for the /readyz endpoint. // Readiness probes determine if a container can receive traffic. func WithReadinessHandler(handler http.HandlerFunc) Option { return func(s *Server) { s.readinessHandler = handler } } // WithStartupHandler sets a specific handler for the /startupz endpoint. // Startup probes determine if a container has finished initializing. func WithStartupHandler(handler http.HandlerFunc) Option { return func(s *Server) { s.startupHandler = handler } } // WithServiceName sets the service name for JSON responses and logging. func WithServiceName(serviceName string) Option { return func(s *Server) { s.serviceName = serviceName } } // NewServer creates a new health check server with optional probe-specific handlers. // // If healthFn is provided, it will be used as a fallback for any probe endpoints // that don't have specific handlers configured. If healthFn is nil, a default // handler that returns HTTP 200 "ok" is used as the fallback. // // Use the With* option functions to configure specific handlers for different // probe types (liveness, readiness, startup). func NewServer(healthFn http.HandlerFunc, opts ...Option) *Server { if healthFn == nil { healthFn = basicHealth } srv := &Server{ log: logger.Setup(), generalHandler: healthFn, } for _, opt := range opts { opt(srv) } return srv } // SetLogger replaces the default logger with a custom one. func (srv *Server) SetLogger(log *slog.Logger) { srv.log = log } // Listen starts the health server on the specified port and blocks until ctx is cancelled. // The server exposes health check endpoints with graceful shutdown support. // // Standard endpoints exposed: // - /healthz - liveness probe (uses livenessHandler or falls back to generalHandler) // - /readyz - readiness probe (uses readinessHandler or falls back to generalHandler) // - /startupz - startup probe (uses startupHandler or falls back to generalHandler) // - /__health - general health endpoint (uses generalHandler) // - / - root health endpoint (uses generalHandler) func (srv *Server) Listen(ctx context.Context, port int) error { srv.log.Info("starting health listener", "port", port) serveMux := http.NewServeMux() // Register probe-specific handlers serveMux.HandleFunc("/healthz", srv.createProbeHandler("liveness")) serveMux.HandleFunc("/readyz", srv.createProbeHandler("readiness")) serveMux.HandleFunc("/startupz", srv.createProbeHandler("startup")) // Register general health endpoints for backward compatibility serveMux.HandleFunc("/__health", srv.createGeneralHandler()) serveMux.HandleFunc("/", srv.createGeneralHandler()) hsrv := &http.Server{ Addr: ":" + strconv.Itoa(port), ReadTimeout: 10 * time.Second, WriteTimeout: 20 * time.Second, IdleTimeout: 120 * time.Second, Handler: serveMux, } g, ctx := errgroup.WithContext(ctx) g.Go(func() error { err := hsrv.ListenAndServe() if err != http.ErrServerClosed { srv.log.Warn("health check server done listening", "err", err) return err } return nil }) <-ctx.Done() g.Go(func() error { shCtx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() if err := hsrv.Shutdown(shCtx); err != nil { srv.log.Error("health check server shutdown failed", "err", err) return err } return nil }) return g.Wait() } // createProbeHandler creates a handler for a specific probe type that provides // appropriate JSON responses and falls back to the general handler if no specific // handler is configured. func (srv *Server) createProbeHandler(probeType string) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { var handler http.HandlerFunc // Select the appropriate handler switch probeType { case "liveness": handler = srv.livenessHandler case "readiness": handler = srv.readinessHandler case "startup": handler = srv.startupHandler } // Fall back to general handler if no specific handler is configured if handler == nil { handler = srv.generalHandler } // Create a response recorder to capture the handler's status code recorder := &statusRecorder{ResponseWriter: w, statusCode: 200} handler(recorder, r) // If the handler already wrote a response, we're done if recorder.written { return } // Otherwise, provide a standard JSON response based on the status code w.Header().Set("Content-Type", "application/json") if recorder.statusCode >= 400 { // Handler indicated unhealthy switch probeType { case "liveness": json.NewEncoder(w).Encode(map[string]string{"status": "unhealthy"}) case "readiness": json.NewEncoder(w).Encode(map[string]bool{"ready": false}) case "startup": json.NewEncoder(w).Encode(map[string]bool{"started": false}) } } else { // Handler indicated healthy switch probeType { case "liveness": json.NewEncoder(w).Encode(map[string]string{"status": "alive"}) case "readiness": json.NewEncoder(w).Encode(map[string]bool{"ready": true}) case "startup": json.NewEncoder(w).Encode(map[string]bool{"started": true}) } } } } // createGeneralHandler creates a handler for general health endpoints that provides // comprehensive health information. func (srv *Server) createGeneralHandler() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { // Create a response recorder to capture the handler's status code // Use a buffer to prevent the handler from writing to the actual response recorder := &statusRecorder{ResponseWriter: &discardWriter{}, statusCode: 200} srv.generalHandler(recorder, r) // Always provide a comprehensive JSON response for general endpoints w.Header().Set("Content-Type", "application/json") w.WriteHeader(recorder.statusCode) response := map[string]interface{}{ "status": map[bool]string{true: "healthy", false: "unhealthy"}[recorder.statusCode < 400], } if srv.serviceName != "" { response["service"] = srv.serviceName } json.NewEncoder(w).Encode(response) } } // statusRecorder captures the response status code from handlers while allowing // them to write their own response content if needed. type statusRecorder struct { http.ResponseWriter statusCode int written bool } func (r *statusRecorder) WriteHeader(code int) { r.statusCode = code r.ResponseWriter.WriteHeader(code) } func (r *statusRecorder) Write(data []byte) (int, error) { r.written = true return r.ResponseWriter.Write(data) } // discardWriter implements http.ResponseWriter but discards all writes. // Used to capture status codes without writing response content. type discardWriter struct{} func (d *discardWriter) Header() http.Header { return make(http.Header) } func (d *discardWriter) Write([]byte) (int, error) { return 0, nil } func (d *discardWriter) WriteHeader(int) {} // HealthCheckListener runs a simple HTTP server on the specified port for health check probes. func HealthCheckListener(ctx context.Context, port int, log *slog.Logger) error { srv := NewServer(nil) srv.SetLogger(log) return srv.Listen(ctx, port) } func basicHealth(w http.ResponseWriter, r *http.Request) { w.WriteHeader(200) w.Write([]byte("ok")) }