feat(health): enhance server with probe-specific handlers

- Add separate handlers for liveness (/healthz), readiness (/readyz),
  and startup (/startupz) probes
- Implement WithLivenessHandler, WithReadinessHandler, WithStartupHandler,
  and WithServiceName options
- Add probe-specific JSON response formats
- Add comprehensive package documentation with usage examples
- Maintain backward compatibility for /__health and / endpoints
- Add tests for all probe types and fallback scenarios

Enables proper Kubernetes health monitoring with different probe types.
This commit is contained in:
2025-09-21 10:52:29 -07:00
parent 66b51df2af
commit 10864363e2
2 changed files with 381 additions and 16 deletions

View File

@@ -1,13 +1,71 @@
// Package health provides a standalone HTTP server for health checks.
//
// This package implements a simple health check server that can be used
// to expose health status endpoints for monitoring and load balancing.
// It supports custom health check handlers and provides structured logging
// with graceful shutdown capabilities.
// This package implements a flexible health check server that supports
// different handlers for Kubernetes probe types (liveness, readiness, startup).
// It provides structured logging, graceful shutdown, and standard HTTP endpoints
// for monitoring and load balancing.
//
// # Kubernetes Probe Types
//
// Liveness Probe: Detects when a container is "dead" and needs restarting.
// Should be a lightweight check that verifies the process is still running
// and not in an unrecoverable state.
//
// Readiness Probe: Determines when a container is ready to accept traffic.
// Controls which Pods are used as backends for Services. Should verify
// the application can handle requests properly.
//
// Startup Probe: Verifies when a container application has successfully started.
// Delays liveness and readiness probes until startup succeeds. Useful for
// slow-starting applications.
//
// # Usage Examples
//
// Basic usage with a single handler for all probes:
//
// srv := health.NewServer(myHealthHandler)
// srv.Listen(ctx, 9091)
//
// Advanced usage with separate handlers for each probe type:
//
// srv := health.NewServer(nil,
// health.WithLivenessHandler(func(w http.ResponseWriter, r *http.Request) {
// // Simple alive check
// w.WriteHeader(http.StatusOK)
// }),
// health.WithReadinessHandler(func(w http.ResponseWriter, r *http.Request) {
// // Check if ready to serve traffic
// if err := checkDatabase(); err != nil {
// w.WriteHeader(http.StatusServiceUnavailable)
// return
// }
// w.WriteHeader(http.StatusOK)
// }),
// health.WithStartupHandler(func(w http.ResponseWriter, r *http.Request) {
// // Check if startup is complete
// if !applicationReady() {
// w.WriteHeader(http.StatusServiceUnavailable)
// return
// }
// w.WriteHeader(http.StatusOK)
// }),
// health.WithServiceName("my-service"),
// )
// srv.Listen(ctx, 9091)
//
// # Standard Endpoints
//
// The server exposes these endpoints:
// - /healthz - liveness probe (or general health if no specific handler)
// - /readyz - readiness probe (or general health if no specific handler)
// - /startupz - startup probe (or general health if no specific handler)
// - /__health - general health endpoint (backward compatibility)
// - / - general health endpoint (root path)
package health
import (
"context"
"encoding/json"
"log/slog"
"net/http"
"strconv"
@@ -21,23 +79,74 @@ import (
// It runs separately from the main application server to ensure health
// checks remain available even if the main server is experiencing issues.
//
// The server includes built-in timeouts, graceful shutdown, and structured
// logging for monitoring and debugging health check behavior.
// The server supports separate handlers for different Kubernetes probe types
// (liveness, readiness, startup) and includes built-in timeouts, graceful
// shutdown, and structured logging.
type Server struct {
log *slog.Logger
healthFn http.HandlerFunc
log *slog.Logger
livenessHandler http.HandlerFunc
readinessHandler http.HandlerFunc
startupHandler http.HandlerFunc
generalHandler http.HandlerFunc // fallback for /__health and / paths
serviceName string
}
// NewServer creates a new health check server with the specified health handler.
// If healthFn is nil, a default handler that returns HTTP 200 "ok" is used.
func NewServer(healthFn http.HandlerFunc) *Server {
// Option represents a configuration option for the health server.
type Option func(*Server)
// WithLivenessHandler sets a specific handler for the /healthz endpoint.
// Liveness probes determine if a container should be restarted.
func WithLivenessHandler(handler http.HandlerFunc) Option {
return func(s *Server) {
s.livenessHandler = handler
}
}
// WithReadinessHandler sets a specific handler for the /readyz endpoint.
// Readiness probes determine if a container can receive traffic.
func WithReadinessHandler(handler http.HandlerFunc) Option {
return func(s *Server) {
s.readinessHandler = handler
}
}
// WithStartupHandler sets a specific handler for the /startupz endpoint.
// Startup probes determine if a container has finished initializing.
func WithStartupHandler(handler http.HandlerFunc) Option {
return func(s *Server) {
s.startupHandler = handler
}
}
// WithServiceName sets the service name for JSON responses and logging.
func WithServiceName(serviceName string) Option {
return func(s *Server) {
s.serviceName = serviceName
}
}
// NewServer creates a new health check server with optional probe-specific handlers.
//
// If healthFn is provided, it will be used as a fallback for any probe endpoints
// that don't have specific handlers configured. If healthFn is nil, a default
// handler that returns HTTP 200 "ok" is used as the fallback.
//
// Use the With* option functions to configure specific handlers for different
// probe types (liveness, readiness, startup).
func NewServer(healthFn http.HandlerFunc, opts ...Option) *Server {
if healthFn == nil {
healthFn = basicHealth
}
srv := &Server{
log: logger.Setup(),
healthFn: healthFn,
log: logger.Setup(),
generalHandler: healthFn,
}
for _, opt := range opts {
opt(srv)
}
return srv
}
@@ -47,13 +156,27 @@ func (srv *Server) SetLogger(log *slog.Logger) {
}
// Listen starts the health server on the specified port and blocks until ctx is cancelled.
// The server exposes the health handler at "/__health" with graceful shutdown support.
// The server exposes health check endpoints with graceful shutdown support.
//
// Standard endpoints exposed:
// - /healthz - liveness probe (uses livenessHandler or falls back to generalHandler)
// - /readyz - readiness probe (uses readinessHandler or falls back to generalHandler)
// - /startupz - startup probe (uses startupHandler or falls back to generalHandler)
// - /__health - general health endpoint (uses generalHandler)
// - / - root health endpoint (uses generalHandler)
func (srv *Server) Listen(ctx context.Context, port int) error {
srv.log.Info("starting health listener", "port", port)
serveMux := http.NewServeMux()
serveMux.HandleFunc("/__health", srv.healthFn)
// Register probe-specific handlers
serveMux.HandleFunc("/healthz", srv.createProbeHandler("liveness"))
serveMux.HandleFunc("/readyz", srv.createProbeHandler("readiness"))
serveMux.HandleFunc("/startupz", srv.createProbeHandler("startup"))
// Register general health endpoints for backward compatibility
serveMux.HandleFunc("/__health", srv.createGeneralHandler())
serveMux.HandleFunc("/", srv.createGeneralHandler())
hsrv := &http.Server{
Addr: ":" + strconv.Itoa(port),
@@ -89,6 +212,121 @@ func (srv *Server) Listen(ctx context.Context, port int) error {
return g.Wait()
}
// createProbeHandler creates a handler for a specific probe type that provides
// appropriate JSON responses and falls back to the general handler if no specific
// handler is configured.
func (srv *Server) createProbeHandler(probeType string) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var handler http.HandlerFunc
// Select the appropriate handler
switch probeType {
case "liveness":
handler = srv.livenessHandler
case "readiness":
handler = srv.readinessHandler
case "startup":
handler = srv.startupHandler
}
// Fall back to general handler if no specific handler is configured
if handler == nil {
handler = srv.generalHandler
}
// Create a response recorder to capture the handler's status code
recorder := &statusRecorder{ResponseWriter: w, statusCode: 200}
handler(recorder, r)
// If the handler already wrote a response, we're done
if recorder.written {
return
}
// Otherwise, provide a standard JSON response based on the status code
w.Header().Set("Content-Type", "application/json")
if recorder.statusCode >= 400 {
// Handler indicated unhealthy
switch probeType {
case "liveness":
json.NewEncoder(w).Encode(map[string]string{"status": "unhealthy"})
case "readiness":
json.NewEncoder(w).Encode(map[string]bool{"ready": false})
case "startup":
json.NewEncoder(w).Encode(map[string]bool{"started": false})
}
} else {
// Handler indicated healthy
switch probeType {
case "liveness":
json.NewEncoder(w).Encode(map[string]string{"status": "alive"})
case "readiness":
json.NewEncoder(w).Encode(map[string]bool{"ready": true})
case "startup":
json.NewEncoder(w).Encode(map[string]bool{"started": true})
}
}
}
}
// createGeneralHandler creates a handler for general health endpoints that provides
// comprehensive health information.
func (srv *Server) createGeneralHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Create a response recorder to capture the handler's status code
// Use a buffer to prevent the handler from writing to the actual response
recorder := &statusRecorder{ResponseWriter: &discardWriter{}, statusCode: 200}
srv.generalHandler(recorder, r)
// Always provide a comprehensive JSON response for general endpoints
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(recorder.statusCode)
response := map[string]interface{}{
"status": map[bool]string{true: "healthy", false: "unhealthy"}[recorder.statusCode < 400],
}
if srv.serviceName != "" {
response["service"] = srv.serviceName
}
json.NewEncoder(w).Encode(response)
}
}
// statusRecorder captures the response status code from handlers while allowing
// them to write their own response content if needed.
type statusRecorder struct {
http.ResponseWriter
statusCode int
written bool
}
func (r *statusRecorder) WriteHeader(code int) {
r.statusCode = code
r.ResponseWriter.WriteHeader(code)
}
func (r *statusRecorder) Write(data []byte) (int, error) {
r.written = true
return r.ResponseWriter.Write(data)
}
// discardWriter implements http.ResponseWriter but discards all writes.
// Used to capture status codes without writing response content.
type discardWriter struct{}
func (d *discardWriter) Header() http.Header {
return make(http.Header)
}
func (d *discardWriter) Write([]byte) (int, error) {
return 0, nil
}
func (d *discardWriter) WriteHeader(int) {}
// HealthCheckListener runs a simple HTTP server on the specified port for health check probes.
func HealthCheckListener(ctx context.Context, port int, log *slog.Logger) error {
srv := NewServer(nil)