feat(health): enhance server with probe-specific handlers
- Add separate handlers for liveness (/healthz), readiness (/readyz), and startup (/startupz) probes - Implement WithLivenessHandler, WithReadinessHandler, WithStartupHandler, and WithServiceName options - Add probe-specific JSON response formats - Add comprehensive package documentation with usage examples - Maintain backward compatibility for /__health and / endpoints - Add tests for all probe types and fallback scenarios Enables proper Kubernetes health monitoring with different probe types.
This commit is contained in:
@@ -1,13 +1,71 @@
|
||||
// Package health provides a standalone HTTP server for health checks.
|
||||
//
|
||||
// This package implements a simple health check server that can be used
|
||||
// to expose health status endpoints for monitoring and load balancing.
|
||||
// It supports custom health check handlers and provides structured logging
|
||||
// with graceful shutdown capabilities.
|
||||
// This package implements a flexible health check server that supports
|
||||
// different handlers for Kubernetes probe types (liveness, readiness, startup).
|
||||
// It provides structured logging, graceful shutdown, and standard HTTP endpoints
|
||||
// for monitoring and load balancing.
|
||||
//
|
||||
// # Kubernetes Probe Types
|
||||
//
|
||||
// Liveness Probe: Detects when a container is "dead" and needs restarting.
|
||||
// Should be a lightweight check that verifies the process is still running
|
||||
// and not in an unrecoverable state.
|
||||
//
|
||||
// Readiness Probe: Determines when a container is ready to accept traffic.
|
||||
// Controls which Pods are used as backends for Services. Should verify
|
||||
// the application can handle requests properly.
|
||||
//
|
||||
// Startup Probe: Verifies when a container application has successfully started.
|
||||
// Delays liveness and readiness probes until startup succeeds. Useful for
|
||||
// slow-starting applications.
|
||||
//
|
||||
// # Usage Examples
|
||||
//
|
||||
// Basic usage with a single handler for all probes:
|
||||
//
|
||||
// srv := health.NewServer(myHealthHandler)
|
||||
// srv.Listen(ctx, 9091)
|
||||
//
|
||||
// Advanced usage with separate handlers for each probe type:
|
||||
//
|
||||
// srv := health.NewServer(nil,
|
||||
// health.WithLivenessHandler(func(w http.ResponseWriter, r *http.Request) {
|
||||
// // Simple alive check
|
||||
// w.WriteHeader(http.StatusOK)
|
||||
// }),
|
||||
// health.WithReadinessHandler(func(w http.ResponseWriter, r *http.Request) {
|
||||
// // Check if ready to serve traffic
|
||||
// if err := checkDatabase(); err != nil {
|
||||
// w.WriteHeader(http.StatusServiceUnavailable)
|
||||
// return
|
||||
// }
|
||||
// w.WriteHeader(http.StatusOK)
|
||||
// }),
|
||||
// health.WithStartupHandler(func(w http.ResponseWriter, r *http.Request) {
|
||||
// // Check if startup is complete
|
||||
// if !applicationReady() {
|
||||
// w.WriteHeader(http.StatusServiceUnavailable)
|
||||
// return
|
||||
// }
|
||||
// w.WriteHeader(http.StatusOK)
|
||||
// }),
|
||||
// health.WithServiceName("my-service"),
|
||||
// )
|
||||
// srv.Listen(ctx, 9091)
|
||||
//
|
||||
// # Standard Endpoints
|
||||
//
|
||||
// The server exposes these endpoints:
|
||||
// - /healthz - liveness probe (or general health if no specific handler)
|
||||
// - /readyz - readiness probe (or general health if no specific handler)
|
||||
// - /startupz - startup probe (or general health if no specific handler)
|
||||
// - /__health - general health endpoint (backward compatibility)
|
||||
// - / - general health endpoint (root path)
|
||||
package health
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strconv"
|
||||
@@ -21,23 +79,74 @@ import (
|
||||
// It runs separately from the main application server to ensure health
|
||||
// checks remain available even if the main server is experiencing issues.
|
||||
//
|
||||
// The server includes built-in timeouts, graceful shutdown, and structured
|
||||
// logging for monitoring and debugging health check behavior.
|
||||
// The server supports separate handlers for different Kubernetes probe types
|
||||
// (liveness, readiness, startup) and includes built-in timeouts, graceful
|
||||
// shutdown, and structured logging.
|
||||
type Server struct {
|
||||
log *slog.Logger
|
||||
healthFn http.HandlerFunc
|
||||
log *slog.Logger
|
||||
livenessHandler http.HandlerFunc
|
||||
readinessHandler http.HandlerFunc
|
||||
startupHandler http.HandlerFunc
|
||||
generalHandler http.HandlerFunc // fallback for /__health and / paths
|
||||
serviceName string
|
||||
}
|
||||
|
||||
// NewServer creates a new health check server with the specified health handler.
|
||||
// If healthFn is nil, a default handler that returns HTTP 200 "ok" is used.
|
||||
func NewServer(healthFn http.HandlerFunc) *Server {
|
||||
// Option represents a configuration option for the health server.
|
||||
type Option func(*Server)
|
||||
|
||||
// WithLivenessHandler sets a specific handler for the /healthz endpoint.
|
||||
// Liveness probes determine if a container should be restarted.
|
||||
func WithLivenessHandler(handler http.HandlerFunc) Option {
|
||||
return func(s *Server) {
|
||||
s.livenessHandler = handler
|
||||
}
|
||||
}
|
||||
|
||||
// WithReadinessHandler sets a specific handler for the /readyz endpoint.
|
||||
// Readiness probes determine if a container can receive traffic.
|
||||
func WithReadinessHandler(handler http.HandlerFunc) Option {
|
||||
return func(s *Server) {
|
||||
s.readinessHandler = handler
|
||||
}
|
||||
}
|
||||
|
||||
// WithStartupHandler sets a specific handler for the /startupz endpoint.
|
||||
// Startup probes determine if a container has finished initializing.
|
||||
func WithStartupHandler(handler http.HandlerFunc) Option {
|
||||
return func(s *Server) {
|
||||
s.startupHandler = handler
|
||||
}
|
||||
}
|
||||
|
||||
// WithServiceName sets the service name for JSON responses and logging.
|
||||
func WithServiceName(serviceName string) Option {
|
||||
return func(s *Server) {
|
||||
s.serviceName = serviceName
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// NewServer creates a new health check server with optional probe-specific handlers.
|
||||
//
|
||||
// If healthFn is provided, it will be used as a fallback for any probe endpoints
|
||||
// that don't have specific handlers configured. If healthFn is nil, a default
|
||||
// handler that returns HTTP 200 "ok" is used as the fallback.
|
||||
//
|
||||
// Use the With* option functions to configure specific handlers for different
|
||||
// probe types (liveness, readiness, startup).
|
||||
func NewServer(healthFn http.HandlerFunc, opts ...Option) *Server {
|
||||
if healthFn == nil {
|
||||
healthFn = basicHealth
|
||||
}
|
||||
srv := &Server{
|
||||
log: logger.Setup(),
|
||||
healthFn: healthFn,
|
||||
log: logger.Setup(),
|
||||
generalHandler: healthFn,
|
||||
}
|
||||
|
||||
for _, opt := range opts {
|
||||
opt(srv)
|
||||
}
|
||||
|
||||
return srv
|
||||
}
|
||||
|
||||
@@ -47,13 +156,27 @@ func (srv *Server) SetLogger(log *slog.Logger) {
|
||||
}
|
||||
|
||||
// Listen starts the health server on the specified port and blocks until ctx is cancelled.
|
||||
// The server exposes the health handler at "/__health" with graceful shutdown support.
|
||||
// The server exposes health check endpoints with graceful shutdown support.
|
||||
//
|
||||
// Standard endpoints exposed:
|
||||
// - /healthz - liveness probe (uses livenessHandler or falls back to generalHandler)
|
||||
// - /readyz - readiness probe (uses readinessHandler or falls back to generalHandler)
|
||||
// - /startupz - startup probe (uses startupHandler or falls back to generalHandler)
|
||||
// - /__health - general health endpoint (uses generalHandler)
|
||||
// - / - root health endpoint (uses generalHandler)
|
||||
func (srv *Server) Listen(ctx context.Context, port int) error {
|
||||
srv.log.Info("starting health listener", "port", port)
|
||||
|
||||
serveMux := http.NewServeMux()
|
||||
|
||||
serveMux.HandleFunc("/__health", srv.healthFn)
|
||||
// Register probe-specific handlers
|
||||
serveMux.HandleFunc("/healthz", srv.createProbeHandler("liveness"))
|
||||
serveMux.HandleFunc("/readyz", srv.createProbeHandler("readiness"))
|
||||
serveMux.HandleFunc("/startupz", srv.createProbeHandler("startup"))
|
||||
|
||||
// Register general health endpoints for backward compatibility
|
||||
serveMux.HandleFunc("/__health", srv.createGeneralHandler())
|
||||
serveMux.HandleFunc("/", srv.createGeneralHandler())
|
||||
|
||||
hsrv := &http.Server{
|
||||
Addr: ":" + strconv.Itoa(port),
|
||||
@@ -89,6 +212,121 @@ func (srv *Server) Listen(ctx context.Context, port int) error {
|
||||
return g.Wait()
|
||||
}
|
||||
|
||||
// createProbeHandler creates a handler for a specific probe type that provides
|
||||
// appropriate JSON responses and falls back to the general handler if no specific
|
||||
// handler is configured.
|
||||
func (srv *Server) createProbeHandler(probeType string) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var handler http.HandlerFunc
|
||||
|
||||
// Select the appropriate handler
|
||||
switch probeType {
|
||||
case "liveness":
|
||||
handler = srv.livenessHandler
|
||||
case "readiness":
|
||||
handler = srv.readinessHandler
|
||||
case "startup":
|
||||
handler = srv.startupHandler
|
||||
}
|
||||
|
||||
// Fall back to general handler if no specific handler is configured
|
||||
if handler == nil {
|
||||
handler = srv.generalHandler
|
||||
}
|
||||
|
||||
// Create a response recorder to capture the handler's status code
|
||||
recorder := &statusRecorder{ResponseWriter: w, statusCode: 200}
|
||||
handler(recorder, r)
|
||||
|
||||
// If the handler already wrote a response, we're done
|
||||
if recorder.written {
|
||||
return
|
||||
}
|
||||
|
||||
// Otherwise, provide a standard JSON response based on the status code
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
|
||||
if recorder.statusCode >= 400 {
|
||||
// Handler indicated unhealthy
|
||||
switch probeType {
|
||||
case "liveness":
|
||||
json.NewEncoder(w).Encode(map[string]string{"status": "unhealthy"})
|
||||
case "readiness":
|
||||
json.NewEncoder(w).Encode(map[string]bool{"ready": false})
|
||||
case "startup":
|
||||
json.NewEncoder(w).Encode(map[string]bool{"started": false})
|
||||
}
|
||||
} else {
|
||||
// Handler indicated healthy
|
||||
switch probeType {
|
||||
case "liveness":
|
||||
json.NewEncoder(w).Encode(map[string]string{"status": "alive"})
|
||||
case "readiness":
|
||||
json.NewEncoder(w).Encode(map[string]bool{"ready": true})
|
||||
case "startup":
|
||||
json.NewEncoder(w).Encode(map[string]bool{"started": true})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// createGeneralHandler creates a handler for general health endpoints that provides
|
||||
// comprehensive health information.
|
||||
func (srv *Server) createGeneralHandler() http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
// Create a response recorder to capture the handler's status code
|
||||
// Use a buffer to prevent the handler from writing to the actual response
|
||||
recorder := &statusRecorder{ResponseWriter: &discardWriter{}, statusCode: 200}
|
||||
srv.generalHandler(recorder, r)
|
||||
|
||||
// Always provide a comprehensive JSON response for general endpoints
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(recorder.statusCode)
|
||||
|
||||
response := map[string]interface{}{
|
||||
"status": map[bool]string{true: "healthy", false: "unhealthy"}[recorder.statusCode < 400],
|
||||
}
|
||||
|
||||
if srv.serviceName != "" {
|
||||
response["service"] = srv.serviceName
|
||||
}
|
||||
|
||||
json.NewEncoder(w).Encode(response)
|
||||
}
|
||||
}
|
||||
|
||||
// statusRecorder captures the response status code from handlers while allowing
|
||||
// them to write their own response content if needed.
|
||||
type statusRecorder struct {
|
||||
http.ResponseWriter
|
||||
statusCode int
|
||||
written bool
|
||||
}
|
||||
|
||||
func (r *statusRecorder) WriteHeader(code int) {
|
||||
r.statusCode = code
|
||||
r.ResponseWriter.WriteHeader(code)
|
||||
}
|
||||
|
||||
func (r *statusRecorder) Write(data []byte) (int, error) {
|
||||
r.written = true
|
||||
return r.ResponseWriter.Write(data)
|
||||
}
|
||||
|
||||
// discardWriter implements http.ResponseWriter but discards all writes.
|
||||
// Used to capture status codes without writing response content.
|
||||
type discardWriter struct{}
|
||||
|
||||
func (d *discardWriter) Header() http.Header {
|
||||
return make(http.Header)
|
||||
}
|
||||
|
||||
func (d *discardWriter) Write([]byte) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (d *discardWriter) WriteHeader(int) {}
|
||||
|
||||
// HealthCheckListener runs a simple HTTP server on the specified port for health check probes.
|
||||
func HealthCheckListener(ctx context.Context, port int, log *slog.Logger) error {
|
||||
srv := NewServer(nil)
|
||||
|
@@ -1,13 +1,14 @@
|
||||
package health
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestHealthHandler(t *testing.T) {
|
||||
func TestBasicHealthHandler(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodGet, "/__health", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
@@ -24,3 +25,129 @@ func TestHealthHandler(t *testing.T) {
|
||||
t.Errorf("expected ok got %q", string(data))
|
||||
}
|
||||
}
|
||||
|
||||
func TestProbeHandlers(t *testing.T) {
|
||||
// Test with separate handlers for each probe type
|
||||
srv := NewServer(nil,
|
||||
WithLivenessHandler(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}),
|
||||
WithReadinessHandler(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}),
|
||||
WithStartupHandler(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}),
|
||||
WithServiceName("test-service"),
|
||||
)
|
||||
|
||||
tests := []struct {
|
||||
handler http.HandlerFunc
|
||||
expectedStatus int
|
||||
expectedBody string
|
||||
}{
|
||||
{srv.createProbeHandler("liveness"), 200, `{"status":"alive"}`},
|
||||
{srv.createProbeHandler("readiness"), 200, `{"ready":true}`},
|
||||
{srv.createProbeHandler("startup"), 200, `{"started":true}`},
|
||||
{srv.createGeneralHandler(), 200, `{"service":"test-service","status":"healthy"}`},
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
t.Run(fmt.Sprintf("test_%d", i), func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
tt.handler(w, req)
|
||||
|
||||
if w.Code != tt.expectedStatus {
|
||||
t.Errorf("expected status %d, got %d", tt.expectedStatus, w.Code)
|
||||
}
|
||||
|
||||
body := w.Body.String()
|
||||
if body != tt.expectedBody+"\n" { // json.Encoder adds newline
|
||||
t.Errorf("expected body %q, got %q", tt.expectedBody, body)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestProbeHandlerFallback(t *testing.T) {
|
||||
// Test fallback to general handler when no specific handler is configured
|
||||
generalHandler := func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}
|
||||
|
||||
srv := NewServer(generalHandler, WithServiceName("test-service"))
|
||||
|
||||
tests := []struct {
|
||||
handler http.HandlerFunc
|
||||
expectedStatus int
|
||||
expectedBody string
|
||||
}{
|
||||
{srv.createProbeHandler("liveness"), 200, `{"status":"alive"}`},
|
||||
{srv.createProbeHandler("readiness"), 200, `{"ready":true}`},
|
||||
{srv.createProbeHandler("startup"), 200, `{"started":true}`},
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
t.Run(fmt.Sprintf("fallback_%d", i), func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
tt.handler(w, req)
|
||||
|
||||
if w.Code != tt.expectedStatus {
|
||||
t.Errorf("expected status %d, got %d", tt.expectedStatus, w.Code)
|
||||
}
|
||||
|
||||
body := w.Body.String()
|
||||
if body != tt.expectedBody+"\n" { // json.Encoder adds newline
|
||||
t.Errorf("expected body %q, got %q", tt.expectedBody, body)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnhealthyProbeHandlers(t *testing.T) {
|
||||
// Test with handlers that return unhealthy status
|
||||
srv := NewServer(nil,
|
||||
WithLivenessHandler(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
}),
|
||||
WithReadinessHandler(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
}),
|
||||
WithStartupHandler(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
}),
|
||||
WithServiceName("test-service"),
|
||||
)
|
||||
|
||||
tests := []struct {
|
||||
handler http.HandlerFunc
|
||||
expectedStatus int
|
||||
expectedBody string
|
||||
}{
|
||||
{srv.createProbeHandler("liveness"), 503, `{"status":"unhealthy"}`},
|
||||
{srv.createProbeHandler("readiness"), 503, `{"ready":false}`},
|
||||
{srv.createProbeHandler("startup"), 503, `{"started":false}`},
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
t.Run(fmt.Sprintf("unhealthy_%d", i), func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
tt.handler(w, req)
|
||||
|
||||
if w.Code != tt.expectedStatus {
|
||||
t.Errorf("expected status %d, got %d", tt.expectedStatus, w.Code)
|
||||
}
|
||||
|
||||
body := w.Body.String()
|
||||
if body != tt.expectedBody+"\n" { // json.Encoder adds newline
|
||||
t.Errorf("expected body %q, got %q", tt.expectedBody, body)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user