common/tracing/tracing.go
Ask Bjørn Hansen 694f8ba1d3 Add comprehensive godoc documentation to all packages
- Add package-level documentation with usage examples and architecture details
- Document all public types, functions, and methods following godoc conventions
- Remove unused logger.Error type and NewError function
- Apply consistent documentation style across all packages

Packages updated:
- apitls: TLS certificate management with automatic renewal
- config: Environment-based configuration system
- config/depenv: Deployment environment handling
- ekko: Enhanced Echo web framework wrapper
- kafka: Kafka client wrapper with TLS support
- logger: Structured logging with OpenTelemetry integration
- tracing: OpenTelemetry distributed tracing setup
- types: Shared data structures for NTP Pool project
- xff/fastlyxff: Fastly CDN IP range management

All tests pass after documentation changes.
2025-06-19 23:52:03 -07:00

349 lines
12 KiB
Go

// Package tracing provides OpenTelemetry distributed tracing setup with OTLP export support.
//
// This package handles the complete OpenTelemetry SDK initialization including:
// - Trace provider configuration with batching and resource detection
// - Log provider setup for structured log export via OTLP
// - Automatic resource discovery (service name, version, host, container, process info)
// - Support for both gRPC and HTTP OTLP exporters with TLS configuration
// - Propagation context setup for distributed tracing across services
// - Graceful shutdown handling for all telemetry components
//
// The package supports various deployment scenarios:
// - Development: Local OTLP collectors or observability backends
// - Production: Secure OTLP export with mutual TLS authentication
// - Container environments: Automatic container and Kubernetes resource detection
//
// Configuration is primarily handled via standard OpenTelemetry environment variables:
// - OTEL_SERVICE_NAME: Service identification
// - OTEL_EXPORTER_OTLP_PROTOCOL: Protocol selection (grpc, http/protobuf)
// - OTEL_TRACES_EXPORTER: Exporter type (otlp, autoexport)
// - OTEL_RESOURCE_ATTRIBUTES: Additional resource attributes
//
// Example usage:
//
// cfg := &tracing.TracerConfig{
// ServiceName: "my-service",
// Environment: "production",
// Endpoint: "https://otlp.example.com:4317",
// }
// shutdown, err := tracing.InitTracer(ctx, cfg)
// if err != nil {
// log.Fatal(err)
// }
// defer shutdown(ctx)
package tracing
// todo, review:
// https://github.com/ttys3/tracing-go/blob/main/tracing.go#L136
import (
"context"
"crypto/tls"
"crypto/x509"
"errors"
"os"
"slices"
"time"
"go.ntppool.org/common/logger"
"go.ntppool.org/common/version"
"google.golang.org/grpc/credentials"
"go.opentelemetry.io/contrib/exporters/autoexport"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
logglobal "go.opentelemetry.io/otel/log/global"
"go.opentelemetry.io/otel/propagation"
sdklog "go.opentelemetry.io/otel/sdk/log"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
"go.opentelemetry.io/otel/trace"
)
const (
// svcNameKey is the environment variable name that Service Name information will be read from.
svcNameKey = "OTEL_SERVICE_NAME"
otelExporterOTLPProtoEnvKey = "OTEL_EXPORTER_OTLP_PROTOCOL"
otelExporterOTLPTracesProtoEnvKey = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"
)
var errInvalidOTLPProtocol = errors.New("invalid OTLP protocol - should be one of ['grpc', 'http/protobuf']")
// https://github.com/open-telemetry/opentelemetry-go/blob/main/exporters/otlp/otlptrace/otlptracehttp/example_test.go
// TpShutdownFunc represents a function that gracefully shuts down telemetry providers.
// It should be called during application shutdown to ensure all telemetry data is flushed
// and exporters are properly closed. The context can be used to set shutdown timeouts.
type TpShutdownFunc func(ctx context.Context) error
// Tracer returns the configured OpenTelemetry tracer for the NTP Pool project.
// This tracer should be used for creating spans and distributed tracing throughout
// the application. It uses the global tracer provider set up by InitTracer/SetupSDK.
func Tracer() trace.Tracer {
traceProvider := otel.GetTracerProvider()
return traceProvider.Tracer("ntppool-tracer")
}
// Start creates a new span with the given name and options using the configured tracer.
// This is a convenience function that wraps the standard OpenTelemetry span creation.
// It returns a new context containing the span and the span itself for further configuration.
//
// The returned context should be used for downstream operations to maintain trace correlation.
func Start(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
return Tracer().Start(ctx, spanName, opts...)
}
// GetClientCertificate defines a function type for providing client certificates for mutual TLS.
// This is used when exporting telemetry data to secured OTLP endpoints that require
// client certificate authentication.
type GetClientCertificate func(*tls.CertificateRequestInfo) (*tls.Certificate, error)
// TracerConfig provides configuration options for OpenTelemetry tracing setup.
// It supplements standard OpenTelemetry environment variables with additional
// NTP Pool-specific configuration including TLS settings for secure OTLP export.
type TracerConfig struct {
ServiceName string // Service name for resource identification (overrides OTEL_SERVICE_NAME)
Environment string // Deployment environment (development, staging, production)
Endpoint string // OTLP endpoint hostname/port (e.g., "otlp.example.com:4317")
EndpointURL string // Complete OTLP endpoint URL (e.g., "https://otlp.example.com:4317/v1/traces")
CertificateProvider GetClientCertificate // Client certificate provider for mutual TLS
RootCAs *x509.CertPool // CA certificate pool for server verification
}
// InitTracer initializes the OpenTelemetry SDK with the provided configuration.
// This is the main entry point for setting up distributed tracing in applications.
//
// The function configures trace and log providers, sets up OTLP exporters,
// and returns a shutdown function that must be called during application termination.
//
// Returns a shutdown function and an error. The shutdown function should be called
// with a context that has an appropriate timeout for graceful shutdown.
func InitTracer(ctx context.Context, cfg *TracerConfig) (TpShutdownFunc, error) {
// todo: setup environment from cfg
return SetupSDK(ctx, cfg)
}
// SetupSDK performs the complete OpenTelemetry SDK initialization including resource
// discovery, exporter configuration, provider setup, and shutdown function creation.
//
// The function automatically discovers system resources (service info, host, container,
// process details) and configures both trace and log exporters. It supports multiple
// OTLP protocols (gRPC, HTTP) and handles TLS configuration for secure deployments.
//
// The returned shutdown function coordinates graceful shutdown of all telemetry
// components in the reverse order of their initialization.
func SetupSDK(ctx context.Context, cfg *TracerConfig) (shutdown TpShutdownFunc, err error) {
if cfg == nil {
cfg = &TracerConfig{}
}
log := logger.Setup()
if serviceName := os.Getenv(svcNameKey); len(serviceName) == 0 {
if len(cfg.ServiceName) > 0 {
os.Setenv(svcNameKey, cfg.ServiceName)
}
}
resources := []resource.Option{
resource.WithFromEnv(), // Discover and provide attributes from OTEL_RESOURCE_ATTRIBUTES and OTEL_SERVICE_NAME environment variables.
resource.WithTelemetrySDK(), // Discover and provide information about the OpenTelemetry SDK used.
resource.WithProcess(), // Discover and provide process information.
resource.WithOS(), // Discover and provide OS information.
resource.WithContainer(), // Discover and provide container information.
resource.WithHost(), // Discover and provide host information.
// set above via os.Setenv() for WithFromEnv to find
// resource.WithAttributes(semconv.ServiceNameKey.String(cfg.ServiceName)),
resource.WithAttributes(semconv.ServiceVersionKey.String(version.Version())),
}
if len(cfg.Environment) > 0 {
resources = append(resources,
resource.WithAttributes(attribute.String("environment", cfg.Environment)),
)
}
res, err := resource.New(
context.Background(),
resources...,
)
if errors.Is(err, resource.ErrPartialResource) || errors.Is(err, resource.ErrSchemaURLConflict) {
log.Warn("otel resource setup", "err", err) // Log non-fatal issues.
} else if err != nil {
log.Error("otel resource setup", "err", err)
return
}
var shutdownFuncs []func(context.Context) error
shutdown = func(ctx context.Context) error {
var err error
// need to shutdown the providers first,
// exporters after which is the opposite
// order they are setup.
slices.Reverse(shutdownFuncs)
for _, fn := range shutdownFuncs {
// log.Warn("shutting down", "fn", fn)
err = errors.Join(err, fn(ctx))
}
shutdownFuncs = nil
if err != nil {
log.Warn("shutdown returned errors", "err", err)
}
return err
}
// handleErr calls shutdown for cleanup and makes sure that all errors are returned.
handleErr := func(inErr error) {
err = errors.Join(inErr, shutdown(ctx))
}
prop := newPropagator()
otel.SetTextMapPropagator(prop)
var spanExporter sdktrace.SpanExporter
switch os.Getenv("OTEL_TRACES_EXPORTER") {
case "":
spanExporter, err = newOLTPExporter(ctx, cfg)
case "otlp":
spanExporter, err = newOLTPExporter(ctx, cfg)
default:
// log.Debug("OTEL_TRACES_EXPORTER", "fallback", os.Getenv("OTEL_TRACES_EXPORTER"))
spanExporter, err = autoexport.NewSpanExporter(ctx)
}
if err != nil {
handleErr(err)
return
}
shutdownFuncs = append(shutdownFuncs, spanExporter.Shutdown)
logExporter, err := autoexport.NewLogExporter(ctx)
if err != nil {
handleErr(err)
return
}
shutdownFuncs = append(shutdownFuncs, logExporter.Shutdown)
// Set up trace provider.
tracerProvider, err := newTraceProvider(spanExporter, res)
if err != nil {
handleErr(err)
return
}
shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown)
otel.SetTracerProvider(tracerProvider)
logProvider := sdklog.NewLoggerProvider(sdklog.WithResource(res),
sdklog.WithProcessor(
sdklog.NewBatchProcessor(logExporter, sdklog.WithExportBufferSize(10)),
),
)
logglobal.SetLoggerProvider(logProvider)
shutdownFuncs = append(shutdownFuncs, func(ctx context.Context) error {
logProvider.ForceFlush(ctx)
return logProvider.Shutdown(ctx)
},
)
if err != nil {
handleErr(err)
return
}
return
}
func newOLTPExporter(ctx context.Context, cfg *TracerConfig) (sdktrace.SpanExporter, error) {
log := logger.Setup()
var tlsConfig *tls.Config
if cfg.CertificateProvider != nil {
tlsConfig = &tls.Config{
GetClientCertificate: cfg.CertificateProvider,
RootCAs: cfg.RootCAs,
}
}
proto := os.Getenv(otelExporterOTLPTracesProtoEnvKey)
if proto == "" {
proto = os.Getenv(otelExporterOTLPProtoEnvKey)
}
// Fallback to default, http/protobuf.
if proto == "" {
proto = "http/protobuf"
}
var client otlptrace.Client
switch proto {
case "grpc":
opts := []otlptracegrpc.Option{
otlptracegrpc.WithCompressor("gzip"),
}
if tlsConfig != nil {
opts = append(opts, otlptracegrpc.WithTLSCredentials(credentials.NewTLS(tlsConfig)))
}
if len(cfg.Endpoint) > 0 {
log.Info("adding option", "Endpoint", cfg.Endpoint)
opts = append(opts, otlptracegrpc.WithEndpoint(cfg.Endpoint))
}
if len(cfg.EndpointURL) > 0 {
log.Info("adding option", "EndpointURL", cfg.EndpointURL)
opts = append(opts, otlptracegrpc.WithEndpointURL(cfg.EndpointURL))
}
client = otlptracegrpc.NewClient(opts...)
case "http/protobuf", "http/json":
opts := []otlptracehttp.Option{
otlptracehttp.WithCompression(otlptracehttp.GzipCompression),
}
if tlsConfig != nil {
opts = append(opts, otlptracehttp.WithTLSClientConfig(tlsConfig))
}
if len(cfg.Endpoint) > 0 {
opts = append(opts, otlptracehttp.WithEndpoint(cfg.Endpoint))
}
if len(cfg.EndpointURL) > 0 {
opts = append(opts, otlptracehttp.WithEndpointURL(cfg.EndpointURL))
}
client = otlptracehttp.NewClient(opts...)
default:
return nil, errInvalidOTLPProtocol
}
exporter, err := otlptrace.New(ctx, client)
if err != nil {
log.ErrorContext(ctx, "creating OTLP trace exporter", "err", err)
}
return exporter, err
}
func newTraceProvider(traceExporter sdktrace.SpanExporter, res *resource.Resource) (*sdktrace.TracerProvider, error) {
traceProvider := sdktrace.NewTracerProvider(
sdktrace.WithResource(res),
sdktrace.WithBatcher(traceExporter,
sdktrace.WithBatchTimeout(time.Second*3),
),
)
return traceProvider, nil
}
func newPropagator() propagation.TextMapPropagator {
return propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
)
}