// Package tracing provides OpenTelemetry distributed tracing setup with OTLP export support. // // This package handles the complete OpenTelemetry SDK initialization including: // - Trace provider configuration with batching and resource detection // - Log provider setup for structured log export via OTLP // - Automatic resource discovery (service name, version, host, container, process info) // - Support for both gRPC and HTTP OTLP exporters with TLS configuration // - Propagation context setup for distributed tracing across services // - Graceful shutdown handling for all telemetry components // // The package supports various deployment scenarios: // - Development: Local OTLP collectors or observability backends // - Production: Secure OTLP export with mutual TLS authentication // - Container environments: Automatic container and Kubernetes resource detection // // Configuration is primarily handled via standard OpenTelemetry environment variables: // - OTEL_SERVICE_NAME: Service identification // - OTEL_EXPORTER_OTLP_PROTOCOL: Protocol selection (grpc, http/protobuf) // - OTEL_TRACES_EXPORTER: Exporter type (otlp, autoexport) // - OTEL_RESOURCE_ATTRIBUTES: Additional resource attributes // // Example usage: // // cfg := &tracing.TracerConfig{ // ServiceName: "my-service", // Environment: "production", // Endpoint: "https://otlp.example.com:4317", // } // shutdown, err := tracing.InitTracer(ctx, cfg) // if err != nil { // log.Fatal(err) // } // defer shutdown(ctx) package tracing // todo, review: // https://github.com/ttys3/tracing-go/blob/main/tracing.go#L136 import ( "context" "crypto/x509" "errors" "log/slog" "os" "slices" "time" "go.ntppool.org/common/internal/tracerconfig" "go.ntppool.org/common/version" "go.opentelemetry.io/contrib/exporters/autoexport" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/log/global" "go.opentelemetry.io/otel/propagation" sdklog "go.opentelemetry.io/otel/sdk/log" sdkmetric "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" sdktrace "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.26.0" "go.opentelemetry.io/otel/trace" ) const ( // svcNameKey is the environment variable name that Service Name information will be read from. svcNameKey = "OTEL_SERVICE_NAME" ) // createOTLPLogExporter creates an OTLP log exporter using the provided configuration. // This function is used as the LogExporterFactory for the tracerconfig bridge. func createOTLPLogExporter(ctx context.Context, cfg *tracerconfig.Config) (sdklog.Exporter, error) { return tracerconfig.CreateOTLPLogExporter(ctx, cfg) } // createOTLPMetricExporter creates an OTLP metric exporter using the provided configuration. // This function is used as the MetricExporterFactory for the tracerconfig bridge. func createOTLPMetricExporter(ctx context.Context, cfg *tracerconfig.Config) (sdkmetric.Exporter, error) { return tracerconfig.CreateOTLPMetricExporter(ctx, cfg) } // createOTLPTraceExporter creates an OTLP trace exporter using the provided configuration. // This function is used as the TraceExporterFactory for the tracerconfig bridge. func createOTLPTraceExporter(ctx context.Context, cfg *tracerconfig.Config) (sdktrace.SpanExporter, error) { return tracerconfig.CreateOTLPTraceExporter(ctx, cfg) } // https://github.com/open-telemetry/opentelemetry-go/blob/main/exporters/otlp/otlptrace/otlptracehttp/example_test.go // TpShutdownFunc represents a function that gracefully shuts down telemetry providers. // It should be called during application shutdown to ensure all telemetry data is flushed // and exporters are properly closed. The context can be used to set shutdown timeouts. type TpShutdownFunc func(ctx context.Context) error // Tracer returns the configured OpenTelemetry tracer for the NTP Pool project. // This tracer should be used for creating spans and distributed tracing throughout // the application. It uses the global tracer provider set up by InitTracer/SetupSDK. func Tracer() trace.Tracer { traceProvider := otel.GetTracerProvider() return traceProvider.Tracer("ntppool-tracer") } // Start creates a new span with the given name and options using the configured tracer. // This is a convenience function that wraps the standard OpenTelemetry span creation. // It returns a new context containing the span and the span itself for further configuration. // // The returned context should be used for downstream operations to maintain trace correlation. func Start(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span) { return Tracer().Start(ctx, spanName, opts...) } // GetClientCertificate is an alias for the type defined in tracerconfig. // This maintains backward compatibility for existing code. type GetClientCertificate = tracerconfig.GetClientCertificate // TracerConfig provides configuration options for OpenTelemetry tracing setup. // It supplements standard OpenTelemetry environment variables with additional // NTP Pool-specific configuration including TLS settings for secure OTLP export. type TracerConfig struct { ServiceName string // Service name for resource identification (overrides OTEL_SERVICE_NAME) Environment string // Deployment environment (development, staging, production) Endpoint string // OTLP endpoint hostname/port (e.g., "otlp.example.com:4317") EndpointURL string // Complete OTLP endpoint URL (e.g., "https://otlp.example.com:4317/v1/traces") CertificateProvider GetClientCertificate // Client certificate provider for mutual TLS RootCAs *x509.CertPool // CA certificate pool for server verification } // InitTracer initializes the OpenTelemetry SDK with the provided configuration. // This is the main entry point for setting up distributed tracing in applications. // // The function configures trace and log providers, sets up OTLP exporters, // and returns a shutdown function that must be called during application termination. // // Returns a shutdown function and an error. The shutdown function should be called // with a context that has an appropriate timeout for graceful shutdown. func InitTracer(ctx context.Context, cfg *TracerConfig) (TpShutdownFunc, error) { // todo: setup environment from cfg return SetupSDK(ctx, cfg) } // SetupSDK performs the complete OpenTelemetry SDK initialization including resource // discovery, exporter configuration, provider setup, and shutdown function creation. // // The function automatically discovers system resources (service info, host, container, // process details) and configures both trace and log exporters. It supports multiple // OTLP protocols (gRPC, HTTP) and handles TLS configuration for secure deployments. // // The returned shutdown function coordinates graceful shutdown of all telemetry // components in the reverse order of their initialization. func SetupSDK(ctx context.Context, cfg *TracerConfig) (shutdown TpShutdownFunc, err error) { if cfg == nil { cfg = &TracerConfig{} } // Store configuration for use by logger and metrics packages via bridge bridgeConfig := &tracerconfig.Config{ ServiceName: cfg.ServiceName, Environment: cfg.Environment, Endpoint: cfg.Endpoint, EndpointURL: cfg.EndpointURL, CertificateProvider: cfg.CertificateProvider, RootCAs: cfg.RootCAs, } tracerconfig.Store(ctx, bridgeConfig, createOTLPLogExporter, createOTLPMetricExporter, createOTLPTraceExporter) log := slog.Default() if serviceName := os.Getenv(svcNameKey); len(serviceName) == 0 { if len(cfg.ServiceName) > 0 { os.Setenv(svcNameKey, cfg.ServiceName) } } resources := []resource.Option{ resource.WithFromEnv(), // Discover and provide attributes from OTEL_RESOURCE_ATTRIBUTES and OTEL_SERVICE_NAME environment variables. resource.WithTelemetrySDK(), // Discover and provide information about the OpenTelemetry SDK used. resource.WithProcess(), // Discover and provide process information. resource.WithOS(), // Discover and provide OS information. resource.WithContainer(), // Discover and provide container information. resource.WithHost(), // Discover and provide host information. // set above via os.Setenv() for WithFromEnv to find // resource.WithAttributes(semconv.ServiceNameKey.String(cfg.ServiceName)), resource.WithAttributes(semconv.ServiceVersionKey.String(version.Version())), } if len(cfg.Environment) > 0 { resources = append(resources, resource.WithAttributes(attribute.String("environment", cfg.Environment)), ) } res, err := resource.New( context.Background(), resources..., ) if errors.Is(err, resource.ErrPartialResource) || errors.Is(err, resource.ErrSchemaURLConflict) { log.Warn("otel resource setup", "err", err) // Log non-fatal issues. } else if err != nil { log.Error("otel resource setup", "err", err) return } var shutdownFuncs []func(context.Context) error shutdown = func(ctx context.Context) error { // Force flush the global logger provider before shutting down anything else if loggerProvider := global.GetLoggerProvider(); loggerProvider != nil { if sdkProvider, ok := loggerProvider.(*sdklog.LoggerProvider); ok { if flushErr := sdkProvider.ForceFlush(ctx); flushErr != nil { log.Warn("logger provider force flush failed", "err", flushErr) } } } var err error // need to shutdown the providers first, // exporters after which is the opposite // order they are setup. slices.Reverse(shutdownFuncs) for _, fn := range shutdownFuncs { err = errors.Join(err, fn(ctx)) } shutdownFuncs = nil if err != nil { log.Warn("shutdown returned errors", "err", err) } return err } // handleErr calls shutdown for cleanup and makes sure that all errors are returned. handleErr := func(inErr error) { err = errors.Join(inErr, shutdown(ctx)) } prop := newPropagator() otel.SetTextMapPropagator(prop) var spanExporter sdktrace.SpanExporter switch os.Getenv("OTEL_TRACES_EXPORTER") { case "": spanExporter, err = createOTLPTraceExporter(ctx, bridgeConfig) case "otlp": spanExporter, err = createOTLPTraceExporter(ctx, bridgeConfig) default: // log.Debug("OTEL_TRACES_EXPORTER", "fallback", os.Getenv("OTEL_TRACES_EXPORTER")) spanExporter, err = autoexport.NewSpanExporter(ctx) } if err != nil { handleErr(err) return } shutdownFuncs = append(shutdownFuncs, spanExporter.Shutdown) // Set up trace provider. tracerProvider, err := newTraceProvider(spanExporter, res) if err != nil { handleErr(err) return } shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown) otel.SetTracerProvider(tracerProvider) if err != nil { handleErr(err) return } return } func newTraceProvider(traceExporter sdktrace.SpanExporter, res *resource.Resource) (*sdktrace.TracerProvider, error) { traceProvider := sdktrace.NewTracerProvider( sdktrace.WithResource(res), sdktrace.WithBatcher(traceExporter, sdktrace.WithBatchTimeout(time.Second*3), ), ) return traceProvider, nil } func newPropagator() propagation.TextMapPropagator { return propagation.NewCompositeTextMapPropagator( propagation.TraceContext{}, propagation.Baggage{}, ) }