Add BearerTokenFunc to support dynamic bearer token authentication for OTLP exporters. Tokens are injected per-request via gRPC PerRPCCredentials and HTTP custom RoundTripper. - Add BearerTokenFunc type and Config field in tracerconfig - Implement bearerCredentials (gRPC) and bearerRoundTripper (HTTP) - Wire bearer auth into all exporter creation functions - Add getHTTPClient helper for DRY HTTP client configuration - Upgrade OpenTelemetry SDK to v1.39.0 for WithHTTPClient support
295 lines
11 KiB
Go
295 lines
11 KiB
Go
// Package tracing provides OpenTelemetry distributed tracing setup with OTLP export support.
|
|
//
|
|
// This package handles the complete OpenTelemetry SDK initialization including:
|
|
// - Trace provider configuration with batching and resource detection
|
|
// - Log provider setup for structured log export via OTLP
|
|
// - Automatic resource discovery (service name, version, host, container, process info)
|
|
// - Support for both gRPC and HTTP OTLP exporters with TLS configuration
|
|
// - Propagation context setup for distributed tracing across services
|
|
// - Graceful shutdown handling for all telemetry components
|
|
//
|
|
// The package supports various deployment scenarios:
|
|
// - Development: Local OTLP collectors or observability backends
|
|
// - Production: Secure OTLP export with mutual TLS authentication
|
|
// - Container environments: Automatic container and Kubernetes resource detection
|
|
//
|
|
// Configuration is primarily handled via standard OpenTelemetry environment variables:
|
|
// - OTEL_SERVICE_NAME: Service identification
|
|
// - OTEL_EXPORTER_OTLP_PROTOCOL: Protocol selection (grpc, http/protobuf)
|
|
// - OTEL_TRACES_EXPORTER: Exporter type (otlp, autoexport)
|
|
// - OTEL_RESOURCE_ATTRIBUTES: Additional resource attributes
|
|
//
|
|
// Example usage:
|
|
//
|
|
// cfg := &tracing.TracerConfig{
|
|
// ServiceName: "my-service",
|
|
// Environment: "production",
|
|
// Endpoint: "https://otlp.example.com:4317",
|
|
// }
|
|
// shutdown, err := tracing.InitTracer(ctx, cfg)
|
|
// if err != nil {
|
|
// log.Fatal(err)
|
|
// }
|
|
// defer shutdown(ctx)
|
|
package tracing
|
|
|
|
// todo, review:
|
|
// https://github.com/ttys3/tracing-go/blob/main/tracing.go#L136
|
|
|
|
import (
|
|
"context"
|
|
"crypto/x509"
|
|
"errors"
|
|
"log/slog"
|
|
"os"
|
|
"slices"
|
|
"time"
|
|
|
|
"go.ntppool.org/common/internal/tracerconfig"
|
|
"go.ntppool.org/common/version"
|
|
|
|
"go.opentelemetry.io/contrib/exporters/autoexport"
|
|
"go.opentelemetry.io/otel"
|
|
"go.opentelemetry.io/otel/attribute"
|
|
"go.opentelemetry.io/otel/log/global"
|
|
"go.opentelemetry.io/otel/propagation"
|
|
sdklog "go.opentelemetry.io/otel/sdk/log"
|
|
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
|
"go.opentelemetry.io/otel/sdk/resource"
|
|
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
|
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
|
|
"go.opentelemetry.io/otel/trace"
|
|
)
|
|
|
|
const (
|
|
// svcNameKey is the environment variable name that Service Name information will be read from.
|
|
svcNameKey = "OTEL_SERVICE_NAME"
|
|
)
|
|
|
|
// createOTLPLogExporter creates an OTLP log exporter using the provided configuration.
|
|
// This function is used as the LogExporterFactory for the tracerconfig bridge.
|
|
func createOTLPLogExporter(ctx context.Context, cfg *tracerconfig.Config) (sdklog.Exporter, error) {
|
|
return tracerconfig.CreateOTLPLogExporter(ctx, cfg)
|
|
}
|
|
|
|
// createOTLPMetricExporter creates an OTLP metric exporter using the provided configuration.
|
|
// This function is used as the MetricExporterFactory for the tracerconfig bridge.
|
|
func createOTLPMetricExporter(ctx context.Context, cfg *tracerconfig.Config) (sdkmetric.Exporter, error) {
|
|
return tracerconfig.CreateOTLPMetricExporter(ctx, cfg)
|
|
}
|
|
|
|
// createOTLPTraceExporter creates an OTLP trace exporter using the provided configuration.
|
|
// This function is used as the TraceExporterFactory for the tracerconfig bridge.
|
|
func createOTLPTraceExporter(ctx context.Context, cfg *tracerconfig.Config) (sdktrace.SpanExporter, error) {
|
|
return tracerconfig.CreateOTLPTraceExporter(ctx, cfg)
|
|
}
|
|
|
|
// https://github.com/open-telemetry/opentelemetry-go/blob/main/exporters/otlp/otlptrace/otlptracehttp/example_test.go
|
|
|
|
// TpShutdownFunc represents a function that gracefully shuts down telemetry providers.
|
|
// It should be called during application shutdown to ensure all telemetry data is flushed
|
|
// and exporters are properly closed. The context can be used to set shutdown timeouts.
|
|
type TpShutdownFunc func(ctx context.Context) error
|
|
|
|
// Tracer returns the configured OpenTelemetry tracer for the NTP Pool project.
|
|
// This tracer should be used for creating spans and distributed tracing throughout
|
|
// the application. It uses the global tracer provider set up by InitTracer/SetupSDK.
|
|
func Tracer() trace.Tracer {
|
|
traceProvider := otel.GetTracerProvider()
|
|
return traceProvider.Tracer("ntppool-tracer")
|
|
}
|
|
|
|
// Start creates a new span with the given name and options using the configured tracer.
|
|
// This is a convenience function that wraps the standard OpenTelemetry span creation.
|
|
// It returns a new context containing the span and the span itself for further configuration.
|
|
//
|
|
// The returned context should be used for downstream operations to maintain trace correlation.
|
|
func Start(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
|
|
return Tracer().Start(ctx, spanName, opts...)
|
|
}
|
|
|
|
// GetClientCertificate is an alias for the type defined in tracerconfig.
|
|
// This maintains backward compatibility for existing code.
|
|
type GetClientCertificate = tracerconfig.GetClientCertificate
|
|
|
|
// BearerTokenFunc is an alias for the type defined in tracerconfig.
|
|
// It retrieves a bearer token for OTLP authentication.
|
|
type BearerTokenFunc = tracerconfig.BearerTokenFunc
|
|
|
|
// TracerConfig provides configuration options for OpenTelemetry tracing setup.
|
|
// It supplements standard OpenTelemetry environment variables with additional
|
|
// NTP Pool-specific configuration including TLS settings for secure OTLP export.
|
|
type TracerConfig struct {
|
|
ServiceName string // Service name for resource identification (overrides OTEL_SERVICE_NAME)
|
|
Environment string // Deployment environment (development, staging, production)
|
|
Endpoint string // OTLP endpoint hostname/port (e.g., "otlp.example.com:4317")
|
|
EndpointURL string // Complete OTLP endpoint URL (e.g., "https://otlp.example.com:4317/v1/traces")
|
|
|
|
CertificateProvider GetClientCertificate // Client certificate provider for mutual TLS
|
|
RootCAs *x509.CertPool // CA certificate pool for server verification
|
|
BearerTokenFunc BearerTokenFunc // Token provider for bearer authentication
|
|
}
|
|
|
|
// InitTracer initializes the OpenTelemetry SDK with the provided configuration.
|
|
// This is the main entry point for setting up distributed tracing in applications.
|
|
//
|
|
// The function configures trace and log providers, sets up OTLP exporters,
|
|
// and returns a shutdown function that must be called during application termination.
|
|
//
|
|
// Returns a shutdown function and an error. The shutdown function should be called
|
|
// with a context that has an appropriate timeout for graceful shutdown.
|
|
func InitTracer(ctx context.Context, cfg *TracerConfig) (TpShutdownFunc, error) {
|
|
// todo: setup environment from cfg
|
|
return SetupSDK(ctx, cfg)
|
|
}
|
|
|
|
// SetupSDK performs the complete OpenTelemetry SDK initialization including resource
|
|
// discovery, exporter configuration, provider setup, and shutdown function creation.
|
|
//
|
|
// The function automatically discovers system resources (service info, host, container,
|
|
// process details) and configures both trace and log exporters. It supports multiple
|
|
// OTLP protocols (gRPC, HTTP) and handles TLS configuration for secure deployments.
|
|
//
|
|
// The returned shutdown function coordinates graceful shutdown of all telemetry
|
|
// components in the reverse order of their initialization.
|
|
func SetupSDK(ctx context.Context, cfg *TracerConfig) (shutdown TpShutdownFunc, err error) {
|
|
if cfg == nil {
|
|
cfg = &TracerConfig{}
|
|
}
|
|
|
|
// Store configuration for use by logger and metrics packages via bridge
|
|
bridgeConfig := &tracerconfig.Config{
|
|
ServiceName: cfg.ServiceName,
|
|
Environment: cfg.Environment,
|
|
Endpoint: cfg.Endpoint,
|
|
EndpointURL: cfg.EndpointURL,
|
|
CertificateProvider: cfg.CertificateProvider,
|
|
RootCAs: cfg.RootCAs,
|
|
BearerTokenFunc: cfg.BearerTokenFunc,
|
|
}
|
|
tracerconfig.Store(ctx, bridgeConfig, createOTLPLogExporter, createOTLPMetricExporter, createOTLPTraceExporter)
|
|
|
|
log := slog.Default()
|
|
|
|
if serviceName := os.Getenv(svcNameKey); len(serviceName) == 0 {
|
|
if len(cfg.ServiceName) > 0 {
|
|
os.Setenv(svcNameKey, cfg.ServiceName)
|
|
}
|
|
}
|
|
|
|
resources := []resource.Option{
|
|
resource.WithFromEnv(), // Discover and provide attributes from OTEL_RESOURCE_ATTRIBUTES and OTEL_SERVICE_NAME environment variables.
|
|
resource.WithTelemetrySDK(), // Discover and provide information about the OpenTelemetry SDK used.
|
|
resource.WithProcess(), // Discover and provide process information.
|
|
resource.WithOS(), // Discover and provide OS information.
|
|
resource.WithContainer(), // Discover and provide container information.
|
|
resource.WithHost(), // Discover and provide host information.
|
|
|
|
// set above via os.Setenv() for WithFromEnv to find
|
|
// resource.WithAttributes(semconv.ServiceNameKey.String(cfg.ServiceName)),
|
|
|
|
resource.WithAttributes(semconv.ServiceVersionKey.String(version.Version())),
|
|
}
|
|
|
|
if len(cfg.Environment) > 0 {
|
|
resources = append(resources,
|
|
resource.WithAttributes(attribute.String("environment", cfg.Environment)),
|
|
)
|
|
}
|
|
|
|
res, err := resource.New(
|
|
context.Background(),
|
|
resources...,
|
|
)
|
|
if errors.Is(err, resource.ErrPartialResource) || errors.Is(err, resource.ErrSchemaURLConflict) {
|
|
log.Warn("otel resource setup", "err", err) // Log non-fatal issues.
|
|
} else if err != nil {
|
|
log.Error("otel resource setup", "err", err)
|
|
return
|
|
}
|
|
|
|
var shutdownFuncs []func(context.Context) error
|
|
shutdown = func(ctx context.Context) error {
|
|
// Force flush the global logger provider before shutting down anything else
|
|
if loggerProvider := global.GetLoggerProvider(); loggerProvider != nil {
|
|
if sdkProvider, ok := loggerProvider.(*sdklog.LoggerProvider); ok {
|
|
if flushErr := sdkProvider.ForceFlush(ctx); flushErr != nil {
|
|
log.Warn("logger provider force flush failed", "err", flushErr)
|
|
}
|
|
}
|
|
}
|
|
|
|
var err error
|
|
// need to shutdown the providers first,
|
|
// exporters after which is the opposite
|
|
// order they are setup.
|
|
slices.Reverse(shutdownFuncs)
|
|
for _, fn := range shutdownFuncs {
|
|
err = errors.Join(err, fn(ctx))
|
|
}
|
|
shutdownFuncs = nil
|
|
if err != nil {
|
|
log.Warn("shutdown returned errors", "err", err)
|
|
}
|
|
return err
|
|
}
|
|
|
|
// handleErr calls shutdown for cleanup and makes sure that all errors are returned.
|
|
handleErr := func(inErr error) {
|
|
err = errors.Join(inErr, shutdown(ctx))
|
|
}
|
|
|
|
prop := newPropagator()
|
|
otel.SetTextMapPropagator(prop)
|
|
|
|
var spanExporter sdktrace.SpanExporter
|
|
|
|
switch os.Getenv("OTEL_TRACES_EXPORTER") {
|
|
case "":
|
|
spanExporter, err = createOTLPTraceExporter(ctx, bridgeConfig)
|
|
case "otlp":
|
|
spanExporter, err = createOTLPTraceExporter(ctx, bridgeConfig)
|
|
default:
|
|
// log.Debug("OTEL_TRACES_EXPORTER", "fallback", os.Getenv("OTEL_TRACES_EXPORTER"))
|
|
spanExporter, err = autoexport.NewSpanExporter(ctx)
|
|
}
|
|
if err != nil {
|
|
handleErr(err)
|
|
return
|
|
}
|
|
shutdownFuncs = append(shutdownFuncs, spanExporter.Shutdown)
|
|
|
|
// Set up trace provider.
|
|
tracerProvider, err := newTraceProvider(spanExporter, res)
|
|
if err != nil {
|
|
handleErr(err)
|
|
return
|
|
}
|
|
shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown)
|
|
otel.SetTracerProvider(tracerProvider)
|
|
|
|
if err != nil {
|
|
handleErr(err)
|
|
return
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func newTraceProvider(traceExporter sdktrace.SpanExporter, res *resource.Resource) (*sdktrace.TracerProvider, error) {
|
|
traceProvider := sdktrace.NewTracerProvider(
|
|
sdktrace.WithResource(res),
|
|
sdktrace.WithBatcher(traceExporter,
|
|
sdktrace.WithBatchTimeout(time.Second*3),
|
|
),
|
|
)
|
|
return traceProvider, nil
|
|
}
|
|
|
|
func newPropagator() propagation.TextMapPropagator {
|
|
return propagation.NewCompositeTextMapPropagator(
|
|
propagation.TraceContext{},
|
|
propagation.Baggage{},
|
|
)
|
|
}
|