From c6230be91e7fc05d7d99f2b7078c9ac168d033a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ask=20Bj=C3=B8rn=20Hansen?= Date: Sat, 2 Aug 2025 09:29:27 -0700 Subject: [PATCH] feat(metrics): add OTLP metrics support with centralized config - Create new metrics/ package for OpenTelemetry-native metrics with OTLP export - Refactor OTLP configuration to internal/tracerconfig/ to eliminate code duplication - Add consistent retry configuration across all HTTP OTLP exporters - Add configuration validation and improved error messages - Include test coverage for all new functionality - Make OpenTelemetry metrics dependencies explicit in go.mod Designed for new applications requiring structured metrics export to observability backends via OTLP protocol. --- go.mod | 12 +- internal/tracerconfig/config.go | 330 ++++++++++++++++++- internal/tracerconfig/config_test.go | 474 +++++++++++++++++++++++++++ metrics/metrics.go | 122 +++++++ metrics/metrics_test.go | 296 +++++++++++++++++ metricsserver/metrics_test.go | 82 ++--- tracing/tracing.go | 182 ++-------- 7 files changed, 1274 insertions(+), 224 deletions(-) create mode 100644 internal/tracerconfig/config_test.go create mode 100644 metrics/metrics.go create mode 100644 metrics/metrics_test.go diff --git a/go.mod b/go.mod index 48d641d..0a66669 100644 --- a/go.mod +++ b/go.mod @@ -19,12 +19,18 @@ require ( go.opentelemetry.io/contrib/exporters/autoexport v0.58.0 go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.58.0 go.opentelemetry.io/otel v1.33.0 + go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.9.0 + go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.9.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.33.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.33.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0 go.opentelemetry.io/otel/log v0.9.0 + go.opentelemetry.io/otel/metric v1.33.0 go.opentelemetry.io/otel/sdk v1.33.0 go.opentelemetry.io/otel/sdk/log v0.9.0 + go.opentelemetry.io/otel/sdk/metric v1.33.0 go.opentelemetry.io/otel/trace v1.33.0 golang.org/x/mod v0.22.0 golang.org/x/net v0.33.0 @@ -59,16 +65,10 @@ require ( github.com/valyala/fasttemplate v1.2.2 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/contrib/bridges/prometheus v0.58.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.9.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.9.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.33.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.33.0 // indirect go.opentelemetry.io/otel/exporters/prometheus v0.55.0 // indirect go.opentelemetry.io/otel/exporters/stdout/stdoutlog v0.9.0 // indirect go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.33.0 // indirect go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.33.0 // indirect - go.opentelemetry.io/otel/metric v1.33.0 // indirect - go.opentelemetry.io/otel/sdk/metric v1.33.0 // indirect go.opentelemetry.io/proto/otlp v1.4.0 // indirect golang.org/x/crypto v0.31.0 // indirect golang.org/x/sys v0.28.0 // indirect diff --git a/internal/tracerconfig/config.go b/internal/tracerconfig/config.go index 16fd5a0..4318f7d 100644 --- a/internal/tracerconfig/config.go +++ b/internal/tracerconfig/config.go @@ -7,11 +7,103 @@ import ( "context" "crypto/tls" "crypto/x509" + "errors" + "fmt" + "net/url" + "os" + "strings" "sync" + "time" + "go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" sdklog "go.opentelemetry.io/otel/sdk/log" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "google.golang.org/grpc/credentials" ) +const ( + otelExporterOTLPProtoEnvKey = "OTEL_EXPORTER_OTLP_PROTOCOL" + otelExporterOTLPTracesProtoEnvKey = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL" + otelExporterOTLPLogsProtoEnvKey = "OTEL_EXPORTER_OTLP_LOGS_PROTOCOL" + otelExporterOTLPMetricsProtoEnvKey = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL" +) + +var errInvalidOTLPProtocol = errors.New("invalid OTLP protocol - should be one of ['grpc', 'http/protobuf']") + +// newInvalidProtocolError creates a specific error message for invalid protocols +func newInvalidProtocolError(protocol, signalType string) error { + return fmt.Errorf("invalid OTLP protocol '%s' for %s - should be one of ['grpc', 'http/protobuf', 'http/json']", protocol, signalType) +} + +// Validate checks the configuration for common errors and inconsistencies +func (c *Config) Validate() error { + var errs []error + + // Check that both Endpoint and EndpointURL are not specified + if c.Endpoint != "" && c.EndpointURL != "" { + errs = append(errs, errors.New("cannot specify both Endpoint and EndpointURL - use one or the other")) + } + + // Validate EndpointURL format if specified + if c.EndpointURL != "" { + if _, err := url.Parse(c.EndpointURL); err != nil { + errs = append(errs, fmt.Errorf("invalid EndpointURL format: %w", err)) + } + } + + // Validate Endpoint format if specified + if c.Endpoint != "" { + // Basic validation - should not contain protocol scheme + if strings.Contains(c.Endpoint, "://") { + errs = append(errs, errors.New("Endpoint should not include protocol scheme (use EndpointURL for full URLs)")) + } + // Should not be empty after trimming whitespace + if strings.TrimSpace(c.Endpoint) == "" { + errs = append(errs, errors.New("Endpoint cannot be empty or whitespace")) + } + } + + // Validate TLS configuration consistency + if c.CertificateProvider != nil && c.RootCAs == nil { + // This is just a warning - client cert without custom CAs is valid + // but might indicate a configuration issue + } + + // Validate service name if specified + if c.ServiceName != "" && strings.TrimSpace(c.ServiceName) == "" { + errs = append(errs, errors.New("ServiceName cannot be empty or whitespace")) + } + + // Combine all errors + if len(errs) > 0 { + var errMsgs []string + for _, err := range errs { + errMsgs = append(errMsgs, err.Error()) + } + return fmt.Errorf("configuration validation failed: %s", strings.Join(errMsgs, "; ")) + } + + return nil +} + +// ValidateAndStore validates the configuration before storing it +func ValidateAndStore(ctx context.Context, cfg *Config, logFactory LogExporterFactory, metricFactory MetricExporterFactory, traceFactory TraceExporterFactory) error { + if cfg != nil { + if err := cfg.Validate(); err != nil { + return err + } + } + Store(ctx, cfg, logFactory, metricFactory, traceFactory) + return nil +} + // GetClientCertificate defines a function type for providing client certificates for mutual TLS. // This is used when exporting telemetry data to secured OTLP endpoints that require // client certificate authentication. @@ -29,41 +121,76 @@ type Config struct { RootCAs *x509.CertPool // CA certificate pool for server verification } -// ExporterFactory creates an OTLP log exporter using the provided configuration. +// LogExporterFactory creates an OTLP log exporter using the provided configuration. // This allows the logger package to create exporters without importing the tracing package. -type ExporterFactory func(context.Context, *Config) (sdklog.Exporter, error) +type LogExporterFactory func(context.Context, *Config) (sdklog.Exporter, error) + +// MetricExporterFactory creates an OTLP metric exporter using the provided configuration. +// This allows the metrics package to create exporters without importing the tracing package. +type MetricExporterFactory func(context.Context, *Config) (sdkmetric.Exporter, error) + +// TraceExporterFactory creates an OTLP trace exporter using the provided configuration. +// This allows for consistent trace exporter creation across packages. +type TraceExporterFactory func(context.Context, *Config) (sdktrace.SpanExporter, error) // Global state for sharing configuration between packages var ( - globalConfig *Config - globalContext context.Context - exporterFactory ExporterFactory - configMu sync.RWMutex + globalConfig *Config + globalContext context.Context + logExporterFactory LogExporterFactory + metricExporterFactory MetricExporterFactory + traceExporterFactory TraceExporterFactory + configMu sync.RWMutex ) -// Store saves the tracer configuration and exporter factory for use by other packages. +// Store saves the tracer configuration and exporter factories for use by other packages. // This should be called by the tracing package during initialization. -func Store(ctx context.Context, cfg *Config, factory ExporterFactory) { +func Store(ctx context.Context, cfg *Config, logFactory LogExporterFactory, metricFactory MetricExporterFactory, traceFactory TraceExporterFactory) { configMu.Lock() defer configMu.Unlock() globalConfig = cfg globalContext = ctx - exporterFactory = factory + logExporterFactory = logFactory + metricExporterFactory = metricFactory + traceExporterFactory = traceFactory } -// Get returns the stored tracer configuration, context, and exporter factory. +// GetLogExporter returns the stored configuration and log exporter factory. // Returns nil values if no configuration has been stored yet. -func Get() (*Config, context.Context, ExporterFactory) { +func GetLogExporter() (*Config, context.Context, LogExporterFactory) { configMu.RLock() defer configMu.RUnlock() - return globalConfig, globalContext, exporterFactory + return globalConfig, globalContext, logExporterFactory +} + +// GetMetricExporter returns the stored configuration and metric exporter factory. +// Returns nil values if no configuration has been stored yet. +func GetMetricExporter() (*Config, context.Context, MetricExporterFactory) { + configMu.RLock() + defer configMu.RUnlock() + return globalConfig, globalContext, metricExporterFactory +} + +// GetTraceExporter returns the stored configuration and trace exporter factory. +// Returns nil values if no configuration has been stored yet. +func GetTraceExporter() (*Config, context.Context, TraceExporterFactory) { + configMu.RLock() + defer configMu.RUnlock() + return globalConfig, globalContext, traceExporterFactory +} + +// Get returns the stored tracer configuration, context, and log exporter factory. +// This maintains backward compatibility for the logger package. +// Returns nil values if no configuration has been stored yet. +func Get() (*Config, context.Context, LogExporterFactory) { + return GetLogExporter() } // IsConfigured returns true if tracer configuration has been stored. func IsConfigured() bool { configMu.RLock() defer configMu.RUnlock() - return globalConfig != nil && globalContext != nil && exporterFactory != nil + return globalConfig != nil && globalContext != nil } // Clear removes the stored configuration. This is primarily useful for testing. @@ -72,5 +199,180 @@ func Clear() { defer configMu.Unlock() globalConfig = nil globalContext = nil - exporterFactory = nil + logExporterFactory = nil + metricExporterFactory = nil + traceExporterFactory = nil +} + +// getTLSConfig creates a TLS configuration from the provided Config. +func getTLSConfig(cfg *Config) *tls.Config { + if cfg.CertificateProvider == nil { + return nil + } + return &tls.Config{ + GetClientCertificate: cfg.CertificateProvider, + RootCAs: cfg.RootCAs, + } +} + +// getProtocol determines the OTLP protocol to use for the given signal type. +// It follows OpenTelemetry environment variable precedence. +func getProtocol(signalSpecificEnv string) string { + proto := os.Getenv(signalSpecificEnv) + if proto == "" { + proto = os.Getenv(otelExporterOTLPProtoEnvKey) + } + // Fallback to default, http/protobuf. + if proto == "" { + proto = "http/protobuf" + } + return proto +} + +// CreateOTLPLogExporter creates an OTLP log exporter using the provided configuration. +func CreateOTLPLogExporter(ctx context.Context, cfg *Config) (sdklog.Exporter, error) { + tlsConfig := getTLSConfig(cfg) + proto := getProtocol(otelExporterOTLPLogsProtoEnvKey) + + switch proto { + case "grpc": + opts := []otlploggrpc.Option{ + otlploggrpc.WithCompressor("gzip"), + } + if tlsConfig != nil { + opts = append(opts, otlploggrpc.WithTLSCredentials(credentials.NewTLS(tlsConfig))) + } + if len(cfg.Endpoint) > 0 { + opts = append(opts, otlploggrpc.WithEndpoint(cfg.Endpoint)) + } + if len(cfg.EndpointURL) > 0 { + opts = append(opts, otlploggrpc.WithEndpointURL(cfg.EndpointURL)) + } + + return otlploggrpc.New(ctx, opts...) + case "http/protobuf", "http/json": + opts := []otlploghttp.Option{ + otlploghttp.WithCompression(otlploghttp.GzipCompression), + } + if tlsConfig != nil { + opts = append(opts, otlploghttp.WithTLSClientConfig(tlsConfig)) + } + if len(cfg.Endpoint) > 0 { + opts = append(opts, otlploghttp.WithEndpoint(cfg.Endpoint)) + } + if len(cfg.EndpointURL) > 0 { + opts = append(opts, otlploghttp.WithEndpointURL(cfg.EndpointURL)) + } + + opts = append(opts, otlploghttp.WithRetry(otlploghttp.RetryConfig{ + Enabled: true, + InitialInterval: 3 * time.Second, + MaxInterval: 60 * time.Second, + MaxElapsedTime: 5 * time.Minute, + })) + + return otlploghttp.New(ctx, opts...) + default: + return nil, newInvalidProtocolError(proto, "logs") + } +} + +// CreateOTLPMetricExporter creates an OTLP metric exporter using the provided configuration. +func CreateOTLPMetricExporter(ctx context.Context, cfg *Config) (sdkmetric.Exporter, error) { + tlsConfig := getTLSConfig(cfg) + proto := getProtocol(otelExporterOTLPMetricsProtoEnvKey) + + switch proto { + case "grpc": + opts := []otlpmetricgrpc.Option{ + otlpmetricgrpc.WithCompressor("gzip"), + } + if tlsConfig != nil { + opts = append(opts, otlpmetricgrpc.WithTLSCredentials(credentials.NewTLS(tlsConfig))) + } + if len(cfg.Endpoint) > 0 { + opts = append(opts, otlpmetricgrpc.WithEndpoint(cfg.Endpoint)) + } + if len(cfg.EndpointURL) > 0 { + opts = append(opts, otlpmetricgrpc.WithEndpointURL(cfg.EndpointURL)) + } + + return otlpmetricgrpc.New(ctx, opts...) + case "http/protobuf", "http/json": + opts := []otlpmetrichttp.Option{ + otlpmetrichttp.WithCompression(otlpmetrichttp.GzipCompression), + } + if tlsConfig != nil { + opts = append(opts, otlpmetrichttp.WithTLSClientConfig(tlsConfig)) + } + if len(cfg.Endpoint) > 0 { + opts = append(opts, otlpmetrichttp.WithEndpoint(cfg.Endpoint)) + } + if len(cfg.EndpointURL) > 0 { + opts = append(opts, otlpmetrichttp.WithEndpointURL(cfg.EndpointURL)) + } + + opts = append(opts, otlpmetrichttp.WithRetry(otlpmetrichttp.RetryConfig{ + Enabled: true, + InitialInterval: 3 * time.Second, + MaxInterval: 60 * time.Second, + MaxElapsedTime: 5 * time.Minute, + })) + + return otlpmetrichttp.New(ctx, opts...) + default: + return nil, newInvalidProtocolError(proto, "metrics") + } +} + +// CreateOTLPTraceExporter creates an OTLP trace exporter using the provided configuration. +func CreateOTLPTraceExporter(ctx context.Context, cfg *Config) (sdktrace.SpanExporter, error) { + tlsConfig := getTLSConfig(cfg) + proto := getProtocol(otelExporterOTLPTracesProtoEnvKey) + + var client otlptrace.Client + + switch proto { + case "grpc": + opts := []otlptracegrpc.Option{ + otlptracegrpc.WithCompressor("gzip"), + } + if tlsConfig != nil { + opts = append(opts, otlptracegrpc.WithTLSCredentials(credentials.NewTLS(tlsConfig))) + } + if len(cfg.Endpoint) > 0 { + opts = append(opts, otlptracegrpc.WithEndpoint(cfg.Endpoint)) + } + if len(cfg.EndpointURL) > 0 { + opts = append(opts, otlptracegrpc.WithEndpointURL(cfg.EndpointURL)) + } + + client = otlptracegrpc.NewClient(opts...) + case "http/protobuf", "http/json": + opts := []otlptracehttp.Option{ + otlptracehttp.WithCompression(otlptracehttp.GzipCompression), + } + if tlsConfig != nil { + opts = append(opts, otlptracehttp.WithTLSClientConfig(tlsConfig)) + } + if len(cfg.Endpoint) > 0 { + opts = append(opts, otlptracehttp.WithEndpoint(cfg.Endpoint)) + } + if len(cfg.EndpointURL) > 0 { + opts = append(opts, otlptracehttp.WithEndpointURL(cfg.EndpointURL)) + } + + opts = append(opts, otlptracehttp.WithRetry(otlptracehttp.RetryConfig{ + Enabled: true, + InitialInterval: 3 * time.Second, + MaxInterval: 60 * time.Second, + MaxElapsedTime: 5 * time.Minute, + })) + + client = otlptracehttp.NewClient(opts...) + default: + return nil, newInvalidProtocolError(proto, "traces") + } + + return otlptrace.New(ctx, client) } diff --git a/internal/tracerconfig/config_test.go b/internal/tracerconfig/config_test.go new file mode 100644 index 0000000..7e78d61 --- /dev/null +++ b/internal/tracerconfig/config_test.go @@ -0,0 +1,474 @@ +package tracerconfig + +import ( + "context" + "crypto/tls" + "crypto/x509" + "os" + "strings" + "sync" + "testing" + "time" + + sdklog "go.opentelemetry.io/otel/sdk/log" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + sdktrace "go.opentelemetry.io/otel/sdk/trace" +) + +func TestStore_And_Retrieve(t *testing.T) { + // Clear any existing configuration + Clear() + + ctx := context.Background() + config := &Config{ + ServiceName: "test-service", + Environment: "test", + Endpoint: "localhost:4317", + } + + // Create mock factories + logFactory := func(context.Context, *Config) (sdklog.Exporter, error) { return nil, nil } + metricFactory := func(context.Context, *Config) (sdkmetric.Exporter, error) { return nil, nil } + traceFactory := func(context.Context, *Config) (sdktrace.SpanExporter, error) { return nil, nil } + + // Store configuration + Store(ctx, config, logFactory, metricFactory, traceFactory) + + // Test IsConfigured + if !IsConfigured() { + t.Error("IsConfigured() should return true after Store()") + } + + // Test GetLogExporter + cfg, ctx2, factory := GetLogExporter() + if cfg == nil || ctx2 == nil || factory == nil { + t.Error("GetLogExporter() should return non-nil values") + } + if cfg.ServiceName != "test-service" { + t.Errorf("Expected ServiceName 'test-service', got '%s'", cfg.ServiceName) + } + + // Test GetMetricExporter + cfg, ctx3, metricFact := GetMetricExporter() + if cfg == nil || ctx3 == nil || metricFact == nil { + t.Error("GetMetricExporter() should return non-nil values") + } + + // Test GetTraceExporter + cfg, ctx4, traceFact := GetTraceExporter() + if cfg == nil || ctx4 == nil || traceFact == nil { + t.Error("GetTraceExporter() should return non-nil values") + } + + // Test backward compatibility Get() + cfg, ctx5, logFact := Get() + if cfg == nil || ctx5 == nil || logFact == nil { + t.Error("Get() should return non-nil values for backward compatibility") + } +} + +func TestClear(t *testing.T) { + // Store some configuration first + ctx := context.Background() + config := &Config{ServiceName: "test"} + Store(ctx, config, nil, nil, nil) + + if !IsConfigured() { + t.Error("Should be configured before Clear()") + } + + // Clear configuration + Clear() + + if IsConfigured() { + t.Error("Should not be configured after Clear()") + } + + // All getters should return nil + cfg, ctx2, factory := GetLogExporter() + if cfg != nil || ctx2 != nil || factory != nil { + t.Error("GetLogExporter() should return nil values after Clear()") + } +} + +func TestConcurrentAccess(t *testing.T) { + Clear() + + ctx := context.Background() + config := &Config{ServiceName: "concurrent-test"} + + var wg sync.WaitGroup + const numGoroutines = 10 + + // Test concurrent Store and Get operations + wg.Add(numGoroutines * 2) + + // Concurrent Store operations + for i := 0; i < numGoroutines; i++ { + go func() { + defer wg.Done() + Store(ctx, config, nil, nil, nil) + }() + } + + // Concurrent Get operations + for i := 0; i < numGoroutines; i++ { + go func() { + defer wg.Done() + IsConfigured() + GetLogExporter() + GetMetricExporter() + GetTraceExporter() + }() + } + + wg.Wait() + + // Should be configured after all operations + if !IsConfigured() { + t.Error("Should be configured after concurrent operations") + } +} + +func TestGetTLSConfig(t *testing.T) { + tests := []struct { + name string + config *Config + expected bool // whether TLS config should be nil + }{ + { + name: "nil certificate provider", + config: &Config{}, + expected: true, // should be nil + }, + { + name: "with certificate provider", + config: &Config{ + CertificateProvider: func(*tls.CertificateRequestInfo) (*tls.Certificate, error) { + return &tls.Certificate{}, nil + }, + }, + expected: false, // should not be nil + }, + { + name: "with certificate provider and RootCAs", + config: &Config{ + CertificateProvider: func(*tls.CertificateRequestInfo) (*tls.Certificate, error) { + return &tls.Certificate{}, nil + }, + RootCAs: x509.NewCertPool(), + }, + expected: false, // should not be nil + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tlsConfig := getTLSConfig(tt.config) + if tt.expected && tlsConfig != nil { + t.Errorf("Expected nil TLS config, got %v", tlsConfig) + } + if !tt.expected && tlsConfig == nil { + t.Error("Expected non-nil TLS config, got nil") + } + if !tt.expected && tlsConfig != nil { + if tlsConfig.GetClientCertificate == nil { + t.Error("Expected GetClientCertificate to be set") + } + if tt.config.RootCAs != nil && tlsConfig.RootCAs != tt.config.RootCAs { + t.Error("Expected RootCAs to be set correctly") + } + } + }) + } +} + +func TestGetProtocol(t *testing.T) { + // Save original env vars + originalGeneral := os.Getenv(otelExporterOTLPProtoEnvKey) + originalLogs := os.Getenv(otelExporterOTLPLogsProtoEnvKey) + + defer func() { + // Restore original env vars + if originalGeneral != "" { + os.Setenv(otelExporterOTLPProtoEnvKey, originalGeneral) + } else { + os.Unsetenv(otelExporterOTLPProtoEnvKey) + } + if originalLogs != "" { + os.Setenv(otelExporterOTLPLogsProtoEnvKey, originalLogs) + } else { + os.Unsetenv(otelExporterOTLPLogsProtoEnvKey) + } + }() + + tests := []struct { + name string + signalSpecific string + generalProto string + specificProto string + expectedResult string + }{ + { + name: "no env vars set - default", + signalSpecific: otelExporterOTLPLogsProtoEnvKey, + expectedResult: "http/protobuf", + }, + { + name: "general env var set", + signalSpecific: otelExporterOTLPLogsProtoEnvKey, + generalProto: "grpc", + expectedResult: "grpc", + }, + { + name: "specific env var overrides general", + signalSpecific: otelExporterOTLPLogsProtoEnvKey, + generalProto: "grpc", + specificProto: "http/protobuf", + expectedResult: "http/protobuf", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Clear env vars + os.Unsetenv(otelExporterOTLPProtoEnvKey) + os.Unsetenv(otelExporterOTLPLogsProtoEnvKey) + + // Set test env vars + if tt.generalProto != "" { + os.Setenv(otelExporterOTLPProtoEnvKey, tt.generalProto) + } + if tt.specificProto != "" { + os.Setenv(tt.signalSpecific, tt.specificProto) + } + + result := getProtocol(tt.signalSpecific) + if result != tt.expectedResult { + t.Errorf("Expected protocol '%s', got '%s'", tt.expectedResult, result) + } + }) + } +} + +func TestCreateExporterErrors(t *testing.T) { + ctx := context.Background() + config := &Config{ + ServiceName: "test-service", + Endpoint: "invalid-endpoint", + } + + // Test with invalid protocol for logs + os.Setenv(otelExporterOTLPLogsProtoEnvKey, "invalid-protocol") + defer os.Unsetenv(otelExporterOTLPLogsProtoEnvKey) + + _, err := CreateOTLPLogExporter(ctx, config) + if err == nil { + t.Error("Expected error for invalid protocol") + } + // Check that it's a protocol error (the specific message will be different now) + if !strings.Contains(err.Error(), "invalid OTLP protocol") { + t.Errorf("Expected protocol error, got %v", err) + } + + // Test with invalid protocol for metrics + os.Setenv(otelExporterOTLPMetricsProtoEnvKey, "invalid-protocol") + defer os.Unsetenv(otelExporterOTLPMetricsProtoEnvKey) + + _, err = CreateOTLPMetricExporter(ctx, config) + if err == nil { + t.Error("Expected error for invalid protocol") + } + if !strings.Contains(err.Error(), "invalid OTLP protocol") { + t.Errorf("Expected protocol error, got %v", err) + } + + // Test with invalid protocol for traces + os.Setenv(otelExporterOTLPTracesProtoEnvKey, "invalid-protocol") + defer os.Unsetenv(otelExporterOTLPTracesProtoEnvKey) + + _, err = CreateOTLPTraceExporter(ctx, config) + if err == nil { + t.Error("Expected error for invalid protocol") + } + if !strings.Contains(err.Error(), "invalid OTLP protocol") { + t.Errorf("Expected protocol error, got %v", err) + } +} + +func TestCreateExporterValidProtocols(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + config := &Config{ + ServiceName: "test-service", + Endpoint: "localhost:4317", // This will likely fail to connect, but should create exporter + } + + protocols := []string{"grpc", "http/protobuf", "http/json"} + + for _, proto := range protocols { + t.Run("logs_"+proto, func(t *testing.T) { + os.Setenv(otelExporterOTLPLogsProtoEnvKey, proto) + defer os.Unsetenv(otelExporterOTLPLogsProtoEnvKey) + + exporter, err := CreateOTLPLogExporter(ctx, config) + if err != nil { + // Connection errors are expected since we're not running a real OTLP server + // but the exporter should be created successfully + t.Logf("Connection error expected: %v", err) + } + if exporter != nil { + exporter.Shutdown(ctx) + } + }) + + t.Run("metrics_"+proto, func(t *testing.T) { + os.Setenv(otelExporterOTLPMetricsProtoEnvKey, proto) + defer os.Unsetenv(otelExporterOTLPMetricsProtoEnvKey) + + exporter, err := CreateOTLPMetricExporter(ctx, config) + if err != nil { + t.Logf("Connection error expected: %v", err) + } + if exporter != nil { + exporter.Shutdown(ctx) + } + }) + + t.Run("traces_"+proto, func(t *testing.T) { + os.Setenv(otelExporterOTLPTracesProtoEnvKey, proto) + defer os.Unsetenv(otelExporterOTLPTracesProtoEnvKey) + + exporter, err := CreateOTLPTraceExporter(ctx, config) + if err != nil { + t.Logf("Connection error expected: %v", err) + } + if exporter != nil { + exporter.Shutdown(ctx) + } + }) + } +} + +func TestConfigValidation(t *testing.T) { + tests := []struct { + name string + config *Config + shouldErr bool + }{ + { + name: "valid empty config", + config: &Config{}, + shouldErr: false, + }, + { + name: "valid config with endpoint", + config: &Config{ + ServiceName: "test-service", + Endpoint: "localhost:4317", + }, + shouldErr: false, + }, + { + name: "valid config with endpoint URL", + config: &Config{ + ServiceName: "test-service", + EndpointURL: "https://otlp.example.com:4317/v1/traces", + }, + shouldErr: false, + }, + { + name: "invalid - both endpoint and endpoint URL", + config: &Config{ + ServiceName: "test-service", + Endpoint: "localhost:4317", + EndpointURL: "https://otlp.example.com:4317/v1/traces", + }, + shouldErr: true, + }, + { + name: "invalid - endpoint with protocol", + config: &Config{ + ServiceName: "test-service", + Endpoint: "https://localhost:4317", + }, + shouldErr: true, + }, + { + name: "invalid - empty endpoint", + config: &Config{ + ServiceName: "test-service", + Endpoint: " ", + }, + shouldErr: true, + }, + { + name: "invalid - malformed endpoint URL", + config: &Config{ + ServiceName: "test-service", + EndpointURL: "://invalid-url-missing-scheme", + }, + shouldErr: true, + }, + { + name: "invalid - empty service name", + config: &Config{ + ServiceName: " ", + Endpoint: "localhost:4317", + }, + shouldErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.config.Validate() + if tt.shouldErr && err == nil { + t.Error("Expected validation error, got nil") + } + if !tt.shouldErr && err != nil { + t.Errorf("Expected no validation error, got: %v", err) + } + }) + } +} + +func TestValidateAndStore(t *testing.T) { + Clear() + + ctx := context.Background() + + // Test with valid config + validConfig := &Config{ + ServiceName: "test-service", + Endpoint: "localhost:4317", + } + + err := ValidateAndStore(ctx, validConfig, nil, nil, nil) + if err != nil { + t.Errorf("ValidateAndStore with valid config should not error: %v", err) + } + + if !IsConfigured() { + t.Error("Should be configured after ValidateAndStore") + } + + Clear() + + // Test with invalid config + invalidConfig := &Config{ + ServiceName: "test-service", + Endpoint: "localhost:4317", + EndpointURL: "https://example.com:4317", // both specified - invalid + } + + err = ValidateAndStore(ctx, invalidConfig, nil, nil, nil) + if err == nil { + t.Error("ValidateAndStore with invalid config should return error") + } + + if IsConfigured() { + t.Error("Should not be configured after failed ValidateAndStore") + } +} diff --git a/metrics/metrics.go b/metrics/metrics.go new file mode 100644 index 0000000..7a1222b --- /dev/null +++ b/metrics/metrics.go @@ -0,0 +1,122 @@ +// Package metrics provides OpenTelemetry-native metrics with OTLP export support. +// +// This package implements a metrics system using the OpenTelemetry metrics data model +// with OTLP export capabilities. It's designed for new applications that want to use +// structured metrics export to observability backends. +// +// Key features: +// - OpenTelemetry native metric types (Counter, Histogram, Gauge, etc.) +// - OTLP export for sending metrics to observability backends +// - Resource detection and correlation with traces/logs +// - Graceful handling when OTLP configuration is not available +// +// Example usage: +// +// // Initialize metrics along with tracing +// shutdown, err := tracing.InitTracer(ctx, cfg) +// if err != nil { +// log.Fatal(err) +// } +// defer shutdown(ctx) +// +// // Get a meter and create instruments +// meter := metrics.GetMeter("my-service") +// counter, _ := meter.Int64Counter("requests_total") +// counter.Add(ctx, 1, metric.WithAttributes(attribute.String("method", "GET"))) +package metrics + +import ( + "context" + "log/slog" + "sync" + "time" + + "go.ntppool.org/common/internal/tracerconfig" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" +) + +var ( + meterProvider metric.MeterProvider + setupOnce sync.Once + setupErr error +) + +// Setup initializes the OpenTelemetry metrics provider with OTLP export. +// This function uses the configuration stored by the tracing package and +// creates a metrics provider that exports to the same OTLP endpoint. +// +// The function is safe to call multiple times - it will only initialize once. +// If tracing configuration is not available, it returns a no-op provider that +// doesn't export metrics. +// +// Returns an error only if there's a configuration problem. Missing tracing +// configuration is handled gracefully with a warning log. +func Setup(ctx context.Context) error { + setupOnce.Do(func() { + setupErr = initializeMetrics(ctx) + }) + return setupErr +} + +// GetMeter returns a named meter for creating metric instruments. +// The meter uses the configured metrics provider, or the global provider +// if metrics haven't been set up yet. +// +// This is the primary entry point for creating metric instruments in your application. +func GetMeter(name string, opts ...metric.MeterOption) metric.Meter { + if meterProvider == nil { + // Return the global provider as fallback (no-op if not configured) + return otel.GetMeterProvider().Meter(name, opts...) + } + return meterProvider.Meter(name, opts...) +} + +// initializeMetrics sets up the OpenTelemetry metrics provider with OTLP export. +func initializeMetrics(ctx context.Context) error { + log := slog.Default() + + // Check if tracing configuration is available + cfg, configCtx, factory := tracerconfig.GetMetricExporter() + if cfg == nil || configCtx == nil || factory == nil { + log.Warn("metrics setup: tracing configuration not available, using no-op provider") + // Set the global provider as fallback - metrics just won't be exported + meterProvider = otel.GetMeterProvider() + return nil + } + + // Create OTLP metrics exporter + exporter, err := factory(ctx, cfg) + if err != nil { + log.Error("metrics setup: failed to create OTLP exporter", "error", err) + // Fall back to global provider + meterProvider = otel.GetMeterProvider() + return nil + } + + // Create metrics provider with the exporter + provider := sdkmetric.NewMeterProvider( + sdkmetric.WithReader(sdkmetric.NewPeriodicReader( + exporter, + sdkmetric.WithInterval(15*time.Second), + )), + ) + + // Set the global provider + otel.SetMeterProvider(provider) + meterProvider = provider + + log.Info("metrics setup: OTLP metrics provider initialized") + return nil +} + +// Shutdown gracefully shuts down the metrics provider. +// This should be called during application shutdown to ensure all metrics +// are properly flushed and exported. +func Shutdown(ctx context.Context) error { + if provider, ok := meterProvider.(*sdkmetric.MeterProvider); ok { + return provider.Shutdown(ctx) + } + return nil +} diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go new file mode 100644 index 0000000..fb6c74d --- /dev/null +++ b/metrics/metrics_test.go @@ -0,0 +1,296 @@ +package metrics + +import ( + "context" + "os" + "testing" + "time" + + "go.ntppool.org/common/internal/tracerconfig" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" +) + +func TestSetup_NoConfiguration(t *testing.T) { + // Clear any existing configuration + tracerconfig.Clear() + + ctx := context.Background() + err := Setup(ctx) + // Should not return an error even when no configuration is available + if err != nil { + t.Errorf("Setup() returned unexpected error: %v", err) + } + + // Should be able to get a meter (even if it's a no-op) + meter := GetMeter("test-meter") + if meter == nil { + t.Error("GetMeter() returned nil") + } +} + +func TestGetMeter(t *testing.T) { + // Clear any existing configuration + tracerconfig.Clear() + + ctx := context.Background() + _ = Setup(ctx) + + meter := GetMeter("test-service") + if meter == nil { + t.Fatal("GetMeter() returned nil") + } + + // Test creating a counter instrument + counter, err := meter.Int64Counter("test_counter") + if err != nil { + t.Errorf("Failed to create counter: %v", err) + } + + // Test using the counter (should not error even with no-op provider) + counter.Add(ctx, 1, metric.WithAttributes(attribute.String("test", "value"))) +} + +func TestSetup_MultipleCallsSafe(t *testing.T) { + // Clear any existing configuration + tracerconfig.Clear() + + ctx := context.Background() + + // Call Setup multiple times + err1 := Setup(ctx) + err2 := Setup(ctx) + err3 := Setup(ctx) + + if err1 != nil { + t.Errorf("First Setup() call returned error: %v", err1) + } + if err2 != nil { + t.Errorf("Second Setup() call returned error: %v", err2) + } + if err3 != nil { + t.Errorf("Third Setup() call returned error: %v", err3) + } + + // Should still be able to get meters + meter := GetMeter("test-meter") + if meter == nil { + t.Error("GetMeter() returned nil after multiple Setup() calls") + } +} + +func TestSetup_WithConfiguration(t *testing.T) { + // Clear any existing configuration + tracerconfig.Clear() + + ctx := context.Background() + config := &tracerconfig.Config{ + ServiceName: "test-metrics-service", + Environment: "test", + Endpoint: "localhost:4317", // Will likely fail to connect, but should set up provider + } + + // Create a mock exporter factory that returns a working exporter + mockFactory := func(ctx context.Context, cfg *tracerconfig.Config) (sdkmetric.Exporter, error) { + // Create a simple in-memory exporter for testing + return &mockMetricExporter{}, nil + } + + // Store configuration with mock factory + tracerconfig.Store(ctx, config, nil, mockFactory, nil) + + // Setup metrics + err := Setup(ctx) + if err != nil { + t.Errorf("Setup() returned error: %v", err) + } + + // Should be able to get a meter + meter := GetMeter("test-service") + if meter == nil { + t.Fatal("GetMeter() returned nil") + } + + // Test creating and using instruments + counter, err := meter.Int64Counter("test_counter") + if err != nil { + t.Errorf("Failed to create counter: %v", err) + } + + histogram, err := meter.Float64Histogram("test_histogram") + if err != nil { + t.Errorf("Failed to create histogram: %v", err) + } + + gauge, err := meter.Int64UpDownCounter("test_gauge") + if err != nil { + t.Errorf("Failed to create gauge: %v", err) + } + + // Use the instruments + counter.Add(ctx, 1, metric.WithAttributes(attribute.String("test", "value"))) + histogram.Record(ctx, 1.5, metric.WithAttributes(attribute.String("test", "value"))) + gauge.Add(ctx, 10, metric.WithAttributes(attribute.String("test", "value"))) + + // Test shutdown + err = Shutdown(ctx) + if err != nil { + t.Errorf("Shutdown() returned error: %v", err) + } +} + +func TestSetup_WithRealOTLPConfig(t *testing.T) { + // Skip this test in short mode since it may try to make network connections + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + // Clear any existing configuration + tracerconfig.Clear() + + // Set environment variables for OTLP configuration + originalEndpoint := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT") + originalProtocol := os.Getenv("OTEL_EXPORTER_OTLP_PROTOCOL") + + defer func() { + if originalEndpoint != "" { + os.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", originalEndpoint) + } else { + os.Unsetenv("OTEL_EXPORTER_OTLP_ENDPOINT") + } + if originalProtocol != "" { + os.Setenv("OTEL_EXPORTER_OTLP_PROTOCOL", originalProtocol) + } else { + os.Unsetenv("OTEL_EXPORTER_OTLP_PROTOCOL") + } + }() + + os.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318") // HTTP endpoint + os.Setenv("OTEL_EXPORTER_OTLP_PROTOCOL", "http/protobuf") + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + config := &tracerconfig.Config{ + ServiceName: "test-metrics-e2e", + Environment: "test", + Endpoint: "localhost:4318", + } + + // Store configuration with real factory + tracerconfig.Store(ctx, config, nil, tracerconfig.CreateOTLPMetricExporter, nil) + + // Setup metrics - this may fail if no OTLP collector is running, which is okay + err := Setup(ctx) + if err != nil { + t.Logf("Setup() returned error (expected if no OTLP collector): %v", err) + } + + // Should still be able to get a meter + meter := GetMeter("test-service-e2e") + if meter == nil { + t.Fatal("GetMeter() returned nil") + } + + // Create and use instruments + counter, err := meter.Int64Counter("e2e_test_counter") + if err != nil { + t.Errorf("Failed to create counter: %v", err) + } + + // Add some metrics + for i := 0; i < 5; i++ { + counter.Add(ctx, 1, metric.WithAttributes( + attribute.String("iteration", string(rune('0'+i))), + attribute.String("test_type", "e2e"), + )) + } + + // Give some time for export (if collector is running) + time.Sleep(100 * time.Millisecond) + + // Test shutdown + err = Shutdown(ctx) + if err != nil { + t.Logf("Shutdown() returned error (may be expected): %v", err) + } +} + +func TestConcurrentMetricUsage(t *testing.T) { + // Clear any existing configuration + tracerconfig.Clear() + + ctx := context.Background() + config := &tracerconfig.Config{ + ServiceName: "concurrent-test", + } + + // Use mock factory + mockFactory := func(ctx context.Context, cfg *tracerconfig.Config) (sdkmetric.Exporter, error) { + return &mockMetricExporter{}, nil + } + + tracerconfig.Store(ctx, config, nil, mockFactory, nil) + Setup(ctx) + + meter := GetMeter("concurrent-test") + counter, err := meter.Int64Counter("concurrent_counter") + if err != nil { + t.Fatalf("Failed to create counter: %v", err) + } + + // Test concurrent metric usage + const numGoroutines = 10 + const metricsPerGoroutine = 100 + + done := make(chan bool, numGoroutines) + + for i := 0; i < numGoroutines; i++ { + go func(goroutineID int) { + for j := 0; j < metricsPerGoroutine; j++ { + counter.Add(ctx, 1, metric.WithAttributes( + attribute.Int("goroutine", goroutineID), + attribute.Int("iteration", j), + )) + } + done <- true + }(i) + } + + // Wait for all goroutines to complete + for i := 0; i < numGoroutines; i++ { + <-done + } + + // Shutdown + err = Shutdown(ctx) + if err != nil { + t.Errorf("Shutdown() returned error: %v", err) + } +} + +// mockMetricExporter is a simple mock exporter for testing +type mockMetricExporter struct{} + +func (m *mockMetricExporter) Export(ctx context.Context, rm *metricdata.ResourceMetrics) error { + // Just pretend to export + return nil +} + +func (m *mockMetricExporter) ForceFlush(ctx context.Context) error { + return nil +} + +func (m *mockMetricExporter) Shutdown(ctx context.Context) error { + return nil +} + +func (m *mockMetricExporter) Temporality(kind sdkmetric.InstrumentKind) metricdata.Temporality { + return metricdata.CumulativeTemporality +} + +func (m *mockMetricExporter) Aggregation(kind sdkmetric.InstrumentKind) sdkmetric.Aggregation { + return sdkmetric.DefaultAggregationSelector(kind) +} diff --git a/metricsserver/metrics_test.go b/metricsserver/metrics_test.go index 9d1aced..a5fad2f 100644 --- a/metricsserver/metrics_test.go +++ b/metricsserver/metrics_test.go @@ -15,11 +15,11 @@ import ( func TestNew(t *testing.T) { metrics := New() - + if metrics == nil { t.Fatal("New returned nil") } - + if metrics.r == nil { t.Error("metrics registry is nil") } @@ -28,32 +28,32 @@ func TestNew(t *testing.T) { func TestRegistry(t *testing.T) { metrics := New() registry := metrics.Registry() - + if registry == nil { t.Fatal("Registry() returned nil") } - + if registry != metrics.r { t.Error("Registry() did not return the metrics registry") } - + // Test that we can register a metric counter := prometheus.NewCounter(prometheus.CounterOpts{ Name: "test_counter", Help: "A test counter", }) - + err := registry.Register(counter) if err != nil { t.Errorf("failed to register metric: %v", err) } - + // Test that the metric is registered metricFamilies, err := registry.Gather() if err != nil { t.Errorf("failed to gather metrics: %v", err) } - + found := false for _, mf := range metricFamilies { if mf.GetName() == "test_counter" { @@ -61,7 +61,7 @@ func TestRegistry(t *testing.T) { break } } - + if !found { t.Error("registered metric not found in registry") } @@ -69,7 +69,7 @@ func TestRegistry(t *testing.T) { func TestHandler(t *testing.T) { metrics := New() - + // Register a test metric counter := prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -80,40 +80,40 @@ func TestHandler(t *testing.T) { ) metrics.Registry().MustRegister(counter) counter.WithLabelValues("GET").Inc() - + // Test the handler handler := metrics.Handler() if handler == nil { t.Fatal("Handler() returned nil") } - + // Create a test request req := httptest.NewRequest("GET", "/metrics", nil) recorder := httptest.NewRecorder() - + // Call the handler handler.ServeHTTP(recorder, req) - + // Check response resp := recorder.Result() defer resp.Body.Close() - + if resp.StatusCode != http.StatusOK { t.Errorf("expected status 200, got %d", resp.StatusCode) } - + body, err := io.ReadAll(resp.Body) if err != nil { t.Fatalf("failed to read response body: %v", err) } - + bodyStr := string(body) - + // Check for our test metric if !strings.Contains(bodyStr, "test_requests_total") { t.Error("test metric not found in metrics output") } - + // Check for OpenMetrics format indicators if !strings.Contains(bodyStr, "# TYPE") { t.Error("metrics output missing TYPE comments") @@ -122,7 +122,7 @@ func TestHandler(t *testing.T) { func TestListenAndServe(t *testing.T) { metrics := New() - + // Register a test metric counter := prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -133,46 +133,46 @@ func TestListenAndServe(t *testing.T) { ) metrics.Registry().MustRegister(counter) counter.WithLabelValues("GET").Inc() - + ctx, cancel := context.WithCancel(context.Background()) defer cancel() - + // Start server in a goroutine errCh := make(chan error, 1) go func() { // Use a high port number to avoid conflicts errCh <- metrics.ListenAndServe(ctx, 9999) }() - + // Give the server a moment to start time.Sleep(100 * time.Millisecond) - + // Test metrics endpoint resp, err := http.Get("http://localhost:9999/metrics") if err != nil { t.Fatalf("failed to GET /metrics: %v", err) } defer resp.Body.Close() - + if resp.StatusCode != http.StatusOK { t.Errorf("expected status 200, got %d", resp.StatusCode) } - + body, err := io.ReadAll(resp.Body) if err != nil { t.Fatalf("failed to read response body: %v", err) } - + bodyStr := string(body) - + // Check for our test metric if !strings.Contains(bodyStr, "test_requests_total") { t.Error("test metric not found in metrics output") } - + // Cancel context to stop server cancel() - + // Wait for server to stop select { case err := <-errCh: @@ -186,21 +186,21 @@ func TestListenAndServe(t *testing.T) { func TestListenAndServeContextCancellation(t *testing.T) { metrics := New() - + ctx, cancel := context.WithCancel(context.Background()) - + // Start server errCh := make(chan error, 1) go func() { errCh <- metrics.ListenAndServe(ctx, 9998) }() - + // Give server time to start time.Sleep(100 * time.Millisecond) - + // Cancel context cancel() - + // Server should stop gracefully select { case err := <-errCh: @@ -215,7 +215,7 @@ func TestListenAndServeContextCancellation(t *testing.T) { // Benchmark the metrics handler response time func BenchmarkMetricsHandler(b *testing.B) { metrics := New() - + // Register some test metrics for i := 0; i < 10; i++ { counter := prometheus.NewCounter(prometheus.CounterOpts{ @@ -225,18 +225,18 @@ func BenchmarkMetricsHandler(b *testing.B) { metrics.Registry().MustRegister(counter) counter.Add(float64(i * 100)) } - + handler := metrics.Handler() - + b.ResetTimer() - + for i := 0; i < b.N; i++ { req := httptest.NewRequest("GET", "/metrics", nil) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) - + if recorder.Code != http.StatusOK { b.Fatalf("unexpected status code: %d", recorder.Code) } } -} \ No newline at end of file +} diff --git a/tracing/tracing.go b/tracing/tracing.go index e7c9c87..acf987d 100644 --- a/tracing/tracing.go +++ b/tracing/tracing.go @@ -38,7 +38,6 @@ package tracing import ( "context" - "crypto/tls" "crypto/x509" "errors" "log/slog" @@ -48,19 +47,14 @@ import ( "go.ntppool.org/common/internal/tracerconfig" "go.ntppool.org/common/version" - "google.golang.org/grpc/credentials" "go.opentelemetry.io/contrib/exporters/autoexport" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc" - "go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp" - "go.opentelemetry.io/otel/exporters/otlp/otlptrace" - "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" - "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" "go.opentelemetry.io/otel/log/global" "go.opentelemetry.io/otel/propagation" sdklog "go.opentelemetry.io/otel/sdk/log" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" sdktrace "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.26.0" @@ -70,27 +64,24 @@ import ( const ( // svcNameKey is the environment variable name that Service Name information will be read from. svcNameKey = "OTEL_SERVICE_NAME" - - otelExporterOTLPProtoEnvKey = "OTEL_EXPORTER_OTLP_PROTOCOL" - otelExporterOTLPTracesProtoEnvKey = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL" - otelExporterOTLPLogsProtoEnvKey = "OTEL_EXPORTER_OTLP_LOGS_PROTOCOL" ) -var errInvalidOTLPProtocol = errors.New("invalid OTLP protocol - should be one of ['grpc', 'http/protobuf']") - // createOTLPLogExporter creates an OTLP log exporter using the provided configuration. -// This function is used as the ExporterFactory for the tracerconfig bridge. +// This function is used as the LogExporterFactory for the tracerconfig bridge. func createOTLPLogExporter(ctx context.Context, cfg *tracerconfig.Config) (sdklog.Exporter, error) { - // Convert bridge config to local TracerConfig - tracerCfg := &TracerConfig{ - ServiceName: cfg.ServiceName, - Environment: cfg.Environment, - Endpoint: cfg.Endpoint, - EndpointURL: cfg.EndpointURL, - CertificateProvider: cfg.CertificateProvider, - RootCAs: cfg.RootCAs, - } - return newOTLPLogExporter(ctx, tracerCfg) + return tracerconfig.CreateOTLPLogExporter(ctx, cfg) +} + +// createOTLPMetricExporter creates an OTLP metric exporter using the provided configuration. +// This function is used as the MetricExporterFactory for the tracerconfig bridge. +func createOTLPMetricExporter(ctx context.Context, cfg *tracerconfig.Config) (sdkmetric.Exporter, error) { + return tracerconfig.CreateOTLPMetricExporter(ctx, cfg) +} + +// createOTLPTraceExporter creates an OTLP trace exporter using the provided configuration. +// This function is used as the TraceExporterFactory for the tracerconfig bridge. +func createOTLPTraceExporter(ctx context.Context, cfg *tracerconfig.Config) (sdktrace.SpanExporter, error) { + return tracerconfig.CreateOTLPTraceExporter(ctx, cfg) } // https://github.com/open-telemetry/opentelemetry-go/blob/main/exporters/otlp/otlptrace/otlptracehttp/example_test.go @@ -161,7 +152,7 @@ func SetupSDK(ctx context.Context, cfg *TracerConfig) (shutdown TpShutdownFunc, cfg = &TracerConfig{} } - // Store configuration for use by logger package via bridge + // Store configuration for use by logger and metrics packages via bridge bridgeConfig := &tracerconfig.Config{ ServiceName: cfg.ServiceName, Environment: cfg.Environment, @@ -170,7 +161,7 @@ func SetupSDK(ctx context.Context, cfg *TracerConfig) (shutdown TpShutdownFunc, CertificateProvider: cfg.CertificateProvider, RootCAs: cfg.RootCAs, } - tracerconfig.Store(ctx, bridgeConfig, createOTLPLogExporter) + tracerconfig.Store(ctx, bridgeConfig, createOTLPLogExporter, createOTLPMetricExporter, createOTLPTraceExporter) log := slog.Default() @@ -249,9 +240,9 @@ func SetupSDK(ctx context.Context, cfg *TracerConfig) (shutdown TpShutdownFunc, switch os.Getenv("OTEL_TRACES_EXPORTER") { case "": - spanExporter, err = newOLTPExporter(ctx, cfg) + spanExporter, err = createOTLPTraceExporter(ctx, bridgeConfig) case "otlp": - spanExporter, err = newOLTPExporter(ctx, cfg) + spanExporter, err = createOTLPTraceExporter(ctx, bridgeConfig) default: // log.Debug("OTEL_TRACES_EXPORTER", "fallback", os.Getenv("OTEL_TRACES_EXPORTER")) spanExporter, err = autoexport.NewSpanExporter(ctx) @@ -279,141 +270,6 @@ func SetupSDK(ctx context.Context, cfg *TracerConfig) (shutdown TpShutdownFunc, return } -func newOLTPExporter(ctx context.Context, cfg *TracerConfig) (sdktrace.SpanExporter, error) { - log := slog.Default() - - var tlsConfig *tls.Config - - if cfg.CertificateProvider != nil { - tlsConfig = &tls.Config{ - GetClientCertificate: cfg.CertificateProvider, - RootCAs: cfg.RootCAs, - } - } - - proto := os.Getenv(otelExporterOTLPTracesProtoEnvKey) - if proto == "" { - proto = os.Getenv(otelExporterOTLPProtoEnvKey) - } - - // Fallback to default, http/protobuf. - if proto == "" { - proto = "http/protobuf" - } - - var client otlptrace.Client - - switch proto { - case "grpc": - opts := []otlptracegrpc.Option{ - otlptracegrpc.WithCompressor("gzip"), - } - if tlsConfig != nil { - opts = append(opts, otlptracegrpc.WithTLSCredentials(credentials.NewTLS(tlsConfig))) - } - if len(cfg.Endpoint) > 0 { - log.Info("adding option", "Endpoint", cfg.Endpoint) - opts = append(opts, otlptracegrpc.WithEndpoint(cfg.Endpoint)) - } - if len(cfg.EndpointURL) > 0 { - log.Info("adding option", "EndpointURL", cfg.EndpointURL) - opts = append(opts, otlptracegrpc.WithEndpointURL(cfg.EndpointURL)) - } - - client = otlptracegrpc.NewClient(opts...) - case "http/protobuf", "http/json": - opts := []otlptracehttp.Option{ - otlptracehttp.WithCompression(otlptracehttp.GzipCompression), - } - if tlsConfig != nil { - opts = append(opts, otlptracehttp.WithTLSClientConfig(tlsConfig)) - } - if len(cfg.Endpoint) > 0 { - opts = append(opts, otlptracehttp.WithEndpoint(cfg.Endpoint)) - } - if len(cfg.EndpointURL) > 0 { - opts = append(opts, otlptracehttp.WithEndpointURL(cfg.EndpointURL)) - } - - opts = append(opts, otlptracehttp.WithRetry(otlptracehttp.RetryConfig{ - Enabled: true, - InitialInterval: 3 * time.Second, - MaxInterval: 60 * time.Second, - MaxElapsedTime: 5 * time.Minute, - })) - - client = otlptracehttp.NewClient(opts...) - default: - return nil, errInvalidOTLPProtocol - } - - exporter, err := otlptrace.New(ctx, client) - if err != nil { - log.ErrorContext(ctx, "creating OTLP trace exporter", "err", err) - } - return exporter, err -} - -func newOTLPLogExporter(ctx context.Context, cfg *TracerConfig) (sdklog.Exporter, error) { - log := slog.Default() - - var tlsConfig *tls.Config - - if cfg.CertificateProvider != nil { - tlsConfig = &tls.Config{ - GetClientCertificate: cfg.CertificateProvider, - RootCAs: cfg.RootCAs, - } - } - - proto := os.Getenv(otelExporterOTLPLogsProtoEnvKey) - if proto == "" { - proto = os.Getenv(otelExporterOTLPProtoEnvKey) - } - - // Fallback to default, http/protobuf. - if proto == "" { - proto = "http/protobuf" - } - - switch proto { - case "grpc": - opts := []otlploggrpc.Option{ - otlploggrpc.WithCompressor("gzip"), - } - if tlsConfig != nil { - opts = append(opts, otlploggrpc.WithTLSCredentials(credentials.NewTLS(tlsConfig))) - } - if len(cfg.Endpoint) > 0 { - log.Info("adding log option", "Endpoint", cfg.Endpoint) - opts = append(opts, otlploggrpc.WithEndpoint(cfg.Endpoint)) - } - if len(cfg.EndpointURL) > 0 { - log.Info("adding log option", "EndpointURL", cfg.EndpointURL) - opts = append(opts, otlploggrpc.WithEndpointURL(cfg.EndpointURL)) - } - - return otlploggrpc.New(ctx, opts...) - case "http/protobuf", "http/json": - opts := []otlploghttp.Option{ - otlploghttp.WithCompression(otlploghttp.GzipCompression), - } - if tlsConfig != nil { - opts = append(opts, otlploghttp.WithTLSClientConfig(tlsConfig)) - } - if len(cfg.Endpoint) > 0 { - opts = append(opts, otlploghttp.WithEndpoint(cfg.Endpoint)) - } - if len(cfg.EndpointURL) > 0 { - opts = append(opts, otlploghttp.WithEndpointURL(cfg.EndpointURL)) - } - - return otlploghttp.New(ctx, opts...) - default: - return nil, errInvalidOTLPProtocol - } -} - func newTraceProvider(traceExporter sdktrace.SpanExporter, res *resource.Resource) (*sdktrace.TracerProvider, error) { traceProvider := sdktrace.NewTracerProvider( sdktrace.WithResource(res),