Ask Bjørn Hansen c6230be91e feat(metrics): add OTLP metrics support with centralized config
- Create new metrics/ package for OpenTelemetry-native metrics with OTLP export
- Refactor OTLP configuration to internal/tracerconfig/ to eliminate code duplication
- Add consistent retry configuration across all HTTP OTLP exporters
- Add configuration validation and improved error messages
- Include test coverage for all new functionality
- Make OpenTelemetry metrics dependencies explicit in go.mod

Designed for new applications requiring structured metrics export to
observability backends via OTLP protocol.
2025-08-02 09:29:27 -07:00

379 lines
13 KiB
Go

// Package tracerconfig provides a bridge to eliminate circular dependencies between
// the logger and tracing packages. It stores tracer configuration and provides
// factory functions that can be used by the logger package without importing tracing.
package tracerconfig
import (
"context"
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"net/url"
"os"
"strings"
"sync"
"time"
"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
sdklog "go.opentelemetry.io/otel/sdk/log"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
"google.golang.org/grpc/credentials"
)
const (
otelExporterOTLPProtoEnvKey = "OTEL_EXPORTER_OTLP_PROTOCOL"
otelExporterOTLPTracesProtoEnvKey = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"
otelExporterOTLPLogsProtoEnvKey = "OTEL_EXPORTER_OTLP_LOGS_PROTOCOL"
otelExporterOTLPMetricsProtoEnvKey = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL"
)
var errInvalidOTLPProtocol = errors.New("invalid OTLP protocol - should be one of ['grpc', 'http/protobuf']")
// newInvalidProtocolError creates a specific error message for invalid protocols
func newInvalidProtocolError(protocol, signalType string) error {
return fmt.Errorf("invalid OTLP protocol '%s' for %s - should be one of ['grpc', 'http/protobuf', 'http/json']", protocol, signalType)
}
// Validate checks the configuration for common errors and inconsistencies
func (c *Config) Validate() error {
var errs []error
// Check that both Endpoint and EndpointURL are not specified
if c.Endpoint != "" && c.EndpointURL != "" {
errs = append(errs, errors.New("cannot specify both Endpoint and EndpointURL - use one or the other"))
}
// Validate EndpointURL format if specified
if c.EndpointURL != "" {
if _, err := url.Parse(c.EndpointURL); err != nil {
errs = append(errs, fmt.Errorf("invalid EndpointURL format: %w", err))
}
}
// Validate Endpoint format if specified
if c.Endpoint != "" {
// Basic validation - should not contain protocol scheme
if strings.Contains(c.Endpoint, "://") {
errs = append(errs, errors.New("Endpoint should not include protocol scheme (use EndpointURL for full URLs)"))
}
// Should not be empty after trimming whitespace
if strings.TrimSpace(c.Endpoint) == "" {
errs = append(errs, errors.New("Endpoint cannot be empty or whitespace"))
}
}
// Validate TLS configuration consistency
if c.CertificateProvider != nil && c.RootCAs == nil {
// This is just a warning - client cert without custom CAs is valid
// but might indicate a configuration issue
}
// Validate service name if specified
if c.ServiceName != "" && strings.TrimSpace(c.ServiceName) == "" {
errs = append(errs, errors.New("ServiceName cannot be empty or whitespace"))
}
// Combine all errors
if len(errs) > 0 {
var errMsgs []string
for _, err := range errs {
errMsgs = append(errMsgs, err.Error())
}
return fmt.Errorf("configuration validation failed: %s", strings.Join(errMsgs, "; "))
}
return nil
}
// ValidateAndStore validates the configuration before storing it
func ValidateAndStore(ctx context.Context, cfg *Config, logFactory LogExporterFactory, metricFactory MetricExporterFactory, traceFactory TraceExporterFactory) error {
if cfg != nil {
if err := cfg.Validate(); err != nil {
return err
}
}
Store(ctx, cfg, logFactory, metricFactory, traceFactory)
return nil
}
// GetClientCertificate defines a function type for providing client certificates for mutual TLS.
// This is used when exporting telemetry data to secured OTLP endpoints that require
// client certificate authentication.
type GetClientCertificate func(*tls.CertificateRequestInfo) (*tls.Certificate, error)
// Config provides configuration options for OpenTelemetry tracing setup.
// It supplements standard OpenTelemetry environment variables with additional
// NTP Pool-specific configuration including TLS settings for secure OTLP export.
type Config struct {
ServiceName string // Service name for resource identification (overrides OTEL_SERVICE_NAME)
Environment string // Deployment environment (development, staging, production)
Endpoint string // OTLP endpoint hostname/port (e.g., "otlp.example.com:4317")
EndpointURL string // Complete OTLP endpoint URL (e.g., "https://otlp.example.com:4317/v1/traces")
CertificateProvider GetClientCertificate // Client certificate provider for mutual TLS
RootCAs *x509.CertPool // CA certificate pool for server verification
}
// LogExporterFactory creates an OTLP log exporter using the provided configuration.
// This allows the logger package to create exporters without importing the tracing package.
type LogExporterFactory func(context.Context, *Config) (sdklog.Exporter, error)
// MetricExporterFactory creates an OTLP metric exporter using the provided configuration.
// This allows the metrics package to create exporters without importing the tracing package.
type MetricExporterFactory func(context.Context, *Config) (sdkmetric.Exporter, error)
// TraceExporterFactory creates an OTLP trace exporter using the provided configuration.
// This allows for consistent trace exporter creation across packages.
type TraceExporterFactory func(context.Context, *Config) (sdktrace.SpanExporter, error)
// Global state for sharing configuration between packages
var (
globalConfig *Config
globalContext context.Context
logExporterFactory LogExporterFactory
metricExporterFactory MetricExporterFactory
traceExporterFactory TraceExporterFactory
configMu sync.RWMutex
)
// Store saves the tracer configuration and exporter factories for use by other packages.
// This should be called by the tracing package during initialization.
func Store(ctx context.Context, cfg *Config, logFactory LogExporterFactory, metricFactory MetricExporterFactory, traceFactory TraceExporterFactory) {
configMu.Lock()
defer configMu.Unlock()
globalConfig = cfg
globalContext = ctx
logExporterFactory = logFactory
metricExporterFactory = metricFactory
traceExporterFactory = traceFactory
}
// GetLogExporter returns the stored configuration and log exporter factory.
// Returns nil values if no configuration has been stored yet.
func GetLogExporter() (*Config, context.Context, LogExporterFactory) {
configMu.RLock()
defer configMu.RUnlock()
return globalConfig, globalContext, logExporterFactory
}
// GetMetricExporter returns the stored configuration and metric exporter factory.
// Returns nil values if no configuration has been stored yet.
func GetMetricExporter() (*Config, context.Context, MetricExporterFactory) {
configMu.RLock()
defer configMu.RUnlock()
return globalConfig, globalContext, metricExporterFactory
}
// GetTraceExporter returns the stored configuration and trace exporter factory.
// Returns nil values if no configuration has been stored yet.
func GetTraceExporter() (*Config, context.Context, TraceExporterFactory) {
configMu.RLock()
defer configMu.RUnlock()
return globalConfig, globalContext, traceExporterFactory
}
// Get returns the stored tracer configuration, context, and log exporter factory.
// This maintains backward compatibility for the logger package.
// Returns nil values if no configuration has been stored yet.
func Get() (*Config, context.Context, LogExporterFactory) {
return GetLogExporter()
}
// IsConfigured returns true if tracer configuration has been stored.
func IsConfigured() bool {
configMu.RLock()
defer configMu.RUnlock()
return globalConfig != nil && globalContext != nil
}
// Clear removes the stored configuration. This is primarily useful for testing.
func Clear() {
configMu.Lock()
defer configMu.Unlock()
globalConfig = nil
globalContext = nil
logExporterFactory = nil
metricExporterFactory = nil
traceExporterFactory = nil
}
// getTLSConfig creates a TLS configuration from the provided Config.
func getTLSConfig(cfg *Config) *tls.Config {
if cfg.CertificateProvider == nil {
return nil
}
return &tls.Config{
GetClientCertificate: cfg.CertificateProvider,
RootCAs: cfg.RootCAs,
}
}
// getProtocol determines the OTLP protocol to use for the given signal type.
// It follows OpenTelemetry environment variable precedence.
func getProtocol(signalSpecificEnv string) string {
proto := os.Getenv(signalSpecificEnv)
if proto == "" {
proto = os.Getenv(otelExporterOTLPProtoEnvKey)
}
// Fallback to default, http/protobuf.
if proto == "" {
proto = "http/protobuf"
}
return proto
}
// CreateOTLPLogExporter creates an OTLP log exporter using the provided configuration.
func CreateOTLPLogExporter(ctx context.Context, cfg *Config) (sdklog.Exporter, error) {
tlsConfig := getTLSConfig(cfg)
proto := getProtocol(otelExporterOTLPLogsProtoEnvKey)
switch proto {
case "grpc":
opts := []otlploggrpc.Option{
otlploggrpc.WithCompressor("gzip"),
}
if tlsConfig != nil {
opts = append(opts, otlploggrpc.WithTLSCredentials(credentials.NewTLS(tlsConfig)))
}
if len(cfg.Endpoint) > 0 {
opts = append(opts, otlploggrpc.WithEndpoint(cfg.Endpoint))
}
if len(cfg.EndpointURL) > 0 {
opts = append(opts, otlploggrpc.WithEndpointURL(cfg.EndpointURL))
}
return otlploggrpc.New(ctx, opts...)
case "http/protobuf", "http/json":
opts := []otlploghttp.Option{
otlploghttp.WithCompression(otlploghttp.GzipCompression),
}
if tlsConfig != nil {
opts = append(opts, otlploghttp.WithTLSClientConfig(tlsConfig))
}
if len(cfg.Endpoint) > 0 {
opts = append(opts, otlploghttp.WithEndpoint(cfg.Endpoint))
}
if len(cfg.EndpointURL) > 0 {
opts = append(opts, otlploghttp.WithEndpointURL(cfg.EndpointURL))
}
opts = append(opts, otlploghttp.WithRetry(otlploghttp.RetryConfig{
Enabled: true,
InitialInterval: 3 * time.Second,
MaxInterval: 60 * time.Second,
MaxElapsedTime: 5 * time.Minute,
}))
return otlploghttp.New(ctx, opts...)
default:
return nil, newInvalidProtocolError(proto, "logs")
}
}
// CreateOTLPMetricExporter creates an OTLP metric exporter using the provided configuration.
func CreateOTLPMetricExporter(ctx context.Context, cfg *Config) (sdkmetric.Exporter, error) {
tlsConfig := getTLSConfig(cfg)
proto := getProtocol(otelExporterOTLPMetricsProtoEnvKey)
switch proto {
case "grpc":
opts := []otlpmetricgrpc.Option{
otlpmetricgrpc.WithCompressor("gzip"),
}
if tlsConfig != nil {
opts = append(opts, otlpmetricgrpc.WithTLSCredentials(credentials.NewTLS(tlsConfig)))
}
if len(cfg.Endpoint) > 0 {
opts = append(opts, otlpmetricgrpc.WithEndpoint(cfg.Endpoint))
}
if len(cfg.EndpointURL) > 0 {
opts = append(opts, otlpmetricgrpc.WithEndpointURL(cfg.EndpointURL))
}
return otlpmetricgrpc.New(ctx, opts...)
case "http/protobuf", "http/json":
opts := []otlpmetrichttp.Option{
otlpmetrichttp.WithCompression(otlpmetrichttp.GzipCompression),
}
if tlsConfig != nil {
opts = append(opts, otlpmetrichttp.WithTLSClientConfig(tlsConfig))
}
if len(cfg.Endpoint) > 0 {
opts = append(opts, otlpmetrichttp.WithEndpoint(cfg.Endpoint))
}
if len(cfg.EndpointURL) > 0 {
opts = append(opts, otlpmetrichttp.WithEndpointURL(cfg.EndpointURL))
}
opts = append(opts, otlpmetrichttp.WithRetry(otlpmetrichttp.RetryConfig{
Enabled: true,
InitialInterval: 3 * time.Second,
MaxInterval: 60 * time.Second,
MaxElapsedTime: 5 * time.Minute,
}))
return otlpmetrichttp.New(ctx, opts...)
default:
return nil, newInvalidProtocolError(proto, "metrics")
}
}
// CreateOTLPTraceExporter creates an OTLP trace exporter using the provided configuration.
func CreateOTLPTraceExporter(ctx context.Context, cfg *Config) (sdktrace.SpanExporter, error) {
tlsConfig := getTLSConfig(cfg)
proto := getProtocol(otelExporterOTLPTracesProtoEnvKey)
var client otlptrace.Client
switch proto {
case "grpc":
opts := []otlptracegrpc.Option{
otlptracegrpc.WithCompressor("gzip"),
}
if tlsConfig != nil {
opts = append(opts, otlptracegrpc.WithTLSCredentials(credentials.NewTLS(tlsConfig)))
}
if len(cfg.Endpoint) > 0 {
opts = append(opts, otlptracegrpc.WithEndpoint(cfg.Endpoint))
}
if len(cfg.EndpointURL) > 0 {
opts = append(opts, otlptracegrpc.WithEndpointURL(cfg.EndpointURL))
}
client = otlptracegrpc.NewClient(opts...)
case "http/protobuf", "http/json":
opts := []otlptracehttp.Option{
otlptracehttp.WithCompression(otlptracehttp.GzipCompression),
}
if tlsConfig != nil {
opts = append(opts, otlptracehttp.WithTLSClientConfig(tlsConfig))
}
if len(cfg.Endpoint) > 0 {
opts = append(opts, otlptracehttp.WithEndpoint(cfg.Endpoint))
}
if len(cfg.EndpointURL) > 0 {
opts = append(opts, otlptracehttp.WithEndpointURL(cfg.EndpointURL))
}
opts = append(opts, otlptracehttp.WithRetry(otlptracehttp.RetryConfig{
Enabled: true,
InitialInterval: 3 * time.Second,
MaxInterval: 60 * time.Second,
MaxElapsedTime: 5 * time.Minute,
}))
client = otlptracehttp.NewClient(opts...)
default:
return nil, newInvalidProtocolError(proto, "traces")
}
return otlptrace.New(ctx, client)
}