feat(metrics): add OTLP metrics support with centralized config

- Create new metrics/ package for OpenTelemetry-native metrics with OTLP export
- Refactor OTLP configuration to internal/tracerconfig/ to eliminate code duplication
- Add consistent retry configuration across all HTTP OTLP exporters
- Add configuration validation and improved error messages
- Include test coverage for all new functionality
- Make OpenTelemetry metrics dependencies explicit in go.mod

Designed for new applications requiring structured metrics export to
observability backends via OTLP protocol.
This commit is contained in:
2025-08-02 09:29:27 -07:00
parent 796b2a8412
commit c6230be91e
7 changed files with 1274 additions and 224 deletions

122
metrics/metrics.go Normal file
View File

@@ -0,0 +1,122 @@
// Package metrics provides OpenTelemetry-native metrics with OTLP export support.
//
// This package implements a metrics system using the OpenTelemetry metrics data model
// with OTLP export capabilities. It's designed for new applications that want to use
// structured metrics export to observability backends.
//
// Key features:
// - OpenTelemetry native metric types (Counter, Histogram, Gauge, etc.)
// - OTLP export for sending metrics to observability backends
// - Resource detection and correlation with traces/logs
// - Graceful handling when OTLP configuration is not available
//
// Example usage:
//
// // Initialize metrics along with tracing
// shutdown, err := tracing.InitTracer(ctx, cfg)
// if err != nil {
// log.Fatal(err)
// }
// defer shutdown(ctx)
//
// // Get a meter and create instruments
// meter := metrics.GetMeter("my-service")
// counter, _ := meter.Int64Counter("requests_total")
// counter.Add(ctx, 1, metric.WithAttributes(attribute.String("method", "GET")))
package metrics
import (
"context"
"log/slog"
"sync"
"time"
"go.ntppool.org/common/internal/tracerconfig"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/metric"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
)
var (
meterProvider metric.MeterProvider
setupOnce sync.Once
setupErr error
)
// Setup initializes the OpenTelemetry metrics provider with OTLP export.
// This function uses the configuration stored by the tracing package and
// creates a metrics provider that exports to the same OTLP endpoint.
//
// The function is safe to call multiple times - it will only initialize once.
// If tracing configuration is not available, it returns a no-op provider that
// doesn't export metrics.
//
// Returns an error only if there's a configuration problem. Missing tracing
// configuration is handled gracefully with a warning log.
func Setup(ctx context.Context) error {
setupOnce.Do(func() {
setupErr = initializeMetrics(ctx)
})
return setupErr
}
// GetMeter returns a named meter for creating metric instruments.
// The meter uses the configured metrics provider, or the global provider
// if metrics haven't been set up yet.
//
// This is the primary entry point for creating metric instruments in your application.
func GetMeter(name string, opts ...metric.MeterOption) metric.Meter {
if meterProvider == nil {
// Return the global provider as fallback (no-op if not configured)
return otel.GetMeterProvider().Meter(name, opts...)
}
return meterProvider.Meter(name, opts...)
}
// initializeMetrics sets up the OpenTelemetry metrics provider with OTLP export.
func initializeMetrics(ctx context.Context) error {
log := slog.Default()
// Check if tracing configuration is available
cfg, configCtx, factory := tracerconfig.GetMetricExporter()
if cfg == nil || configCtx == nil || factory == nil {
log.Warn("metrics setup: tracing configuration not available, using no-op provider")
// Set the global provider as fallback - metrics just won't be exported
meterProvider = otel.GetMeterProvider()
return nil
}
// Create OTLP metrics exporter
exporter, err := factory(ctx, cfg)
if err != nil {
log.Error("metrics setup: failed to create OTLP exporter", "error", err)
// Fall back to global provider
meterProvider = otel.GetMeterProvider()
return nil
}
// Create metrics provider with the exporter
provider := sdkmetric.NewMeterProvider(
sdkmetric.WithReader(sdkmetric.NewPeriodicReader(
exporter,
sdkmetric.WithInterval(15*time.Second),
)),
)
// Set the global provider
otel.SetMeterProvider(provider)
meterProvider = provider
log.Info("metrics setup: OTLP metrics provider initialized")
return nil
}
// Shutdown gracefully shuts down the metrics provider.
// This should be called during application shutdown to ensure all metrics
// are properly flushed and exported.
func Shutdown(ctx context.Context) error {
if provider, ok := meterProvider.(*sdkmetric.MeterProvider); ok {
return provider.Shutdown(ctx)
}
return nil
}

296
metrics/metrics_test.go Normal file
View File

@@ -0,0 +1,296 @@
package metrics
import (
"context"
"os"
"testing"
"time"
"go.ntppool.org/common/internal/tracerconfig"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/metric/metricdata"
)
func TestSetup_NoConfiguration(t *testing.T) {
// Clear any existing configuration
tracerconfig.Clear()
ctx := context.Background()
err := Setup(ctx)
// Should not return an error even when no configuration is available
if err != nil {
t.Errorf("Setup() returned unexpected error: %v", err)
}
// Should be able to get a meter (even if it's a no-op)
meter := GetMeter("test-meter")
if meter == nil {
t.Error("GetMeter() returned nil")
}
}
func TestGetMeter(t *testing.T) {
// Clear any existing configuration
tracerconfig.Clear()
ctx := context.Background()
_ = Setup(ctx)
meter := GetMeter("test-service")
if meter == nil {
t.Fatal("GetMeter() returned nil")
}
// Test creating a counter instrument
counter, err := meter.Int64Counter("test_counter")
if err != nil {
t.Errorf("Failed to create counter: %v", err)
}
// Test using the counter (should not error even with no-op provider)
counter.Add(ctx, 1, metric.WithAttributes(attribute.String("test", "value")))
}
func TestSetup_MultipleCallsSafe(t *testing.T) {
// Clear any existing configuration
tracerconfig.Clear()
ctx := context.Background()
// Call Setup multiple times
err1 := Setup(ctx)
err2 := Setup(ctx)
err3 := Setup(ctx)
if err1 != nil {
t.Errorf("First Setup() call returned error: %v", err1)
}
if err2 != nil {
t.Errorf("Second Setup() call returned error: %v", err2)
}
if err3 != nil {
t.Errorf("Third Setup() call returned error: %v", err3)
}
// Should still be able to get meters
meter := GetMeter("test-meter")
if meter == nil {
t.Error("GetMeter() returned nil after multiple Setup() calls")
}
}
func TestSetup_WithConfiguration(t *testing.T) {
// Clear any existing configuration
tracerconfig.Clear()
ctx := context.Background()
config := &tracerconfig.Config{
ServiceName: "test-metrics-service",
Environment: "test",
Endpoint: "localhost:4317", // Will likely fail to connect, but should set up provider
}
// Create a mock exporter factory that returns a working exporter
mockFactory := func(ctx context.Context, cfg *tracerconfig.Config) (sdkmetric.Exporter, error) {
// Create a simple in-memory exporter for testing
return &mockMetricExporter{}, nil
}
// Store configuration with mock factory
tracerconfig.Store(ctx, config, nil, mockFactory, nil)
// Setup metrics
err := Setup(ctx)
if err != nil {
t.Errorf("Setup() returned error: %v", err)
}
// Should be able to get a meter
meter := GetMeter("test-service")
if meter == nil {
t.Fatal("GetMeter() returned nil")
}
// Test creating and using instruments
counter, err := meter.Int64Counter("test_counter")
if err != nil {
t.Errorf("Failed to create counter: %v", err)
}
histogram, err := meter.Float64Histogram("test_histogram")
if err != nil {
t.Errorf("Failed to create histogram: %v", err)
}
gauge, err := meter.Int64UpDownCounter("test_gauge")
if err != nil {
t.Errorf("Failed to create gauge: %v", err)
}
// Use the instruments
counter.Add(ctx, 1, metric.WithAttributes(attribute.String("test", "value")))
histogram.Record(ctx, 1.5, metric.WithAttributes(attribute.String("test", "value")))
gauge.Add(ctx, 10, metric.WithAttributes(attribute.String("test", "value")))
// Test shutdown
err = Shutdown(ctx)
if err != nil {
t.Errorf("Shutdown() returned error: %v", err)
}
}
func TestSetup_WithRealOTLPConfig(t *testing.T) {
// Skip this test in short mode since it may try to make network connections
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
// Clear any existing configuration
tracerconfig.Clear()
// Set environment variables for OTLP configuration
originalEndpoint := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
originalProtocol := os.Getenv("OTEL_EXPORTER_OTLP_PROTOCOL")
defer func() {
if originalEndpoint != "" {
os.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", originalEndpoint)
} else {
os.Unsetenv("OTEL_EXPORTER_OTLP_ENDPOINT")
}
if originalProtocol != "" {
os.Setenv("OTEL_EXPORTER_OTLP_PROTOCOL", originalProtocol)
} else {
os.Unsetenv("OTEL_EXPORTER_OTLP_PROTOCOL")
}
}()
os.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318") // HTTP endpoint
os.Setenv("OTEL_EXPORTER_OTLP_PROTOCOL", "http/protobuf")
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
config := &tracerconfig.Config{
ServiceName: "test-metrics-e2e",
Environment: "test",
Endpoint: "localhost:4318",
}
// Store configuration with real factory
tracerconfig.Store(ctx, config, nil, tracerconfig.CreateOTLPMetricExporter, nil)
// Setup metrics - this may fail if no OTLP collector is running, which is okay
err := Setup(ctx)
if err != nil {
t.Logf("Setup() returned error (expected if no OTLP collector): %v", err)
}
// Should still be able to get a meter
meter := GetMeter("test-service-e2e")
if meter == nil {
t.Fatal("GetMeter() returned nil")
}
// Create and use instruments
counter, err := meter.Int64Counter("e2e_test_counter")
if err != nil {
t.Errorf("Failed to create counter: %v", err)
}
// Add some metrics
for i := 0; i < 5; i++ {
counter.Add(ctx, 1, metric.WithAttributes(
attribute.String("iteration", string(rune('0'+i))),
attribute.String("test_type", "e2e"),
))
}
// Give some time for export (if collector is running)
time.Sleep(100 * time.Millisecond)
// Test shutdown
err = Shutdown(ctx)
if err != nil {
t.Logf("Shutdown() returned error (may be expected): %v", err)
}
}
func TestConcurrentMetricUsage(t *testing.T) {
// Clear any existing configuration
tracerconfig.Clear()
ctx := context.Background()
config := &tracerconfig.Config{
ServiceName: "concurrent-test",
}
// Use mock factory
mockFactory := func(ctx context.Context, cfg *tracerconfig.Config) (sdkmetric.Exporter, error) {
return &mockMetricExporter{}, nil
}
tracerconfig.Store(ctx, config, nil, mockFactory, nil)
Setup(ctx)
meter := GetMeter("concurrent-test")
counter, err := meter.Int64Counter("concurrent_counter")
if err != nil {
t.Fatalf("Failed to create counter: %v", err)
}
// Test concurrent metric usage
const numGoroutines = 10
const metricsPerGoroutine = 100
done := make(chan bool, numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func(goroutineID int) {
for j := 0; j < metricsPerGoroutine; j++ {
counter.Add(ctx, 1, metric.WithAttributes(
attribute.Int("goroutine", goroutineID),
attribute.Int("iteration", j),
))
}
done <- true
}(i)
}
// Wait for all goroutines to complete
for i := 0; i < numGoroutines; i++ {
<-done
}
// Shutdown
err = Shutdown(ctx)
if err != nil {
t.Errorf("Shutdown() returned error: %v", err)
}
}
// mockMetricExporter is a simple mock exporter for testing
type mockMetricExporter struct{}
func (m *mockMetricExporter) Export(ctx context.Context, rm *metricdata.ResourceMetrics) error {
// Just pretend to export
return nil
}
func (m *mockMetricExporter) ForceFlush(ctx context.Context) error {
return nil
}
func (m *mockMetricExporter) Shutdown(ctx context.Context) error {
return nil
}
func (m *mockMetricExporter) Temporality(kind sdkmetric.InstrumentKind) metricdata.Temporality {
return metricdata.CumulativeTemporality
}
func (m *mockMetricExporter) Aggregation(kind sdkmetric.InstrumentKind) sdkmetric.Aggregation {
return sdkmetric.DefaultAggregationSelector(kind)
}