feat(metrics): add OTLP metrics support with centralized config
- Create new metrics/ package for OpenTelemetry-native metrics with OTLP export - Refactor OTLP configuration to internal/tracerconfig/ to eliminate code duplication - Add consistent retry configuration across all HTTP OTLP exporters - Add configuration validation and improved error messages - Include test coverage for all new functionality - Make OpenTelemetry metrics dependencies explicit in go.mod Designed for new applications requiring structured metrics export to observability backends via OTLP protocol.
This commit is contained in:
122
metrics/metrics.go
Normal file
122
metrics/metrics.go
Normal file
@@ -0,0 +1,122 @@
|
||||
// Package metrics provides OpenTelemetry-native metrics with OTLP export support.
|
||||
//
|
||||
// This package implements a metrics system using the OpenTelemetry metrics data model
|
||||
// with OTLP export capabilities. It's designed for new applications that want to use
|
||||
// structured metrics export to observability backends.
|
||||
//
|
||||
// Key features:
|
||||
// - OpenTelemetry native metric types (Counter, Histogram, Gauge, etc.)
|
||||
// - OTLP export for sending metrics to observability backends
|
||||
// - Resource detection and correlation with traces/logs
|
||||
// - Graceful handling when OTLP configuration is not available
|
||||
//
|
||||
// Example usage:
|
||||
//
|
||||
// // Initialize metrics along with tracing
|
||||
// shutdown, err := tracing.InitTracer(ctx, cfg)
|
||||
// if err != nil {
|
||||
// log.Fatal(err)
|
||||
// }
|
||||
// defer shutdown(ctx)
|
||||
//
|
||||
// // Get a meter and create instruments
|
||||
// meter := metrics.GetMeter("my-service")
|
||||
// counter, _ := meter.Int64Counter("requests_total")
|
||||
// counter.Add(ctx, 1, metric.WithAttributes(attribute.String("method", "GET")))
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"go.ntppool.org/common/internal/tracerconfig"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
)
|
||||
|
||||
var (
|
||||
meterProvider metric.MeterProvider
|
||||
setupOnce sync.Once
|
||||
setupErr error
|
||||
)
|
||||
|
||||
// Setup initializes the OpenTelemetry metrics provider with OTLP export.
|
||||
// This function uses the configuration stored by the tracing package and
|
||||
// creates a metrics provider that exports to the same OTLP endpoint.
|
||||
//
|
||||
// The function is safe to call multiple times - it will only initialize once.
|
||||
// If tracing configuration is not available, it returns a no-op provider that
|
||||
// doesn't export metrics.
|
||||
//
|
||||
// Returns an error only if there's a configuration problem. Missing tracing
|
||||
// configuration is handled gracefully with a warning log.
|
||||
func Setup(ctx context.Context) error {
|
||||
setupOnce.Do(func() {
|
||||
setupErr = initializeMetrics(ctx)
|
||||
})
|
||||
return setupErr
|
||||
}
|
||||
|
||||
// GetMeter returns a named meter for creating metric instruments.
|
||||
// The meter uses the configured metrics provider, or the global provider
|
||||
// if metrics haven't been set up yet.
|
||||
//
|
||||
// This is the primary entry point for creating metric instruments in your application.
|
||||
func GetMeter(name string, opts ...metric.MeterOption) metric.Meter {
|
||||
if meterProvider == nil {
|
||||
// Return the global provider as fallback (no-op if not configured)
|
||||
return otel.GetMeterProvider().Meter(name, opts...)
|
||||
}
|
||||
return meterProvider.Meter(name, opts...)
|
||||
}
|
||||
|
||||
// initializeMetrics sets up the OpenTelemetry metrics provider with OTLP export.
|
||||
func initializeMetrics(ctx context.Context) error {
|
||||
log := slog.Default()
|
||||
|
||||
// Check if tracing configuration is available
|
||||
cfg, configCtx, factory := tracerconfig.GetMetricExporter()
|
||||
if cfg == nil || configCtx == nil || factory == nil {
|
||||
log.Warn("metrics setup: tracing configuration not available, using no-op provider")
|
||||
// Set the global provider as fallback - metrics just won't be exported
|
||||
meterProvider = otel.GetMeterProvider()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create OTLP metrics exporter
|
||||
exporter, err := factory(ctx, cfg)
|
||||
if err != nil {
|
||||
log.Error("metrics setup: failed to create OTLP exporter", "error", err)
|
||||
// Fall back to global provider
|
||||
meterProvider = otel.GetMeterProvider()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create metrics provider with the exporter
|
||||
provider := sdkmetric.NewMeterProvider(
|
||||
sdkmetric.WithReader(sdkmetric.NewPeriodicReader(
|
||||
exporter,
|
||||
sdkmetric.WithInterval(15*time.Second),
|
||||
)),
|
||||
)
|
||||
|
||||
// Set the global provider
|
||||
otel.SetMeterProvider(provider)
|
||||
meterProvider = provider
|
||||
|
||||
log.Info("metrics setup: OTLP metrics provider initialized")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Shutdown gracefully shuts down the metrics provider.
|
||||
// This should be called during application shutdown to ensure all metrics
|
||||
// are properly flushed and exported.
|
||||
func Shutdown(ctx context.Context) error {
|
||||
if provider, ok := meterProvider.(*sdkmetric.MeterProvider); ok {
|
||||
return provider.Shutdown(ctx)
|
||||
}
|
||||
return nil
|
||||
}
|
296
metrics/metrics_test.go
Normal file
296
metrics/metrics_test.go
Normal file
@@ -0,0 +1,296 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"go.ntppool.org/common/internal/tracerconfig"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/metric/metricdata"
|
||||
)
|
||||
|
||||
func TestSetup_NoConfiguration(t *testing.T) {
|
||||
// Clear any existing configuration
|
||||
tracerconfig.Clear()
|
||||
|
||||
ctx := context.Background()
|
||||
err := Setup(ctx)
|
||||
// Should not return an error even when no configuration is available
|
||||
if err != nil {
|
||||
t.Errorf("Setup() returned unexpected error: %v", err)
|
||||
}
|
||||
|
||||
// Should be able to get a meter (even if it's a no-op)
|
||||
meter := GetMeter("test-meter")
|
||||
if meter == nil {
|
||||
t.Error("GetMeter() returned nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetMeter(t *testing.T) {
|
||||
// Clear any existing configuration
|
||||
tracerconfig.Clear()
|
||||
|
||||
ctx := context.Background()
|
||||
_ = Setup(ctx)
|
||||
|
||||
meter := GetMeter("test-service")
|
||||
if meter == nil {
|
||||
t.Fatal("GetMeter() returned nil")
|
||||
}
|
||||
|
||||
// Test creating a counter instrument
|
||||
counter, err := meter.Int64Counter("test_counter")
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create counter: %v", err)
|
||||
}
|
||||
|
||||
// Test using the counter (should not error even with no-op provider)
|
||||
counter.Add(ctx, 1, metric.WithAttributes(attribute.String("test", "value")))
|
||||
}
|
||||
|
||||
func TestSetup_MultipleCallsSafe(t *testing.T) {
|
||||
// Clear any existing configuration
|
||||
tracerconfig.Clear()
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Call Setup multiple times
|
||||
err1 := Setup(ctx)
|
||||
err2 := Setup(ctx)
|
||||
err3 := Setup(ctx)
|
||||
|
||||
if err1 != nil {
|
||||
t.Errorf("First Setup() call returned error: %v", err1)
|
||||
}
|
||||
if err2 != nil {
|
||||
t.Errorf("Second Setup() call returned error: %v", err2)
|
||||
}
|
||||
if err3 != nil {
|
||||
t.Errorf("Third Setup() call returned error: %v", err3)
|
||||
}
|
||||
|
||||
// Should still be able to get meters
|
||||
meter := GetMeter("test-meter")
|
||||
if meter == nil {
|
||||
t.Error("GetMeter() returned nil after multiple Setup() calls")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetup_WithConfiguration(t *testing.T) {
|
||||
// Clear any existing configuration
|
||||
tracerconfig.Clear()
|
||||
|
||||
ctx := context.Background()
|
||||
config := &tracerconfig.Config{
|
||||
ServiceName: "test-metrics-service",
|
||||
Environment: "test",
|
||||
Endpoint: "localhost:4317", // Will likely fail to connect, but should set up provider
|
||||
}
|
||||
|
||||
// Create a mock exporter factory that returns a working exporter
|
||||
mockFactory := func(ctx context.Context, cfg *tracerconfig.Config) (sdkmetric.Exporter, error) {
|
||||
// Create a simple in-memory exporter for testing
|
||||
return &mockMetricExporter{}, nil
|
||||
}
|
||||
|
||||
// Store configuration with mock factory
|
||||
tracerconfig.Store(ctx, config, nil, mockFactory, nil)
|
||||
|
||||
// Setup metrics
|
||||
err := Setup(ctx)
|
||||
if err != nil {
|
||||
t.Errorf("Setup() returned error: %v", err)
|
||||
}
|
||||
|
||||
// Should be able to get a meter
|
||||
meter := GetMeter("test-service")
|
||||
if meter == nil {
|
||||
t.Fatal("GetMeter() returned nil")
|
||||
}
|
||||
|
||||
// Test creating and using instruments
|
||||
counter, err := meter.Int64Counter("test_counter")
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create counter: %v", err)
|
||||
}
|
||||
|
||||
histogram, err := meter.Float64Histogram("test_histogram")
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create histogram: %v", err)
|
||||
}
|
||||
|
||||
gauge, err := meter.Int64UpDownCounter("test_gauge")
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create gauge: %v", err)
|
||||
}
|
||||
|
||||
// Use the instruments
|
||||
counter.Add(ctx, 1, metric.WithAttributes(attribute.String("test", "value")))
|
||||
histogram.Record(ctx, 1.5, metric.WithAttributes(attribute.String("test", "value")))
|
||||
gauge.Add(ctx, 10, metric.WithAttributes(attribute.String("test", "value")))
|
||||
|
||||
// Test shutdown
|
||||
err = Shutdown(ctx)
|
||||
if err != nil {
|
||||
t.Errorf("Shutdown() returned error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetup_WithRealOTLPConfig(t *testing.T) {
|
||||
// Skip this test in short mode since it may try to make network connections
|
||||
if testing.Short() {
|
||||
t.Skip("Skipping integration test in short mode")
|
||||
}
|
||||
|
||||
// Clear any existing configuration
|
||||
tracerconfig.Clear()
|
||||
|
||||
// Set environment variables for OTLP configuration
|
||||
originalEndpoint := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
|
||||
originalProtocol := os.Getenv("OTEL_EXPORTER_OTLP_PROTOCOL")
|
||||
|
||||
defer func() {
|
||||
if originalEndpoint != "" {
|
||||
os.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", originalEndpoint)
|
||||
} else {
|
||||
os.Unsetenv("OTEL_EXPORTER_OTLP_ENDPOINT")
|
||||
}
|
||||
if originalProtocol != "" {
|
||||
os.Setenv("OTEL_EXPORTER_OTLP_PROTOCOL", originalProtocol)
|
||||
} else {
|
||||
os.Unsetenv("OTEL_EXPORTER_OTLP_PROTOCOL")
|
||||
}
|
||||
}()
|
||||
|
||||
os.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318") // HTTP endpoint
|
||||
os.Setenv("OTEL_EXPORTER_OTLP_PROTOCOL", "http/protobuf")
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
config := &tracerconfig.Config{
|
||||
ServiceName: "test-metrics-e2e",
|
||||
Environment: "test",
|
||||
Endpoint: "localhost:4318",
|
||||
}
|
||||
|
||||
// Store configuration with real factory
|
||||
tracerconfig.Store(ctx, config, nil, tracerconfig.CreateOTLPMetricExporter, nil)
|
||||
|
||||
// Setup metrics - this may fail if no OTLP collector is running, which is okay
|
||||
err := Setup(ctx)
|
||||
if err != nil {
|
||||
t.Logf("Setup() returned error (expected if no OTLP collector): %v", err)
|
||||
}
|
||||
|
||||
// Should still be able to get a meter
|
||||
meter := GetMeter("test-service-e2e")
|
||||
if meter == nil {
|
||||
t.Fatal("GetMeter() returned nil")
|
||||
}
|
||||
|
||||
// Create and use instruments
|
||||
counter, err := meter.Int64Counter("e2e_test_counter")
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create counter: %v", err)
|
||||
}
|
||||
|
||||
// Add some metrics
|
||||
for i := 0; i < 5; i++ {
|
||||
counter.Add(ctx, 1, metric.WithAttributes(
|
||||
attribute.String("iteration", string(rune('0'+i))),
|
||||
attribute.String("test_type", "e2e"),
|
||||
))
|
||||
}
|
||||
|
||||
// Give some time for export (if collector is running)
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// Test shutdown
|
||||
err = Shutdown(ctx)
|
||||
if err != nil {
|
||||
t.Logf("Shutdown() returned error (may be expected): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConcurrentMetricUsage(t *testing.T) {
|
||||
// Clear any existing configuration
|
||||
tracerconfig.Clear()
|
||||
|
||||
ctx := context.Background()
|
||||
config := &tracerconfig.Config{
|
||||
ServiceName: "concurrent-test",
|
||||
}
|
||||
|
||||
// Use mock factory
|
||||
mockFactory := func(ctx context.Context, cfg *tracerconfig.Config) (sdkmetric.Exporter, error) {
|
||||
return &mockMetricExporter{}, nil
|
||||
}
|
||||
|
||||
tracerconfig.Store(ctx, config, nil, mockFactory, nil)
|
||||
Setup(ctx)
|
||||
|
||||
meter := GetMeter("concurrent-test")
|
||||
counter, err := meter.Int64Counter("concurrent_counter")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create counter: %v", err)
|
||||
}
|
||||
|
||||
// Test concurrent metric usage
|
||||
const numGoroutines = 10
|
||||
const metricsPerGoroutine = 100
|
||||
|
||||
done := make(chan bool, numGoroutines)
|
||||
|
||||
for i := 0; i < numGoroutines; i++ {
|
||||
go func(goroutineID int) {
|
||||
for j := 0; j < metricsPerGoroutine; j++ {
|
||||
counter.Add(ctx, 1, metric.WithAttributes(
|
||||
attribute.Int("goroutine", goroutineID),
|
||||
attribute.Int("iteration", j),
|
||||
))
|
||||
}
|
||||
done <- true
|
||||
}(i)
|
||||
}
|
||||
|
||||
// Wait for all goroutines to complete
|
||||
for i := 0; i < numGoroutines; i++ {
|
||||
<-done
|
||||
}
|
||||
|
||||
// Shutdown
|
||||
err = Shutdown(ctx)
|
||||
if err != nil {
|
||||
t.Errorf("Shutdown() returned error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// mockMetricExporter is a simple mock exporter for testing
|
||||
type mockMetricExporter struct{}
|
||||
|
||||
func (m *mockMetricExporter) Export(ctx context.Context, rm *metricdata.ResourceMetrics) error {
|
||||
// Just pretend to export
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockMetricExporter) ForceFlush(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockMetricExporter) Shutdown(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockMetricExporter) Temporality(kind sdkmetric.InstrumentKind) metricdata.Temporality {
|
||||
return metricdata.CumulativeTemporality
|
||||
}
|
||||
|
||||
func (m *mockMetricExporter) Aggregation(kind sdkmetric.InstrumentKind) sdkmetric.Aggregation {
|
||||
return sdkmetric.DefaultAggregationSelector(kind)
|
||||
}
|
Reference in New Issue
Block a user