feat(metrics): add OTLP metrics support with centralized config

- Create new metrics/ package for OpenTelemetry-native metrics with OTLP export
- Refactor OTLP configuration to internal/tracerconfig/ to eliminate code duplication
- Add consistent retry configuration across all HTTP OTLP exporters
- Add configuration validation and improved error messages
- Include test coverage for all new functionality
- Make OpenTelemetry metrics dependencies explicit in go.mod

Designed for new applications requiring structured metrics export to
observability backends via OTLP protocol.
This commit is contained in:
2025-08-02 09:29:27 -07:00
parent 796b2a8412
commit c6230be91e
7 changed files with 1274 additions and 224 deletions

View File

@@ -15,11 +15,11 @@ import (
func TestNew(t *testing.T) {
metrics := New()
if metrics == nil {
t.Fatal("New returned nil")
}
if metrics.r == nil {
t.Error("metrics registry is nil")
}
@@ -28,32 +28,32 @@ func TestNew(t *testing.T) {
func TestRegistry(t *testing.T) {
metrics := New()
registry := metrics.Registry()
if registry == nil {
t.Fatal("Registry() returned nil")
}
if registry != metrics.r {
t.Error("Registry() did not return the metrics registry")
}
// Test that we can register a metric
counter := prometheus.NewCounter(prometheus.CounterOpts{
Name: "test_counter",
Help: "A test counter",
})
err := registry.Register(counter)
if err != nil {
t.Errorf("failed to register metric: %v", err)
}
// Test that the metric is registered
metricFamilies, err := registry.Gather()
if err != nil {
t.Errorf("failed to gather metrics: %v", err)
}
found := false
for _, mf := range metricFamilies {
if mf.GetName() == "test_counter" {
@@ -61,7 +61,7 @@ func TestRegistry(t *testing.T) {
break
}
}
if !found {
t.Error("registered metric not found in registry")
}
@@ -69,7 +69,7 @@ func TestRegistry(t *testing.T) {
func TestHandler(t *testing.T) {
metrics := New()
// Register a test metric
counter := prometheus.NewCounterVec(
prometheus.CounterOpts{
@@ -80,40 +80,40 @@ func TestHandler(t *testing.T) {
)
metrics.Registry().MustRegister(counter)
counter.WithLabelValues("GET").Inc()
// Test the handler
handler := metrics.Handler()
if handler == nil {
t.Fatal("Handler() returned nil")
}
// Create a test request
req := httptest.NewRequest("GET", "/metrics", nil)
recorder := httptest.NewRecorder()
// Call the handler
handler.ServeHTTP(recorder, req)
// Check response
resp := recorder.Result()
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Errorf("expected status 200, got %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
t.Fatalf("failed to read response body: %v", err)
}
bodyStr := string(body)
// Check for our test metric
if !strings.Contains(bodyStr, "test_requests_total") {
t.Error("test metric not found in metrics output")
}
// Check for OpenMetrics format indicators
if !strings.Contains(bodyStr, "# TYPE") {
t.Error("metrics output missing TYPE comments")
@@ -122,7 +122,7 @@ func TestHandler(t *testing.T) {
func TestListenAndServe(t *testing.T) {
metrics := New()
// Register a test metric
counter := prometheus.NewCounterVec(
prometheus.CounterOpts{
@@ -133,46 +133,46 @@ func TestListenAndServe(t *testing.T) {
)
metrics.Registry().MustRegister(counter)
counter.WithLabelValues("GET").Inc()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Start server in a goroutine
errCh := make(chan error, 1)
go func() {
// Use a high port number to avoid conflicts
errCh <- metrics.ListenAndServe(ctx, 9999)
}()
// Give the server a moment to start
time.Sleep(100 * time.Millisecond)
// Test metrics endpoint
resp, err := http.Get("http://localhost:9999/metrics")
if err != nil {
t.Fatalf("failed to GET /metrics: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Errorf("expected status 200, got %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
t.Fatalf("failed to read response body: %v", err)
}
bodyStr := string(body)
// Check for our test metric
if !strings.Contains(bodyStr, "test_requests_total") {
t.Error("test metric not found in metrics output")
}
// Cancel context to stop server
cancel()
// Wait for server to stop
select {
case err := <-errCh:
@@ -186,21 +186,21 @@ func TestListenAndServe(t *testing.T) {
func TestListenAndServeContextCancellation(t *testing.T) {
metrics := New()
ctx, cancel := context.WithCancel(context.Background())
// Start server
errCh := make(chan error, 1)
go func() {
errCh <- metrics.ListenAndServe(ctx, 9998)
}()
// Give server time to start
time.Sleep(100 * time.Millisecond)
// Cancel context
cancel()
// Server should stop gracefully
select {
case err := <-errCh:
@@ -215,7 +215,7 @@ func TestListenAndServeContextCancellation(t *testing.T) {
// Benchmark the metrics handler response time
func BenchmarkMetricsHandler(b *testing.B) {
metrics := New()
// Register some test metrics
for i := 0; i < 10; i++ {
counter := prometheus.NewCounter(prometheus.CounterOpts{
@@ -225,18 +225,18 @@ func BenchmarkMetricsHandler(b *testing.B) {
metrics.Registry().MustRegister(counter)
counter.Add(float64(i * 100))
}
handler := metrics.Handler()
b.ResetTimer()
for i := 0; i < b.N; i++ {
req := httptest.NewRequest("GET", "/metrics", nil)
recorder := httptest.NewRecorder()
handler.ServeHTTP(recorder, req)
if recorder.Code != http.StatusOK {
b.Fatalf("unexpected status code: %d", recorder.Code)
}
}
}
}