From 33d7d1aacfa8b08ebed321c72dcd2a11d0cc4881 Mon Sep 17 00:00:00 2001 From: Philip Paeps Date: Sun, 25 Sep 2022 07:52:08 +0800 Subject: [PATCH] Improve collection of RRSIG expiration times (#3) * Improve collection of RRSIG expiration times A new record_earliest_rrsig_expiry metric contains the unixtime of the earliest expiring signature per resolver. This allows for different alerting configurations when monitoring a mix of authoritative and caching resolvers. Use a single DNS query instead of querying for RRSIG separately. While some resolvers (reasonably enough) return REFUSED when queried for type RRSIG, they will include relevant RRSIG records when queried for other types (as required by RFC 4034). * Document the improved RRSIG expiration handling While here, also clarify the limitations of the record_days_left metric. --- README.md | 29 ++++++++++++++ main.go | 108 ++++++++++++++++++++++++--------------------------- main_test.go | 48 ++++++++++++++++------- 3 files changed, 113 insertions(+), 72 deletions(-) diff --git a/README.md b/README.md index f6c9716..e52b5cf 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,27 @@ Labels: * `record` * `type` +If more than one resolver is configured, the metric will be calculated from the +resolver that is configured first. If more than one RRSIG covers the record, +the number of days until the first one expires will be returned. If the record +is not signed of the signature cannot be validated, this metric will contain a +bogus timestamp. + +### Gauge: `dnssec_zone_record_earliest_rrsig_expiry` + +Earliest expiring RRSIG covering the record on resolver in unixtime. + +Labels: + +* `resolver` +* `zone` +* `record` +* `type` + +If more than one RRSIG covers the record, the expiration time returned will be +of the one that expires earliest. If the record does not resolve or cannot be +validated, this metric will be absent. + ### Gauge: `dnssec_zone_record_resolves` Does the record resolve using the specified DNSSEC enabled resolvers. @@ -41,6 +62,8 @@ Labels: * `record` * `type` +This metric will return 1 only if the record resolves **and** validates. + ### Examples # HELP dnssec_zone_record_days_left Number of days the signature will be valid @@ -53,6 +76,12 @@ Labels: dnssec_zone_record_resolves{record="@",resolver="1.1.1.1:53",type="SOA",zone="verisigninc.com"} 1 dnssec_zone_record_resolves{record="@",resolver="8.8.8.8:53",type="SOA",zone="ietf.org"} 1 dnssec_zone_record_resolves{record="@",resolver="8.8.8.8:53",type="SOA",zone="verisigninc.com"} 1 + # HELP dnssec_zone_record_earliest_rrsig_expiry Earliest expiring RRSIG covering the record on resolver in unixtime + # TYPE dnssec_zone_record_earliest_rrsig_expiry gauge + dnssec_zone_record_earliest_rrsig_expiry{record="@",resolver="1.1.1.1:53",type="SOA",zone="ietf.org"} 1.664872679e+09 + dnssec_zone_record_earliest_rrsig_expiry{record="@",resolver="1.1.1.1:53",type="SOA",zone="verisigninc.com"} 1.664778306e+09 + dnssec_zone_record_earliest_rrsig_expiry{record="@",resolver="8.8.8.8:53",type="SOA",zone="ietf.org"} 1.664872679e+09 + dnssec_zone_record_earliest_rrsig_expiry{record="@",resolver="8.8.8.8:53",type="SOA",zone="verisigninc.com"} 1.664778306e+09 ## Configuration diff --git a/main.go b/main.go index 2316120..87d8b94 100644 --- a/main.go +++ b/main.go @@ -37,6 +37,7 @@ type Exporter struct { records *prometheus.GaugeVec resolves *prometheus.GaugeVec + expiry *prometheus.GaugeVec resolvers []string dnsClient *dns.Client @@ -73,6 +74,20 @@ func NewDNSSECExporter(timeout time.Duration, resolvers []string, logger Logger) "type", }, ), + expiry: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "dnssec", + Subsystem: "zone", + Name: "record_earliest_rrsig_expiry", + Help: "Earliest expiring RRSIG covering the record on resolver in unixtime", + }, + []string{ + "resolver", + "zone", + "record", + "type", + }, + ), dnsClient: &dns.Client{ Net: "tcp", Timeout: timeout, @@ -85,32 +100,19 @@ func NewDNSSECExporter(timeout time.Duration, resolvers []string, logger Logger) func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { e.records.Describe(ch) e.resolves.Describe(ch) + e.expiry.Describe(ch) } func (e *Exporter) Collect(ch chan<- prometheus.Metric) { var wg sync.WaitGroup - wg.Add(len(e.Records) * (len(e.resolvers) + 1)) + wg.Add(len(e.Records) * (len(e.resolvers))) for _, rec := range e.Records { rec := rec - // Check the expiration - - go func() { - - exp := e.expiration(rec.Zone, rec.Record, rec.Type) - - e.records.WithLabelValues( - rec.Zone, rec.Record, rec.Type, - ).Set(float64(time.Until(exp)/time.Hour) / 24) - - wg.Done() - - }() - // Check the configured resolvers for _, resolver := range e.resolvers { @@ -119,12 +121,29 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { go func() { - resolves := e.resolve(rec.Zone, rec.Record, rec.Type, resolver) + resolves, expires := e.resolve(rec.Zone, rec.Record, rec.Type, resolver) e.resolves.WithLabelValues( resolver, rec.Zone, rec.Record, rec.Type, ).Set(map[bool]float64{true: 1}[resolves]) + // Only return the signature expiry if the record resolves. + if resolves { + e.expiry.WithLabelValues( + resolver, rec.Zone, rec.Record, rec.Type, + ).Set(float64(expires.Unix())) + } + + // For compatibility with historical behaviour, record_days_left + // returns the time until the earliest RRSIG expiration on the + // first configured resolver. This value will be bogus if that + // resolver fails to resolve and validate the record. + if (resolver == e.resolvers[0]) { + e.records.WithLabelValues( + rec.Zone, rec.Record, rec.Type, + ).Set(float64(time.Until(expires)/time.Hour) / 24) + } + wg.Done() }() @@ -137,49 +156,11 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { e.records.Collect(ch) e.resolves.Collect(ch) + e.expiry.Collect(ch) } -func (e *Exporter) expiration(zone, record, recordType string) (exp time.Time) { - - msg := &dns.Msg{} - msg.SetQuestion(hostname(zone, record), dns.TypeRRSIG) - - response, _, err := e.dnsClient.Exchange(msg, e.resolvers[0]) - if err != nil { - e.logger.Printf("while looking up RRSIG for %v: %v", hostname(zone, record), err) - return - } - - var sig *dns.RRSIG - - for _, rr := range response.Answer { - - if rrsig, ok := rr.(*dns.RRSIG); ok && - rrsig.TypeCovered == dns.StringToType[recordType] { - - sig = rrsig - break - - } - } - - if sig == nil { - e.logger.Printf("didn't find RRSIG for %v covering type %v matching a tag of a DNSKEY", hostname(zone, record), recordType) - return - } - - exp = time.Unix(int64(sig.Expiration), 0) - if exp.IsZero() { - e.logger.Printf("zero exp for RRSIG for %v covering type %v", hostname(zone, record), recordType) - return - } - - return - -} - -func (e *Exporter) resolve(zone, record, recordType, resolver string) (resolves bool) { +func (e *Exporter) resolve(zone, record, recordType, resolver string) (resolves bool, expires time.Time) { msg := &dns.Msg{} msg.SetQuestion(hostname(zone, record), dns.StringToType[recordType]) @@ -187,14 +168,25 @@ func (e *Exporter) resolve(zone, record, recordType, resolver string) (resolves response, _, err := e.dnsClient.Exchange(msg, resolver) if err != nil { - e.logger.Printf("while resolving for %v: %v", hostname(zone, record), err) + e.logger.Printf("error resolving %v %v on %v: %v", hostname(zone, record), recordType, resolver, err) return } - return response.AuthenticatedData && + resolves = response.AuthenticatedData && !response.CheckingDisabled && response.Rcode == dns.RcodeSuccess + // If multiple RRSIGs cover our record, return the one that will expire the earliest. + for _, rr := range response.Answer { + if rrsig, ok := rr.(*dns.RRSIG); ok { + sigexp := time.Unix(int64(rrsig.Expiration), 0) + if (expires.IsZero() || sigexp.Before(expires) && !sigexp.IsZero()) { + expires = sigexp; + } + } + } + + return } func hostname(zone, record string) string { diff --git a/main_test.go b/main_test.go index 710ef2d..4c95f0f 100644 --- a/main_test.go +++ b/main_test.go @@ -16,6 +16,7 @@ type opts struct { expires time.Time rcode int unauthenticated bool + noedns0support bool } func nullLogger() *log.Logger { @@ -66,7 +67,7 @@ func runServer(t *testing.T, opts opts) ([]string, func()) { switch q.Qtype { - case dns.TypeRRSIG: + case dns.TypeSOA: rrHeader := dns.RR_Header{ Name: q.Name, @@ -74,8 +75,13 @@ func runServer(t *testing.T, opts opts) ([]string, func()) { Class: dns.ClassINET, Ttl: 3600, } + msg.Answer = append(msg.Answer, soa) - answer := &dns.RRSIG{ + if opts.noedns0support { + break + } + + rrsig := &dns.RRSIG{ Hdr: rrHeader, TypeCovered: dns.TypeSOA, Algorithm: dnskey.Algorithm, @@ -87,19 +93,15 @@ func runServer(t *testing.T, opts opts) ([]string, func()) { SignerName: q.Name, } - if err := answer.Sign(privkey.(*ecdsa.PrivateKey), []dns.RR{soa}); err != nil { + if err := rrsig.Sign(privkey.(*ecdsa.PrivateKey), []dns.RR{soa}); err != nil { t.Fatalf("couldn't sign SOA record: %v", err) } - msg.Answer = append(msg.Answer, answer) - - case dns.TypeSOA: - - msg.Answer = append(msg.Answer, soa) + msg.Answer = append(msg.Answer, rrsig) } - msg.AuthenticatedData = !opts.unauthenticated + msg.AuthenticatedData = !opts.unauthenticated && !opts.noedns0support msg.Rcode = opts.rcode rw.WriteMsg(msg) @@ -141,7 +143,7 @@ func TestExpirationOK(t *testing.T) { e := NewDNSSECExporter(time.Second, addr, nullLogger()) - exp := e.expiration("example.org", "@", "SOA") + _, exp := e.resolve("example.org", "@", "SOA", addr[0]) if exp.Before(time.Now()) { t.Fatalf("expected expiration to be in the future, was: %v", exp) @@ -160,7 +162,7 @@ func TestExpired(t *testing.T) { e := NewDNSSECExporter(time.Second, addr, nullLogger()) - exp := e.expiration("example.org", "@", "SOA") + _, exp := e.resolve("example.org", "@", "SOA", addr[0]) if exp.After(time.Now()) { t.Fatalf("expected expiration to be in the past, was: %v", exp) @@ -179,7 +181,7 @@ func TestValid(t *testing.T) { e := NewDNSSECExporter(time.Second, addr, nullLogger()) - valid := e.resolve("example.org", "@", "SOA", addr[0]) + valid, _ := e.resolve("example.org", "@", "SOA", addr[0]) if !valid { t.Fatal("expected valid result") @@ -197,7 +199,7 @@ func TestInvalidError(t *testing.T) { e := NewDNSSECExporter(time.Second, addr, nullLogger()) - valid := e.resolve("example.org", "@", "SOA", addr[0]) + valid, _ := e.resolve("example.org", "@", "SOA", addr[0]) if valid { t.Fatal("expected invalid result") @@ -215,7 +217,25 @@ func TestInvalidUnauthenticated(t *testing.T) { e := NewDNSSECExporter(time.Second, addr, nullLogger()) - valid := e.resolve("example.org", "@", "SOA", addr[0]) + valid, _ := e.resolve("example.org", "@", "SOA", addr[0]) + + if valid { + t.Fatal("expected invalid result") + } + +} + +func TestNoEDNS0Support(t *testing.T) { + + addr, cancel := runServer(t, opts{ + noedns0support: true, + }) + + defer cancel() + + e := NewDNSSECExporter(time.Second, addr, nullLogger()) + + valid, _ := e.resolve("example.org", "@", "SOA", addr[0]) if valid { t.Fatal("expected invalid result")