Improve collection of RRSIG expiration times (#3)

* Improve collection of RRSIG expiration times

A new record_earliest_rrsig_expiry metric contains the unixtime of the
earliest expiring signature per resolver.  This allows for different
alerting configurations when monitoring a mix of authoritative and
caching resolvers.

Use a single DNS query instead of querying for RRSIG separately.  While
some resolvers (reasonably enough) return REFUSED when queried for type
RRSIG, they will include relevant RRSIG records when queried for other
types (as required by RFC 4034).

* Document the improved RRSIG expiration handling

While here, also clarify the limitations of the record_days_left metric.
This commit is contained in:
Philip Paeps 2022-09-25 07:52:08 +08:00 committed by GitHub
parent ad1185ef55
commit 33d7d1aacf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 113 additions and 72 deletions

View File

@ -30,6 +30,27 @@ Labels:
* `record`
* `type`
If more than one resolver is configured, the metric will be calculated from the
resolver that is configured first. If more than one RRSIG covers the record,
the number of days until the first one expires will be returned. If the record
is not signed of the signature cannot be validated, this metric will contain a
bogus timestamp.
### Gauge: `dnssec_zone_record_earliest_rrsig_expiry`
Earliest expiring RRSIG covering the record on resolver in unixtime.
Labels:
* `resolver`
* `zone`
* `record`
* `type`
If more than one RRSIG covers the record, the expiration time returned will be
of the one that expires earliest. If the record does not resolve or cannot be
validated, this metric will be absent.
### Gauge: `dnssec_zone_record_resolves`
Does the record resolve using the specified DNSSEC enabled resolvers.
@ -41,6 +62,8 @@ Labels:
* `record`
* `type`
This metric will return 1 only if the record resolves **and** validates.
### Examples
# HELP dnssec_zone_record_days_left Number of days the signature will be valid
@ -53,6 +76,12 @@ Labels:
dnssec_zone_record_resolves{record="@",resolver="1.1.1.1:53",type="SOA",zone="verisigninc.com"} 1
dnssec_zone_record_resolves{record="@",resolver="8.8.8.8:53",type="SOA",zone="ietf.org"} 1
dnssec_zone_record_resolves{record="@",resolver="8.8.8.8:53",type="SOA",zone="verisigninc.com"} 1
# HELP dnssec_zone_record_earliest_rrsig_expiry Earliest expiring RRSIG covering the record on resolver in unixtime
# TYPE dnssec_zone_record_earliest_rrsig_expiry gauge
dnssec_zone_record_earliest_rrsig_expiry{record="@",resolver="1.1.1.1:53",type="SOA",zone="ietf.org"} 1.664872679e+09
dnssec_zone_record_earliest_rrsig_expiry{record="@",resolver="1.1.1.1:53",type="SOA",zone="verisigninc.com"} 1.664778306e+09
dnssec_zone_record_earliest_rrsig_expiry{record="@",resolver="8.8.8.8:53",type="SOA",zone="ietf.org"} 1.664872679e+09
dnssec_zone_record_earliest_rrsig_expiry{record="@",resolver="8.8.8.8:53",type="SOA",zone="verisigninc.com"} 1.664778306e+09
## Configuration

108
main.go
View File

@ -37,6 +37,7 @@ type Exporter struct {
records *prometheus.GaugeVec
resolves *prometheus.GaugeVec
expiry *prometheus.GaugeVec
resolvers []string
dnsClient *dns.Client
@ -73,6 +74,20 @@ func NewDNSSECExporter(timeout time.Duration, resolvers []string, logger Logger)
"type",
},
),
expiry: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "dnssec",
Subsystem: "zone",
Name: "record_earliest_rrsig_expiry",
Help: "Earliest expiring RRSIG covering the record on resolver in unixtime",
},
[]string{
"resolver",
"zone",
"record",
"type",
},
),
dnsClient: &dns.Client{
Net: "tcp",
Timeout: timeout,
@ -85,32 +100,19 @@ func NewDNSSECExporter(timeout time.Duration, resolvers []string, logger Logger)
func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
e.records.Describe(ch)
e.resolves.Describe(ch)
e.expiry.Describe(ch)
}
func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
var wg sync.WaitGroup
wg.Add(len(e.Records) * (len(e.resolvers) + 1))
wg.Add(len(e.Records) * (len(e.resolvers)))
for _, rec := range e.Records {
rec := rec
// Check the expiration
go func() {
exp := e.expiration(rec.Zone, rec.Record, rec.Type)
e.records.WithLabelValues(
rec.Zone, rec.Record, rec.Type,
).Set(float64(time.Until(exp)/time.Hour) / 24)
wg.Done()
}()
// Check the configured resolvers
for _, resolver := range e.resolvers {
@ -119,12 +121,29 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
go func() {
resolves := e.resolve(rec.Zone, rec.Record, rec.Type, resolver)
resolves, expires := e.resolve(rec.Zone, rec.Record, rec.Type, resolver)
e.resolves.WithLabelValues(
resolver, rec.Zone, rec.Record, rec.Type,
).Set(map[bool]float64{true: 1}[resolves])
// Only return the signature expiry if the record resolves.
if resolves {
e.expiry.WithLabelValues(
resolver, rec.Zone, rec.Record, rec.Type,
).Set(float64(expires.Unix()))
}
// For compatibility with historical behaviour, record_days_left
// returns the time until the earliest RRSIG expiration on the
// first configured resolver. This value will be bogus if that
// resolver fails to resolve and validate the record.
if (resolver == e.resolvers[0]) {
e.records.WithLabelValues(
rec.Zone, rec.Record, rec.Type,
).Set(float64(time.Until(expires)/time.Hour) / 24)
}
wg.Done()
}()
@ -137,49 +156,11 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
e.records.Collect(ch)
e.resolves.Collect(ch)
e.expiry.Collect(ch)
}
func (e *Exporter) expiration(zone, record, recordType string) (exp time.Time) {
msg := &dns.Msg{}
msg.SetQuestion(hostname(zone, record), dns.TypeRRSIG)
response, _, err := e.dnsClient.Exchange(msg, e.resolvers[0])
if err != nil {
e.logger.Printf("while looking up RRSIG for %v: %v", hostname(zone, record), err)
return
}
var sig *dns.RRSIG
for _, rr := range response.Answer {
if rrsig, ok := rr.(*dns.RRSIG); ok &&
rrsig.TypeCovered == dns.StringToType[recordType] {
sig = rrsig
break
}
}
if sig == nil {
e.logger.Printf("didn't find RRSIG for %v covering type %v matching a tag of a DNSKEY", hostname(zone, record), recordType)
return
}
exp = time.Unix(int64(sig.Expiration), 0)
if exp.IsZero() {
e.logger.Printf("zero exp for RRSIG for %v covering type %v", hostname(zone, record), recordType)
return
}
return
}
func (e *Exporter) resolve(zone, record, recordType, resolver string) (resolves bool) {
func (e *Exporter) resolve(zone, record, recordType, resolver string) (resolves bool, expires time.Time) {
msg := &dns.Msg{}
msg.SetQuestion(hostname(zone, record), dns.StringToType[recordType])
@ -187,14 +168,25 @@ func (e *Exporter) resolve(zone, record, recordType, resolver string) (resolves
response, _, err := e.dnsClient.Exchange(msg, resolver)
if err != nil {
e.logger.Printf("while resolving for %v: %v", hostname(zone, record), err)
e.logger.Printf("error resolving %v %v on %v: %v", hostname(zone, record), recordType, resolver, err)
return
}
return response.AuthenticatedData &&
resolves = response.AuthenticatedData &&
!response.CheckingDisabled &&
response.Rcode == dns.RcodeSuccess
// If multiple RRSIGs cover our record, return the one that will expire the earliest.
for _, rr := range response.Answer {
if rrsig, ok := rr.(*dns.RRSIG); ok {
sigexp := time.Unix(int64(rrsig.Expiration), 0)
if (expires.IsZero() || sigexp.Before(expires) && !sigexp.IsZero()) {
expires = sigexp;
}
}
}
return
}
func hostname(zone, record string) string {

View File

@ -16,6 +16,7 @@ type opts struct {
expires time.Time
rcode int
unauthenticated bool
noedns0support bool
}
func nullLogger() *log.Logger {
@ -66,7 +67,7 @@ func runServer(t *testing.T, opts opts) ([]string, func()) {
switch q.Qtype {
case dns.TypeRRSIG:
case dns.TypeSOA:
rrHeader := dns.RR_Header{
Name: q.Name,
@ -74,8 +75,13 @@ func runServer(t *testing.T, opts opts) ([]string, func()) {
Class: dns.ClassINET,
Ttl: 3600,
}
msg.Answer = append(msg.Answer, soa)
answer := &dns.RRSIG{
if opts.noedns0support {
break
}
rrsig := &dns.RRSIG{
Hdr: rrHeader,
TypeCovered: dns.TypeSOA,
Algorithm: dnskey.Algorithm,
@ -87,19 +93,15 @@ func runServer(t *testing.T, opts opts) ([]string, func()) {
SignerName: q.Name,
}
if err := answer.Sign(privkey.(*ecdsa.PrivateKey), []dns.RR{soa}); err != nil {
if err := rrsig.Sign(privkey.(*ecdsa.PrivateKey), []dns.RR{soa}); err != nil {
t.Fatalf("couldn't sign SOA record: %v", err)
}
msg.Answer = append(msg.Answer, answer)
case dns.TypeSOA:
msg.Answer = append(msg.Answer, soa)
msg.Answer = append(msg.Answer, rrsig)
}
msg.AuthenticatedData = !opts.unauthenticated
msg.AuthenticatedData = !opts.unauthenticated && !opts.noedns0support
msg.Rcode = opts.rcode
rw.WriteMsg(msg)
@ -141,7 +143,7 @@ func TestExpirationOK(t *testing.T) {
e := NewDNSSECExporter(time.Second, addr, nullLogger())
exp := e.expiration("example.org", "@", "SOA")
_, exp := e.resolve("example.org", "@", "SOA", addr[0])
if exp.Before(time.Now()) {
t.Fatalf("expected expiration to be in the future, was: %v", exp)
@ -160,7 +162,7 @@ func TestExpired(t *testing.T) {
e := NewDNSSECExporter(time.Second, addr, nullLogger())
exp := e.expiration("example.org", "@", "SOA")
_, exp := e.resolve("example.org", "@", "SOA", addr[0])
if exp.After(time.Now()) {
t.Fatalf("expected expiration to be in the past, was: %v", exp)
@ -179,7 +181,7 @@ func TestValid(t *testing.T) {
e := NewDNSSECExporter(time.Second, addr, nullLogger())
valid := e.resolve("example.org", "@", "SOA", addr[0])
valid, _ := e.resolve("example.org", "@", "SOA", addr[0])
if !valid {
t.Fatal("expected valid result")
@ -197,7 +199,7 @@ func TestInvalidError(t *testing.T) {
e := NewDNSSECExporter(time.Second, addr, nullLogger())
valid := e.resolve("example.org", "@", "SOA", addr[0])
valid, _ := e.resolve("example.org", "@", "SOA", addr[0])
if valid {
t.Fatal("expected invalid result")
@ -215,7 +217,25 @@ func TestInvalidUnauthenticated(t *testing.T) {
e := NewDNSSECExporter(time.Second, addr, nullLogger())
valid := e.resolve("example.org", "@", "SOA", addr[0])
valid, _ := e.resolve("example.org", "@", "SOA", addr[0])
if valid {
t.Fatal("expected invalid result")
}
}
func TestNoEDNS0Support(t *testing.T) {
addr, cancel := runServer(t, opts{
noedns0support: true,
})
defer cancel()
e := NewDNSSECExporter(time.Second, addr, nullLogger())
valid, _ := e.resolve("example.org", "@", "SOA", addr[0])
if valid {
t.Fatal("expected invalid result")