Improve collection of RRSIG expiration times (#3)
* Improve collection of RRSIG expiration times A new record_earliest_rrsig_expiry metric contains the unixtime of the earliest expiring signature per resolver. This allows for different alerting configurations when monitoring a mix of authoritative and caching resolvers. Use a single DNS query instead of querying for RRSIG separately. While some resolvers (reasonably enough) return REFUSED when queried for type RRSIG, they will include relevant RRSIG records when queried for other types (as required by RFC 4034). * Document the improved RRSIG expiration handling While here, also clarify the limitations of the record_days_left metric.
This commit is contained in:
parent
ad1185ef55
commit
33d7d1aacf
29
README.md
29
README.md
@ -30,6 +30,27 @@ Labels:
|
||||
* `record`
|
||||
* `type`
|
||||
|
||||
If more than one resolver is configured, the metric will be calculated from the
|
||||
resolver that is configured first. If more than one RRSIG covers the record,
|
||||
the number of days until the first one expires will be returned. If the record
|
||||
is not signed of the signature cannot be validated, this metric will contain a
|
||||
bogus timestamp.
|
||||
|
||||
### Gauge: `dnssec_zone_record_earliest_rrsig_expiry`
|
||||
|
||||
Earliest expiring RRSIG covering the record on resolver in unixtime.
|
||||
|
||||
Labels:
|
||||
|
||||
* `resolver`
|
||||
* `zone`
|
||||
* `record`
|
||||
* `type`
|
||||
|
||||
If more than one RRSIG covers the record, the expiration time returned will be
|
||||
of the one that expires earliest. If the record does not resolve or cannot be
|
||||
validated, this metric will be absent.
|
||||
|
||||
### Gauge: `dnssec_zone_record_resolves`
|
||||
|
||||
Does the record resolve using the specified DNSSEC enabled resolvers.
|
||||
@ -41,6 +62,8 @@ Labels:
|
||||
* `record`
|
||||
* `type`
|
||||
|
||||
This metric will return 1 only if the record resolves **and** validates.
|
||||
|
||||
### Examples
|
||||
|
||||
# HELP dnssec_zone_record_days_left Number of days the signature will be valid
|
||||
@ -53,6 +76,12 @@ Labels:
|
||||
dnssec_zone_record_resolves{record="@",resolver="1.1.1.1:53",type="SOA",zone="verisigninc.com"} 1
|
||||
dnssec_zone_record_resolves{record="@",resolver="8.8.8.8:53",type="SOA",zone="ietf.org"} 1
|
||||
dnssec_zone_record_resolves{record="@",resolver="8.8.8.8:53",type="SOA",zone="verisigninc.com"} 1
|
||||
# HELP dnssec_zone_record_earliest_rrsig_expiry Earliest expiring RRSIG covering the record on resolver in unixtime
|
||||
# TYPE dnssec_zone_record_earliest_rrsig_expiry gauge
|
||||
dnssec_zone_record_earliest_rrsig_expiry{record="@",resolver="1.1.1.1:53",type="SOA",zone="ietf.org"} 1.664872679e+09
|
||||
dnssec_zone_record_earliest_rrsig_expiry{record="@",resolver="1.1.1.1:53",type="SOA",zone="verisigninc.com"} 1.664778306e+09
|
||||
dnssec_zone_record_earliest_rrsig_expiry{record="@",resolver="8.8.8.8:53",type="SOA",zone="ietf.org"} 1.664872679e+09
|
||||
dnssec_zone_record_earliest_rrsig_expiry{record="@",resolver="8.8.8.8:53",type="SOA",zone="verisigninc.com"} 1.664778306e+09
|
||||
|
||||
## Configuration
|
||||
|
||||
|
108
main.go
108
main.go
@ -37,6 +37,7 @@ type Exporter struct {
|
||||
|
||||
records *prometheus.GaugeVec
|
||||
resolves *prometheus.GaugeVec
|
||||
expiry *prometheus.GaugeVec
|
||||
|
||||
resolvers []string
|
||||
dnsClient *dns.Client
|
||||
@ -73,6 +74,20 @@ func NewDNSSECExporter(timeout time.Duration, resolvers []string, logger Logger)
|
||||
"type",
|
||||
},
|
||||
),
|
||||
expiry: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: "dnssec",
|
||||
Subsystem: "zone",
|
||||
Name: "record_earliest_rrsig_expiry",
|
||||
Help: "Earliest expiring RRSIG covering the record on resolver in unixtime",
|
||||
},
|
||||
[]string{
|
||||
"resolver",
|
||||
"zone",
|
||||
"record",
|
||||
"type",
|
||||
},
|
||||
),
|
||||
dnsClient: &dns.Client{
|
||||
Net: "tcp",
|
||||
Timeout: timeout,
|
||||
@ -85,32 +100,19 @@ func NewDNSSECExporter(timeout time.Duration, resolvers []string, logger Logger)
|
||||
func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
|
||||
e.records.Describe(ch)
|
||||
e.resolves.Describe(ch)
|
||||
e.expiry.Describe(ch)
|
||||
}
|
||||
|
||||
func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
wg.Add(len(e.Records) * (len(e.resolvers) + 1))
|
||||
wg.Add(len(e.Records) * (len(e.resolvers)))
|
||||
|
||||
for _, rec := range e.Records {
|
||||
|
||||
rec := rec
|
||||
|
||||
// Check the expiration
|
||||
|
||||
go func() {
|
||||
|
||||
exp := e.expiration(rec.Zone, rec.Record, rec.Type)
|
||||
|
||||
e.records.WithLabelValues(
|
||||
rec.Zone, rec.Record, rec.Type,
|
||||
).Set(float64(time.Until(exp)/time.Hour) / 24)
|
||||
|
||||
wg.Done()
|
||||
|
||||
}()
|
||||
|
||||
// Check the configured resolvers
|
||||
|
||||
for _, resolver := range e.resolvers {
|
||||
@ -119,12 +121,29 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
||||
|
||||
go func() {
|
||||
|
||||
resolves := e.resolve(rec.Zone, rec.Record, rec.Type, resolver)
|
||||
resolves, expires := e.resolve(rec.Zone, rec.Record, rec.Type, resolver)
|
||||
|
||||
e.resolves.WithLabelValues(
|
||||
resolver, rec.Zone, rec.Record, rec.Type,
|
||||
).Set(map[bool]float64{true: 1}[resolves])
|
||||
|
||||
// Only return the signature expiry if the record resolves.
|
||||
if resolves {
|
||||
e.expiry.WithLabelValues(
|
||||
resolver, rec.Zone, rec.Record, rec.Type,
|
||||
).Set(float64(expires.Unix()))
|
||||
}
|
||||
|
||||
// For compatibility with historical behaviour, record_days_left
|
||||
// returns the time until the earliest RRSIG expiration on the
|
||||
// first configured resolver. This value will be bogus if that
|
||||
// resolver fails to resolve and validate the record.
|
||||
if (resolver == e.resolvers[0]) {
|
||||
e.records.WithLabelValues(
|
||||
rec.Zone, rec.Record, rec.Type,
|
||||
).Set(float64(time.Until(expires)/time.Hour) / 24)
|
||||
}
|
||||
|
||||
wg.Done()
|
||||
|
||||
}()
|
||||
@ -137,49 +156,11 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
||||
|
||||
e.records.Collect(ch)
|
||||
e.resolves.Collect(ch)
|
||||
e.expiry.Collect(ch)
|
||||
|
||||
}
|
||||
|
||||
func (e *Exporter) expiration(zone, record, recordType string) (exp time.Time) {
|
||||
|
||||
msg := &dns.Msg{}
|
||||
msg.SetQuestion(hostname(zone, record), dns.TypeRRSIG)
|
||||
|
||||
response, _, err := e.dnsClient.Exchange(msg, e.resolvers[0])
|
||||
if err != nil {
|
||||
e.logger.Printf("while looking up RRSIG for %v: %v", hostname(zone, record), err)
|
||||
return
|
||||
}
|
||||
|
||||
var sig *dns.RRSIG
|
||||
|
||||
for _, rr := range response.Answer {
|
||||
|
||||
if rrsig, ok := rr.(*dns.RRSIG); ok &&
|
||||
rrsig.TypeCovered == dns.StringToType[recordType] {
|
||||
|
||||
sig = rrsig
|
||||
break
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if sig == nil {
|
||||
e.logger.Printf("didn't find RRSIG for %v covering type %v matching a tag of a DNSKEY", hostname(zone, record), recordType)
|
||||
return
|
||||
}
|
||||
|
||||
exp = time.Unix(int64(sig.Expiration), 0)
|
||||
if exp.IsZero() {
|
||||
e.logger.Printf("zero exp for RRSIG for %v covering type %v", hostname(zone, record), recordType)
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
func (e *Exporter) resolve(zone, record, recordType, resolver string) (resolves bool) {
|
||||
func (e *Exporter) resolve(zone, record, recordType, resolver string) (resolves bool, expires time.Time) {
|
||||
|
||||
msg := &dns.Msg{}
|
||||
msg.SetQuestion(hostname(zone, record), dns.StringToType[recordType])
|
||||
@ -187,14 +168,25 @@ func (e *Exporter) resolve(zone, record, recordType, resolver string) (resolves
|
||||
|
||||
response, _, err := e.dnsClient.Exchange(msg, resolver)
|
||||
if err != nil {
|
||||
e.logger.Printf("while resolving for %v: %v", hostname(zone, record), err)
|
||||
e.logger.Printf("error resolving %v %v on %v: %v", hostname(zone, record), recordType, resolver, err)
|
||||
return
|
||||
}
|
||||
|
||||
return response.AuthenticatedData &&
|
||||
resolves = response.AuthenticatedData &&
|
||||
!response.CheckingDisabled &&
|
||||
response.Rcode == dns.RcodeSuccess
|
||||
|
||||
// If multiple RRSIGs cover our record, return the one that will expire the earliest.
|
||||
for _, rr := range response.Answer {
|
||||
if rrsig, ok := rr.(*dns.RRSIG); ok {
|
||||
sigexp := time.Unix(int64(rrsig.Expiration), 0)
|
||||
if (expires.IsZero() || sigexp.Before(expires) && !sigexp.IsZero()) {
|
||||
expires = sigexp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func hostname(zone, record string) string {
|
||||
|
48
main_test.go
48
main_test.go
@ -16,6 +16,7 @@ type opts struct {
|
||||
expires time.Time
|
||||
rcode int
|
||||
unauthenticated bool
|
||||
noedns0support bool
|
||||
}
|
||||
|
||||
func nullLogger() *log.Logger {
|
||||
@ -66,7 +67,7 @@ func runServer(t *testing.T, opts opts) ([]string, func()) {
|
||||
|
||||
switch q.Qtype {
|
||||
|
||||
case dns.TypeRRSIG:
|
||||
case dns.TypeSOA:
|
||||
|
||||
rrHeader := dns.RR_Header{
|
||||
Name: q.Name,
|
||||
@ -74,8 +75,13 @@ func runServer(t *testing.T, opts opts) ([]string, func()) {
|
||||
Class: dns.ClassINET,
|
||||
Ttl: 3600,
|
||||
}
|
||||
msg.Answer = append(msg.Answer, soa)
|
||||
|
||||
answer := &dns.RRSIG{
|
||||
if opts.noedns0support {
|
||||
break
|
||||
}
|
||||
|
||||
rrsig := &dns.RRSIG{
|
||||
Hdr: rrHeader,
|
||||
TypeCovered: dns.TypeSOA,
|
||||
Algorithm: dnskey.Algorithm,
|
||||
@ -87,19 +93,15 @@ func runServer(t *testing.T, opts opts) ([]string, func()) {
|
||||
SignerName: q.Name,
|
||||
}
|
||||
|
||||
if err := answer.Sign(privkey.(*ecdsa.PrivateKey), []dns.RR{soa}); err != nil {
|
||||
if err := rrsig.Sign(privkey.(*ecdsa.PrivateKey), []dns.RR{soa}); err != nil {
|
||||
t.Fatalf("couldn't sign SOA record: %v", err)
|
||||
}
|
||||
|
||||
msg.Answer = append(msg.Answer, answer)
|
||||
|
||||
case dns.TypeSOA:
|
||||
|
||||
msg.Answer = append(msg.Answer, soa)
|
||||
msg.Answer = append(msg.Answer, rrsig)
|
||||
|
||||
}
|
||||
|
||||
msg.AuthenticatedData = !opts.unauthenticated
|
||||
msg.AuthenticatedData = !opts.unauthenticated && !opts.noedns0support
|
||||
msg.Rcode = opts.rcode
|
||||
|
||||
rw.WriteMsg(msg)
|
||||
@ -141,7 +143,7 @@ func TestExpirationOK(t *testing.T) {
|
||||
|
||||
e := NewDNSSECExporter(time.Second, addr, nullLogger())
|
||||
|
||||
exp := e.expiration("example.org", "@", "SOA")
|
||||
_, exp := e.resolve("example.org", "@", "SOA", addr[0])
|
||||
|
||||
if exp.Before(time.Now()) {
|
||||
t.Fatalf("expected expiration to be in the future, was: %v", exp)
|
||||
@ -160,7 +162,7 @@ func TestExpired(t *testing.T) {
|
||||
|
||||
e := NewDNSSECExporter(time.Second, addr, nullLogger())
|
||||
|
||||
exp := e.expiration("example.org", "@", "SOA")
|
||||
_, exp := e.resolve("example.org", "@", "SOA", addr[0])
|
||||
|
||||
if exp.After(time.Now()) {
|
||||
t.Fatalf("expected expiration to be in the past, was: %v", exp)
|
||||
@ -179,7 +181,7 @@ func TestValid(t *testing.T) {
|
||||
|
||||
e := NewDNSSECExporter(time.Second, addr, nullLogger())
|
||||
|
||||
valid := e.resolve("example.org", "@", "SOA", addr[0])
|
||||
valid, _ := e.resolve("example.org", "@", "SOA", addr[0])
|
||||
|
||||
if !valid {
|
||||
t.Fatal("expected valid result")
|
||||
@ -197,7 +199,7 @@ func TestInvalidError(t *testing.T) {
|
||||
|
||||
e := NewDNSSECExporter(time.Second, addr, nullLogger())
|
||||
|
||||
valid := e.resolve("example.org", "@", "SOA", addr[0])
|
||||
valid, _ := e.resolve("example.org", "@", "SOA", addr[0])
|
||||
|
||||
if valid {
|
||||
t.Fatal("expected invalid result")
|
||||
@ -215,7 +217,25 @@ func TestInvalidUnauthenticated(t *testing.T) {
|
||||
|
||||
e := NewDNSSECExporter(time.Second, addr, nullLogger())
|
||||
|
||||
valid := e.resolve("example.org", "@", "SOA", addr[0])
|
||||
valid, _ := e.resolve("example.org", "@", "SOA", addr[0])
|
||||
|
||||
if valid {
|
||||
t.Fatal("expected invalid result")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestNoEDNS0Support(t *testing.T) {
|
||||
|
||||
addr, cancel := runServer(t, opts{
|
||||
noedns0support: true,
|
||||
})
|
||||
|
||||
defer cancel()
|
||||
|
||||
e := NewDNSSECExporter(time.Second, addr, nullLogger())
|
||||
|
||||
valid, _ := e.resolve("example.org", "@", "SOA", addr[0])
|
||||
|
||||
if valid {
|
||||
t.Fatal("expected invalid result")
|
||||
|
Loading…
Reference in New Issue
Block a user