net/netmon, wgengine/userspace: purge ChangeDelta.Major and address TODOs (#17823)

updates tailscale/corp#33891

Addresses several older the TODO's in netmon.  This removes the 
Major flag precomputes the ChangeDelta state, rather than making
consumers of ChangeDeltas sort that out themselves.   We're also seeing
a lot of ChangeDelta's being flagged as "Major" when they are
not interesting, triggering rebinds in wgengine that are not needed.  This
cleans that up and adds a host of additional tests.

The dependencies are cleaned, notably removing dependency on netmon
itself for calculating what is interesting, and what is not.  This includes letting
individual platforms set a bespoke global "IsInterestingInterface"
function.  This is only used on Darwin.

RebindRequired now roughly follows how "Major" was historically
calculated but includes some additional checks for various
uninteresting events such as changes in interface addresses that
shouldn't trigger a rebind.  This significantly reduces thrashing (by
roughly half on Darwin clients which switching between nics).   The individual
values that we roll  into RebindRequired are also exposed so that
components consuming netmap.ChangeDelta can ask more
targeted questions.

Signed-off-by: Jonathan Nobels <jonathan@tailscale.com>
This commit is contained in:
Jonathan Nobels
2025-12-17 12:32:40 -05:00
committed by GitHub
parent 0fd1670a59
commit 3e89068792
19 changed files with 754 additions and 273 deletions
+31 -14
View File
@@ -1349,20 +1349,18 @@ func (e *userspaceEngine) Done() <-chan struct{} {
}
func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) {
changed := delta.Major // TODO(bradfitz): ask more specific questions?
cur := delta.New
up := cur.AnyInterfaceUp()
up := delta.AnyInterfaceUp()
if !up {
e.logf("LinkChange: all links down; pausing: %v", cur)
} else if changed {
e.logf("LinkChange: major, rebinding. New state: %v", cur)
e.logf("LinkChange: all links down; pausing: %v", delta.StateDesc())
} else if delta.RebindLikelyRequired {
e.logf("LinkChange: major, rebinding: %v", delta.StateDesc())
} else {
e.logf("[v1] LinkChange: minor")
}
e.health.SetAnyInterfaceUp(up)
e.magicConn.SetNetworkUp(up)
if !up || changed {
if !up || delta.RebindLikelyRequired {
if err := e.dns.FlushCaches(); err != nil {
e.logf("wgengine: dns flush failed after major link change: %v", err)
}
@@ -1372,9 +1370,20 @@ func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) {
// suspend/resume or whenever NetworkManager is started, it
// nukes all systemd-resolved configs. So reapply our DNS
// config on major link change.
// TODO: explain why this is ncessary not just on Linux but also android
// and Apple platforms.
if changed {
//
// On Darwin (netext), we reapply the DNS config when the interface flaps
// because the change in interface can potentially change the nameservers
// for the forwarder. On Darwin netext clients, magicDNS is ~always the default
// resolver so having no nameserver to forward queries to (or one on a network we
// are not currently on) breaks DNS resolution system-wide. There are notable
// timing issues here with Darwin's network stack. It is not guaranteed that
// the forward resolver will be available immediately after the interface
// comes up. We leave it to the network extension to also poke magicDNS directly
// via [dns.Manager.RecompileDNSConfig] when it detects any change in the
// nameservers.
//
// TODO: On Android, Darwin-tailscaled, and openbsd, why do we need this?
if delta.RebindLikelyRequired && up {
switch runtime.GOOS {
case "linux", "android", "ios", "darwin", "openbsd":
e.wgLock.Lock()
@@ -1392,15 +1401,23 @@ func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) {
}
}
e.magicConn.SetNetworkUp(up)
why := "link-change-minor"
if changed {
if delta.RebindLikelyRequired {
why = "link-change-major"
metricNumMajorChanges.Add(1)
e.magicConn.Rebind()
} else {
metricNumMinorChanges.Add(1)
}
e.magicConn.ReSTUN(why)
// If we're up and it's a minor change, just send a STUN ping
if up {
if delta.RebindLikelyRequired {
e.magicConn.Rebind()
}
e.magicConn.ReSTUN(why)
}
}
func (e *userspaceEngine) SetNetworkMap(nm *netmap.NetworkMap) {