From a7703701b806184b39583bcf08b95fcd52f4ce2e Mon Sep 17 00:00:00 2001 From: huanld Date: Fri, 17 Apr 2026 17:38:37 +0700 Subject: [PATCH] fix(tray): stability, no-console, icon logic, Windows GUI build --- cmd/tailscale-tray/main.go | 191 ++++++++++++++++++++++++++++++------- 1 file changed, 154 insertions(+), 37 deletions(-) diff --git a/cmd/tailscale-tray/main.go b/cmd/tailscale-tray/main.go index f46bab356..5cae40c93 100644 --- a/cmd/tailscale-tray/main.go +++ b/cmd/tailscale-tray/main.go @@ -44,13 +44,22 @@ type app struct { } func main() { - // Single instance mutex + // Single instance mutex. Lock the OS thread so that CreateMutex and + // the subsequent error check reference the same thread-local LastError. + // (Go may otherwise reschedule the goroutine onto a different OS thread + // between the two syscalls, yielding a meaningless LastError value.) + runtime.LockOSThread() mutexName, _ := windows.UTF16PtrFromString("Global\\Tailscale-Custom-Tray-Mutex") - handle, err := windows.CreateMutex(nil, false, mutexName) - if err != nil { + handle, createErr := windows.CreateMutex(nil, false, mutexName) + lastErr := windows.GetLastError() + runtime.UnlockOSThread() + if handle == 0 { + // Real failure (access denied, invalid args, etc.) + log.Printf("CreateMutex failed: %v", createErr) os.Exit(1) } - if windows.GetLastError() == windows.ERROR_ALREADY_EXISTS { + if lastErr == windows.ERROR_ALREADY_EXISTS { + // Another instance is already running; exit silently with success. windows.CloseHandle(handle) os.Exit(0) } @@ -59,6 +68,14 @@ func main() { setupLogging() log.Println("tailscale-tray starting") + // Top-level panic recovery so a stray panic in any goroutine that + // eventually reaches main doesn't just vanish the tray icon silently. + defer func() { + if r := recover(); r != nil { + log.Printf("PANIC in main: %v", r) + } + }() + a := &app{ lc: &local.Client{}, rebuildCh: make(chan struct{}, 1), @@ -68,9 +85,44 @@ func main() { a.updateState() go a.watchIPNBus() + go a.periodicRefresh() systray.Run(a.onReady, a.onExit) } +// periodicRefresh acts as a safety-net against a silently-stuck IPN bus +// watcher (e.g. broken pipe on tailscaled restart that does not surface as +// an error on Next()). Every 15s it forces a fresh status snapshot and, +// if the backend state actually changed, triggers a menu rebuild. +func (a *app) periodicRefresh() { + defer func() { + if r := recover(); r != nil { + log.Printf("PANIC in periodicRefresh: %v", r) + } + }() + ticker := time.NewTicker(15 * time.Second) + defer ticker.Stop() + var lastState string + for { + select { + case <-a.bgCtx.Done(): + return + case <-ticker.C: + } + a.updateState() + a.mu.Lock() + cur := "" + if a.status != nil { + cur = a.status.BackendState + } + a.mu.Unlock() + if cur != lastState { + log.Printf("periodicRefresh: state changed %q -> %q, rebuilding", lastState, cur) + lastState = cur + a.triggerRebuild() + } + } +} + func setupLogging() { dirs := []string{ filepath.Join(os.Getenv("ProgramData"), "Tailscale-Custom", "Logs"), @@ -163,9 +215,10 @@ func (a *app) rebuild() { // Show IP on a separate line when connected if isRunning && a.status != nil && a.status.Self != nil && len(a.status.Self.TailscaleIPs) > 0 { - ipLine := systray.AddMenuItem(" IP: "+a.status.Self.TailscaleIPs[0].String(), "Click to copy") + selfIP := a.status.Self.TailscaleIPs[0].String() + ipLine := systray.AddMenuItem(" IP: "+selfIP, "Click to copy") onClick(ctx, ipLine, func() { - copyToClipboard(a.status.Self.TailscaleIPs[0].String()) + copyToClipboard(selfIP) }) } @@ -413,7 +466,10 @@ func (a *app) doLogin() { } defer atomic.StoreInt32(&a.inAction, 0) + // Take a thread-safe snapshot of the current profile URL. + a.mu.Lock() serverURL := a.curProfile.ControlURL + a.mu.Unlock() if serverURL == "" { serverURL = "https://vpn.softs.business" } @@ -497,14 +553,21 @@ func (a *app) addServer() { } // openAuthURL polls Status.AuthURL and opens the browser when available. +// Uses bgCtx (not the caller's short operation context) because the auth URL +// can take longer than 5s to materialise on a cold tailscaled. func (a *app) openAuthURL() { - for i := 0; i < 20; i++ { // poll up to ~5 seconds + deadline := time.Now().Add(30 * time.Second) + for time.Now().Before(deadline) { + if a.bgCtx.Err() != nil { + return + } ctx, cancel := context.WithTimeout(a.bgCtx, 3*time.Second) st, err := a.lc.Status(ctx) cancel() if err != nil { - log.Printf("openAuthURL: Status error: %v", err) - return + log.Printf("openAuthURL: Status error (will retry): %v", err) + time.Sleep(500 * time.Millisecond) + continue } if st.AuthURL != "" { log.Printf("openAuthURL: opening %s", st.AuthURL) @@ -515,7 +578,7 @@ func (a *app) openAuthURL() { log.Println("openAuthURL: already running, no auth needed") return } - time.Sleep(250 * time.Millisecond) + time.Sleep(500 * time.Millisecond) } log.Println("openAuthURL: timed out waiting for AuthURL") } @@ -523,11 +586,32 @@ func (a *app) openAuthURL() { // ── IPN Bus Watcher ───────────────────────────────────── func (a *app) watchIPNBus() { - for { - err := a.watchIPNBusInner() - if err != nil { - log.Printf("watchIPNBus error: %v", err) + defer func() { + if r := recover(); r != nil { + log.Printf("PANIC in watchIPNBus (outer): %v", r) } + }() + for { + // Reset prevState before each connection attempt so stale state + // from a prior session does not trigger false transitions. + a.mu.Lock() + a.prevState = "" + a.mu.Unlock() + + func() { + defer func() { + if r := recover(); r != nil { + log.Printf("PANIC in watchIPNBusInner: %v", r) + } + }() + if err := a.watchIPNBusInner(); err != nil { + log.Printf("watchIPNBus error: %v", err) + } + }() + // After the bus dies, force a status refresh + rebuild so the UI + // does not freeze on a stale snapshot while we wait to reconnect. + a.updateState() + a.triggerRebuild() select { case <-a.bgCtx.Done(): return @@ -553,7 +637,7 @@ func (a *app) watchIPNBusInner() error { return err } - // Detect node removed: Running -> NeedsLogin transition + // Track state transitions for logging. if n.State != nil { newState := n.State.String() a.mu.Lock() @@ -561,12 +645,14 @@ func (a *app) watchIPNBusInner() error { a.prevState = newState a.mu.Unlock() - if prev == "Running" && newState == "NeedsLogin" { - log.Println("Detected state Running->NeedsLogin, performing auto-logout") - logoutCtx, cancel := context.WithTimeout(a.bgCtx, 5*time.Second) - _ = a.lc.Logout(logoutCtx) - cancel() + if prev != "" && prev != newState { + log.Printf("State transition: %s -> %s", prev, newState) } + // NOTE: We intentionally do NOT auto-logout on Running->NeedsLogin. + // A NeedsLogin state can be transient (network hiccup, key rotation, + // server maintenance). Calling Logout() here would deregister the node + // entirely, forcing full re-authentication. Let the UI reflect the state + // and allow the user to decide. } if n.State != nil || n.Prefs != nil { @@ -780,7 +866,10 @@ func buildInputDialogTemplate(title, prompt string) []byte { d.w32(wsPopup | wsCaption | wsSysMenu | dsSetFont | dsModalFrm | ds3DLook) d.w32(0) d.w16(4) - d.ws16(0); d.ws16(0); d.ws16(280); d.ws16(90) + d.ws16(0) + d.ws16(0) + d.ws16(280) + d.ws16(90) d.w16(0) d.w16(0) d.wstr(title) @@ -789,31 +878,59 @@ func buildInputDialogTemplate(title, prompt string) []byte { // Static label id=100 d.align(4) - d.w32(wsChild | wsVisible); d.w32(0) - d.ws16(12); d.ws16(12); d.ws16(256); d.ws16(14) - d.w16(100); d.w16(0xFFFF); d.w16(0x0082) - d.wstr(prompt); d.w16(0) + d.w32(wsChild | wsVisible) + d.w32(0) + d.ws16(12) + d.ws16(12) + d.ws16(256) + d.ws16(14) + d.w16(100) + d.w16(0xFFFF) + d.w16(0x0082) + d.wstr(prompt) + d.w16(0) // Edit id=101 d.align(4) - d.w32(wsChild | wsVisible | wsTabStop | wsBorder | esAutoHS); d.w32(0) - d.ws16(12); d.ws16(32); d.ws16(256); d.ws16(14) - d.w16(101); d.w16(0xFFFF); d.w16(0x0081) - d.w16(0); d.w16(0) + d.w32(wsChild | wsVisible | wsTabStop | wsBorder | esAutoHS) + d.w32(0) + d.ws16(12) + d.ws16(32) + d.ws16(256) + d.ws16(14) + d.w16(101) + d.w16(0xFFFF) + d.w16(0x0081) + d.w16(0) + d.w16(0) // OK id=1 d.align(4) - d.w32(wsChild | wsVisible | wsTabStop | bsPushBtn); d.w32(0) - d.ws16(150); d.ws16(62); d.ws16(55); d.ws16(16) - d.w16(1); d.w16(0xFFFF); d.w16(0x0080) - d.wstr("Connect"); d.w16(0) + d.w32(wsChild | wsVisible | wsTabStop | bsPushBtn) + d.w32(0) + d.ws16(150) + d.ws16(62) + d.ws16(55) + d.ws16(16) + d.w16(1) + d.w16(0xFFFF) + d.w16(0x0080) + d.wstr("Connect") + d.w16(0) // Cancel id=2 d.align(4) - d.w32(wsChild | wsVisible | wsTabStop); d.w32(0) - d.ws16(213); d.ws16(62); d.ws16(55); d.ws16(16) - d.w16(2); d.w16(0xFFFF); d.w16(0x0080) - d.wstr("Cancel"); d.w16(0) + d.w32(wsChild | wsVisible | wsTabStop) + d.w32(0) + d.ws16(213) + d.ws16(62) + d.ws16(55) + d.ws16(16) + d.w16(2) + d.w16(0xFFFF) + d.w16(0x0080) + d.wstr("Cancel") + d.w16(0) return d.buf }