From 654407dc6d1dc5724420d7e08f2064d0e5b32a68 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 24 Feb 2019 13:24:55 -0600 Subject: [PATCH] close long-dead connections in link.go instead of in switch.go, this is important in case a connection opens but never bothers to send even one switch message --- src/yggdrasil/link.go | 12 +++++++++++- src/yggdrasil/switch.go | 23 ----------------------- 2 files changed, 11 insertions(+), 24 deletions(-) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 27c3aa23..df9625d0 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -228,12 +228,15 @@ func (intf *linkInterface) handler() error { var isReady bool var sendTimerRunning bool var recvTimerRunning bool - recvTime := 6 * time.Second // TODO set to ReadTimeout from the config, reset if it gets changed + recvTime := 6 * time.Second // TODO set to ReadTimeout from the config, reset if it gets changed + closeTime := 2 * switch_timeout // TODO or maybe this makes more sense for ReadTimeout?... sendTime := time.Second sendTimer := time.NewTimer(sendTime) defer util.TimerStop(sendTimer) recvTimer := time.NewTimer(recvTime) defer util.TimerStop(recvTimer) + closeTimer := time.NewTimer(closeTime) + defer util.TimerStop(closeTimer) for { //intf.link.core.log.Debugf("State of %s: %s, source %s :: isAlive %t isReady %t sendTimerRunning %t recvTimerRunning %t", // strings.ToUpper(intf.info.linkType), themString, intf.info.local, @@ -243,6 +246,7 @@ func (intf *linkInterface) handler() error { if !ok { return } + util.TimerStop(closeTimer) util.TimerStop(recvTimer) recvTimerRunning = false isAlive = true @@ -274,6 +278,8 @@ func (intf *linkInterface) handler() error { // Start a timer, if it expires and we haven't gotten any return traffic (including a 0-sized ack), then assume there's a problem util.TimerStop(recvTimer) recvTimer.Reset(recvTime) + util.TimerStop(closeTimer) + closeTimer.Reset(closeTime) recvTimerRunning = true } case _, ok := <-signalReady: @@ -297,6 +303,10 @@ func (intf *linkInterface) handler() error { case <-recvTimer.C: // We haven't received anything, so assume there's a problem and don't return this node to the switch until they start responding isAlive = false + case <-closeTimer.C: + // We haven't received anything in a really long time, so things have died at the switch level and then some... + // Just close the connection at this point... + intf.msgIO.close() } } }() diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index d45b8855..1b611af2 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -215,7 +215,6 @@ func (t *switchTable) doMaintenance() { defer t.mutex.Unlock() // Release lock when we're done t.cleanRoot() t.cleanDropped() - t.cleanPeers() } // Updates the root periodically if it is ourself, or promotes ourself to root if we're better than the current root or if the current root has timed out. @@ -272,28 +271,6 @@ func (t *switchTable) forgetPeer(port switchPort) { } } -// Clean all unresponsive peers from the table, needed in case a peer stops updating. -// Needed in case a non-parent peer keeps the connection open but stops sending updates. -// Also reclaims space from deleted peers by copying the map. -func (t *switchTable) cleanPeers() { - now := time.Now() - for port, peer := range t.data.peers { - if now.Sub(peer.time) > switch_timeout+switch_throttle { - // Longer than switch_timeout to make sure we don't remove a working peer because the root stopped responding. - delete(t.data.peers, port) - go t.core.peers.removePeer(port) // TODO figure out if it's safe to do this without a goroutine, or make it safe - } - } - if _, isIn := t.data.peers[t.parent]; !isIn { - // The root timestamp would probably time out before this happens, but better safe than sorry. - // We removed the current parent, so find a new one. - t.parent = 0 - for _, peer := range t.data.peers { - t.unlockedHandleMsg(&peer.msg, peer.port, true) - } - } -} - // Dropped is a list of roots that are better than the current root, but stopped sending new timestamps. // If we switch to a new root, and that root is better than an old root that previously timed out, then we can clean up the old dropped root infos. // This function is called periodically to do that cleanup.