From f308e81bf3e906e3c02d43a12b953269a5f82f09 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Tue, 18 Feb 2020 20:13:39 -0600 Subject: [PATCH 01/51] in the switch, keep a separate set of queues per peer instead of a global queue --- src/yggdrasil/api.go | 17 ++--- src/yggdrasil/link.go | 19 ------ src/yggdrasil/switch.go | 140 +++++++++++++++++++++------------------- 3 files changed, 82 insertions(+), 94 deletions(-) diff --git a/src/yggdrasil/api.go b/src/yggdrasil/api.go index 82d0aa93..4a6ae417 100644 --- a/src/yggdrasil/api.go +++ b/src/yggdrasil/api.go @@ -207,15 +207,16 @@ func (c *Core) GetSwitchQueues() SwitchQueues { HighestSize: switchTable.queues.maxsize, MaximumSize: switchTable.queues.totalMaxSize, } - for k, v := range switchTable.queues.bufs { - nexthop := switchTable.bestPortForCoords([]byte(k)) - queue := SwitchQueue{ - ID: k, - Size: v.size, - Packets: uint64(len(v.packets)), - Port: uint64(nexthop), + for port, pbuf := range switchTable.queues.bufs { + for k, v := range pbuf { + queue := SwitchQueue{ + ID: k, + Size: v.size, + Packets: uint64(len(v.packets)), + Port: uint64(port), + } + switchqueues.Queues = append(switchqueues.Queues, queue) } - switchqueues.Queues = append(switchqueues.Queues, queue) } } phony.Block(&c.switchTable, getSwitchQueues) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 157ea525..fb40fc08 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -282,13 +282,6 @@ func (intf *linkInterface) notifySending(size int, isLinkTraffic bool) { }) } -// called by an AfterFunc if we seem to be blocked in a send syscall for a long time -func (intf *linkInterface) _notifySyscall() { - intf.link.core.switchTable.Act(intf, func() { - intf.link.core.switchTable._sendingIn(intf.peer.port) - }) -} - // we just sent something, so cancel any pending timer to send keep-alive traffic func (intf *linkInterface) _cancelStallTimer() { if intf.stallTimer != nil { @@ -402,19 +395,7 @@ func (w *linkWriter) sendFrom(from phony.Actor, bss [][]byte, isLinkTraffic bool size += len(bs) } w.intf.notifySending(size, isLinkTraffic) - // start a timer that will fire if we get stuck in writeMsgs for an oddly long time - var once sync.Once - timer := time.AfterFunc(time.Millisecond, func() { - // 1 ms is kind of arbitrary - // the rationale is that this should be very long compared to a syscall - // but it's still short compared to end-to-end latency or human perception - once.Do(func() { - w.intf.Act(nil, w.intf._notifySyscall) - }) - }) w.intf.msgIO.writeMsgs(bss) - // Make sure we either stop the timer from doing anything or wait until it's done - once.Do(func() { timer.Stop() }) w.intf.notifySent(size, isLinkTraffic) // Cleanup for _, bs := range bss { diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index 653b12f1..899d143d 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -177,7 +177,6 @@ type switchTable struct { phony.Inbox // Owns the below queues switch_buffers // Queues - not atomic so ONLY use through the actor idle map[switchPort]struct{} // idle peers - not atomic so ONLY use through the actor - sending map[switchPort]struct{} // peers known to be blocked in a send (somehow) } // Minimum allowed total size of switch queues. @@ -202,9 +201,8 @@ func (t *switchTable) init(core *Core) { t.queues.totalMaxSize = SwitchQueueTotalMinSize } core.config.Mutex.RUnlock() - t.queues.bufs = make(map[string]switch_buffer) + t.queues.bufs = make(map[switchPort]map[string]switch_buffer) t.idle = make(map[switchPort]struct{}) - t.sending = make(map[switchPort]struct{}) }) } @@ -666,27 +664,17 @@ func (t *switchTable) bestPortForCoords(coords []byte) switchPort { // Handle an incoming packet // Either send it to ourself, or to the first idle peer that's free // Returns true if the packet has been handled somehow, false if it should be queued -func (t *switchTable) _handleIn(packet []byte, idle map[switchPort]struct{}, sending map[switchPort]struct{}) bool { +func (t *switchTable) _handleIn(packet []byte, idle map[switchPort]struct{}) (bool, switchPort) { coords := switch_getPacketCoords(packet) closer := t.getCloser(coords) - if len(closer) == 0 { - // TODO? call the router directly, and remove the whole concept of a self peer? - self := t.core.peers.getPorts()[0] - self.sendPacketsFrom(t, [][]byte{packet}) - return true - } var best *closerInfo ports := t.core.peers.getPorts() for _, cinfo := range closer { to := ports[cinfo.elem.port] - //_, isIdle := idle[cinfo.elem.port] - _, isSending := sending[cinfo.elem.port] var update bool switch { case to == nil: // no port was found, ignore it - case isSending: - // the port is busy, ignore it case best == nil: // this is the first idle port we've found, so select it until we find a // better candidate port to use instead @@ -715,15 +703,20 @@ func (t *switchTable) _handleIn(packet []byte, idle map[switchPort]struct{}, sen best = &b } } - if best != nil { - if _, isIdle := idle[best.elem.port]; isIdle { - delete(idle, best.elem.port) - ports[best.elem.port].sendPacketsFrom(t, [][]byte{packet}) - return true - } + if best == nil { + // No closer peers + // TODO? call the router directly, and remove the whole concept of a self peer? + self := t.core.peers.getPorts()[0] + self.sendPacketsFrom(t, [][]byte{packet}) + return true, 0 } - // Didn't find anyone idle to send it to - return false + if _, isIdle := idle[best.elem.port]; isIdle { + delete(idle, best.elem.port) + ports[best.elem.port].sendPacketsFrom(t, [][]byte{packet}) + return true, best.elem.port + } + // Best node isn't idle, so return port and let the packet be buffered + return false, best.elem.port } // Info about a buffered packet @@ -740,24 +733,29 @@ type switch_buffer struct { type switch_buffers struct { totalMaxSize uint64 - bufs map[string]switch_buffer // Buffers indexed by StreamID - size uint64 // Total size of all buffers, in bytes + bufs map[switchPort]map[string]switch_buffer // Buffers indexed by port and StreamID + size uint64 // Total size of all buffers, in bytes maxbufs int maxsize uint64 closer []closerInfo // Scratch space } func (b *switch_buffers) _cleanup(t *switchTable) { - for streamID, buf := range b.bufs { - // Remove queues for which we have no next hop - packet := buf.packets[0] - coords := switch_getPacketCoords(packet.bytes) - if len(t.getCloser(coords)) == 0 { - for _, packet := range buf.packets { - util.PutBytes(packet.bytes) + for port, pbufs := range b.bufs { + for streamID, buf := range pbufs { + // Remove queues for which we have no next hop + packet := buf.packets[0] + coords := switch_getPacketCoords(packet.bytes) + if len(t.getCloser(coords)) == 0 { + for _, packet := range buf.packets { + util.PutBytes(packet.bytes) + } + b.size -= buf.size + delete(pbufs, streamID) } - b.size -= buf.size - delete(b.bufs, streamID) + } + if len(pbufs) == 0 { + delete(b.bufs, port) } } @@ -765,23 +763,28 @@ func (b *switch_buffers) _cleanup(t *switchTable) { // Drop a random queue target := rand.Uint64() % b.size var size uint64 // running total - for streamID, buf := range b.bufs { - size += buf.size - if size < target { - continue + for port, pbufs := range b.bufs { + for streamID, buf := range pbufs { + size += buf.size + if size < target { + continue + } + var packet switch_packetInfo + packet, buf.packets = buf.packets[0], buf.packets[1:] + buf.size -= uint64(len(packet.bytes)) + b.size -= uint64(len(packet.bytes)) + util.PutBytes(packet.bytes) + if len(buf.packets) == 0 { + delete(pbufs, streamID) + if len(pbufs) == 0 { + delete(b.bufs, port) + } + } else { + // Need to update the map, since buf was retrieved by value + pbufs[streamID] = buf + } + break } - var packet switch_packetInfo - packet, buf.packets = buf.packets[0], buf.packets[1:] - buf.size -= uint64(len(packet.bytes)) - b.size -= uint64(len(packet.bytes)) - util.PutBytes(packet.bytes) - if len(buf.packets) == 0 { - delete(b.bufs, streamID) - } else { - // Need to update the map, since buf was retrieved by value - b.bufs[streamID] = buf - } - break } } } @@ -799,32 +802,35 @@ func (t *switchTable) _handleIdle(port switchPort) bool { var psize int t.queues._cleanup(t) now := time.Now() + pbufs := t.queues.bufs[port] for psize < 65535 { var best string var bestPriority float64 - for streamID, buf := range t.queues.bufs { + for streamID, buf := range pbufs { // Filter over the streams that this node is closer to // Keep the one with the smallest queue packet := buf.packets[0] - coords := switch_getPacketCoords(packet.bytes) priority := float64(now.Sub(packet.time)) / float64(buf.size) - if priority >= bestPriority && t.portIsCloser(coords, port) { + if priority >= bestPriority { best = streamID bestPriority = priority } } if best != "" { - buf := t.queues.bufs[best] + buf := pbufs[best] var packet switch_packetInfo // TODO decide if this should be LIFO or FIFO packet, buf.packets = buf.packets[0], buf.packets[1:] buf.size -= uint64(len(packet.bytes)) t.queues.size -= uint64(len(packet.bytes)) if len(buf.packets) == 0 { - delete(t.queues.bufs, best) + delete(pbufs, best) + if len(pbufs) == 0 { + delete(t.queues.bufs, port) + } } else { // Need to update the map, since buf was retrieved by value - t.queues.bufs[best] = buf + pbufs[best] = buf } packets = append(packets, packet.bytes) psize += len(packet.bytes) @@ -848,11 +854,14 @@ func (t *switchTable) packetInFrom(from phony.Actor, bytes []byte) { func (t *switchTable) _packetIn(bytes []byte) { // Try to send it somewhere (or drop it if it's corrupt or at a dead end) - if !t._handleIn(bytes, t.idle, t.sending) { + if sent, best := t._handleIn(bytes, t.idle); !sent { // There's nobody free to take it right now, so queue it for later packet := switch_packetInfo{bytes, time.Now()} streamID := switch_getPacketStreamID(packet.bytes) - buf, bufExists := t.queues.bufs[streamID] + if _, isIn := t.queues.bufs[best]; !isIn { + t.queues.bufs[best] = make(map[string]switch_buffer) + } + buf, bufExists := t.queues.bufs[best][streamID] buf.packets = append(buf.packets, packet) buf.size += uint64(len(packet.bytes)) t.queues.size += uint64(len(packet.bytes)) @@ -860,13 +869,17 @@ func (t *switchTable) _packetIn(bytes []byte) { if t.queues.size > t.queues.maxsize { t.queues.maxsize = t.queues.size } - t.queues.bufs[streamID] = buf + t.queues.bufs[best][streamID] = buf if !bufExists { // Keep a track of the max total queue count. Only recalculate this // when the queue is new because otherwise repeating len(dict) might // cause unnecessary processing overhead - if len(t.queues.bufs) > t.queues.maxbufs { - t.queues.maxbufs = len(t.queues.bufs) + var count int + for _, pbufs := range t.queues.bufs { + count += len(pbufs) + } + if count > t.queues.maxbufs { + t.queues.maxbufs = count } } t.queues._cleanup(t) @@ -875,15 +888,8 @@ func (t *switchTable) _packetIn(bytes []byte) { func (t *switchTable) _idleIn(port switchPort) { // Try to find something to send to this peer - delete(t.sending, port) if !t._handleIdle(port) { // Didn't find anything ready to send yet, so stay idle t.idle[port] = struct{}{} } } - -func (t *switchTable) _sendingIn(port switchPort) { - if _, isIn := t.idle[port]; !isIn { - t.sending[port] = struct{}{} - } -} From 48098799958d5450c894821a1d768f475cb7c4d7 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Tue, 10 Mar 2020 01:03:07 -0500 Subject: [PATCH 02/51] refactor switch code so calling lookupTable.lookup does most of the important work --- src/yggdrasil/switch.go | 109 +++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 62 deletions(-) diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index d4dd045a..ce5e3db6 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -596,14 +596,14 @@ func (t *switchTable) getCloser(dest []byte) []closerInfo { // Skip the iteration step if it's impossible to be closer return nil } - t.queues.closer = t.queues.closer[:0] + var closer []closerInfo for _, info := range table.elems { dist := info.locator.dist(dest) if dist < myDist { - t.queues.closer = append(t.queues.closer, closerInfo{info, dist}) + closer = append(closer, closerInfo{info, dist}) } } - return t.queues.closer + return closer } // Returns true if the peer is closer to the destination than ourself @@ -645,20 +645,41 @@ func switch_getFlowLabelFromCoords(in []byte) []byte { return []byte{} } -// Find the best port for a given set of coords -func (t *switchTable) bestPortForCoords(coords []byte) switchPort { - table := t.getTable() - var best switchPort - bestDist := table.self.dist(coords) - for to, elem := range table.elems { - dist := elem.locator.dist(coords) - if !(dist < bestDist) { +// Find the best port to forward to for a given set of coords +func (t *lookupTable) lookup(coords []byte) switchPort { + var bestPort switchPort + myDist := t.self.dist(coords) + bestDist := myDist + var bestElem tableElem + for _, info := range t.elems { + dist := info.locator.dist(coords) + if dist >= myDist { continue } - best = to - bestDist = dist + var update bool + switch { + case dist < bestDist: + // Closer to destination + update = true + case dist > bestDist: + // Further from destination + case info.locator.tstamp > bestElem.locator.tstamp: + // Newer root update + update = true + case info.locator.tstamp < bestElem.locator.tstamp: + // Older root update + case info.time.Before(bestElem.time): + // Received root update via this peer sooner + update = true + default: + } + if update { + bestPort = info.port + bestDist = dist + bestElem = info + } } - return best + return bestPort } // Handle an incoming packet @@ -666,57 +687,22 @@ func (t *switchTable) bestPortForCoords(coords []byte) switchPort { // Returns true if the packet has been handled somehow, false if it should be queued func (t *switchTable) _handleIn(packet []byte, idle map[switchPort]struct{}) (bool, switchPort) { coords := switch_getPacketCoords(packet) - closer := t.getCloser(coords) - var best *closerInfo + table := t.getTable() + port := table.lookup(coords) ports := t.core.peers.getPorts() - for _, cinfo := range closer { - to := ports[cinfo.elem.port] - var update bool - switch { - case to == nil: - // no port was found, ignore it - case best == nil: - // this is the first idle port we've found, so select it until we find a - // better candidate port to use instead - update = true - case cinfo.dist < best.dist: - // the port takes a shorter path/is more direct than our current - // candidate, so select that instead - update = true - case cinfo.dist > best.dist: - // the port takes a longer path/is less direct than our current candidate, - // ignore it - case cinfo.elem.locator.tstamp > best.elem.locator.tstamp: - // has a newer tstamp from the root, so presumably a better path - update = true - case cinfo.elem.locator.tstamp < best.elem.locator.tstamp: - // has a n older tstamp, so presumably a worse path - case cinfo.elem.time.Before(best.elem.time): - // same tstamp, but got it earlier, so presumably a better path - //t.core.log.Println("DEBUG new best:", best.elem.time, cinfo.elem.time) - update = true - default: - // the search for a port has finished - } - if update { - b := cinfo // because cinfo gets mutated by the iteration - best = &b - } - } - if best == nil { - // No closer peers - // TODO? call the router directly, and remove the whole concept of a self peer? - self := t.core.peers.getPorts()[0] - self.sendPacketsFrom(t, [][]byte{packet}) + peer := ports[port] + if peer == nil { + // FIXME hack, if the peer disappeared durring a race then don't buffer return true, 0 } - if _, isIdle := idle[best.elem.port]; isIdle { - delete(idle, best.elem.port) - ports[best.elem.port].sendPacketsFrom(t, [][]byte{packet}) - return true, best.elem.port + if _, isIdle := idle[port]; isIdle || port == 0 { + // Either no closer peers, or the closest peer is idle + delete(idle, port) + peer.sendPacketsFrom(t, [][]byte{packet}) + return true, port } - // Best node isn't idle, so return port and let the packet be buffered - return false, best.elem.port + // There's a closer peer, but it's not idle, so buffer it + return false, port } // Info about a buffered packet @@ -737,7 +723,6 @@ type switch_buffers struct { size uint64 // Total size of all buffers, in bytes maxbufs int maxsize uint64 - closer []closerInfo // Scratch space } func (b *switch_buffers) _cleanup(t *switchTable) { From e926a3be6d2b9d475dd6b7a5677ec4442033ff74 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 29 Mar 2020 00:23:38 -0500 Subject: [PATCH 03/51] work in progress actorizing core.peers and replacing switch worker with per-peer switch-generated lookupTable --- contrib/ansible/genkeys.go | 4 +- src/yggdrasil/api.go | 23 ++-- src/yggdrasil/link.go | 7 +- src/yggdrasil/nodeinfo.go | 4 +- src/yggdrasil/peer.go | 119 ++++++++++-------- src/yggdrasil/router.go | 6 +- src/yggdrasil/switch.go | 244 +++++++++++++++++++------------------ 7 files changed, 221 insertions(+), 186 deletions(-) diff --git a/contrib/ansible/genkeys.go b/contrib/ansible/genkeys.go index 1d7c222d..681431b5 100644 --- a/contrib/ansible/genkeys.go +++ b/contrib/ansible/genkeys.go @@ -12,9 +12,9 @@ import ( "net" "os" + "github.com/cheggaaa/pb/v3" "github.com/yggdrasil-network/yggdrasil-go/src/address" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" - "github.com/cheggaaa/pb/v3" ) var numHosts = flag.Int("hosts", 1, "number of host vars to generate") @@ -30,7 +30,7 @@ type keySet struct { func main() { flag.Parse() - bar := pb.StartNew(*keyTries * 2 + *numHosts) + bar := pb.StartNew(*keyTries*2 + *numHosts) if *numHosts > *keyTries { println("Can't generate less keys than hosts.") diff --git a/src/yggdrasil/api.go b/src/yggdrasil/api.go index 4a6ae417..15e2acd6 100644 --- a/src/yggdrasil/api.go +++ b/src/yggdrasil/api.go @@ -110,7 +110,8 @@ type Session struct { // there is exactly one entry then this node is not connected to any other nodes // and is therefore isolated. func (c *Core) GetPeers() []Peer { - ports := c.peers.ports.Load().(map[switchPort]*peer) + var ports map[switchPort]*peer + phony.Block(&c.peers, func() { ports = c.peers.ports }) var peers []Peer var ps []switchPort for port := range ports { @@ -143,10 +144,14 @@ func (c *Core) GetPeers() []Peer { // isolated or not connected to any peers. func (c *Core) GetSwitchPeers() []SwitchPeer { var switchpeers []SwitchPeer - table := c.switchTable.table.Load().(lookupTable) - peers := c.peers.ports.Load().(map[switchPort]*peer) + var table *lookupTable + var ports map[switchPort]*peer + phony.Block(&c.peers, func() { + table = c.peers.table + ports = c.peers.ports + }) for _, elem := range table.elems { - peer, isIn := peers[elem.port] + peer, isIn := ports[elem.port] if !isIn { continue } @@ -325,8 +330,8 @@ func (c *Core) EncryptionPublicKey() string { // connected to any other nodes (effectively making you the root of a // single-node network). func (c *Core) Coords() []uint64 { - table := c.switchTable.table.Load().(lookupTable) - return wire_coordsBytestoUint64s(table.self.getCoords()) + loc := c.switchTable.getLocator() + return wire_coordsBytestoUint64s(loc.getCoords()) } // Address gets the IPv6 address of the Yggdrasil node. This is always a /128 @@ -490,7 +495,11 @@ func (c *Core) CallPeer(addr string, sintf string) error { // DisconnectPeer disconnects a peer once. This should be specified as a port // number. func (c *Core) DisconnectPeer(port uint64) error { - c.peers.removePeer(switchPort(port)) + c.peers.Act(nil, func() { + if p, isIn := c.peers.ports[switchPort(port)]; isIn { + p.Act(&c.peers, p._removeSelf) + } + }) return nil } diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index fb40fc08..fa6563f1 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -217,13 +217,16 @@ func (intf *linkInterface) handler() error { intf.link.mutex.Unlock() // Create peer shared := crypto.GetSharedKey(myLinkPriv, &meta.link) - intf.peer = intf.link.core.peers.newPeer(&meta.box, &meta.sig, shared, intf, func() { intf.msgIO.close() }) + phony.Block(&intf.link.core.peers, func() { + // FIXME don't use phony.Block, it's bad practice, even if it's safe here + intf.peer = intf.link.core.peers._newPeer(&meta.box, &meta.sig, shared, intf, func() { intf.msgIO.close() }) + }) if intf.peer == nil { return errors.New("failed to create peer") } defer func() { // More cleanup can go here - intf.link.core.peers.removePeer(intf.peer.port) + intf.peer.Act(nil, intf.peer._removeSelf) }() intf.peer.out = func(msgs [][]byte) { intf.writer.sendFrom(intf.peer, msgs, false) diff --git a/src/yggdrasil/nodeinfo.go b/src/yggdrasil/nodeinfo.go index fc6250d6..745756fe 100644 --- a/src/yggdrasil/nodeinfo.go +++ b/src/yggdrasil/nodeinfo.go @@ -187,9 +187,9 @@ func (m *nodeinfo) sendNodeInfo(key crypto.BoxPubKey, coords []byte, isResponse } func (m *nodeinfo) _sendNodeInfo(key crypto.BoxPubKey, coords []byte, isResponse bool) { - table := m.core.switchTable.table.Load().(lookupTable) + loc := m.core.switchTable.getLocator() nodeinfo := nodeinfoReqRes{ - SendCoords: table.self.getCoords(), + SendCoords: loc.getCoords(), IsResponse: isResponse, NodeInfo: m._getNodeInfo(), } diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 4206857e..7fa2b317 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -6,8 +6,6 @@ package yggdrasil import ( "encoding/hex" - "sync" - "sync/atomic" "time" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" @@ -21,17 +19,17 @@ import ( // In most cases, this involves passing the packet to the handler for outgoing traffic to another peer. // In other cases, its link protocol traffic is used to build the spanning tree, in which case this checks signatures and passes the message along to the switch. type peers struct { + phony.Inbox core *Core - mutex sync.Mutex // Synchronize writes to atomic - ports atomic.Value //map[switchPort]*peer, use CoW semantics + ports map[switchPort]*peer // use CoW semantics, share updated version with each peer + table *lookupTable // Sent from switch, share updated version with each peer } // Initializes the peers struct. func (ps *peers) init(c *Core) { - ps.mutex.Lock() - defer ps.mutex.Unlock() - ps.putPorts(make(map[switchPort]*peer)) ps.core = c + ps.ports = make(map[switchPort]*peer) + ps.table = new(lookupTable) } func (ps *peers) reconfigure() { @@ -80,16 +78,6 @@ func (ps *peers) getAllowedEncryptionPublicKeys() []string { return ps.core.config.Current.AllowedEncryptionPublicKeys } -// Atomically gets a map[switchPort]*peer of known peers. -func (ps *peers) getPorts() map[switchPort]*peer { - return ps.ports.Load().(map[switchPort]*peer) -} - -// Stores a map[switchPort]*peer (note that you should take a mutex before store operations to avoid conflicts with other nodes attempting to read/change/store at the same time). -func (ps *peers) putPorts(ports map[switchPort]*peer) { - ps.ports.Store(ports) -} - // Information known about a peer, including their box/sig keys, precomputed shared keys (static and ephemeral) and a handler for their outgoing traffic type peer struct { phony.Inbox @@ -110,10 +98,31 @@ type peer struct { // The below aren't actually useful internally, they're just gathered for getPeers statistics bytesSent uint64 bytesRecvd uint64 + ports map[switchPort]*peer + table *lookupTable +} + +func (ps *peers) updateTables(from phony.Actor, table *lookupTable) { + ps.Act(from, func() { + ps.table = table + ps._updatePeers() + }) +} + +func (ps *peers) _updatePeers() { + ports := ps.ports + table := ps.table + for _, peer := range ps.ports { + p := peer // peer is mutated during iteration + p.Act(ps, func() { + p.ports = ports + p.table = table + }) + } } // Creates a new peer with the specified box, sig, and linkShared keys, using the lowest unoccupied port number. -func (ps *peers) newPeer(box *crypto.BoxPubKey, sig *crypto.SigPubKey, linkShared *crypto.BoxSharedKey, intf *linkInterface, closer func()) *peer { +func (ps *peers) _newPeer(box *crypto.BoxPubKey, sig *crypto.SigPubKey, linkShared *crypto.BoxSharedKey, intf *linkInterface, closer func()) *peer { now := time.Now() p := peer{box: *box, sig: *sig, @@ -125,9 +134,7 @@ func (ps *peers) newPeer(box *crypto.BoxPubKey, sig *crypto.SigPubKey, linkShare core: ps.core, intf: intf, } - ps.mutex.Lock() - defer ps.mutex.Unlock() - oldPorts := ps.getPorts() + oldPorts := ps.ports newPorts := make(map[switchPort]*peer) for k, v := range oldPorts { newPorts[k] = v @@ -139,46 +146,49 @@ func (ps *peers) newPeer(box *crypto.BoxPubKey, sig *crypto.SigPubKey, linkShare break } } - ps.putPorts(newPorts) + ps.ports = newPorts + ps._updatePeers() return &p } -// Removes a peer for a given port, if one exists. -func (ps *peers) removePeer(port switchPort) { - if port == 0 { - return - } // Can't remove self peer - phony.Block(&ps.core.router, func() { - ps.core.switchTable.forgetPeer(port) +func (p *peer) _removeSelf() { + p.core.peers.Act(p, func() { + p.core.peers._removePeer(p) }) - ps.mutex.Lock() - oldPorts := ps.getPorts() - p, isIn := oldPorts[port] +} + +// Removes a peer for a given port, if one exists. +func (ps *peers) _removePeer(p *peer) { + if q := ps.ports[p.port]; p.port == 0 || q != p { + return + } // Can't remove self peer or nonexistant peer + ps.core.switchTable.forgetPeer(p.port) + oldPorts := ps.ports newPorts := make(map[switchPort]*peer) for k, v := range oldPorts { newPorts[k] = v } - delete(newPorts, port) - ps.putPorts(newPorts) - ps.mutex.Unlock() - if isIn { - if p.close != nil { - p.close() - } - close(p.done) + delete(newPorts, p.port) + if p.close != nil { + p.close() } + close(p.done) + ps.ports = newPorts + ps._updatePeers() } // If called, sends a notification to each peer that they should send a new switch message. // Mainly called by the switch after an update. func (ps *peers) sendSwitchMsgs(from phony.Actor) { - ports := ps.getPorts() - for _, p := range ports { - if p.port == 0 { - continue + ps.Act(from, func() { + for _, peer := range ps.ports { + p := peer + if p.port == 0 { + continue + } + p.Act(ps, p._sendSwitchMsg) } - p.Act(from, p._sendSwitchMsg) - } + }) } // This must be launched in a separate goroutine by whatever sets up the peer struct. @@ -236,12 +246,16 @@ func (p *peer) _handlePacket(packet []byte) { // Called to handle traffic or protocolTraffic packets. // In either case, this reads from the coords of the packet header, does a switch lookup, and forwards to the next node. func (p *peer) _handleTraffic(packet []byte) { - table := p.core.switchTable.getTable() - if _, isIn := table.elems[p.port]; !isIn && p.port != 0 { + if _, isIn := p.table.elems[p.port]; !isIn && p.port != 0 { // Drop traffic if the peer isn't in the switch return } - p.core.switchTable.packetInFrom(p, packet) + coords := switch_getPacketCoords(packet) + next := p.table.lookup(coords) + if nPeer, isIn := p.ports[next]; isIn { + nPeer.sendPacketsFrom(p, [][]byte{packet}) + } + //p.core.switchTable.packetInFrom(p, packet) } func (p *peer) sendPacketsFrom(from phony.Actor, packets [][]byte) { @@ -259,6 +273,7 @@ func (p *peer) _sendPackets(packets [][]byte) { size += len(packet) } p.bytesSent += uint64(size) + // FIXME need to manage queues here or else things can block! p.out(packets) } @@ -335,7 +350,8 @@ func (p *peer) _handleSwitchMsg(packet []byte) { return } if len(msg.Hops) < 1 { - p.core.peers.removePeer(p.port) + p._removeSelf() + return } var loc switchLocator prevKey := msg.Root @@ -346,7 +362,8 @@ func (p *peer) _handleSwitchMsg(packet []byte) { loc.coords = append(loc.coords, hop.Port) bs := getBytesForSig(&hop.Next, &sigMsg) if !crypto.Verify(&prevKey, bs, &hop.Sig) { - p.core.peers.removePeer(p.port) + p._removeSelf() + return } prevKey = hop.Next } diff --git a/src/yggdrasil/router.go b/src/yggdrasil/router.go index b08a12d3..ac4d655d 100644 --- a/src/yggdrasil/router.go +++ b/src/yggdrasil/router.go @@ -61,7 +61,11 @@ func (r *router) init(core *Core) { linkType: "self", }, } - p := r.core.peers.newPeer(&r.core.boxPub, &r.core.sigPub, &crypto.BoxSharedKey{}, &self, nil) + var p *peer + phony.Block(&r.core.peers, func() { + // FIXME don't block here! + p = r.core.peers._newPeer(&r.core.boxPub, &r.core.sigPub, &crypto.BoxSharedKey{}, &self, nil) + }) p.out = func(packets [][]byte) { r.handlePackets(p, packets) } r.out = func(bs []byte) { p.handlePacketFrom(r, bs) } r.nodeinfo.init(r.core) diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index ce5e3db6..33f2a1bd 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -12,13 +12,12 @@ package yggdrasil // A little annoying to do with constant changes from backpressure import ( - "math/rand" + //"math/rand" "sync" - "sync/atomic" "time" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" - "github.com/yggdrasil-network/yggdrasil-go/src/util" + //"github.com/yggdrasil-network/yggdrasil-go/src/util" "github.com/Arceliar/phony" ) @@ -172,8 +171,6 @@ type switchTable struct { mutex sync.RWMutex // Lock for reads/writes of switchData parent switchPort // Port of whatever peer is our parent, or self if we're root data switchData // - updater atomic.Value // *sync.Once - table atomic.Value // lookupTable phony.Inbox // Owns the below queues switch_buffers // Queues - not atomic so ONLY use through the actor idle map[switchPort]struct{} // idle peers - not atomic so ONLY use through the actor @@ -190,8 +187,6 @@ func (t *switchTable) init(core *Core) { locator := switchLocator{root: t.key, tstamp: now.Unix()} peers := make(map[switchPort]peerInfo) t.data = switchData{locator: locator, peers: peers} - t.updater.Store(&sync.Once{}) - t.table.Store(lookupTable{}) t.drop = make(map[crypto.SigPubKey]int64) phony.Block(t, func() { core.config.Mutex.RLock() @@ -204,6 +199,7 @@ func (t *switchTable) init(core *Core) { t.queues.bufs = make(map[switchPort]map[string]switch_buffer) t.idle = make(map[switchPort]struct{}) }) + t.updateTable() } func (t *switchTable) reconfigure() { @@ -254,7 +250,7 @@ func (t *switchTable) cleanRoot() { t.time = now if t.data.locator.root != t.key { t.data.seq++ - t.updater.Store(&sync.Once{}) + t.updateTable() t.core.router.reset(nil) } t.data.locator = switchLocator{root: t.key, tstamp: now.Unix()} @@ -292,7 +288,7 @@ func (t *switchTable) forgetPeer(port switchPort) { t.mutex.Lock() defer t.mutex.Unlock() delete(t.data.peers, port) - t.updater.Store(&sync.Once{}) + defer t.updateTable() if port != t.parent { return } @@ -528,7 +524,7 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep t.core.peers.sendSwitchMsgs(t) } if true || doUpdate { - t.updater.Store(&sync.Once{}) + t.updateTable() } return } @@ -566,13 +562,7 @@ func (t *switchTable) updateTable() { time: pinfo.time, } } - t.table.Store(newTable) -} - -// Returns a copy of the atomically-updated table used for switch lookups -func (t *switchTable) getTable() lookupTable { - t.updater.Load().(*sync.Once).Do(t.updateTable) - return t.table.Load().(lookupTable) + t.core.peers.updateTables(nil, &newTable) // TODO not be from nil } // Starts the switch worker @@ -589,6 +579,7 @@ type closerInfo struct { // Return a map of ports onto distance, keeping only ports closer to the destination than this node // If the map is empty (or nil), then no peer is closer +/* func (t *switchTable) getCloser(dest []byte) []closerInfo { table := t.getTable() myDist := table.self.dist(dest) @@ -605,8 +596,10 @@ func (t *switchTable) getCloser(dest []byte) []closerInfo { } return closer } +*/ // Returns true if the peer is closer to the destination than ourself +/* func (t *switchTable) portIsCloser(dest []byte, port switchPort) bool { table := t.getTable() if info, isIn := table.elems[port]; isIn { @@ -617,6 +610,7 @@ func (t *switchTable) portIsCloser(dest []byte, port switchPort) bool { return false } } +*/ // Get the coords of a packet without decoding func switch_getPacketCoords(packet []byte) []byte { @@ -686,23 +680,26 @@ func (t *lookupTable) lookup(coords []byte) switchPort { // Either send it to ourself, or to the first idle peer that's free // Returns true if the packet has been handled somehow, false if it should be queued func (t *switchTable) _handleIn(packet []byte, idle map[switchPort]struct{}) (bool, switchPort) { - coords := switch_getPacketCoords(packet) - table := t.getTable() - port := table.lookup(coords) - ports := t.core.peers.getPorts() - peer := ports[port] - if peer == nil { - // FIXME hack, if the peer disappeared durring a race then don't buffer - return true, 0 - } - if _, isIdle := idle[port]; isIdle || port == 0 { - // Either no closer peers, or the closest peer is idle - delete(idle, port) - peer.sendPacketsFrom(t, [][]byte{packet}) - return true, port - } - // There's a closer peer, but it's not idle, so buffer it - return false, port + /* + coords := switch_getPacketCoords(packet) + table := t.getTable() + port := table.lookup(coords) + ports := t.core.peers.getPorts() + peer := ports[port] + if peer == nil { + // FIXME hack, if the peer disappeared durring a race then don't buffer + return true, 0 + } + if _, isIdle := idle[port]; isIdle || port == 0 { + // Either no closer peers, or the closest peer is idle + delete(idle, port) + peer.sendPacketsFrom(t, [][]byte{packet}) + return true, port + } + // There's a closer peer, but it's not idle, so buffer it + return false, port + */ + return true, 0 } // Info about a buffered packet @@ -726,52 +723,54 @@ type switch_buffers struct { } func (b *switch_buffers) _cleanup(t *switchTable) { - for port, pbufs := range b.bufs { - for streamID, buf := range pbufs { - // Remove queues for which we have no next hop - packet := buf.packets[0] - coords := switch_getPacketCoords(packet.bytes) - if len(t.getCloser(coords)) == 0 { - for _, packet := range buf.packets { - util.PutBytes(packet.bytes) - } - b.size -= buf.size - delete(pbufs, streamID) - } - } - if len(pbufs) == 0 { - delete(b.bufs, port) - } - } - - for b.size > b.totalMaxSize { - // Drop a random queue - target := rand.Uint64() % b.size - var size uint64 // running total + /* for port, pbufs := range b.bufs { for streamID, buf := range pbufs { - size += buf.size - if size < target { - continue - } - var packet switch_packetInfo - packet, buf.packets = buf.packets[0], buf.packets[1:] - buf.size -= uint64(len(packet.bytes)) - b.size -= uint64(len(packet.bytes)) - util.PutBytes(packet.bytes) - if len(buf.packets) == 0 { - delete(pbufs, streamID) - if len(pbufs) == 0 { - delete(b.bufs, port) + // Remove queues for which we have no next hop + packet := buf.packets[0] + coords := switch_getPacketCoords(packet.bytes) + if len(t.getCloser(coords)) == 0 { + for _, packet := range buf.packets { + util.PutBytes(packet.bytes) } - } else { - // Need to update the map, since buf was retrieved by value - pbufs[streamID] = buf + b.size -= buf.size + delete(pbufs, streamID) } - break + } + if len(pbufs) == 0 { + delete(b.bufs, port) } } - } + + for b.size > b.totalMaxSize { + // Drop a random queue + target := rand.Uint64() % b.size + var size uint64 // running total + for port, pbufs := range b.bufs { + for streamID, buf := range pbufs { + size += buf.size + if size < target { + continue + } + var packet switch_packetInfo + packet, buf.packets = buf.packets[0], buf.packets[1:] + buf.size -= uint64(len(packet.bytes)) + b.size -= uint64(len(packet.bytes)) + util.PutBytes(packet.bytes) + if len(buf.packets) == 0 { + delete(pbufs, streamID) + if len(pbufs) == 0 { + delete(b.bufs, port) + } + } else { + // Need to update the map, since buf was retrieved by value + pbufs[streamID] = buf + } + break + } + } + } + */ } // Handles incoming idle notifications @@ -779,57 +778,60 @@ func (b *switch_buffers) _cleanup(t *switchTable) { // Returns true if the peer is no longer idle, false if it should be added to the idle list func (t *switchTable) _handleIdle(port switchPort) bool { // TODO? only send packets for which this is the best next hop that isn't currently blocked sending - to := t.core.peers.getPorts()[port] - if to == nil { - return true - } - var packets [][]byte - var psize int - t.queues._cleanup(t) - now := time.Now() - pbufs := t.queues.bufs[port] - for psize < 65535 { - var best *string - var bestPriority float64 - for streamID, buf := range pbufs { - // Filter over the streams that this node is closer to - // Keep the one with the smallest queue - packet := buf.packets[0] - priority := float64(now.Sub(packet.time)) / float64(buf.size) - if priority >= bestPriority { - b := streamID // copy since streamID is mutated in the loop - best = &b - bestPriority = priority - } + /* + to := t.core.peers.getPorts()[port] + if to == nil { + return true } - if best != nil { - buf := pbufs[*best] - var packet switch_packetInfo - // TODO decide if this should be LIFO or FIFO - packet, buf.packets = buf.packets[0], buf.packets[1:] - buf.size -= uint64(len(packet.bytes)) - t.queues.size -= uint64(len(packet.bytes)) - if len(buf.packets) == 0 { - delete(pbufs, *best) - if len(pbufs) == 0 { - delete(t.queues.bufs, port) + var packets [][]byte + var psize int + t.queues._cleanup(t) + now := time.Now() + pbufs := t.queues.bufs[port] + for psize < 65535 { + var best *string + var bestPriority float64 + for streamID, buf := range pbufs { + // Filter over the streams that this node is closer to + // Keep the one with the smallest queue + packet := buf.packets[0] + priority := float64(now.Sub(packet.time)) / float64(buf.size) + if priority >= bestPriority { + b := streamID // copy since streamID is mutated in the loop + best = &b + bestPriority = priority } - } else { - // Need to update the map, since buf was retrieved by value - pbufs[*best] = buf - } - packets = append(packets, packet.bytes) - psize += len(packet.bytes) - } else { - // Finished finding packets - break + if best != nil { + buf := pbufs[*best] + var packet switch_packetInfo + // TODO decide if this should be LIFO or FIFO + packet, buf.packets = buf.packets[0], buf.packets[1:] + buf.size -= uint64(len(packet.bytes)) + t.queues.size -= uint64(len(packet.bytes)) + if len(buf.packets) == 0 { + delete(pbufs, *best) + if len(pbufs) == 0 { + delete(t.queues.bufs, port) + } + } else { + // Need to update the map, since buf was retrieved by value + pbufs[*best] = buf + + } + packets = append(packets, packet.bytes) + psize += len(packet.bytes) + } else { + // Finished finding packets + break + } } - } - if len(packets) > 0 { - to.sendPacketsFrom(t, packets) - return true - } + if len(packets) > 0 { + to.sendPacketsFrom(t, packets) + return true + } + return false + */ return false } From d47797088f52ebfb32ee292b8f6563634863b7d0 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 29 Mar 2020 00:48:41 -0500 Subject: [PATCH 04/51] fix shutdown deadlock --- src/yggdrasil/core.go | 2 ++ src/yggdrasil/switch.go | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/yggdrasil/core.go b/src/yggdrasil/core.go index 98a5c6e1..dcb5bc7a 100644 --- a/src/yggdrasil/core.go +++ b/src/yggdrasil/core.go @@ -195,8 +195,10 @@ func (c *Core) _stop() { c.addPeerTimer.Stop() } c.link.stop() + /* FIXME this deadlocks, need a waitgroup or something to coordinate shutdown for _, peer := range c.GetPeers() { c.DisconnectPeer(peer.Port) } + */ c.log.Infoln("Stopped") } diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index 33f2a1bd..7ccb6c94 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -250,7 +250,7 @@ func (t *switchTable) cleanRoot() { t.time = now if t.data.locator.root != t.key { t.data.seq++ - t.updateTable() + defer t.updateTable() t.core.router.reset(nil) } t.data.locator = switchLocator{root: t.key, tstamp: now.Unix()} @@ -524,7 +524,7 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep t.core.peers.sendSwitchMsgs(t) } if true || doUpdate { - t.updateTable() + defer t.updateTable() } return } From 15b850be6e6e1bd02753edbcd0155ac08928149d Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 29 Mar 2020 01:38:32 -0500 Subject: [PATCH 05/51] fix deadlock when running updateTable in the switch --- src/yggdrasil/switch.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index 7ccb6c94..ab2e1194 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -199,7 +199,7 @@ func (t *switchTable) init(core *Core) { t.queues.bufs = make(map[switchPort]map[string]switch_buffer) t.idle = make(map[switchPort]struct{}) }) - t.updateTable() + t._updateTable() } func (t *switchTable) reconfigure() { @@ -250,7 +250,7 @@ func (t *switchTable) cleanRoot() { t.time = now if t.data.locator.root != t.key { t.data.seq++ - defer t.updateTable() + defer t._updateTable() t.core.router.reset(nil) } t.data.locator = switchLocator{root: t.key, tstamp: now.Unix()} @@ -288,7 +288,7 @@ func (t *switchTable) forgetPeer(port switchPort) { t.mutex.Lock() defer t.mutex.Unlock() delete(t.data.peers, port) - defer t.updateTable() + defer t._updateTable() if port != t.parent { return } @@ -524,7 +524,7 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep t.core.peers.sendSwitchMsgs(t) } if true || doUpdate { - defer t.updateTable() + defer t._updateTable() } return } @@ -534,7 +534,7 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep // The rest of these are related to the switch worker // This is called via a sync.Once to update the atomically readable subset of switch information that gets used for routing decisions. -func (t *switchTable) updateTable() { +func (t *switchTable) _updateTable() { // WARNING this should only be called from within t.data.updater.Do() // It relies on the sync.Once for synchronization with messages and lookups // TODO use a pre-computed faster lookup table @@ -543,8 +543,6 @@ func (t *switchTable) updateTable() { // Each struct has stores the best port to forward to, and a next coord map // Move to struct, then iterate over coord maps until you dead end // The last port before the dead end should be the closest - t.mutex.RLock() - defer t.mutex.RUnlock() newTable := lookupTable{ self: t.data.locator.clone(), elems: make(map[switchPort]tableElem, len(t.data.peers)), From 9834f222db65efab838a1a2403b8e039109742f2 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 29 Mar 2020 19:01:50 -0500 Subject: [PATCH 06/51] more work in progress actorizing the remaining parts of the switch --- src/yggdrasil/api.go | 7 ++- src/yggdrasil/dht.go | 12 ++-- src/yggdrasil/link.go | 4 +- src/yggdrasil/nodeinfo.go | 3 +- src/yggdrasil/peer.go | 37 +++++++----- src/yggdrasil/router.go | 18 +++++- src/yggdrasil/search.go | 3 +- src/yggdrasil/session.go | 5 +- src/yggdrasil/switch.go | 123 +++++++++++++++++--------------------- 9 files changed, 111 insertions(+), 101 deletions(-) diff --git a/src/yggdrasil/api.go b/src/yggdrasil/api.go index 15e2acd6..a722dc52 100644 --- a/src/yggdrasil/api.go +++ b/src/yggdrasil/api.go @@ -330,8 +330,11 @@ func (c *Core) EncryptionPublicKey() string { // connected to any other nodes (effectively making you the root of a // single-node network). func (c *Core) Coords() []uint64 { - loc := c.switchTable.getLocator() - return wire_coordsBytestoUint64s(loc.getCoords()) + var coords []byte + phony.Block(&c.router, func() { + coords = c.router.table.self.getCoords() + }) + return wire_coordsBytestoUint64s(coords) } // Address gets the IPv6 address of the Yggdrasil node. This is always a /128 diff --git a/src/yggdrasil/dht.go b/src/yggdrasil/dht.go index 8efc549f..56d03ed1 100644 --- a/src/yggdrasil/dht.go +++ b/src/yggdrasil/dht.go @@ -186,11 +186,9 @@ func dht_ordered(first, second, third *crypto.NodeID) bool { // Update info about the node that sent the request. func (t *dht) handleReq(req *dhtReq) { // Send them what they asked for - loc := t.router.core.switchTable.getLocator() - coords := loc.getCoords() res := dhtRes{ Key: t.router.core.boxPub, - Coords: coords, + Coords: t.router.table.self.getCoords(), Dest: req.Dest, Infos: t.lookup(&req.Dest, false), } @@ -300,11 +298,9 @@ func (t *dht) ping(info *dhtInfo, target *crypto.NodeID) { if target == nil { target = &t.nodeID } - loc := t.router.core.switchTable.getLocator() - coords := loc.getCoords() req := dhtReq{ Key: t.router.core.boxPub, - Coords: coords, + Coords: t.router.table.self.getCoords(), Dest: *target, } t.sendReq(&req, info) @@ -378,7 +374,7 @@ func (t *dht) getImportant() []*dhtInfo { }) // Keep the ones that are no further than the closest seen so far minDist := ^uint64(0) - loc := t.router.core.switchTable.getLocator() + loc := t.router.table.self important := infos[:0] for _, info := range infos { dist := uint64(loc.dist(info.coords)) @@ -416,7 +412,7 @@ func (t *dht) isImportant(ninfo *dhtInfo) bool { } important := t.getImportant() // Check if ninfo is of equal or greater importance to what we already know - loc := t.router.core.switchTable.getLocator() + loc := t.router.table.self ndist := uint64(loc.dist(ninfo.coords)) minDist := ^uint64(0) for _, info := range important { diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index fa6563f1..978e8eab 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -300,7 +300,7 @@ func (intf *linkInterface) notifyBlockedSend() { intf.Act(nil, func() { if intf.sendTimer != nil { //As far as we know, we're still trying to send, and the timer fired. - intf.link.core.switchTable.blockPeer(intf.peer.port) + intf.link.core.switchTable.blockPeer(intf, intf.peer.port) } }) } @@ -340,7 +340,7 @@ func (intf *linkInterface) notifyStalled() { intf.stallTimer.Stop() intf.stallTimer = nil intf.stalled = true - intf.link.core.switchTable.blockPeer(intf.peer.port) + intf.link.core.switchTable.blockPeer(intf, intf.peer.port) } }) } diff --git a/src/yggdrasil/nodeinfo.go b/src/yggdrasil/nodeinfo.go index 745756fe..b179d20b 100644 --- a/src/yggdrasil/nodeinfo.go +++ b/src/yggdrasil/nodeinfo.go @@ -18,6 +18,7 @@ type nodeinfo struct { myNodeInfo NodeInfoPayload callbacks map[crypto.BoxPubKey]nodeinfoCallback cache map[crypto.BoxPubKey]nodeinfoCached + table *lookupTable } type nodeinfoCached struct { @@ -187,7 +188,7 @@ func (m *nodeinfo) sendNodeInfo(key crypto.BoxPubKey, coords []byte, isResponse } func (m *nodeinfo) _sendNodeInfo(key crypto.BoxPubKey, coords []byte, isResponse bool) { - loc := m.core.switchTable.getLocator() + loc := m.table.self nodeinfo := nodeinfoReqRes{ SendCoords: loc.getCoords(), IsResponse: isResponse, diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 7fa2b317..9acb9321 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -162,7 +162,7 @@ func (ps *peers) _removePeer(p *peer) { if q := ps.ports[p.port]; p.port == 0 || q != p { return } // Can't remove self peer or nonexistant peer - ps.core.switchTable.forgetPeer(p.port) + ps.core.switchTable.forgetPeer(ps, p.port) oldPorts := ps.ports newPorts := make(map[switchPort]*peer) for k, v := range oldPorts { @@ -328,7 +328,7 @@ func (p *peer) _handleLinkTraffic(bs []byte) { // Gets a switchMsg from the switch, adds signed next-hop info for this peer, and sends it to them. func (p *peer) _sendSwitchMsg() { - msg := p.core.switchTable.getMsg() + msg := p.table.getMsg() if msg == nil { return } @@ -367,19 +367,26 @@ func (p *peer) _handleSwitchMsg(packet []byte) { } prevKey = hop.Next } - p.core.switchTable.handleMsg(&msg, p.port) - if !p.core.switchTable.checkRoot(&msg) { - // Bad switch message - p.dinfo = nil - return - } - // Pass a message to the dht informing it that this peer (still) exists - loc.coords = loc.coords[:len(loc.coords)-1] - p.dinfo = &dhtInfo{ - key: p.box, - coords: loc.getCoords(), - } - p._updateDHT() + p.core.switchTable.Act(p, func() { + if !p.core.switchTable._checkRoot(&msg) { + // Bad switch message + p.Act(&p.core.switchTable, func() { + p.dinfo = nil + }) + } else { + // handle the message + p.core.switchTable._handleMsg(&msg, p.port, false) + p.Act(&p.core.switchTable, func() { + // Pass a message to the dht informing it that this peer (still) exists + loc.coords = loc.coords[:len(loc.coords)-1] + p.dinfo = &dhtInfo{ + key: p.box, + coords: loc.getCoords(), + } + p._updateDHT() + }) + } + }) } // This generates the bytes that we sign or check the signature of for a switchMsg. diff --git a/src/yggdrasil/router.go b/src/yggdrasil/router.go index ac4d655d..40b8303f 100644 --- a/src/yggdrasil/router.go +++ b/src/yggdrasil/router.go @@ -46,6 +46,7 @@ type router struct { nodeinfo nodeinfo searches searches sessions sessions + table *lookupTable // has a copy of our locator } // Initializes the router struct, which includes setting up channels to/from the adapter. @@ -77,6 +78,21 @@ func (r *router) init(core *Core) { r.sessions.init(r) } +func (r *router) updateTable(from phony.Actor, table *lookupTable) { + r.Act(from, func() { + r.table = table + r.nodeinfo.Act(r, func() { + r.nodeinfo.table = table + }) + for _, ses := range r.sessions.sinfos { + sinfo := ses + sinfo.Act(r, func() { + sinfo.table = table + }) + } + }) +} + // Reconfigures the router and any child modules. This should only ever be run // by the router actor. func (r *router) reconfigure() { @@ -130,7 +146,7 @@ func (r *router) reset(from phony.Actor) { func (r *router) doMaintenance() { phony.Block(r, func() { // Any periodic maintenance stuff goes here - r.core.switchTable.doMaintenance() + r.core.switchTable.doMaintenance(r) r.dht.doMaintenance() r.sessions.cleanup() }) diff --git a/src/yggdrasil/search.go b/src/yggdrasil/search.go index 91f0490c..febde3d8 100644 --- a/src/yggdrasil/search.go +++ b/src/yggdrasil/search.go @@ -161,11 +161,10 @@ func (sinfo *searchInfo) continueSearch(infos []*dhtInfo) { // Initially start a search func (sinfo *searchInfo) startSearch() { - loc := sinfo.searches.router.core.switchTable.getLocator() var infos []*dhtInfo infos = append(infos, &dhtInfo{ key: sinfo.searches.router.core.boxPub, - coords: loc.getCoords(), + coords: sinfo.searches.router.table.self.getCoords(), }) // Start the search by asking ourself, useful if we're the destination sinfo.continueSearch(infos) diff --git a/src/yggdrasil/session.go b/src/yggdrasil/session.go index eaa67fd0..01c2cdfb 100644 --- a/src/yggdrasil/session.go +++ b/src/yggdrasil/session.go @@ -52,6 +52,7 @@ type sessionInfo struct { cancel util.Cancellation // Used to terminate workers conn *Conn // The associated Conn object callbacks []chan func() // Finished work from crypto workers + table *lookupTable // table.self is a locator where we get our coords } // Represents a session ping/pong packet, and includes information like public keys, a session handle, coords, a timestamp to prevent replays, and the tun/tap MTU. @@ -217,6 +218,7 @@ func (ss *sessions) createSession(theirPermKey *crypto.BoxPubKey) *sessionInfo { sinfo.myHandle = *crypto.NewHandle() sinfo.theirAddr = *address.AddrForNodeID(crypto.GetNodeID(&sinfo.theirPermPub)) sinfo.theirSubnet = *address.SubnetForNodeID(crypto.GetNodeID(&sinfo.theirPermPub)) + sinfo.table = ss.router.table ss.sinfos[sinfo.myHandle] = &sinfo ss.byTheirPerm[sinfo.theirPermPub] = &sinfo.myHandle return &sinfo @@ -266,8 +268,7 @@ func (ss *sessions) removeSession(sinfo *sessionInfo) { // Returns a session ping appropriate for the given session info. func (sinfo *sessionInfo) _getPing() sessionPing { - loc := sinfo.sessions.router.core.switchTable.getLocator() - coords := loc.getCoords() + coords := sinfo.table.self.getCoords() ping := sessionPing{ SendPermPub: sinfo.sessions.router.core.boxPub, Handle: sinfo.myHandle, diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index ab2e1194..2661b460 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -12,12 +12,9 @@ package yggdrasil // A little annoying to do with constant changes from backpressure import ( - //"math/rand" - "sync" "time" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" - //"github.com/yggdrasil-network/yggdrasil-go/src/util" "github.com/Arceliar/phony" ) @@ -149,6 +146,7 @@ type tableElem struct { type lookupTable struct { self switchLocator elems map[switchPort]tableElem + _msg switchMsg } // This is switch information which is mutable and needs to be modified by other goroutines, but is not accessed atomically. @@ -168,7 +166,6 @@ type switchTable struct { key crypto.SigPubKey // Our own key time time.Time // Time when locator.tstamp was last updated drop map[crypto.SigPubKey]int64 // Tstamp associated with a dropped root - mutex sync.RWMutex // Lock for reads/writes of switchData parent switchPort // Port of whatever peer is our parent, or self if we're root data switchData // phony.Inbox // Owns the below @@ -208,24 +205,17 @@ func (t *switchTable) reconfigure() { t.core.peers.reconfigure() } -// Safely gets a copy of this node's locator. -func (t *switchTable) getLocator() switchLocator { - t.mutex.RLock() - defer t.mutex.RUnlock() - return t.data.locator.clone() -} - // Regular maintenance to possibly timeout/reset the root and similar. -func (t *switchTable) doMaintenance() { - // Periodic maintenance work to keep things internally consistent - t.mutex.Lock() // Write lock - defer t.mutex.Unlock() // Release lock when we're done - t.cleanRoot() - t.cleanDropped() +func (t *switchTable) doMaintenance(from phony.Actor) { + t.Act(from, func() { + // Periodic maintenance work to keep things internally consistent + t._cleanRoot() + t._cleanDropped() + }) } // Updates the root periodically if it is ourself, or promotes ourself to root if we're better than the current root or if the current root has timed out. -func (t *switchTable) cleanRoot() { +func (t *switchTable) _cleanRoot() { // TODO rethink how this is done?... // Get rid of the root if it looks like its timed out now := time.Now() @@ -259,49 +249,49 @@ func (t *switchTable) cleanRoot() { } // Blocks and, if possible, unparents a peer -func (t *switchTable) blockPeer(port switchPort) { - t.mutex.Lock() - defer t.mutex.Unlock() - peer, isIn := t.data.peers[port] - if !isIn { - return - } - peer.blocked = true - t.data.peers[port] = peer - if port != t.parent { - return - } - t.parent = 0 - for _, info := range t.data.peers { - if info.port == port { - continue +func (t *switchTable) blockPeer(from phony.Actor, port switchPort) { + t.Act(from, func() { + peer, isIn := t.data.peers[port] + if !isIn { + return } - t.unlockedHandleMsg(&info.msg, info.port, true) - } - t.unlockedHandleMsg(&peer.msg, peer.port, true) + peer.blocked = true + t.data.peers[port] = peer + if port != t.parent { + return + } + t.parent = 0 + for _, info := range t.data.peers { + if info.port == port { + continue + } + t._handleMsg(&info.msg, info.port, true) + } + t._handleMsg(&peer.msg, peer.port, true) + }) } // Removes a peer. // Must be called by the router actor with a lambda that calls this. // If the removed peer was this node's parent, it immediately tries to find a new parent. -func (t *switchTable) forgetPeer(port switchPort) { - t.mutex.Lock() - defer t.mutex.Unlock() - delete(t.data.peers, port) - defer t._updateTable() - if port != t.parent { - return - } - t.parent = 0 - for _, info := range t.data.peers { - t.unlockedHandleMsg(&info.msg, info.port, true) - } +func (t *switchTable) forgetPeer(from phony.Actor, port switchPort) { + t.Act(from, func() { + delete(t.data.peers, port) + defer t._updateTable() + if port != t.parent { + return + } + t.parent = 0 + for _, info := range t.data.peers { + t._handleMsg(&info.msg, info.port, true) + } + }) } // Dropped is a list of roots that are better than the current root, but stopped sending new timestamps. // If we switch to a new root, and that root is better than an old root that previously timed out, then we can clean up the old dropped root infos. // This function is called periodically to do that cleanup. -func (t *switchTable) cleanDropped() { +func (t *switchTable) _cleanDropped() { // TODO? only call this after root changes, not periodically for root := range t.drop { if !firstIsBetter(&root, &t.data.locator.root) { @@ -327,9 +317,7 @@ type switchMsgHop struct { } // This returns a *switchMsg to a copy of this node's current switchMsg, which can safely have additional information appended to Hops and sent to a peer. -func (t *switchTable) getMsg() *switchMsg { - t.mutex.RLock() - defer t.mutex.RUnlock() +func (t *switchTable) _getMsg() *switchMsg { if t.parent == 0 { return &switchMsg{Root: t.key, TStamp: t.data.locator.tstamp} } else if parent, isIn := t.data.peers[t.parent]; isIn { @@ -341,14 +329,18 @@ func (t *switchTable) getMsg() *switchMsg { } } +func (t *lookupTable) getMsg() *switchMsg { + msg := t._msg + msg.Hops = append([]switchMsgHop(nil), t._msg.Hops...) + return &msg +} + // This function checks that the root information in a switchMsg is OK. // In particular, that the root is better, or else the same as the current root but with a good timestamp, and that this root+timestamp haven't been dropped due to timeout. -func (t *switchTable) checkRoot(msg *switchMsg) bool { +func (t *switchTable) _checkRoot(msg *switchMsg) bool { // returns false if it's a dropped root, not a better root, or has an older timestamp // returns true otherwise // used elsewhere to keep inserting peers into the dht only if root info is OK - t.mutex.RLock() - defer t.mutex.RUnlock() dropTstamp, isIn := t.drop[msg.Root] switch { case isIn && dropTstamp >= msg.TStamp: @@ -364,20 +356,13 @@ func (t *switchTable) checkRoot(msg *switchMsg) bool { } } -// This is a mutexed wrapper to unlockedHandleMsg, and is called by the peer structs in peers.go to pass a switchMsg for that peer into the switch. -func (t *switchTable) handleMsg(msg *switchMsg, fromPort switchPort) { - t.mutex.Lock() - defer t.mutex.Unlock() - t.unlockedHandleMsg(msg, fromPort, false) -} - // This updates the switch with information about a peer. // Then the tricky part, it decides if it should update our own locator as a result. // That happens if this node is already our parent, or is advertising a better root, or is advertising a better path to the same root, etc... // There are a lot of very delicate order sensitive checks here, so its' best to just read the code if you need to understand what it's doing. // It's very important to not change the order of the statements in the case function unless you're absolutely sure that it's safe, including safe if used alongside nodes that used the previous order. // Set the third arg to true if you're reprocessing an old message, e.g. to find a new parent after one disconnects, to avoid updating some timing related things. -func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, reprocessing bool) { +func (t *switchTable) _handleMsg(msg *switchMsg, fromPort switchPort, reprocessing bool) { // TODO directly use a switchMsg instead of switchMessage + sigs now := time.Now() // Set up the sender peerInfo @@ -500,10 +485,10 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep if peer.port == sender.port { continue } - t.unlockedHandleMsg(&peer.msg, peer.port, true) + t._handleMsg(&peer.msg, peer.port, true) } // Process the sender last, to avoid keeping them as a parent if at all possible. - t.unlockedHandleMsg(&sender.msg, sender.port, true) + t._handleMsg(&sender.msg, sender.port, true) case now.Sub(t.time) < switch_throttle: // We've already gotten an update from this root recently, so ignore this one to avoid flooding. case sender.locator.tstamp > t.data.locator.tstamp: @@ -521,7 +506,7 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep } t.data.locator = sender.locator t.parent = sender.port - t.core.peers.sendSwitchMsgs(t) + defer t.core.peers.sendSwitchMsgs(t) } if true || doUpdate { defer t._updateTable() @@ -560,7 +545,9 @@ func (t *switchTable) _updateTable() { time: pinfo.time, } } - t.core.peers.updateTables(nil, &newTable) // TODO not be from nil + newTable._msg = *t._getMsg() + t.core.peers.updateTables(t, &newTable) + t.core.router.updateTable(t, &newTable) } // Starts the switch worker From 945930aa2ccbc327ae6bef0ec8db36b65a398a17 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Fri, 3 Apr 2020 00:32:26 -0500 Subject: [PATCH 07/51] WIP have peer actors queue packets, temporarily a single simple FIFO queue with head drop --- src/yggdrasil/api.go | 29 ---- src/yggdrasil/link.go | 18 +-- src/yggdrasil/packetqueue.go | 39 +++++ src/yggdrasil/peer.go | 43 ++++-- src/yggdrasil/router.go | 9 +- src/yggdrasil/switch.go | 291 +---------------------------------- 6 files changed, 91 insertions(+), 338 deletions(-) create mode 100644 src/yggdrasil/packetqueue.go diff --git a/src/yggdrasil/api.go b/src/yggdrasil/api.go index a722dc52..31ece6b8 100644 --- a/src/yggdrasil/api.go +++ b/src/yggdrasil/api.go @@ -199,35 +199,6 @@ func (c *Core) GetDHT() []DHTEntry { return dhtentries } -// GetSwitchQueues returns information about the switch queues that are -// currently in effect. These values can change within an instant. -func (c *Core) GetSwitchQueues() SwitchQueues { - var switchqueues SwitchQueues - switchTable := &c.switchTable - getSwitchQueues := func() { - switchqueues = SwitchQueues{ - Count: uint64(len(switchTable.queues.bufs)), - Size: switchTable.queues.size, - HighestCount: uint64(switchTable.queues.maxbufs), - HighestSize: switchTable.queues.maxsize, - MaximumSize: switchTable.queues.totalMaxSize, - } - for port, pbuf := range switchTable.queues.bufs { - for k, v := range pbuf { - queue := SwitchQueue{ - ID: k, - Size: v.size, - Packets: uint64(len(v.packets)), - Port: uint64(port), - } - switchqueues.Queues = append(switchqueues.Queues, queue) - } - } - } - phony.Block(&c.switchTable, getSwitchQueues) - return switchqueues -} - // GetSessions returns a list of open sessions from this node to other nodes. func (c *Core) GetSessions() []Session { var sessions []Session diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 978e8eab..15017993 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -62,7 +62,7 @@ type linkInterface struct { keepAliveTimer *time.Timer // Fires to send keep-alive traffic stallTimer *time.Timer // Fires to signal that no incoming traffic (including keep-alive) has been seen closeTimer *time.Timer // Fires when the link has been idle so long we need to close it - inSwitch bool // True if the switch is tracking this link + isIdle bool // True if the peer actor knows the link is idle stalled bool // True if we haven't been receiving any response traffic unstalled bool // False if an idle notification to the switch hasn't been sent because we stalled (or are first starting up) } @@ -278,7 +278,7 @@ const ( func (intf *linkInterface) notifySending(size int, isLinkTraffic bool) { intf.Act(&intf.writer, func() { if !isLinkTraffic { - intf.inSwitch = false + intf.isIdle = false } intf.sendTimer = time.AfterFunc(sendTime, intf.notifyBlockedSend) intf._cancelStallTimer() @@ -311,7 +311,7 @@ func (intf *linkInterface) notifySent(size int, isLinkTraffic bool) { intf.sendTimer.Stop() intf.sendTimer = nil if !isLinkTraffic { - intf._notifySwitch() + intf._notifyIdle() } if size > 0 && intf.stallTimer == nil { intf.stallTimer = time.AfterFunc(stallTime, intf.notifyStalled) @@ -320,15 +320,13 @@ func (intf *linkInterface) notifySent(size int, isLinkTraffic bool) { } // Notify the switch that we're ready for more traffic, assuming we're not in a stalled state -func (intf *linkInterface) _notifySwitch() { - if !intf.inSwitch { +func (intf *linkInterface) _notifyIdle() { + if !intf.isIdle { if intf.stalled { intf.unstalled = false } else { - intf.inSwitch = true - intf.link.core.switchTable.Act(intf, func() { - intf.link.core.switchTable._idleIn(intf.peer.port) - }) + intf.isIdle = true + intf.peer.Act(intf, intf.peer._handleIdle) } } } @@ -364,7 +362,7 @@ func (intf *linkInterface) notifyRead(size int) { } intf.stalled = false if !intf.unstalled { - intf._notifySwitch() + intf._notifyIdle() intf.unstalled = true } if size > 0 && intf.stallTimer == nil { diff --git a/src/yggdrasil/packetqueue.go b/src/yggdrasil/packetqueue.go new file mode 100644 index 00000000..ac66c0da --- /dev/null +++ b/src/yggdrasil/packetqueue.go @@ -0,0 +1,39 @@ +package yggdrasil + +import "github.com/yggdrasil-network/yggdrasil-go/src/util" + +// TODO take max size from config +const MAX_PACKET_QUEUE_SIZE = 1048576 // 1 MB + +// TODO separate queues per e.g. traffic flow +type packetQueue struct { + packets [][]byte + size uint32 +} + +func (q *packetQueue) cleanup() { + for q.size > MAX_PACKET_QUEUE_SIZE { + if packet, success := q.pop(); success { + util.PutBytes(packet) + } else { + panic("attempted to drop packet from empty queue") + break + } + } +} + +func (q *packetQueue) push(packet []byte) { + q.packets = append(q.packets, packet) + q.size += uint32(len(packet)) + q.cleanup() +} + +func (q *packetQueue) pop() ([]byte, bool) { + if len(q.packets) > 0 { + packet := q.packets[0] + q.packets = q.packets[1:] + q.size -= uint32(len(packet)) + return packet, true + } + return nil, false +} diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 9acb9321..bc9de04c 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -100,6 +100,8 @@ type peer struct { bytesRecvd uint64 ports map[switchPort]*peer table *lookupTable + queue packetQueue + idle bool } func (ps *peers) updateTables(from phony.Actor, table *lookupTable) { @@ -243,6 +245,13 @@ func (p *peer) _handlePacket(packet []byte) { } } +// Get the coords of a packet without decoding +func peer_getPacketCoords(packet []byte) []byte { + _, pTypeLen := wire_decode_uint64(packet) + coords, _ := wire_decode_coords(packet[pTypeLen:]) + return coords +} + // Called to handle traffic or protocolTraffic packets. // In either case, this reads from the coords of the packet header, does a switch lookup, and forwards to the next node. func (p *peer) _handleTraffic(packet []byte) { @@ -250,7 +259,7 @@ func (p *peer) _handleTraffic(packet []byte) { // Drop traffic if the peer isn't in the switch return } - coords := switch_getPacketCoords(packet) + coords := peer_getPacketCoords(packet) next := p.table.lookup(coords) if nPeer, isIn := p.ports[next]; isIn { nPeer.sendPacketsFrom(p, [][]byte{packet}) @@ -264,17 +273,33 @@ func (p *peer) sendPacketsFrom(from phony.Actor, packets [][]byte) { }) } -// This just calls p.out(packet) for now. func (p *peer) _sendPackets(packets [][]byte) { - // Is there ever a case where something more complicated is needed? - // What if p.out blocks? - var size int for _, packet := range packets { - size += len(packet) + p.queue.push(packet) + } + if p.idle { + p.idle = false + p._handleIdle() + } +} + +func (p *peer) _handleIdle() { + var packets [][]byte + var size uint64 + for size < 65535 { + if packet, success := p.queue.pop(); success { + packets = append(packets, packet) + size += uint64(len(packet)) + } else { + break + } + } + if len(packets) > 0 { + p.bytesSent += uint64(size) + p.out(packets) + } else { + p.idle = true } - p.bytesSent += uint64(size) - // FIXME need to manage queues here or else things can block! - p.out(packets) } // This wraps the packet in the inner (ephemeral) and outer (permanent) crypto layers. diff --git a/src/yggdrasil/router.go b/src/yggdrasil/router.go index 40b8303f..1be94661 100644 --- a/src/yggdrasil/router.go +++ b/src/yggdrasil/router.go @@ -67,7 +67,14 @@ func (r *router) init(core *Core) { // FIXME don't block here! p = r.core.peers._newPeer(&r.core.boxPub, &r.core.sigPub, &crypto.BoxSharedKey{}, &self, nil) }) - p.out = func(packets [][]byte) { r.handlePackets(p, packets) } + p.out = func(packets [][]byte) { + r.handlePackets(p, packets) + r.Act(p, func() { + // after the router handle the packets, notify the peer that it's ready for more + p.Act(r, p._handleIdle) + }) + } + p.Act(r, p._handleIdle) r.out = func(bs []byte) { p.handlePacketFrom(r, bs) } r.nodeinfo.init(r.core) r.core.config.Mutex.RLock() diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index 2661b460..091596b5 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -164,13 +164,11 @@ type switchData struct { type switchTable struct { core *Core key crypto.SigPubKey // Our own key + phony.Inbox // Owns the below time time.Time // Time when locator.tstamp was last updated drop map[crypto.SigPubKey]int64 // Tstamp associated with a dropped root parent switchPort // Port of whatever peer is our parent, or self if we're root data switchData // - phony.Inbox // Owns the below - queues switch_buffers // Queues - not atomic so ONLY use through the actor - idle map[switchPort]struct{} // idle peers - not atomic so ONLY use through the actor } // Minimum allowed total size of switch queues. @@ -185,18 +183,7 @@ func (t *switchTable) init(core *Core) { peers := make(map[switchPort]peerInfo) t.data = switchData{locator: locator, peers: peers} t.drop = make(map[crypto.SigPubKey]int64) - phony.Block(t, func() { - core.config.Mutex.RLock() - if core.config.Current.SwitchOptions.MaxTotalQueueSize > SwitchQueueTotalMinSize { - t.queues.totalMaxSize = core.config.Current.SwitchOptions.MaxTotalQueueSize - } else { - t.queues.totalMaxSize = SwitchQueueTotalMinSize - } - core.config.Mutex.RUnlock() - t.queues.bufs = make(map[switchPort]map[string]switch_buffer) - t.idle = make(map[switchPort]struct{}) - }) - t._updateTable() + phony.Block(t, t._updateTable) } func (t *switchTable) reconfigure() { @@ -557,73 +544,6 @@ func (t *switchTable) start() error { return nil } -type closerInfo struct { - elem tableElem - dist int -} - -// Return a map of ports onto distance, keeping only ports closer to the destination than this node -// If the map is empty (or nil), then no peer is closer -/* -func (t *switchTable) getCloser(dest []byte) []closerInfo { - table := t.getTable() - myDist := table.self.dist(dest) - if myDist == 0 { - // Skip the iteration step if it's impossible to be closer - return nil - } - var closer []closerInfo - for _, info := range table.elems { - dist := info.locator.dist(dest) - if dist < myDist { - closer = append(closer, closerInfo{info, dist}) - } - } - return closer -} -*/ - -// Returns true if the peer is closer to the destination than ourself -/* -func (t *switchTable) portIsCloser(dest []byte, port switchPort) bool { - table := t.getTable() - if info, isIn := table.elems[port]; isIn { - theirDist := info.locator.dist(dest) - myDist := table.self.dist(dest) - return theirDist < myDist - } else { - return false - } -} -*/ - -// Get the coords of a packet without decoding -func switch_getPacketCoords(packet []byte) []byte { - _, pTypeLen := wire_decode_uint64(packet) - coords, _ := wire_decode_coords(packet[pTypeLen:]) - return coords -} - -// Returns a unique string for each stream of traffic -// Equal to coords -// The sender may append arbitrary info to the end of coords (as long as it's begins with a 0x00) to designate separate traffic streams -// Currently, it's the IPv6 next header type and the first 2 uint16 of the next header -// This is equivalent to the TCP/UDP protocol numbers and the source / dest ports -// TODO figure out if something else would make more sense (other transport protocols?) -func switch_getPacketStreamID(packet []byte) string { - return string(switch_getPacketCoords(packet)) -} - -// Returns the flowlabel from a given set of coords -func switch_getFlowLabelFromCoords(in []byte) []byte { - for i, v := range in { - if v == 0 { - return in[i+1:] - } - } - return []byte{} -} - // Find the best port to forward to for a given set of coords func (t *lookupTable) lookup(coords []byte) switchPort { var bestPort switchPort @@ -660,210 +580,3 @@ func (t *lookupTable) lookup(coords []byte) switchPort { } return bestPort } - -// Handle an incoming packet -// Either send it to ourself, or to the first idle peer that's free -// Returns true if the packet has been handled somehow, false if it should be queued -func (t *switchTable) _handleIn(packet []byte, idle map[switchPort]struct{}) (bool, switchPort) { - /* - coords := switch_getPacketCoords(packet) - table := t.getTable() - port := table.lookup(coords) - ports := t.core.peers.getPorts() - peer := ports[port] - if peer == nil { - // FIXME hack, if the peer disappeared durring a race then don't buffer - return true, 0 - } - if _, isIdle := idle[port]; isIdle || port == 0 { - // Either no closer peers, or the closest peer is idle - delete(idle, port) - peer.sendPacketsFrom(t, [][]byte{packet}) - return true, port - } - // There's a closer peer, but it's not idle, so buffer it - return false, port - */ - return true, 0 -} - -// Info about a buffered packet -type switch_packetInfo struct { - bytes []byte - time time.Time // Timestamp of when the packet arrived -} - -// Used to keep track of buffered packets -type switch_buffer struct { - packets []switch_packetInfo // Currently buffered packets, which may be dropped if it grows too large - size uint64 // Total queue size in bytes -} - -type switch_buffers struct { - totalMaxSize uint64 - bufs map[switchPort]map[string]switch_buffer // Buffers indexed by port and StreamID - size uint64 // Total size of all buffers, in bytes - maxbufs int - maxsize uint64 -} - -func (b *switch_buffers) _cleanup(t *switchTable) { - /* - for port, pbufs := range b.bufs { - for streamID, buf := range pbufs { - // Remove queues for which we have no next hop - packet := buf.packets[0] - coords := switch_getPacketCoords(packet.bytes) - if len(t.getCloser(coords)) == 0 { - for _, packet := range buf.packets { - util.PutBytes(packet.bytes) - } - b.size -= buf.size - delete(pbufs, streamID) - } - } - if len(pbufs) == 0 { - delete(b.bufs, port) - } - } - - for b.size > b.totalMaxSize { - // Drop a random queue - target := rand.Uint64() % b.size - var size uint64 // running total - for port, pbufs := range b.bufs { - for streamID, buf := range pbufs { - size += buf.size - if size < target { - continue - } - var packet switch_packetInfo - packet, buf.packets = buf.packets[0], buf.packets[1:] - buf.size -= uint64(len(packet.bytes)) - b.size -= uint64(len(packet.bytes)) - util.PutBytes(packet.bytes) - if len(buf.packets) == 0 { - delete(pbufs, streamID) - if len(pbufs) == 0 { - delete(b.bufs, port) - } - } else { - // Need to update the map, since buf was retrieved by value - pbufs[streamID] = buf - } - break - } - } - } - */ -} - -// Handles incoming idle notifications -// Loops over packets and sends the newest one that's OK for this peer to send -// Returns true if the peer is no longer idle, false if it should be added to the idle list -func (t *switchTable) _handleIdle(port switchPort) bool { - // TODO? only send packets for which this is the best next hop that isn't currently blocked sending - /* - to := t.core.peers.getPorts()[port] - if to == nil { - return true - } - var packets [][]byte - var psize int - t.queues._cleanup(t) - now := time.Now() - pbufs := t.queues.bufs[port] - for psize < 65535 { - var best *string - var bestPriority float64 - for streamID, buf := range pbufs { - // Filter over the streams that this node is closer to - // Keep the one with the smallest queue - packet := buf.packets[0] - priority := float64(now.Sub(packet.time)) / float64(buf.size) - if priority >= bestPriority { - b := streamID // copy since streamID is mutated in the loop - best = &b - bestPriority = priority - } - } - if best != nil { - buf := pbufs[*best] - var packet switch_packetInfo - // TODO decide if this should be LIFO or FIFO - packet, buf.packets = buf.packets[0], buf.packets[1:] - buf.size -= uint64(len(packet.bytes)) - t.queues.size -= uint64(len(packet.bytes)) - if len(buf.packets) == 0 { - delete(pbufs, *best) - if len(pbufs) == 0 { - delete(t.queues.bufs, port) - } - } else { - // Need to update the map, since buf was retrieved by value - pbufs[*best] = buf - - } - packets = append(packets, packet.bytes) - psize += len(packet.bytes) - } else { - // Finished finding packets - break - } - } - if len(packets) > 0 { - to.sendPacketsFrom(t, packets) - return true - } - return false - */ - return false -} - -func (t *switchTable) packetInFrom(from phony.Actor, bytes []byte) { - t.Act(from, func() { - t._packetIn(bytes) - }) -} - -func (t *switchTable) _packetIn(bytes []byte) { - // Try to send it somewhere (or drop it if it's corrupt or at a dead end) - if sent, best := t._handleIn(bytes, t.idle); !sent { - // There's nobody free to take it right now, so queue it for later - packet := switch_packetInfo{bytes, time.Now()} - streamID := switch_getPacketStreamID(packet.bytes) - if _, isIn := t.queues.bufs[best]; !isIn { - t.queues.bufs[best] = make(map[string]switch_buffer) - } - buf, bufExists := t.queues.bufs[best][streamID] - buf.packets = append(buf.packets, packet) - buf.size += uint64(len(packet.bytes)) - t.queues.size += uint64(len(packet.bytes)) - // Keep a track of the max total queue size - if t.queues.size > t.queues.maxsize { - t.queues.maxsize = t.queues.size - } - t.queues.bufs[best][streamID] = buf - if !bufExists { - // Keep a track of the max total queue count. Only recalculate this - // when the queue is new because otherwise repeating len(dict) might - // cause unnecessary processing overhead - var count int - for _, pbufs := range t.queues.bufs { - count += len(pbufs) - } - if count > t.queues.maxbufs { - t.queues.maxbufs = count - } - } - t.queues._cleanup(t) - } -} - -func (t *switchTable) _idleIn(port switchPort) { - // Try to find something to send to this peer - if !t._handleIdle(port) { - // Didn't find anything ready to send yet, so stay idle - t.idle[port] = struct{}{} - } -} From 09efdfef9a5ec99ac8ce38c179063127bb6cebad Mon Sep 17 00:00:00 2001 From: Arceliar Date: Fri, 3 Apr 2020 19:26:48 -0500 Subject: [PATCH 08/51] fix bug in switch actor's cleanRoot, strict nonce handling at the session level, and add separate queues per stream to the packetqueue code --- src/yggdrasil/packetqueue.go | 115 ++++++++++++++++++++++++++++++----- src/yggdrasil/session.go | 12 +--- src/yggdrasil/switch.go | 4 +- 3 files changed, 105 insertions(+), 26 deletions(-) diff --git a/src/yggdrasil/packetqueue.go b/src/yggdrasil/packetqueue.go index ac66c0da..ff717258 100644 --- a/src/yggdrasil/packetqueue.go +++ b/src/yggdrasil/packetqueue.go @@ -1,38 +1,125 @@ package yggdrasil -import "github.com/yggdrasil-network/yggdrasil-go/src/util" +import ( + "time" + + "github.com/yggdrasil-network/yggdrasil-go/src/util" +) // TODO take max size from config -const MAX_PACKET_QUEUE_SIZE = 1048576 // 1 MB +const MAX_PACKET_QUEUE_SIZE = 4 * 1048576 // 4 MB + +type pqStreamID string + +type pqPacketInfo struct { + packet []byte + time time.Time +} + +type pqStream struct { + infos []pqPacketInfo + size uint64 +} // TODO separate queues per e.g. traffic flow type packetQueue struct { - packets [][]byte - size uint32 + streams map[pqStreamID]pqStream + size uint64 } func (q *packetQueue) cleanup() { for q.size > MAX_PACKET_QUEUE_SIZE { - if packet, success := q.pop(); success { - util.PutBytes(packet) + // TODO? drop from a random stream + // odds proportional to size? bandwidth? + // always using the worst is exploitable -> flood 1 packet per random stream + // find the stream that's using the most bandwidth + now := time.Now() + var worst pqStreamID + for id := range q.streams { + worst = id + break // get a random ID to start + } + worstStream := q.streams[worst] + worstSize := float64(worstStream.size) + worstAge := now.Sub(worstStream.infos[0].time).Seconds() + for id, stream := range q.streams { + thisSize := float64(stream.size) + thisAge := now.Sub(stream.infos[0].time).Seconds() + // cross multiply to avoid division by zero issues + if worstSize*thisAge < thisSize*worstAge { + // worstSize/worstAge < thisSize/thisAge -> this uses more bandwidth + worst = id + worstStream = stream + worstSize = thisSize + worstAge = thisAge + } + } + // Drop the oldest packet from the worst stream + packet := worstStream.infos[0].packet + worstStream.infos = worstStream.infos[1:] + worstStream.size -= uint64(len(packet)) + q.size -= uint64(len(packet)) + util.PutBytes(packet) + // save the modified stream to queues + if len(worstStream.infos) > 0 { + q.streams[worst] = worstStream } else { - panic("attempted to drop packet from empty queue") - break + delete(q.streams, worst) } } } func (q *packetQueue) push(packet []byte) { - q.packets = append(q.packets, packet) - q.size += uint32(len(packet)) + if q.streams == nil { + q.streams = make(map[pqStreamID]pqStream) + } + // get stream + id := pqStreamID(peer_getPacketCoords(packet)) // just coords for now + stream := q.streams[id] + // update stream + stream.infos = append(stream.infos, pqPacketInfo{packet, time.Now()}) + stream.size += uint64(len(packet)) + // save update to queues + q.streams[id] = stream + q.size += uint64(len(packet)) q.cleanup() } func (q *packetQueue) pop() ([]byte, bool) { - if len(q.packets) > 0 { - packet := q.packets[0] - q.packets = q.packets[1:] - q.size -= uint32(len(packet)) + if len(q.streams) > 0 { + // get the stream that uses the least bandwidth + now := time.Now() + var best pqStreamID + for id := range q.streams { + best = id + break // get a random ID to start + } + bestStream := q.streams[best] + bestSize := float64(bestStream.size) + bestAge := now.Sub(bestStream.infos[0].time).Seconds() + for id, stream := range q.streams { + thisSize := float64(stream.size) + thisAge := now.Sub(stream.infos[0].time).Seconds() + // cross multiply to avoid division by zero issues + if bestSize*thisAge > thisSize*bestAge { + // bestSize/bestAge > thisSize/thisAge -> this uses less bandwidth + best = id + bestStream = stream + bestSize = thisSize + bestAge = thisAge + } + } + // get the oldest packet from the best stream + packet := bestStream.infos[0].packet + bestStream.infos = bestStream.infos[1:] + bestStream.size -= uint64(len(packet)) + q.size -= uint64(len(packet)) + // save the modified stream to queues + if len(bestStream.infos) > 0 { + q.streams[best] = bestStream + } else { + delete(q.streams, best) + } return packet, true } return nil, false diff --git a/src/yggdrasil/session.go b/src/yggdrasil/session.go index 01c2cdfb..223ea33f 100644 --- a/src/yggdrasil/session.go +++ b/src/yggdrasil/session.go @@ -16,9 +16,6 @@ import ( "github.com/Arceliar/phony" ) -// Duration that we keep track of old nonces per session, to allow some out-of-order packet delivery -const nonceWindow = time.Second - // All the information we know about an active session. // This includes coords, permanent and ephemeral keys, handles and nonces, various sorts of timing information for timeout and maintenance, and some metadata for the admin API. type sessionInfo struct { @@ -394,14 +391,9 @@ func (sinfo *sessionInfo) _getMTU() MTU { return sinfo.myMTU } -// Checks if a packet's nonce is recent enough to fall within the window of allowed packets, and not already received. +// Checks if a packet's nonce is newer than any previously received func (sinfo *sessionInfo) _nonceIsOK(theirNonce *crypto.BoxNonce) bool { - // The bitmask is to allow for some non-duplicate out-of-order packets - if theirNonce.Minus(&sinfo.theirNonce) > 0 { - // This is newer than the newest nonce we've seen - return true - } - return time.Since(sinfo.time) < nonceWindow + return theirNonce.Minus(&sinfo.theirNonce) > 0 } // Updates the nonce mask by (possibly) shifting the bitmask and setting the bit corresponding to this nonce to 1, and then updating the most recent nonce diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index 091596b5..4f9044cd 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -227,10 +227,10 @@ func (t *switchTable) _cleanRoot() { t.time = now if t.data.locator.root != t.key { t.data.seq++ - defer t._updateTable() - t.core.router.reset(nil) + defer t.core.router.reset(nil) } t.data.locator = switchLocator{root: t.key, tstamp: now.Unix()} + t._updateTable() // updates base copy of switch msg in lookupTable t.core.peers.sendSwitchMsgs(t) } } From 9d0969db2be1bff624a641158544db687f3d2427 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 5 Apr 2020 14:57:05 -0500 Subject: [PATCH 09/51] prevent a hypothetical block on link message sending --- src/yggdrasil/link.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 15017993..78986286 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -229,10 +229,18 @@ func (intf *linkInterface) handler() error { intf.peer.Act(nil, intf.peer._removeSelf) }() intf.peer.out = func(msgs [][]byte) { - intf.writer.sendFrom(intf.peer, msgs, false) + // nil to prevent it from blocking if the link is somehow frozen + // this is safe because another packet won't be sent until the link notifies + // the peer that it's ready for one + intf.writer.sendFrom(nil, msgs, false) } intf.peer.linkOut = func(bs []byte) { - intf.writer.sendFrom(intf.peer, [][]byte{bs}, true) + // nil to prevent it from blocking if the link is somehow frozen + // FIXME this is hypothetically not safe, the peer shouldn't be sending + // additional packets until this one finishes, otherwise this could leak + // memory if writing happens slower than link packets are generated... + // that seems unlikely, so it's a lesser evil than deadlocking for now + intf.writer.sendFrom(nil, [][]byte{bs}, true) } themAddr := address.AddrForNodeID(crypto.GetNodeID(&intf.info.box)) themAddrString := net.IP(themAddr[:]).String() From 9c818c6278473a923e400832b94daf93ab977c12 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 26 Apr 2020 07:33:03 -0500 Subject: [PATCH 10/51] work-in-progress on a new sim --- build | 2 +- cmd/yggdrasilsim/main.go | 15 +++++++ cmd/yggdrasilsim/node.go | 23 ++++++++++ cmd/yggdrasilsim/store.go | 41 ++++++++++++++++++ src/yggdrasil/simlink.go | 88 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 cmd/yggdrasilsim/main.go create mode 100644 cmd/yggdrasilsim/node.go create mode 100644 cmd/yggdrasilsim/store.go create mode 100644 src/yggdrasil/simlink.go diff --git a/build b/build index 66f94403..6b93ca77 100755 --- a/build +++ b/build @@ -45,7 +45,7 @@ elif [ $ANDROID ]; then github.com/yggdrasil-network/yggdrasil-extras/src/mobile \ github.com/yggdrasil-network/yggdrasil-extras/src/dummy else - for CMD in yggdrasil yggdrasilctl ; do + for CMD in yggdrasil yggdrasilctl yggdrasilsim; do echo "Building: $CMD" go build $ARGS -ldflags="$LDFLAGS" -gcflags="$GCFLAGS" ./cmd/$CMD diff --git a/cmd/yggdrasilsim/main.go b/cmd/yggdrasilsim/main.go new file mode 100644 index 00000000..40fd9ce4 --- /dev/null +++ b/cmd/yggdrasilsim/main.go @@ -0,0 +1,15 @@ +package main + +import ( +//"github.com/yggdrasil-network/yggdrasil-go/src/address" +//"github.com/yggdrasil-network/yggdrasil-go/src/config" +//"github.com/yggdrasil-network/yggdrasil-go/src/crypto" +//"github.com/yggdrasil-network/yggdrasil-go/src/yggdrasil" +) + +func main() { + store := makeStoreSquareGrid(4) + var block chan struct{} + <-block + panic(store) +} diff --git a/cmd/yggdrasilsim/node.go b/cmd/yggdrasilsim/node.go new file mode 100644 index 00000000..e23b5109 --- /dev/null +++ b/cmd/yggdrasilsim/node.go @@ -0,0 +1,23 @@ +package main + +import ( + "io/ioutil" + + "github.com/gologme/log" + + //"github.com/yggdrasil-network/yggdrasil-go/src/address" + "github.com/yggdrasil-network/yggdrasil-go/src/config" + //"github.com/yggdrasil-network/yggdrasil-go/src/crypto" + "github.com/yggdrasil-network/yggdrasil-go/src/yggdrasil" +) + +type simNode struct { + core yggdrasil.Core + id int +} + +func newNode(id int) *simNode { + n := simNode{id: id} + n.core.Start(config.GenerateConfig(), log.New(ioutil.Discard, "", 0)) + return &n +} diff --git a/cmd/yggdrasilsim/store.go b/cmd/yggdrasilsim/store.go new file mode 100644 index 00000000..6fce81a4 --- /dev/null +++ b/cmd/yggdrasilsim/store.go @@ -0,0 +1,41 @@ +package main + +type nodeStore map[int]*simNode + +func makeStoreSingle() nodeStore { + s := make(nodeStore) + s[0] = newNode(0) + return s +} + +func linkNodes(a *simNode, b *simNode) { + la := a.core.NewSimlink() + lb := b.core.NewSimlink() + la.SetDestination(lb) + lb.SetDestination(la) + la.Start() + lb.Start() +} + +func makeStoreSquareGrid(sideLength int) nodeStore { + store := make(nodeStore) + nNodes := sideLength * sideLength + idxs := make([]int, 0, nNodes) + // TODO shuffle nodeIDs + for idx := 1; idx <= nNodes; idx++ { + idxs = append(idxs, idx) + } + for _, idx := range idxs { + n := newNode(idx) + store[idx] = n + } + for idx := 0; idx < nNodes; idx++ { + if (idx % sideLength) != 0 { + linkNodes(store[idxs[idx]], store[idxs[idx-1]]) + } + if idx >= sideLength { + linkNodes(store[idxs[idx]], store[idxs[idx-sideLength]]) + } + } + return store +} diff --git a/src/yggdrasil/simlink.go b/src/yggdrasil/simlink.go new file mode 100644 index 00000000..33332645 --- /dev/null +++ b/src/yggdrasil/simlink.go @@ -0,0 +1,88 @@ +package yggdrasil + +import ( + "errors" + "github.com/Arceliar/phony" + "github.com/yggdrasil-network/yggdrasil-go/src/util" +) + +type Simlink struct { + phony.Inbox + rch chan []byte + dest *Simlink + link *linkInterface + started bool +} + +func (s *Simlink) readMsg() ([]byte, error) { + bs := <-s.rch + if bs != nil { + return bs, nil + } else { + return nil, errors.New("read from closed Simlink") + } +} + +func (s *Simlink) _recvMetaBytes() ([]byte, error) { + return s.readMsg() +} + +func (s *Simlink) _sendMetaBytes(bs []byte) error { + _, err := s.writeMsgs([][]byte{bs}) + return err +} + +func (s *Simlink) close() error { + close(s.rch) + return nil +} + +func (s *Simlink) writeMsgs(msgs [][]byte) (int, error) { + if s.dest == nil { + return 0, errors.New("write to unpaired Simlink") + } + var size int + for _, msg := range msgs { + size += len(msg) + bs := append(util.GetBytes(), msg...) + phony.Block(s, func() { + s.dest.Act(s, func() { + defer func() { recover() }() + s.dest.rch <- bs + }) + }) + } + return size, nil +} + +func (c *Core) NewSimlink() *Simlink { + s := &Simlink{rch: make(chan []byte, 1)} + n := "Simlink" + s.link, _ = c.link.create(s, n, n, n, n, false, true) + return s +} + +func (s *Simlink) SetDestination(dest *Simlink) error { + var err error + phony.Block(s, func() { + if s.dest != nil { + err = errors.New("destination already set") + } else { + s.dest = dest + } + }) + return err +} + +func (s *Simlink) Start() error { + var err error + phony.Block(s, func() { + if s.started { + err = errors.New("already started") + } else { + s.started = true + go s.link.handler() + } + }) + return err +} From 5db93be4df4ae6eabe749bdeedffd6e3f4acf63c Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 26 Apr 2020 09:59:30 -0500 Subject: [PATCH 11/51] more sim work --- cmd/yggdrasilsim/node.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cmd/yggdrasilsim/node.go b/cmd/yggdrasilsim/node.go index e23b5109..65e6a805 100644 --- a/cmd/yggdrasilsim/node.go +++ b/cmd/yggdrasilsim/node.go @@ -5,19 +5,24 @@ import ( "github.com/gologme/log" - //"github.com/yggdrasil-network/yggdrasil-go/src/address" "github.com/yggdrasil-network/yggdrasil-go/src/config" - //"github.com/yggdrasil-network/yggdrasil-go/src/crypto" + "github.com/yggdrasil-network/yggdrasil-go/src/crypto" "github.com/yggdrasil-network/yggdrasil-go/src/yggdrasil" ) type simNode struct { - core yggdrasil.Core - id int + core yggdrasil.Core + id int + nodeID crypto.NodeID + dialer *yggdrasil.Dialer + listener *yggdrasil.Listener } func newNode(id int) *simNode { n := simNode{id: id} n.core.Start(config.GenerateConfig(), log.New(ioutil.Discard, "", 0)) + n.nodeID = *n.core.NodeID() + n.dialer, _ = n.core.ConnDialer() + n.listener, _ = n.core.ConnListen() return &n } From 6d895708602bd8c623c0f4f4b6a90fc5ce496e62 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 2 May 2020 06:44:51 -0500 Subject: [PATCH 12/51] eliminate most sync.Pool use, gives a safer but slightly slower interface --- src/crypto/crypto.go | 18 ++++++++++++------ src/tuntap/conn.go | 5 ----- src/tuntap/iface.go | 16 +++++++--------- src/util/bytes_mobile.go | 21 --------------------- src/util/bytes_other.go | 18 ------------------ src/yggdrasil/conn.go | 6 +++--- src/yggdrasil/link.go | 4 ---- src/yggdrasil/packetqueue.go | 4 +--- src/yggdrasil/peer.go | 3 --- src/yggdrasil/pool.go | 20 ++++++++++++++++++++ src/yggdrasil/router.go | 4 ---- src/yggdrasil/session.go | 15 +-------------- src/yggdrasil/stream.go | 7 ++++--- src/yggdrasil/wire.go | 8 +++++--- 14 files changed, 53 insertions(+), 96 deletions(-) delete mode 100644 src/util/bytes_mobile.go delete mode 100644 src/util/bytes_other.go create mode 100644 src/yggdrasil/pool.go diff --git a/src/crypto/crypto.go b/src/crypto/crypto.go index 211a0e54..595e6f40 100644 --- a/src/crypto/crypto.go +++ b/src/crypto/crypto.go @@ -17,12 +17,11 @@ import ( "crypto/rand" "crypto/sha512" "encoding/hex" + "sync" "golang.org/x/crypto/curve25519" "golang.org/x/crypto/ed25519" "golang.org/x/crypto/nacl/box" - - "github.com/yggdrasil-network/yggdrasil-go/src/util" ) //////////////////////////////////////////////////////////////////////////////// @@ -225,29 +224,36 @@ func GetSharedKey(myPrivKey *BoxPrivKey, return (*BoxSharedKey)(&shared) } +// pool is used internally by BoxOpen and BoxSeal to avoid allocating temporary space +var pool = sync.Pool{New: func() interface{} { return []byte(nil) }} + // BoxOpen returns a message and true if it successfully opens a crypto box using the provided shared key and nonce. +// The boxed input slice's backing array is reused for the unboxed output when possible. func BoxOpen(shared *BoxSharedKey, boxed []byte, nonce *BoxNonce) ([]byte, bool) { - out := util.GetBytes() s := (*[BoxSharedKeyLen]byte)(shared) n := (*[BoxNonceLen]byte)(nonce) - unboxed, success := box.OpenAfterPrecomputation(out, boxed, n, s) + temp := append(pool.Get().([]byte), boxed...) + unboxed, success := box.OpenAfterPrecomputation(boxed[:0], temp, n, s) + pool.Put(temp[:0]) return unboxed, success } // BoxSeal seals a crypto box using the provided shared key, returning the box and the nonce needed to decrypt it. // If nonce is nil, a random BoxNonce will be used and returned. // If nonce is non-nil, then nonce.Increment() will be called before using it, and the incremented BoxNonce is what is returned. +// The unboxed input slice's backing array is reused for the boxed output when possible. func BoxSeal(shared *BoxSharedKey, unboxed []byte, nonce *BoxNonce) ([]byte, *BoxNonce) { if nonce == nil { nonce = NewBoxNonce() } nonce.Increment() - out := util.GetBytes() s := (*[BoxSharedKeyLen]byte)(shared) n := (*[BoxNonceLen]byte)(nonce) - boxed := box.SealAfterPrecomputation(out, unboxed, n, s) + temp := append(pool.Get().([]byte), unboxed...) + boxed := box.SealAfterPrecomputation(unboxed[:0], temp, n, s) + pool.Put(temp[:0]) return boxed, nonce } diff --git a/src/tuntap/conn.go b/src/tuntap/conn.go index 24ea5ef3..ddd89e9b 100644 --- a/src/tuntap/conn.go +++ b/src/tuntap/conn.go @@ -44,13 +44,11 @@ func (s *tunConn) _read(bs []byte) (err error) { select { case <-s.stop: err = errors.New("session was already closed") - util.PutBytes(bs) return default: } if len(bs) == 0 { err = errors.New("read packet with 0 size") - util.PutBytes(bs) return } ipv4 := len(bs) > 20 && bs[0]&0xf0 == 0x40 @@ -107,7 +105,6 @@ func (s *tunConn) _read(bs []byte) (err error) { } if skip { err = errors.New("address not allowed") - util.PutBytes(bs) return } s.tun.writer.writeFrom(s, bs) @@ -125,7 +122,6 @@ func (s *tunConn) _write(bs []byte) (err error) { select { case <-s.stop: err = errors.New("session was already closed") - util.PutBytes(bs) return default: } @@ -183,7 +179,6 @@ func (s *tunConn) _write(bs []byte) (err error) { } if skip { err = errors.New("address not allowed") - util.PutBytes(bs) return } msg := yggdrasil.FlowKeyMessage{ diff --git a/src/tuntap/iface.go b/src/tuntap/iface.go index 1e5902e8..9250665a 100644 --- a/src/tuntap/iface.go +++ b/src/tuntap/iface.go @@ -3,7 +3,6 @@ package tuntap import ( "github.com/yggdrasil-network/yggdrasil-go/src/address" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" - "github.com/yggdrasil-network/yggdrasil-go/src/util" "github.com/yggdrasil-network/yggdrasil-go/src/yggdrasil" "github.com/Arceliar/phony" @@ -14,6 +13,7 @@ const TUN_OFFSET_BYTES = 4 type tunWriter struct { phony.Inbox tun *TunAdapter + buf [TUN_OFFSET_BYTES + 65536]byte } func (w *tunWriter) writeFrom(from phony.Actor, b []byte) { @@ -25,15 +25,13 @@ func (w *tunWriter) writeFrom(from phony.Actor, b []byte) { // write is pretty loose with the memory safety rules, e.g. it assumes it can // read w.tun.iface.IsTap() safely func (w *tunWriter) _write(b []byte) { - defer util.PutBytes(b) var written int var err error n := len(b) if n == 0 { return } - temp := append(util.ResizeBytes(util.GetBytes(), TUN_OFFSET_BYTES), b...) - defer util.PutBytes(temp) + temp := append(w.buf[:TUN_OFFSET_BYTES], b...) written, err = w.tun.iface.Write(temp, TUN_OFFSET_BYTES) if err != nil { w.tun.Act(w, func() { @@ -51,22 +49,23 @@ func (w *tunWriter) _write(b []byte) { type tunReader struct { phony.Inbox tun *TunAdapter + buf [TUN_OFFSET_BYTES + 65536]byte } func (r *tunReader) _read() { // Get a slice to store the packet in - recvd := util.ResizeBytes(util.GetBytes(), int(r.tun.mtu)+TUN_OFFSET_BYTES) // Wait for a packet to be delivered to us through the TUN adapter - n, err := r.tun.iface.Read(recvd, TUN_OFFSET_BYTES) + n, err := r.tun.iface.Read(r.buf[:], TUN_OFFSET_BYTES) if n <= TUN_OFFSET_BYTES || err != nil { r.tun.log.Errorln("Error reading TUN:", err) ferr := r.tun.iface.Flush() if ferr != nil { r.tun.log.Errorln("Unable to flush packets:", ferr) } - util.PutBytes(recvd) } else { - r.tun.handlePacketFrom(r, recvd[TUN_OFFSET_BYTES:n+TUN_OFFSET_BYTES], err) + bs := make([]byte, n, n+crypto.BoxOverhead) // extra capacity for later... + copy(bs, r.buf[TUN_OFFSET_BYTES:n+TUN_OFFSET_BYTES]) + r.tun.handlePacketFrom(r, bs, err) } if err == nil { // Now read again @@ -175,7 +174,6 @@ func (tun *TunAdapter) _handlePacket(recvd []byte, err error) { _, known := tun.dials[dstString] tun.dials[dstString] = append(tun.dials[dstString], bs) for len(tun.dials[dstString]) > 32 { - util.PutBytes(tun.dials[dstString][0]) tun.dials[dstString] = tun.dials[dstString][1:] } if !known { diff --git a/src/util/bytes_mobile.go b/src/util/bytes_mobile.go deleted file mode 100644 index f862c0cd..00000000 --- a/src/util/bytes_mobile.go +++ /dev/null @@ -1,21 +0,0 @@ -//+build mobile - -package util - -import "runtime/debug" - -func init() { - debug.SetGCPercent(25) -} - -// GetBytes always returns a nil slice on mobile platforms. -func GetBytes() []byte { - return nil -} - -// PutBytes does literally nothing on mobile platforms. -// This is done rather than keeping a free list of bytes on platforms with memory constraints. -// It's needed to help keep memory usage low enough to fall under the limits set for e.g. iOS NEPacketTunnelProvider apps. -func PutBytes(bs []byte) { - return -} diff --git a/src/util/bytes_other.go b/src/util/bytes_other.go deleted file mode 100644 index 7c966087..00000000 --- a/src/util/bytes_other.go +++ /dev/null @@ -1,18 +0,0 @@ -//+build !mobile - -package util - -import "sync" - -// This is used to buffer recently used slices of bytes, to prevent allocations in the hot loops. -var byteStore = sync.Pool{New: func() interface{} { return []byte(nil) }} - -// GetBytes returns a 0-length (possibly nil) slice of bytes from a free list, so it may have a larger capacity. -func GetBytes() []byte { - return byteStore.Get().([]byte)[:0] -} - -// PutBytes stores a slice in a free list, where it can potentially be reused to prevent future allocations. -func PutBytes(bs []byte) { - byteStore.Put(bs) -} diff --git a/src/yggdrasil/conn.go b/src/yggdrasil/conn.go index eef57683..ae34e45a 100644 --- a/src/yggdrasil/conn.go +++ b/src/yggdrasil/conn.go @@ -252,7 +252,6 @@ func (c *Conn) Read(b []byte) (int, error) { } // Copy results to the output slice and clean up copy(b, bs) - util.PutBytes(bs) // Return the number of bytes copied to the slice, along with any error return n, err } @@ -323,10 +322,11 @@ func (c *Conn) writeNoCopy(msg FlowKeyMessage) error { // returned. func (c *Conn) Write(b []byte) (int, error) { written := len(b) - msg := FlowKeyMessage{Message: append(util.GetBytes(), b...)} + bs := make([]byte, 0, len(b)+crypto.BoxOverhead) + bs = append(bs, b...) + msg := FlowKeyMessage{Message: bs} err := c.writeNoCopy(msg) if err != nil { - util.PutBytes(msg.Message) written = 0 } return written, err diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 78986286..733b9ac1 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -406,10 +406,6 @@ func (w *linkWriter) sendFrom(from phony.Actor, bss [][]byte, isLinkTraffic bool w.intf.notifySending(size, isLinkTraffic) w.intf.msgIO.writeMsgs(bss) w.intf.notifySent(size, isLinkTraffic) - // Cleanup - for _, bs := range bss { - util.PutBytes(bs) - } }) } diff --git a/src/yggdrasil/packetqueue.go b/src/yggdrasil/packetqueue.go index ff717258..2000ffa6 100644 --- a/src/yggdrasil/packetqueue.go +++ b/src/yggdrasil/packetqueue.go @@ -2,8 +2,6 @@ package yggdrasil import ( "time" - - "github.com/yggdrasil-network/yggdrasil-go/src/util" ) // TODO take max size from config @@ -59,7 +57,7 @@ func (q *packetQueue) cleanup() { worstStream.infos = worstStream.infos[1:] worstStream.size -= uint64(len(packet)) q.size -= uint64(len(packet)) - util.PutBytes(packet) + pool_putBytes(packet) // save the modified stream to queues if len(worstStream.infos) > 0 { q.streams[worst] = worstStream diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index bc9de04c..7eef9a11 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -9,7 +9,6 @@ import ( "time" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" - "github.com/yggdrasil-network/yggdrasil-go/src/util" "github.com/Arceliar/phony" ) @@ -241,7 +240,6 @@ func (p *peer) _handlePacket(packet []byte) { case wire_LinkProtocolTraffic: p._handleLinkTraffic(packet) default: - util.PutBytes(packet) } } @@ -347,7 +345,6 @@ func (p *peer) _handleLinkTraffic(bs []byte) { case wire_SwitchMsg: p._handleSwitchMsg(payload) default: - util.PutBytes(bs) } } diff --git a/src/yggdrasil/pool.go b/src/yggdrasil/pool.go new file mode 100644 index 00000000..e95236a5 --- /dev/null +++ b/src/yggdrasil/pool.go @@ -0,0 +1,20 @@ +package yggdrasil + +import "sync" + +// Used internally to reduce allocations in the hot loop +// I.e. packets being switched or between the crypto and the switch +// For safety reasons, these must not escape this package +var pool = sync.Pool{New: func() interface{} { return []byte(nil) }} + +func pool_getBytes(size int) []byte { + bs := pool.Get().([]byte) + if cap(bs) < size { + bs = make([]byte, size) + } + return bs[:size] +} + +func pool_putBytes(bs []byte) { + pool.Put(bs) +} diff --git a/src/yggdrasil/router.go b/src/yggdrasil/router.go index 1be94661..71d92609 100644 --- a/src/yggdrasil/router.go +++ b/src/yggdrasil/router.go @@ -29,7 +29,6 @@ import ( "github.com/yggdrasil-network/yggdrasil-go/src/address" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" - "github.com/yggdrasil-network/yggdrasil-go/src/util" "github.com/Arceliar/phony" ) @@ -178,14 +177,12 @@ func (r *router) _handlePacket(packet []byte) { // Handles incoming traffic, i.e. encapuslated ordinary IPv6 packets. // Passes them to the crypto session worker to be decrypted and sent to the adapter. func (r *router) _handleTraffic(packet []byte) { - defer util.PutBytes(packet) p := wire_trafficPacket{} if !p.decode(packet) { return } sinfo, isIn := r.sessions.getSessionForHandle(&p.Handle) if !isIn { - util.PutBytes(p.Payload) return } sinfo.recv(r, &p) @@ -231,7 +228,6 @@ func (r *router) _handleProto(packet []byte) { case wire_DHTLookupResponse: r._handleDHTRes(bs, &p.FromKey) default: - util.PutBytes(packet) } } diff --git a/src/yggdrasil/session.go b/src/yggdrasil/session.go index 223ea33f..360f2a1b 100644 --- a/src/yggdrasil/session.go +++ b/src/yggdrasil/session.go @@ -448,12 +448,9 @@ func (sinfo *sessionInfo) _recvPacket(p *wire_trafficPacket) { select { case <-sinfo.init: default: - // TODO find a better way to drop things until initialized - util.PutBytes(p.Payload) return } if !sinfo._nonceIsOK(&p.Nonce) { - util.PutBytes(p.Payload) return } k := sinfo.sharedSesKey @@ -463,11 +460,9 @@ func (sinfo *sessionInfo) _recvPacket(p *wire_trafficPacket) { poolFunc := func() { bs, isOK = crypto.BoxOpen(&k, p.Payload, &p.Nonce) callback := func() { - util.PutBytes(p.Payload) if !isOK || k != sinfo.sharedSesKey || !sinfo._nonceIsOK(&p.Nonce) { // Either we failed to decrypt, or the session was updated, or we // received this packet in the mean time - util.PutBytes(bs) return } sinfo._updateNonce(&p.Nonce) @@ -485,8 +480,6 @@ func (sinfo *sessionInfo) _send(msg FlowKeyMessage) { select { case <-sinfo.init: default: - // TODO find a better way to drop things until initialized - util.PutBytes(msg.Message) return } sinfo.bytesSent += uint64(len(msg.Message)) @@ -505,14 +498,8 @@ func (sinfo *sessionInfo) _send(msg FlowKeyMessage) { ch := make(chan func(), 1) poolFunc := func() { p.Payload, _ = crypto.BoxSeal(&k, msg.Message, &p.Nonce) + packet := p.encode() callback := func() { - // Encoding may block on a util.GetBytes(), so kept out of the worker pool - packet := p.encode() - // Cleanup - util.PutBytes(msg.Message) - util.PutBytes(p.Payload) - // Send the packet - // TODO replace this with a send to the peer struct if that becomes an actor sinfo.sessions.router.Act(sinfo, func() { sinfo.sessions.router.out(packet) }) diff --git a/src/yggdrasil/stream.go b/src/yggdrasil/stream.go index 4ab37c29..be1398fc 100644 --- a/src/yggdrasil/stream.go +++ b/src/yggdrasil/stream.go @@ -6,8 +6,6 @@ import ( "fmt" "io" "net" - - "github.com/yggdrasil-network/yggdrasil-go/src/util" ) // Test that this matches the interface we expect @@ -46,6 +44,9 @@ func (s *stream) writeMsgs(bss [][]byte) (int, error) { } s.outputBuffer = buf[:0] // So we can reuse the same underlying array later _, err := buf.WriteTo(s.rwc) + for _, bs := range bss { + pool_putBytes(bs) + } // TODO only include number of bytes from bs *successfully* written? return written, err } @@ -112,7 +113,7 @@ func (s *stream) readMsgFromBuffer() ([]byte, error) { if msgLen > streamMsgSize { return nil, errors.New("oversized message") } - msg := util.ResizeBytes(util.GetBytes(), int(msgLen)) + msg := pool_getBytes(int(msgLen)) _, err = io.ReadFull(s.inputBuffer, msg) return msg, err } diff --git a/src/yggdrasil/wire.go b/src/yggdrasil/wire.go index 18a647d8..9746d46e 100644 --- a/src/yggdrasil/wire.go +++ b/src/yggdrasil/wire.go @@ -9,7 +9,6 @@ package yggdrasil import ( "github.com/yggdrasil-network/yggdrasil-go/src/crypto" - "github.com/yggdrasil-network/yggdrasil-go/src/util" ) const ( @@ -230,8 +229,9 @@ type wire_trafficPacket struct { } // Encodes a wire_trafficPacket into its wire format. +// The returned slice was taken from the pool. func (p *wire_trafficPacket) encode() []byte { - bs := util.GetBytes() + bs := pool_getBytes(0) bs = wire_put_uint64(wire_Traffic, bs) bs = wire_put_coords(p.Coords, bs) bs = append(bs, p.Handle[:]...) @@ -241,7 +241,9 @@ func (p *wire_trafficPacket) encode() []byte { } // Decodes an encoded wire_trafficPacket into the struct, returning true if successful. +// Either way, the argument slice is added to the pool. func (p *wire_trafficPacket) decode(bs []byte) bool { + defer pool_putBytes(bs) var pType uint64 switch { case !wire_chop_uint64(&pType, &bs): @@ -255,7 +257,7 @@ func (p *wire_trafficPacket) decode(bs []byte) bool { case !wire_chop_slice(p.Nonce[:], &bs): return false } - p.Payload = append(util.GetBytes(), bs...) + p.Payload = append(p.Payload, bs...) return true } From 72afa0502990b12e6a172090c3621cf35fc9f3de Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 2 May 2020 10:01:09 -0500 Subject: [PATCH 13/51] test dial/listen in the sim --- build | 2 +- cmd/yggdrasilsim/dial.go | 60 ++++++++++++++++++++++++++++++++++++++++ cmd/yggdrasilsim/main.go | 4 +-- src/yggdrasil/simlink.go | 3 +- 4 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 cmd/yggdrasilsim/dial.go diff --git a/build b/build index 6b93ca77..0d0da7e2 100755 --- a/build +++ b/build @@ -45,7 +45,7 @@ elif [ $ANDROID ]; then github.com/yggdrasil-network/yggdrasil-extras/src/mobile \ github.com/yggdrasil-network/yggdrasil-extras/src/dummy else - for CMD in yggdrasil yggdrasilctl yggdrasilsim; do + for CMD in yggdrasil yggdrasilctl; do echo "Building: $CMD" go build $ARGS -ldflags="$LDFLAGS" -gcflags="$GCFLAGS" ./cmd/$CMD diff --git a/cmd/yggdrasilsim/dial.go b/cmd/yggdrasilsim/dial.go new file mode 100644 index 00000000..5713fdd7 --- /dev/null +++ b/cmd/yggdrasilsim/dial.go @@ -0,0 +1,60 @@ +package main + +import ( + "fmt" + "sort" + "time" + + "github.com/yggdrasil-network/yggdrasil-go/src/crypto" +) + +func doListen(recvNode *simNode) { + for { + c, err := recvNode.listener.Accept() + if err != nil { + panic(err) + } + c.Close() + } +} + +func dialTest(sendNode, recvNode *simNode) { + if sendNode.id == recvNode.id { + fmt.Println("Skipping dial to self") + return + } + var mask crypto.NodeID + for idx := range mask { + mask[idx] = 0xff + } + for { + c, err := sendNode.dialer.DialByNodeIDandMask(nil, &recvNode.nodeID, &mask) + if c != nil { + c.Close() + return + } + if err != nil { + fmt.Println("Dial failed:", err) + } + time.Sleep(time.Second) + } +} + +func dialStore(store nodeStore) { + var nodeIdxs []int + for idx, n := range store { + nodeIdxs = append(nodeIdxs, idx) + go doListen(n) + } + sort.Slice(nodeIdxs, func(i, j int) bool { + return nodeIdxs[i] < nodeIdxs[j] + }) + for _, idx := range nodeIdxs { + sendNode := store[idx] + for _, jdx := range nodeIdxs { + recvNode := store[jdx] + fmt.Printf("Dialing from node %d to node %d / %d...\n", idx, jdx, len(store)) + dialTest(sendNode, recvNode) + } + } +} diff --git a/cmd/yggdrasilsim/main.go b/cmd/yggdrasilsim/main.go index 40fd9ce4..fcbcfc97 100644 --- a/cmd/yggdrasilsim/main.go +++ b/cmd/yggdrasilsim/main.go @@ -9,7 +9,5 @@ import ( func main() { store := makeStoreSquareGrid(4) - var block chan struct{} - <-block - panic(store) + dialStore(store) } diff --git a/src/yggdrasil/simlink.go b/src/yggdrasil/simlink.go index 33332645..e846f3ba 100644 --- a/src/yggdrasil/simlink.go +++ b/src/yggdrasil/simlink.go @@ -3,7 +3,6 @@ package yggdrasil import ( "errors" "github.com/Arceliar/phony" - "github.com/yggdrasil-network/yggdrasil-go/src/util" ) type Simlink struct { @@ -44,7 +43,7 @@ func (s *Simlink) writeMsgs(msgs [][]byte) (int, error) { var size int for _, msg := range msgs { size += len(msg) - bs := append(util.GetBytes(), msg...) + bs := append([]byte(nil), msg...) phony.Block(s, func() { s.dest.Act(s, func() { defer func() { recover() }() From 15162ee952c3789cd5aed7ae74e36f9a31a11866 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 2 May 2020 10:51:26 -0500 Subject: [PATCH 14/51] fix a panic from a doubly closed channel in the simlink --- src/yggdrasil/simlink.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/yggdrasil/simlink.go b/src/yggdrasil/simlink.go index e846f3ba..736ee633 100644 --- a/src/yggdrasil/simlink.go +++ b/src/yggdrasil/simlink.go @@ -14,12 +14,11 @@ type Simlink struct { } func (s *Simlink) readMsg() ([]byte, error) { - bs := <-s.rch - if bs != nil { - return bs, nil - } else { + bs, ok := <-s.rch + if !ok { return nil, errors.New("read from closed Simlink") } + return bs, nil } func (s *Simlink) _recvMetaBytes() ([]byte, error) { @@ -32,6 +31,7 @@ func (s *Simlink) _sendMetaBytes(bs []byte) error { } func (s *Simlink) close() error { + defer func() { recover() }() close(s.rch) return nil } From 402cfc0f005219580e3bb8da48e36c77ad65a377 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 2 May 2020 10:56:17 -0500 Subject: [PATCH 15/51] undo remaining trivial change to build --- build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build b/build index 0d0da7e2..66f94403 100755 --- a/build +++ b/build @@ -45,7 +45,7 @@ elif [ $ANDROID ]; then github.com/yggdrasil-network/yggdrasil-extras/src/mobile \ github.com/yggdrasil-network/yggdrasil-extras/src/dummy else - for CMD in yggdrasil yggdrasilctl; do + for CMD in yggdrasil yggdrasilctl ; do echo "Building: $CMD" go build $ARGS -ldflags="$LDFLAGS" -gcflags="$GCFLAGS" ./cmd/$CMD From 20ef5910136c1aa214dce826fee0ce5d85b76150 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 2 May 2020 11:16:11 -0500 Subject: [PATCH 16/51] fix some crashes with races during peer setup --- cmd/yggdrasilsim/dial.go | 1 + cmd/yggdrasilsim/main.go | 7 ------- src/yggdrasil/link.go | 30 +++++++++++++++--------------- src/yggdrasil/peer.go | 4 +++- src/yggdrasil/router.go | 10 +++++----- src/yggdrasil/simlink.go | 6 +++++- 6 files changed, 29 insertions(+), 29 deletions(-) diff --git a/cmd/yggdrasilsim/dial.go b/cmd/yggdrasilsim/dial.go index 5713fdd7..c7892d40 100644 --- a/cmd/yggdrasilsim/dial.go +++ b/cmd/yggdrasilsim/dial.go @@ -9,6 +9,7 @@ import ( ) func doListen(recvNode *simNode) { + // TODO be able to stop the listeners somehow so they don't leak across different tests for { c, err := recvNode.listener.Accept() if err != nil { diff --git a/cmd/yggdrasilsim/main.go b/cmd/yggdrasilsim/main.go index fcbcfc97..25504c92 100644 --- a/cmd/yggdrasilsim/main.go +++ b/cmd/yggdrasilsim/main.go @@ -1,12 +1,5 @@ package main -import ( -//"github.com/yggdrasil-network/yggdrasil-go/src/address" -//"github.com/yggdrasil-network/yggdrasil-go/src/config" -//"github.com/yggdrasil-network/yggdrasil-go/src/crypto" -//"github.com/yggdrasil-network/yggdrasil-go/src/yggdrasil" -) - func main() { store := makeStoreSquareGrid(4) dialStore(store) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 733b9ac1..7f6b9b56 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -217,9 +217,23 @@ func (intf *linkInterface) handler() error { intf.link.mutex.Unlock() // Create peer shared := crypto.GetSharedKey(myLinkPriv, &meta.link) + out := func(msgs [][]byte) { + // nil to prevent it from blocking if the link is somehow frozen + // this is safe because another packet won't be sent until the link notifies + // the peer that it's ready for one + intf.writer.sendFrom(nil, msgs, false) + } + linkOut := func(bs []byte) { + // nil to prevent it from blocking if the link is somehow frozen + // FIXME this is hypothetically not safe, the peer shouldn't be sending + // additional packets until this one finishes, otherwise this could leak + // memory if writing happens slower than link packets are generated... + // that seems unlikely, so it's a lesser evil than deadlocking for now + intf.writer.sendFrom(nil, [][]byte{bs}, true) + } phony.Block(&intf.link.core.peers, func() { // FIXME don't use phony.Block, it's bad practice, even if it's safe here - intf.peer = intf.link.core.peers._newPeer(&meta.box, &meta.sig, shared, intf, func() { intf.msgIO.close() }) + intf.peer = intf.link.core.peers._newPeer(&meta.box, &meta.sig, shared, intf, func() { intf.msgIO.close() }, out, linkOut) }) if intf.peer == nil { return errors.New("failed to create peer") @@ -228,20 +242,6 @@ func (intf *linkInterface) handler() error { // More cleanup can go here intf.peer.Act(nil, intf.peer._removeSelf) }() - intf.peer.out = func(msgs [][]byte) { - // nil to prevent it from blocking if the link is somehow frozen - // this is safe because another packet won't be sent until the link notifies - // the peer that it's ready for one - intf.writer.sendFrom(nil, msgs, false) - } - intf.peer.linkOut = func(bs []byte) { - // nil to prevent it from blocking if the link is somehow frozen - // FIXME this is hypothetically not safe, the peer shouldn't be sending - // additional packets until this one finishes, otherwise this could leak - // memory if writing happens slower than link packets are generated... - // that seems unlikely, so it's a lesser evil than deadlocking for now - intf.writer.sendFrom(nil, [][]byte{bs}, true) - } themAddr := address.AddrForNodeID(crypto.GetNodeID(&intf.info.box)) themAddrString := net.IP(themAddr[:]).String() themString := fmt.Sprintf("%s@%s", themAddrString, intf.info.remote) diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 7eef9a11..801691a0 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -123,7 +123,7 @@ func (ps *peers) _updatePeers() { } // Creates a new peer with the specified box, sig, and linkShared keys, using the lowest unoccupied port number. -func (ps *peers) _newPeer(box *crypto.BoxPubKey, sig *crypto.SigPubKey, linkShared *crypto.BoxSharedKey, intf *linkInterface, closer func()) *peer { +func (ps *peers) _newPeer(box *crypto.BoxPubKey, sig *crypto.SigPubKey, linkShared *crypto.BoxSharedKey, intf *linkInterface, closer func(), out func([][]byte), linkOut func([]byte)) *peer { now := time.Now() p := peer{box: *box, sig: *sig, @@ -134,6 +134,8 @@ func (ps *peers) _newPeer(box *crypto.BoxPubKey, sig *crypto.SigPubKey, linkShar close: closer, core: ps.core, intf: intf, + out: out, + linkOut: linkOut, } oldPorts := ps.ports newPorts := make(map[switchPort]*peer) diff --git a/src/yggdrasil/router.go b/src/yggdrasil/router.go index 71d92609..1bb14c4c 100644 --- a/src/yggdrasil/router.go +++ b/src/yggdrasil/router.go @@ -62,17 +62,17 @@ func (r *router) init(core *Core) { }, } var p *peer - phony.Block(&r.core.peers, func() { - // FIXME don't block here! - p = r.core.peers._newPeer(&r.core.boxPub, &r.core.sigPub, &crypto.BoxSharedKey{}, &self, nil) - }) - p.out = func(packets [][]byte) { + peerOut := func(packets [][]byte) { r.handlePackets(p, packets) r.Act(p, func() { // after the router handle the packets, notify the peer that it's ready for more p.Act(r, p._handleIdle) }) } + phony.Block(&r.core.peers, func() { + // FIXME don't block here! + p = r.core.peers._newPeer(&r.core.boxPub, &r.core.sigPub, &crypto.BoxSharedKey{}, &self, nil, peerOut, nil) + }) p.Act(r, p._handleIdle) r.out = func(bs []byte) { p.handlePacketFrom(r, bs) } r.nodeinfo.init(r.core) diff --git a/src/yggdrasil/simlink.go b/src/yggdrasil/simlink.go index 736ee633..f830c215 100644 --- a/src/yggdrasil/simlink.go +++ b/src/yggdrasil/simlink.go @@ -57,7 +57,11 @@ func (s *Simlink) writeMsgs(msgs [][]byte) (int, error) { func (c *Core) NewSimlink() *Simlink { s := &Simlink{rch: make(chan []byte, 1)} n := "Simlink" - s.link, _ = c.link.create(s, n, n, n, n, false, true) + var err error + s.link, err = c.link.create(s, n, n, n, n, false, true) + if err != nil { + panic(err) + } return s } From dc128121e57bd659ece83217b58a50d74a330ae0 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 16 May 2020 09:25:57 -0500 Subject: [PATCH 17/51] update switch blockPeer/unblockPeer logic and dht reset when coords change --- src/yggdrasil/dht.go | 13 +++++++------ src/yggdrasil/link.go | 1 + src/yggdrasil/peer.go | 27 +++++++++++++-------------- src/yggdrasil/switch.go | 20 +++++++++++++++++--- 4 files changed, 38 insertions(+), 23 deletions(-) diff --git a/src/yggdrasil/dht.go b/src/yggdrasil/dht.go index 56d03ed1..f40ac3c7 100644 --- a/src/yggdrasil/dht.go +++ b/src/yggdrasil/dht.go @@ -89,6 +89,11 @@ func (t *dht) reconfigure() { // Resets the DHT in response to coord changes. // This empties all info from the DHT and drops outstanding requests. func (t *dht) reset() { + for _, info := range t.table { + if t.isImportant(info) { + t.ping(info, nil) + } + } t.reqs = make(map[dhtReqKey]time.Time) t.table = make(map[crypto.NodeID]*dhtInfo) t.imp = nil @@ -144,12 +149,8 @@ func (t *dht) insert(info *dhtInfo) { // Insert a peer into the table if it hasn't been pinged lately, to keep peers from dropping func (t *dht) insertPeer(info *dhtInfo) { - oldInfo, isIn := t.table[*info.getNodeID()] - if !isIn || time.Since(oldInfo.recv) > dht_max_delay+30*time.Second { - // TODO? also check coords? - newInfo := *info // Insert a copy - t.insert(&newInfo) - } + t.insert(info) // FIXME this resets timers / ping counts / etc, so it seems kind of dangerous + t.ping(info, nil) // This is a quick fix to the above, ping them immediately... } // Return true if first/second/third are (partially) ordered correctly. diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 7f6b9b56..539d0488 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -376,6 +376,7 @@ func (intf *linkInterface) notifyRead(size int) { if size > 0 && intf.stallTimer == nil { intf.stallTimer = time.AfterFunc(keepAliveTime, intf.notifyDoKeepAlive) } + intf.link.core.switchTable.unblockPeer(intf, intf.peer.port) }) } diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 801691a0..31bba661 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -194,21 +194,20 @@ func (ps *peers) sendSwitchMsgs(from phony.Actor) { }) } -// This must be launched in a separate goroutine by whatever sets up the peer struct. -// It handles link protocol traffic. -func (p *peer) start() { - var updateDHT func() - updateDHT = func() { - phony.Block(p, func() { - select { - case <-p.done: - default: - p._updateDHT() - time.AfterFunc(time.Second, updateDHT) +func (ps *peers) updateDHT(from phony.Actor) { + ps.Act(from, func() { + for _, peer := range ps.ports { + p := peer + if p.port == 0 { + continue } - }) - } - updateDHT() + p.Act(ps, p._updateDHT) + } + }) +} + +// This must be launched in a separate goroutine by whatever sets up the peer struct. +func (p *peer) start() { // Just for good measure, immediately send a switch message to this peer when we start p.Act(nil, p._sendSwitchMsg) } diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index 4f9044cd..6ab9a02b 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -239,11 +239,12 @@ func (t *switchTable) _cleanRoot() { func (t *switchTable) blockPeer(from phony.Actor, port switchPort) { t.Act(from, func() { peer, isIn := t.data.peers[port] - if !isIn { + if !isIn || peer.blocked { return } peer.blocked = true t.data.peers[port] = peer + t._updateTable() if port != t.parent { return } @@ -258,6 +259,18 @@ func (t *switchTable) blockPeer(from phony.Actor, port switchPort) { }) } +func (t *switchTable) unblockPeer(from phony.Actor, port switchPort) { + t.Act(from, func() { + peer, isIn := t.data.peers[port] + if !isIn || !peer.blocked { + return + } + peer.blocked = false + t.data.peers[port] = peer + t._updateTable() + }) +} + // Removes a peer. // Must be called by the router actor with a lambda that calls this. // If the removed peer was this node's parent, it immediately tries to find a new parent. @@ -482,11 +495,12 @@ func (t *switchTable) _handleMsg(msg *switchMsg, fromPort switchPort, reprocessi // The timestamp was updated, so we need to update locally and send to our peers. updateRoot = true } + // Note that we depend on the LIFO order of the stack of defers here... if updateRoot { if !equiv(&sender.locator, &t.data.locator) { doUpdate = true t.data.seq++ - t.core.router.reset(nil) + defer t.core.router.reset(t) } if t.data.locator.tstamp != sender.locator.tstamp { t.time = now @@ -495,7 +509,7 @@ func (t *switchTable) _handleMsg(msg *switchMsg, fromPort switchPort, reprocessi t.parent = sender.port defer t.core.peers.sendSwitchMsgs(t) } - if true || doUpdate { + if doUpdate { defer t._updateTable() } return From 052de98f126f50341ca60e41064fe485f77858d6 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 16 May 2020 17:07:47 -0500 Subject: [PATCH 18/51] work-in-progress on buffering overhaul --- src/yggdrasil/api.go | 8 ++-- src/yggdrasil/link.go | 74 +++++++++++++++++++++------- src/yggdrasil/packetqueue.go | 82 +++++++++++++++---------------- src/yggdrasil/peer.go | 56 ++++++++++++++-------- src/yggdrasil/router.go | 93 +++++++++++++++++++++++++----------- 5 files changed, 202 insertions(+), 111 deletions(-) diff --git a/src/yggdrasil/api.go b/src/yggdrasil/api.go index 31ece6b8..66ee9b81 100644 --- a/src/yggdrasil/api.go +++ b/src/yggdrasil/api.go @@ -123,10 +123,10 @@ func (c *Core) GetPeers() []Peer { var info Peer phony.Block(p, func() { info = Peer{ - Endpoint: p.intf.name, + Endpoint: p.intf.name(), BytesSent: p.bytesSent, BytesRecvd: p.bytesRecvd, - Protocol: p.intf.info.linkType, + Protocol: p.intf.interfaceType(), Port: uint64(port), Uptime: time.Since(p.firstSeen), } @@ -163,8 +163,8 @@ func (c *Core) GetSwitchPeers() []SwitchPeer { BytesSent: peer.bytesSent, BytesRecvd: peer.bytesRecvd, Port: uint64(elem.port), - Protocol: peer.intf.info.linkType, - Endpoint: peer.intf.info.remote, + Protocol: peer.intf.interfaceType(), + Endpoint: peer.intf.remote(), } copy(info.PublicKey[:], peer.box[:]) }) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 539d0488..3b3cfdb6 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -47,7 +47,7 @@ type linkInterfaceMsgIO interface { } type linkInterface struct { - name string + lname string link *link peer *peer msgIO linkInterfaceMsgIO @@ -125,7 +125,7 @@ func (l *link) listen(uri string) error { func (l *link) create(msgIO linkInterfaceMsgIO, name, linkType, local, remote string, incoming, force bool) (*linkInterface, error) { // Technically anything unique would work for names, but let's pick something human readable, just for debugging intf := linkInterface{ - name: name, + lname: name, link: l, msgIO: msgIO, info: linkInfo{ @@ -178,7 +178,7 @@ func (intf *linkInterface) handler() error { } base := version_getBaseMetadata() if meta.ver > base.ver || meta.ver == base.ver && meta.minorVer > base.minorVer { - intf.link.core.log.Errorln("Failed to connect to node: " + intf.name + " version: " + fmt.Sprintf("%d.%d", meta.ver, meta.minorVer)) + intf.link.core.log.Errorln("Failed to connect to node: " + intf.lname + " version: " + fmt.Sprintf("%d.%d", meta.ver, meta.minorVer)) return errors.New("failed to connect: wrong version") } // Check if we're authorized to connect to this key / IP @@ -217,23 +217,9 @@ func (intf *linkInterface) handler() error { intf.link.mutex.Unlock() // Create peer shared := crypto.GetSharedKey(myLinkPriv, &meta.link) - out := func(msgs [][]byte) { - // nil to prevent it from blocking if the link is somehow frozen - // this is safe because another packet won't be sent until the link notifies - // the peer that it's ready for one - intf.writer.sendFrom(nil, msgs, false) - } - linkOut := func(bs []byte) { - // nil to prevent it from blocking if the link is somehow frozen - // FIXME this is hypothetically not safe, the peer shouldn't be sending - // additional packets until this one finishes, otherwise this could leak - // memory if writing happens slower than link packets are generated... - // that seems unlikely, so it's a lesser evil than deadlocking for now - intf.writer.sendFrom(nil, [][]byte{bs}, true) - } phony.Block(&intf.link.core.peers, func() { // FIXME don't use phony.Block, it's bad practice, even if it's safe here - intf.peer = intf.link.core.peers._newPeer(&meta.box, &meta.sig, shared, intf, func() { intf.msgIO.close() }, out, linkOut) + intf.peer = intf.link.core.peers._newPeer(&meta.box, &meta.sig, shared, intf) }) if intf.peer == nil { return errors.New("failed to create peer") @@ -275,6 +261,58 @@ func (intf *linkInterface) handler() error { //////////////////////////////////////////////////////////////////////////////// +// linkInterface needs to match the peerInterface type needed by the peers + +func (intf *linkInterface) out(bss [][]byte) { + intf.Act(nil, func() { + // nil to prevent it from blocking if the link is somehow frozen + // this is safe because another packet won't be sent until the link notifies + // the peer that it's ready for one + intf.writer.sendFrom(nil, bss, false) + }) +} + +func (intf *linkInterface) linkOut(bs []byte) { + intf.Act(nil, func() { + // nil to prevent it from blocking if the link is somehow frozen + // FIXME this is hypothetically not safe, the peer shouldn't be sending + // additional packets until this one finishes, otherwise this could leak + // memory if writing happens slower than link packets are generated... + // that seems unlikely, so it's a lesser evil than deadlocking for now + intf.writer.sendFrom(nil, [][]byte{bs}, true) + }) +} + +func (intf *linkInterface) notifyQueued(seq uint64) { + // This is the part where we want non-nil 'from' fields + intf.Act(intf.peer, func() { + if !intf.isIdle { + intf.peer.dropFromQueue(intf, seq) + } + }) +} + +func (intf *linkInterface) close() { + intf.Act(nil, func() { intf.msgIO.close() }) +} + +func (intf *linkInterface) name() string { + return intf.lname +} + +func (intf *linkInterface) local() string { + return intf.info.local +} + +func (intf *linkInterface) remote() string { + return intf.info.remote +} + +func (intf *linkInterface) interfaceType() string { + return intf.info.linkType +} + +//////////////////////////////////////////////////////////////////////////////// const ( sendTime = 1 * time.Second // How long to wait before deciding a send is blocked keepAliveTime = 2 * time.Second // How long to wait before sending a keep-alive response if we have no real traffic to send diff --git a/src/yggdrasil/packetqueue.go b/src/yggdrasil/packetqueue.go index 2000ffa6..7abdaea7 100644 --- a/src/yggdrasil/packetqueue.go +++ b/src/yggdrasil/packetqueue.go @@ -4,9 +4,6 @@ import ( "time" ) -// TODO take max size from config -const MAX_PACKET_QUEUE_SIZE = 4 * 1048576 // 4 MB - type pqStreamID string type pqPacketInfo struct { @@ -25,46 +22,50 @@ type packetQueue struct { size uint64 } -func (q *packetQueue) cleanup() { - for q.size > MAX_PACKET_QUEUE_SIZE { - // TODO? drop from a random stream - // odds proportional to size? bandwidth? - // always using the worst is exploitable -> flood 1 packet per random stream - // find the stream that's using the most bandwidth - now := time.Now() - var worst pqStreamID - for id := range q.streams { +// drop will remove a packet from the queue, returning it to the pool +// returns true if a packet was removed, false otherwise +func (q *packetQueue) drop() bool { + if q.size == 0 { + return false + } + // TODO? drop from a random stream + // odds proportional to size? bandwidth? + // always using the worst is exploitable -> flood 1 packet per random stream + // find the stream that's using the most bandwidth + now := time.Now() + var worst pqStreamID + for id := range q.streams { + worst = id + break // get a random ID to start + } + worstStream := q.streams[worst] + worstSize := float64(worstStream.size) + worstAge := now.Sub(worstStream.infos[0].time).Seconds() + for id, stream := range q.streams { + thisSize := float64(stream.size) + thisAge := now.Sub(stream.infos[0].time).Seconds() + // cross multiply to avoid division by zero issues + if worstSize*thisAge < thisSize*worstAge { + // worstSize/worstAge < thisSize/thisAge -> this uses more bandwidth worst = id - break // get a random ID to start - } - worstStream := q.streams[worst] - worstSize := float64(worstStream.size) - worstAge := now.Sub(worstStream.infos[0].time).Seconds() - for id, stream := range q.streams { - thisSize := float64(stream.size) - thisAge := now.Sub(stream.infos[0].time).Seconds() - // cross multiply to avoid division by zero issues - if worstSize*thisAge < thisSize*worstAge { - // worstSize/worstAge < thisSize/thisAge -> this uses more bandwidth - worst = id - worstStream = stream - worstSize = thisSize - worstAge = thisAge - } - } - // Drop the oldest packet from the worst stream - packet := worstStream.infos[0].packet - worstStream.infos = worstStream.infos[1:] - worstStream.size -= uint64(len(packet)) - q.size -= uint64(len(packet)) - pool_putBytes(packet) - // save the modified stream to queues - if len(worstStream.infos) > 0 { - q.streams[worst] = worstStream - } else { - delete(q.streams, worst) + worstStream = stream + worstSize = thisSize + worstAge = thisAge } } + // Drop the oldest packet from the worst stream + packet := worstStream.infos[0].packet + worstStream.infos = worstStream.infos[1:] + worstStream.size -= uint64(len(packet)) + q.size -= uint64(len(packet)) + pool_putBytes(packet) + // save the modified stream to queues + if len(worstStream.infos) > 0 { + q.streams[worst] = worstStream + } else { + delete(q.streams, worst) + } + return true } func (q *packetQueue) push(packet []byte) { @@ -80,7 +81,6 @@ func (q *packetQueue) push(packet []byte) { // save update to queues q.streams[id] = stream q.size += uint64(len(packet)) - q.cleanup() } func (q *packetQueue) pop() ([]byte, bool) { diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 31bba661..31ea5f46 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -77,29 +77,38 @@ func (ps *peers) getAllowedEncryptionPublicKeys() []string { return ps.core.config.Current.AllowedEncryptionPublicKeys } +type peerInterface interface { + out([][]byte) + linkOut([]byte) + notifyQueued(uint64) + close() + // These next ones are only used by the API + name() string + local() string + remote() string + interfaceType() string +} + // Information known about a peer, including their box/sig keys, precomputed shared keys (static and ephemeral) and a handler for their outgoing traffic type peer struct { phony.Inbox core *Core - intf *linkInterface + intf peerInterface port switchPort box crypto.BoxPubKey sig crypto.SigPubKey shared crypto.BoxSharedKey linkShared crypto.BoxSharedKey endpoint string - firstSeen time.Time // To track uptime for getPeers - linkOut func([]byte) // used for protocol traffic (bypasses the switch) - dinfo *dhtInfo // used to keep the DHT working - out func([][]byte) // Set up by whatever created the peers struct, used to send packets to other nodes - done (chan struct{}) // closed to exit the linkLoop - close func() // Called when a peer is removed, to close the underlying connection, or via admin api + firstSeen time.Time // To track uptime for getPeers + dinfo *dhtInfo // used to keep the DHT working // The below aren't actually useful internally, they're just gathered for getPeers statistics bytesSent uint64 bytesRecvd uint64 ports map[switchPort]*peer table *lookupTable queue packetQueue + seq uint64 // this and idle are used to detect when to drop packets from queue idle bool } @@ -123,19 +132,15 @@ func (ps *peers) _updatePeers() { } // Creates a new peer with the specified box, sig, and linkShared keys, using the lowest unoccupied port number. -func (ps *peers) _newPeer(box *crypto.BoxPubKey, sig *crypto.SigPubKey, linkShared *crypto.BoxSharedKey, intf *linkInterface, closer func(), out func([][]byte), linkOut func([]byte)) *peer { +func (ps *peers) _newPeer(box *crypto.BoxPubKey, sig *crypto.SigPubKey, linkShared *crypto.BoxSharedKey, intf peerInterface) *peer { now := time.Now() p := peer{box: *box, + core: ps.core, + intf: intf, sig: *sig, shared: *crypto.GetSharedKey(&ps.core.boxPriv, box), linkShared: *linkShared, firstSeen: now, - done: make(chan struct{}), - close: closer, - core: ps.core, - intf: intf, - out: out, - linkOut: linkOut, } oldPorts := ps.ports newPorts := make(map[switchPort]*peer) @@ -172,10 +177,7 @@ func (ps *peers) _removePeer(p *peer) { newPorts[k] = v } delete(newPorts, p.port) - if p.close != nil { - p.close() - } - close(p.done) + p.intf.close() ps.ports = newPorts ps._updatePeers() } @@ -295,12 +297,26 @@ func (p *peer) _handleIdle() { } if len(packets) > 0 { p.bytesSent += uint64(size) - p.out(packets) + p.intf.out(packets) } else { p.idle = true } } +func (p *peer) dropFromQueue(from phony.Actor, seq uint64) { + p.Act(from, func() { + switch { + case seq != p.seq: + case p.queue.drop(): + p.intf.notifyQueued(p.seq) + } + if seq != p.seq { + return + } + + }) +} + // This wraps the packet in the inner (ephemeral) and outer (permanent) crypto layers. // It sends it to p.linkOut, which bypasses the usual packet queues. func (p *peer) _sendLinkPacket(packet []byte) { @@ -316,7 +332,7 @@ func (p *peer) _sendLinkPacket(packet []byte) { Payload: bs, } packet = linkPacket.encode() - p.linkOut(packet) + p.intf.linkOut(packet) } // Decrypts the outer (permanent) and inner (ephemeral) crypto layers on link traffic. diff --git a/src/yggdrasil/router.go b/src/yggdrasil/router.go index 1bb14c4c..303ada69 100644 --- a/src/yggdrasil/router.go +++ b/src/yggdrasil/router.go @@ -45,6 +45,8 @@ type router struct { nodeinfo nodeinfo searches searches sessions sessions + intf routerInterface + peer *peer table *lookupTable // has a copy of our locator } @@ -53,28 +55,17 @@ func (r *router) init(core *Core) { r.core = core r.addr = *address.AddrForNodeID(&r.dht.nodeID) r.subnet = *address.SubnetForNodeID(&r.dht.nodeID) - self := linkInterface{ - name: "(self)", - info: linkInfo{ - local: "(self)", - remote: "(self)", - linkType: "self", - }, - } - var p *peer - peerOut := func(packets [][]byte) { - r.handlePackets(p, packets) - r.Act(p, func() { - // after the router handle the packets, notify the peer that it's ready for more - p.Act(r, p._handleIdle) - }) - } + r.intf.router = r phony.Block(&r.core.peers, func() { // FIXME don't block here! - p = r.core.peers._newPeer(&r.core.boxPub, &r.core.sigPub, &crypto.BoxSharedKey{}, &self, nil, peerOut, nil) + r.peer = r.core.peers._newPeer(&r.core.boxPub, &r.core.sigPub, &crypto.BoxSharedKey{}, &r.intf) }) - p.Act(r, p._handleIdle) - r.out = func(bs []byte) { p.handlePacketFrom(r, bs) } + r.peer.Act(r, r.peer._handleIdle) + r.out = func(bs []byte) { + r.intf.Act(r, func() { + r.peer.handlePacketFrom(&r.intf, bs) + }) + } r.nodeinfo.init(r.core) r.core.config.Mutex.RLock() r.nodeinfo.setNodeInfo(r.core.config.Current.NodeInfo, r.core.config.Current.NodeInfoPrivacy) @@ -123,15 +114,6 @@ func (r *router) start() error { return nil } -// In practice, the switch will call this with 1 packet -func (r *router) handlePackets(from phony.Actor, packets [][]byte) { - r.Act(from, func() { - for _, packet := range packets { - r._handlePacket(packet) - } - }) -} - // Insert a peer info into the dht, TODO? make the dht a separate actor func (r *router) insertPeer(from phony.Actor, info *dhtInfo) { r.Act(from, func() { @@ -275,3 +257,58 @@ func (r *router) _handleNodeInfo(bs []byte, fromKey *crypto.BoxPubKey) { req.SendPermPub = *fromKey r.nodeinfo.handleNodeInfo(r, &req) } + +//////////////////////////////////////////////////////////////////////////////// + +// routerInterface is a helper that implements peerInterface +type routerInterface struct { + phony.Inbox + router *router + busy bool +} + +func (intf *routerInterface) out(bss [][]byte) { + intf.Act(intf.router.peer, func() { + intf.router.Act(intf, func() { + for _, bs := range bss { + intf.router._handlePacket(bs) + } + // we may block due to the above + // so we send a message to ourself, that we'd handle after unblocking + // that message tells us to tell the interface that we're finally idle again + intf.router.Act(nil, func() { + intf.Act(intf.router, intf._handleIdle) + }) + intf.Act(intf.router, intf._handleBusy) + }) + }) +} + +func (intf *routerInterface) _handleBusy() { + intf.busy = true +} + +func (intf *routerInterface) _handleIdle() { + intf.busy = false + intf.router.peer.Act(intf, intf.router.peer._handleIdle) +} + +func (intf *routerInterface) linkOut(_ []byte) {} + +func (intf *routerInterface) notifyQueued(seq uint64) { + intf.Act(intf.router.peer, func() { + if intf.busy { + intf.router.peer.dropFromQueue(intf, seq) + } + }) +} + +func (intf *routerInterface) close() {} + +func (intf *routerInterface) name() string { return "(self)" } + +func (intf *routerInterface) local() string { return "(self)" } + +func (intf *routerInterface) remote() string { return "(self)" } + +func (intf *routerInterface) interfaceType() string { return "self" } From b132560f651ad5f04b977517803a1d6016b1e9a5 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 16 May 2020 17:24:26 -0500 Subject: [PATCH 19/51] it helps to actually run the notifyQueued stuff... --- src/yggdrasil/peer.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 31ea5f46..ada29214 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -281,6 +281,8 @@ func (p *peer) _sendPackets(packets [][]byte) { if p.idle { p.idle = false p._handleIdle() + } else { + p.intf.notifyQueued(p.seq) } } @@ -296,6 +298,7 @@ func (p *peer) _handleIdle() { } } if len(packets) > 0 { + p.seq++ p.bytesSent += uint64(size) p.intf.out(packets) } else { From b17a035a05214c8066eaea613f86f176cfd7d33c Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 16 May 2020 17:40:11 -0500 Subject: [PATCH 20/51] workarounds to dropping being too aggressive --- src/yggdrasil/packetqueue.go | 5 +++++ src/yggdrasil/peer.go | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/yggdrasil/packetqueue.go b/src/yggdrasil/packetqueue.go index 7abdaea7..e37d5bb3 100644 --- a/src/yggdrasil/packetqueue.go +++ b/src/yggdrasil/packetqueue.go @@ -55,6 +55,11 @@ func (q *packetQueue) drop() bool { } // Drop the oldest packet from the worst stream packet := worstStream.infos[0].packet + if q.size-uint64(len(packet)) < streamMsgSize { + // TODO something better + // We don't want to drop *all* packets, so lets save 1 batch worth... + return false + } worstStream.infos = worstStream.infos[1:] worstStream.size -= uint64(len(packet)) q.size -= uint64(len(packet)) diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index ada29214..0c195c6d 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -289,7 +289,7 @@ func (p *peer) _sendPackets(packets [][]byte) { func (p *peer) _handleIdle() { var packets [][]byte var size uint64 - for size < 65535 { + for size < streamMsgSize { if packet, success := p.queue.pop(); success { packets = append(packets, packet) size += uint64(len(packet)) From 62b9fab5f822f018940062ca8914852f41a8ce0b Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 16 May 2020 18:56:04 -0500 Subject: [PATCH 21/51] more work-in-progress, debugging why things are dropping so often --- src/yggdrasil/link.go | 31 +++++++++++++------------------ src/yggdrasil/packetqueue.go | 2 +- src/yggdrasil/peer.go | 5 +---- 3 files changed, 15 insertions(+), 23 deletions(-) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 3b3cfdb6..80989507 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -63,8 +63,7 @@ type linkInterface struct { stallTimer *time.Timer // Fires to signal that no incoming traffic (including keep-alive) has been seen closeTimer *time.Timer // Fires when the link has been idle so long we need to close it isIdle bool // True if the peer actor knows the link is idle - stalled bool // True if we haven't been receiving any response traffic - unstalled bool // False if an idle notification to the switch hasn't been sent because we stalled (or are first starting up) + blocked bool // True if we've blocked the peer in the switch } func (l *link) init(c *Core) error { @@ -235,6 +234,7 @@ func (intf *linkInterface) handler() error { strings.ToUpper(intf.info.linkType), themString, intf.info.local) // Start things go intf.peer.start() + intf.Act(nil, intf._notifyIdle) intf.reader.Act(nil, intf.reader._read) // Wait for the reader to finish // TODO find a way to do this without keeping live goroutines around @@ -344,8 +344,9 @@ func (intf *linkInterface) _cancelStallTimer() { // through other links, if alternatives exist func (intf *linkInterface) notifyBlockedSend() { intf.Act(nil, func() { - if intf.sendTimer != nil { + if intf.sendTimer != nil && !intf.blocked { //As far as we know, we're still trying to send, and the timer fired. + intf.blocked = true intf.link.core.switchTable.blockPeer(intf, intf.peer.port) } }) @@ -365,25 +366,21 @@ func (intf *linkInterface) notifySent(size int, isLinkTraffic bool) { }) } -// Notify the switch that we're ready for more traffic, assuming we're not in a stalled state +// Notify the peer that we're ready for more traffic func (intf *linkInterface) _notifyIdle() { if !intf.isIdle { - if intf.stalled { - intf.unstalled = false - } else { - intf.isIdle = true - intf.peer.Act(intf, intf.peer._handleIdle) - } + intf.isIdle = true + intf.peer.Act(intf, intf.peer._handleIdle) } } // Set the peer as stalled, to prevent them from returning to the switch until a read succeeds func (intf *linkInterface) notifyStalled() { intf.Act(nil, func() { // Sent from a time.AfterFunc - if intf.stallTimer != nil { + if intf.stallTimer != nil && !intf.blocked { intf.stallTimer.Stop() intf.stallTimer = nil - intf.stalled = true + intf.blocked = true intf.link.core.switchTable.blockPeer(intf, intf.peer.port) } }) @@ -406,15 +403,13 @@ func (intf *linkInterface) notifyRead(size int) { intf.stallTimer.Stop() intf.stallTimer = nil } - intf.stalled = false - if !intf.unstalled { - intf._notifyIdle() - intf.unstalled = true - } if size > 0 && intf.stallTimer == nil { intf.stallTimer = time.AfterFunc(keepAliveTime, intf.notifyDoKeepAlive) } - intf.link.core.switchTable.unblockPeer(intf, intf.peer.port) + if intf.blocked { + intf.blocked = false + intf.link.core.switchTable.unblockPeer(intf, intf.peer.port) + } }) } diff --git a/src/yggdrasil/packetqueue.go b/src/yggdrasil/packetqueue.go index e37d5bb3..caabe671 100644 --- a/src/yggdrasil/packetqueue.go +++ b/src/yggdrasil/packetqueue.go @@ -55,7 +55,7 @@ func (q *packetQueue) drop() bool { } // Drop the oldest packet from the worst stream packet := worstStream.infos[0].packet - if q.size-uint64(len(packet)) < streamMsgSize { + if false && q.size-uint64(len(packet)) < streamMsgSize { // TODO something better // We don't want to drop *all* packets, so lets save 1 batch worth... return false diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 0c195c6d..f88eb8bf 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -311,12 +311,9 @@ func (p *peer) dropFromQueue(from phony.Actor, seq uint64) { switch { case seq != p.seq: case p.queue.drop(): + p.core.log.Debugln("DEBUG dropped:", p.port, p.queue.size) p.intf.notifyQueued(p.seq) } - if seq != p.seq { - return - } - }) } From 527d44391666305d818d0b83fd9c38a85d74406c Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 17 May 2020 07:21:09 -0500 Subject: [PATCH 22/51] move where the queue size check before dropping would occur --- src/yggdrasil/packetqueue.go | 5 ----- src/yggdrasil/peer.go | 1 + 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/yggdrasil/packetqueue.go b/src/yggdrasil/packetqueue.go index caabe671..7abdaea7 100644 --- a/src/yggdrasil/packetqueue.go +++ b/src/yggdrasil/packetqueue.go @@ -55,11 +55,6 @@ func (q *packetQueue) drop() bool { } // Drop the oldest packet from the worst stream packet := worstStream.infos[0].packet - if false && q.size-uint64(len(packet)) < streamMsgSize { - // TODO something better - // We don't want to drop *all* packets, so lets save 1 batch worth... - return false - } worstStream.infos = worstStream.infos[1:] worstStream.size -= uint64(len(packet)) q.size -= uint64(len(packet)) diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index f88eb8bf..d3f7047d 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -310,6 +310,7 @@ func (p *peer) dropFromQueue(from phony.Actor, seq uint64) { p.Act(from, func() { switch { case seq != p.seq: + //case p.queue.size < streamMsgSize: case p.queue.drop(): p.core.log.Debugln("DEBUG dropped:", p.port, p.queue.size) p.intf.notifyQueued(p.seq) From 15ac2595aa96a0c1a29fe6a1927a4366dd03f3f4 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 17 May 2020 08:22:02 -0500 Subject: [PATCH 23/51] use a dedicated per-stream writer goroutine, send messages to it over a 1-buffered channel, this eliminates most of the false positive blocking that causes drops --- src/yggdrasil/link.go | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 80989507..d439c309 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -256,6 +256,11 @@ func (intf *linkInterface) handler() error { intf.link.core.log.Infof("Disconnected %s: %s, source %s", strings.ToUpper(intf.info.linkType), themString, intf.info.local) } + intf.writer.Act(nil, func() { + if intf.writer.worker != nil { + close(intf.writer.worker) + } + }) return err } @@ -428,7 +433,8 @@ func (intf *linkInterface) notifyDoKeepAlive() { type linkWriter struct { phony.Inbox - intf *linkInterface + intf *linkInterface + worker chan [][]byte } func (w *linkWriter) sendFrom(from phony.Actor, bss [][]byte, isLinkTraffic bool) { @@ -437,8 +443,19 @@ func (w *linkWriter) sendFrom(from phony.Actor, bss [][]byte, isLinkTraffic bool for _, bs := range bss { size += len(bs) } + if w.worker == nil { + w.worker = make(chan [][]byte, 1) + go func() { + for bss := range w.worker { + w.intf.msgIO.writeMsgs(bss) + } + }() + } w.intf.notifySending(size, isLinkTraffic) - w.intf.msgIO.writeMsgs(bss) + func() { + defer func() { recover() }() + w.worker <- bss + }() w.intf.notifySent(size, isLinkTraffic) }) } From 0dcc555eabd40cb8cce10ef5fdcf2589aec18ca5 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 17 May 2020 08:34:22 -0500 Subject: [PATCH 24/51] cleaner startup/shutdown of the link writer's worker --- src/yggdrasil/link.go | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index d439c309..5676ebe5 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -136,6 +136,7 @@ func (l *link) create(msgIO linkInterfaceMsgIO, name, linkType, local, remote st force: force, } intf.writer.intf = &intf + intf.writer.worker = make(chan [][]byte, 1) intf.reader.intf = &intf intf.reader.err = make(chan error) return &intf, nil @@ -151,6 +152,15 @@ func (l *link) stop() error { func (intf *linkInterface) handler() error { // TODO split some of this into shorter functions, so it's easier to read, and for the FIXME duplicate peer issue mentioned later + go func() { + for bss := range intf.writer.worker { + intf.msgIO.writeMsgs(bss) + } + }() + defer intf.writer.Act(nil, func() { + intf.writer.closed = true + close(intf.writer.worker) + }) myLinkPub, myLinkPriv := crypto.NewBoxKeys() meta := version_getBaseMetadata() meta.box = intf.link.core.boxPub @@ -256,11 +266,6 @@ func (intf *linkInterface) handler() error { intf.link.core.log.Infof("Disconnected %s: %s, source %s", strings.ToUpper(intf.info.linkType), themString, intf.info.local) } - intf.writer.Act(nil, func() { - if intf.writer.worker != nil { - close(intf.writer.worker) - } - }) return err } @@ -435,27 +440,20 @@ type linkWriter struct { phony.Inbox intf *linkInterface worker chan [][]byte + closed bool } func (w *linkWriter) sendFrom(from phony.Actor, bss [][]byte, isLinkTraffic bool) { w.Act(from, func() { + if w.closed { + return + } var size int for _, bs := range bss { size += len(bs) } - if w.worker == nil { - w.worker = make(chan [][]byte, 1) - go func() { - for bss := range w.worker { - w.intf.msgIO.writeMsgs(bss) - } - }() - } w.intf.notifySending(size, isLinkTraffic) - func() { - defer func() { recover() }() - w.worker <- bss - }() + w.worker <- bss w.intf.notifySent(size, isLinkTraffic) }) } From 6e92af1cd26da323db8d83bf52ca1d850ab6df8a Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 17 May 2020 08:49:40 -0500 Subject: [PATCH 25/51] re-enable a minimum queue size of ~1 big packet --- src/yggdrasil/peer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index d3f7047d..361a0ea1 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -310,7 +310,7 @@ func (p *peer) dropFromQueue(from phony.Actor, seq uint64) { p.Act(from, func() { switch { case seq != p.seq: - //case p.queue.size < streamMsgSize: + case p.queue.size < streamMsgSize: case p.queue.drop(): p.core.log.Debugln("DEBUG dropped:", p.port, p.queue.size) p.intf.notifyQueued(p.seq) From 7720e169f26d4609de81ee5dcd83fe96df4cfe64 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 17 May 2020 12:09:40 -0500 Subject: [PATCH 26/51] when we detect we're blocked, only drop packets often enough to make sure the existing queue's size is non-increasing, and always drop the worst packet from a random flow with odds based on the total size of packets queued for that flow --- src/yggdrasil/packetqueue.go | 32 ++++++++++---------------------- src/yggdrasil/peer.go | 20 ++++++++++++-------- 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/src/yggdrasil/packetqueue.go b/src/yggdrasil/packetqueue.go index 7abdaea7..464bc6ce 100644 --- a/src/yggdrasil/packetqueue.go +++ b/src/yggdrasil/packetqueue.go @@ -1,6 +1,7 @@ package yggdrasil import ( + "math/rand" "time" ) @@ -28,32 +29,19 @@ func (q *packetQueue) drop() bool { if q.size == 0 { return false } - // TODO? drop from a random stream - // odds proportional to size? bandwidth? - // always using the worst is exploitable -> flood 1 packet per random stream - // find the stream that's using the most bandwidth - now := time.Now() + // select a random stream, odds based on stream size + offset := rand.Uint64() % q.size var worst pqStreamID - for id := range q.streams { - worst = id - break // get a random ID to start - } - worstStream := q.streams[worst] - worstSize := float64(worstStream.size) - worstAge := now.Sub(worstStream.infos[0].time).Seconds() + var size uint64 for id, stream := range q.streams { - thisSize := float64(stream.size) - thisAge := now.Sub(stream.infos[0].time).Seconds() - // cross multiply to avoid division by zero issues - if worstSize*thisAge < thisSize*worstAge { - // worstSize/worstAge < thisSize/thisAge -> this uses more bandwidth - worst = id - worstStream = stream - worstSize = thisSize - worstAge = thisAge + worst = id + size += stream.size + if size >= offset { + break } } - // Drop the oldest packet from the worst stream + // drop the oldest packet from the stream + worstStream := q.streams[worst] packet := worstStream.infos[0].packet worstStream.infos = worstStream.infos[1:] worstStream.size -= uint64(len(packet)) diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 361a0ea1..31103074 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -110,6 +110,7 @@ type peer struct { queue packetQueue seq uint64 // this and idle are used to detect when to drop packets from queue idle bool + drop bool // set to true if we're dropping packets from the queue } func (ps *peers) updateTables(from phony.Actor, table *lookupTable) { @@ -275,13 +276,19 @@ func (p *peer) sendPacketsFrom(from phony.Actor, packets [][]byte) { } func (p *peer) _sendPackets(packets [][]byte) { + size := p.queue.size for _, packet := range packets { p.queue.push(packet) } - if p.idle { + switch { + case p.idle: p.idle = false p._handleIdle() - } else { + case p.drop: + for p.queue.size > size { + p.queue.drop() + } + default: p.intf.notifyQueued(p.seq) } } @@ -303,17 +310,14 @@ func (p *peer) _handleIdle() { p.intf.out(packets) } else { p.idle = true + p.drop = false } } func (p *peer) dropFromQueue(from phony.Actor, seq uint64) { p.Act(from, func() { - switch { - case seq != p.seq: - case p.queue.size < streamMsgSize: - case p.queue.drop(): - p.core.log.Debugln("DEBUG dropped:", p.port, p.queue.size) - p.intf.notifyQueued(p.seq) + if seq == p.seq { + p.drop = true } }) } From d96ae156a11f03d491d987954662bf83b1cb2482 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 17 May 2020 12:27:43 -0500 Subject: [PATCH 27/51] slight change to peer function names/args --- src/yggdrasil/peer.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 31103074..d8d14cfc 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -264,22 +264,20 @@ func (p *peer) _handleTraffic(packet []byte) { coords := peer_getPacketCoords(packet) next := p.table.lookup(coords) if nPeer, isIn := p.ports[next]; isIn { - nPeer.sendPacketsFrom(p, [][]byte{packet}) + nPeer.sendPacketFrom(p, packet) } //p.core.switchTable.packetInFrom(p, packet) } -func (p *peer) sendPacketsFrom(from phony.Actor, packets [][]byte) { +func (p *peer) sendPacketFrom(from phony.Actor, packet []byte) { p.Act(from, func() { - p._sendPackets(packets) + p._sendPacket(packet) }) } -func (p *peer) _sendPackets(packets [][]byte) { +func (p *peer) _sendPacket(packet []byte) { size := p.queue.size - for _, packet := range packets { - p.queue.push(packet) - } + p.queue.push(packet) switch { case p.idle: p.idle = false From ff3c8cb687561b2485f7d6ad8e5723a8127aaa7a Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 17 May 2020 12:58:57 -0500 Subject: [PATCH 28/51] less aggresive queue size reduction --- src/yggdrasil/peer.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index d8d14cfc..02e92f9a 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -108,6 +108,7 @@ type peer struct { ports map[switchPort]*peer table *lookupTable queue packetQueue + max uint64 seq uint64 // this and idle are used to detect when to drop packets from queue idle bool drop bool // set to true if we're dropping packets from the queue @@ -276,14 +277,13 @@ func (p *peer) sendPacketFrom(from phony.Actor, packet []byte) { } func (p *peer) _sendPacket(packet []byte) { - size := p.queue.size p.queue.push(packet) switch { case p.idle: p.idle = false p._handleIdle() case p.drop: - for p.queue.size > size { + for p.queue.size > p.max { p.queue.drop() } default: @@ -306,6 +306,9 @@ func (p *peer) _handleIdle() { p.seq++ p.bytesSent += uint64(size) p.intf.out(packets) + if p.drop { + p.max = p.queue.size + } } else { p.idle = true p.drop = false From d43b93f60a771566280d34bc058f42b48f168ae6 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 17 May 2020 13:23:15 -0500 Subject: [PATCH 29/51] safer check for the queues if we're blocked on a send, should work even if we're blocked on a link packet send --- src/yggdrasil/link.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 5676ebe5..a0ce5d87 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -63,6 +63,7 @@ type linkInterface struct { stallTimer *time.Timer // Fires to signal that no incoming traffic (including keep-alive) has been seen closeTimer *time.Timer // Fires when the link has been idle so long we need to close it isIdle bool // True if the peer actor knows the link is idle + isSending bool // True between a notifySending and a notifySent blocked bool // True if we've blocked the peer in the switch } @@ -296,7 +297,7 @@ func (intf *linkInterface) linkOut(bs []byte) { func (intf *linkInterface) notifyQueued(seq uint64) { // This is the part where we want non-nil 'from' fields intf.Act(intf.peer, func() { - if !intf.isIdle { + if intf.isSending { intf.peer.dropFromQueue(intf, seq) } }) @@ -336,6 +337,7 @@ func (intf *linkInterface) notifySending(size int, isLinkTraffic bool) { if !isLinkTraffic { intf.isIdle = false } + intf.isSending = true intf.sendTimer = time.AfterFunc(sendTime, intf.notifyBlockedSend) intf._cancelStallTimer() }) @@ -370,6 +372,7 @@ func (intf *linkInterface) notifySent(size int, isLinkTraffic bool) { if !isLinkTraffic { intf._notifyIdle() } + intf.isSending = false if size > 0 && intf.stallTimer == nil { intf.stallTimer = time.AfterFunc(stallTime, intf.notifyStalled) } From cf2edc99d1d7d16316c30522c4fac0be3a45da34 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 17 May 2020 13:32:58 -0500 Subject: [PATCH 30/51] correctly set peer.max --- src/yggdrasil/peer.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 02e92f9a..339ea5a7 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -306,9 +306,7 @@ func (p *peer) _handleIdle() { p.seq++ p.bytesSent += uint64(size) p.intf.out(packets) - if p.drop { - p.max = p.queue.size - } + p.max = p.queue.size } else { p.idle = true p.drop = false @@ -319,6 +317,7 @@ func (p *peer) dropFromQueue(from phony.Actor, seq uint64) { p.Act(from, func() { if seq == p.seq { p.drop = true + p.max = p.queue.size } }) } From 59c5644a52cbc5316fd5c2dc596706aa1c35d635 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 23 May 2020 10:08:23 -0500 Subject: [PATCH 31/51] some peer/link cleanup --- src/yggdrasil/link.go | 42 ++++++++++++++++++++---------------------- src/yggdrasil/peer.go | 2 +- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index a0ce5d87..dc61892f 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -62,7 +62,6 @@ type linkInterface struct { keepAliveTimer *time.Timer // Fires to send keep-alive traffic stallTimer *time.Timer // Fires to signal that no incoming traffic (including keep-alive) has been seen closeTimer *time.Timer // Fires when the link has been idle so long we need to close it - isIdle bool // True if the peer actor knows the link is idle isSending bool // True between a notifySending and a notifySent blocked bool // True if we've blocked the peer in the switch } @@ -279,7 +278,7 @@ func (intf *linkInterface) out(bss [][]byte) { // nil to prevent it from blocking if the link is somehow frozen // this is safe because another packet won't be sent until the link notifies // the peer that it's ready for one - intf.writer.sendFrom(nil, bss, false) + intf.writer.sendFrom(nil, bss) }) } @@ -290,7 +289,7 @@ func (intf *linkInterface) linkOut(bs []byte) { // additional packets until this one finishes, otherwise this could leak // memory if writing happens slower than link packets are generated... // that seems unlikely, so it's a lesser evil than deadlocking for now - intf.writer.sendFrom(nil, [][]byte{bs}, true) + intf.writer.sendFrom(nil, [][]byte{bs}) }) } @@ -332,11 +331,8 @@ const ( ) // notify the intf that we're currently sending -func (intf *linkInterface) notifySending(size int, isLinkTraffic bool) { +func (intf *linkInterface) notifySending(size int) { intf.Act(&intf.writer, func() { - if !isLinkTraffic { - intf.isIdle = false - } intf.isSending = true intf.sendTimer = time.AfterFunc(sendTime, intf.notifyBlockedSend) intf._cancelStallTimer() @@ -365,13 +361,18 @@ func (intf *linkInterface) notifyBlockedSend() { } // notify the intf that we've finished sending, returning the peer to the switch -func (intf *linkInterface) notifySent(size int, isLinkTraffic bool) { +func (intf *linkInterface) notifySent(size int) { intf.Act(&intf.writer, func() { - intf.sendTimer.Stop() - intf.sendTimer = nil - if !isLinkTraffic { - intf._notifyIdle() + if intf.sendTimer != nil { + intf.sendTimer.Stop() + intf.sendTimer = nil } + if intf.keepAliveTimer != nil { + // TODO? unset this when we start sending, not when we finish... + intf.keepAliveTimer.Stop() + intf.keepAliveTimer = nil + } + intf._notifyIdle() intf.isSending = false if size > 0 && intf.stallTimer == nil { intf.stallTimer = time.AfterFunc(stallTime, intf.notifyStalled) @@ -381,10 +382,7 @@ func (intf *linkInterface) notifySent(size int, isLinkTraffic bool) { // Notify the peer that we're ready for more traffic func (intf *linkInterface) _notifyIdle() { - if !intf.isIdle { - intf.isIdle = true - intf.peer.Act(intf, intf.peer._handleIdle) - } + intf.peer.Act(intf, intf.peer._handleIdle) } // Set the peer as stalled, to prevent them from returning to the switch until a read succeeds @@ -416,8 +414,8 @@ func (intf *linkInterface) notifyRead(size int) { intf.stallTimer.Stop() intf.stallTimer = nil } - if size > 0 && intf.stallTimer == nil { - intf.stallTimer = time.AfterFunc(keepAliveTime, intf.notifyDoKeepAlive) + if size > 0 && intf.keepAliveTimer == nil { + intf.keepAliveTimer = time.AfterFunc(keepAliveTime, intf.notifyDoKeepAlive) } if intf.blocked { intf.blocked = false @@ -432,7 +430,7 @@ func (intf *linkInterface) notifyDoKeepAlive() { if intf.stallTimer != nil { intf.stallTimer.Stop() intf.stallTimer = nil - intf.writer.sendFrom(nil, [][]byte{nil}, true) // Empty keep-alive traffic + intf.writer.sendFrom(nil, [][]byte{nil}) // Empty keep-alive traffic } }) } @@ -446,7 +444,7 @@ type linkWriter struct { closed bool } -func (w *linkWriter) sendFrom(from phony.Actor, bss [][]byte, isLinkTraffic bool) { +func (w *linkWriter) sendFrom(from phony.Actor, bss [][]byte) { w.Act(from, func() { if w.closed { return @@ -455,9 +453,9 @@ func (w *linkWriter) sendFrom(from phony.Actor, bss [][]byte, isLinkTraffic bool for _, bs := range bss { size += len(bs) } - w.intf.notifySending(size, isLinkTraffic) + w.intf.notifySending(size) w.worker <- bss - w.intf.notifySent(size, isLinkTraffic) + w.intf.notifySent(size) }) } diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 339ea5a7..3cfc0b4f 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -317,7 +317,7 @@ func (p *peer) dropFromQueue(from phony.Actor, seq uint64) { p.Act(from, func() { if seq == p.seq { p.drop = true - p.max = p.queue.size + p.max = p.queue.size + streamMsgSize } }) } From ef1e506a0c05dc163b5db89b5bf4e30be4cbf761 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 23 May 2020 10:23:55 -0500 Subject: [PATCH 32/51] work-in-progress on more cleanup --- src/yggdrasil/api.go | 6 ++-- src/yggdrasil/core.go | 6 ++-- src/yggdrasil/link.go | 70 +++++++++++++++++++------------------- src/yggdrasil/simlink.go | 2 +- src/yggdrasil/stream.go | 2 +- src/yggdrasil/switch.go | 2 +- src/yggdrasil/tcp.go | 50 +++++++++++++-------------- src/yggdrasil/tcp_linux.go | 6 ++-- src/yggdrasil/tls.go | 4 +-- 9 files changed, 74 insertions(+), 74 deletions(-) diff --git a/src/yggdrasil/api.go b/src/yggdrasil/api.go index 66ee9b81..b5b8d362 100644 --- a/src/yggdrasil/api.go +++ b/src/yggdrasil/api.go @@ -257,14 +257,14 @@ func (c *Core) ConnDialer() (*Dialer, error) { // "Listen" configuration item, e.g. // tcp://a.b.c.d:e func (c *Core) ListenTCP(uri string) (*TcpListener, error) { - return c.link.tcp.listen(uri, nil) + return c.links.tcp.listen(uri, nil) } // ListenTLS starts a new TLS listener. The input URI should match that of the // "Listen" configuration item, e.g. // tls://a.b.c.d:e func (c *Core) ListenTLS(uri string) (*TcpListener, error) { - return c.link.tcp.listen(uri, c.link.tcp.tls.forListener) + return c.links.tcp.listen(uri, c.links.tcp.tls.forListener) } // NodeID gets the node ID. This is derived from your router encryption keys. @@ -463,7 +463,7 @@ func (c *Core) RemovePeer(addr string, sintf string) error { // This does not add the peer to the peer list, so if the connection drops, the // peer will not be called again automatically. func (c *Core) CallPeer(addr string, sintf string) error { - return c.link.call(addr, sintf) + return c.links.call(addr, sintf) } // DisconnectPeer disconnects a peer once. This should be specified as a port diff --git a/src/yggdrasil/core.go b/src/yggdrasil/core.go index f7664942..4ac678df 100644 --- a/src/yggdrasil/core.go +++ b/src/yggdrasil/core.go @@ -29,7 +29,7 @@ type Core struct { switchTable switchTable peers peers router router - link link + links links log *log.Logger addPeerTimer *time.Timer } @@ -165,7 +165,7 @@ func (c *Core) _start(nc *config.NodeConfig, log *log.Logger) (*config.NodeState return nil, err } - if err := c.link.init(c); err != nil { + if err := c.links.init(c); err != nil { c.log.Errorln("Failed to start link interfaces") return nil, err } @@ -197,7 +197,7 @@ func (c *Core) _stop() { if c.addPeerTimer != nil { c.addPeerTimer.Stop() } - c.link.stop() + c.links.stop() /* FIXME this deadlocks, need a waitgroup or something to coordinate shutdown for _, peer := range c.GetPeers() { c.DisconnectPeer(peer.Port) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index dc61892f..9776ee50 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -20,7 +20,7 @@ import ( "github.com/Arceliar/phony" ) -type link struct { +type links struct { core *Core mutex sync.RWMutex // protects interfaces below interfaces map[linkInfo]*linkInterface @@ -37,7 +37,7 @@ type linkInfo struct { remote string // Remote name or address } -type linkInterfaceMsgIO interface { +type linkMsgIO interface { readMsg() ([]byte, error) writeMsgs([][]byte) (int, error) close() error @@ -48,9 +48,9 @@ type linkInterfaceMsgIO interface { type linkInterface struct { lname string - link *link + links *links peer *peer - msgIO linkInterfaceMsgIO + msgIO linkMsgIO info linkInfo incoming bool force bool @@ -66,7 +66,7 @@ type linkInterface struct { blocked bool // True if we've blocked the peer in the switch } -func (l *link) init(c *Core) error { +func (l *links) init(c *Core) error { l.core = c l.mutex.Lock() l.interfaces = make(map[linkInfo]*linkInterface) @@ -81,11 +81,11 @@ func (l *link) init(c *Core) error { return nil } -func (l *link) reconfigure() { +func (l *links) reconfigure() { l.tcp.reconfigure() } -func (l *link) call(uri string, sintf string) error { +func (l *links) call(uri string, sintf string) error { u, err := url.Parse(uri) if err != nil { return fmt.Errorf("peer %s is not correctly formatted (%s)", uri, err) @@ -104,7 +104,7 @@ func (l *link) call(uri string, sintf string) error { return nil } -func (l *link) listen(uri string) error { +func (l *links) listen(uri string) error { u, err := url.Parse(uri) if err != nil { return fmt.Errorf("listener %s is not correctly formatted (%s)", uri, err) @@ -121,11 +121,11 @@ func (l *link) listen(uri string) error { } } -func (l *link) create(msgIO linkInterfaceMsgIO, name, linkType, local, remote string, incoming, force bool) (*linkInterface, error) { +func (l *links) create(msgIO linkMsgIO, name, linkType, local, remote string, incoming, force bool) (*linkInterface, error) { // Technically anything unique would work for names, but let's pick something human readable, just for debugging intf := linkInterface{ lname: name, - link: l, + links: l, msgIO: msgIO, info: linkInfo{ linkType: linkType, @@ -142,7 +142,7 @@ func (l *link) create(msgIO linkInterfaceMsgIO, name, linkType, local, remote st return &intf, nil } -func (l *link) stop() error { +func (l *links) stop() error { close(l.stopped) if err := l.tcp.stop(); err != nil { return err @@ -163,8 +163,8 @@ func (intf *linkInterface) handler() error { }) myLinkPub, myLinkPriv := crypto.NewBoxKeys() meta := version_getBaseMetadata() - meta.box = intf.link.core.boxPub - meta.sig = intf.link.core.sigPub + meta.box = intf.links.core.boxPub + meta.sig = intf.links.core.sigPub meta.link = *myLinkPub metaBytes := meta.encode() // TODO timeouts on send/recv (goroutine for send/recv, channel select w/ timer) @@ -187,12 +187,12 @@ func (intf *linkInterface) handler() error { } base := version_getBaseMetadata() if meta.ver > base.ver || meta.ver == base.ver && meta.minorVer > base.minorVer { - intf.link.core.log.Errorln("Failed to connect to node: " + intf.lname + " version: " + fmt.Sprintf("%d.%d", meta.ver, meta.minorVer)) + intf.links.core.log.Errorln("Failed to connect to node: " + intf.lname + " version: " + fmt.Sprintf("%d.%d", meta.ver, meta.minorVer)) return errors.New("failed to connect: wrong version") } // Check if we're authorized to connect to this key / IP - if intf.incoming && !intf.force && !intf.link.core.peers.isAllowedEncryptionPublicKey(&meta.box) { - intf.link.core.log.Warnf("%s connection from %s forbidden: AllowedEncryptionPublicKeys does not contain key %s", + if intf.incoming && !intf.force && !intf.links.core.peers.isAllowedEncryptionPublicKey(&meta.box) { + intf.links.core.log.Warnf("%s connection from %s forbidden: AllowedEncryptionPublicKeys does not contain key %s", strings.ToUpper(intf.info.linkType), intf.info.remote, hex.EncodeToString(meta.box[:])) intf.msgIO.close() return nil @@ -200,12 +200,12 @@ func (intf *linkInterface) handler() error { // Check if we already have a link to this node intf.info.box = meta.box intf.info.sig = meta.sig - intf.link.mutex.Lock() - if oldIntf, isIn := intf.link.interfaces[intf.info]; isIn { - intf.link.mutex.Unlock() + intf.links.mutex.Lock() + if oldIntf, isIn := intf.links.interfaces[intf.info]; isIn { + intf.links.mutex.Unlock() // FIXME we should really return an error and let the caller block instead // That lets them do things like close connections on its own, avoid printing a connection message in the first place, etc. - intf.link.core.log.Debugln("DEBUG: found existing interface for", intf.name) + intf.links.core.log.Debugln("DEBUG: found existing interface for", intf.name) intf.msgIO.close() if !intf.incoming { // Block outgoing connection attempts until the existing connection closes @@ -214,21 +214,21 @@ func (intf *linkInterface) handler() error { return nil } else { intf.closed = make(chan struct{}) - intf.link.interfaces[intf.info] = intf + intf.links.interfaces[intf.info] = intf defer func() { - intf.link.mutex.Lock() - delete(intf.link.interfaces, intf.info) - intf.link.mutex.Unlock() + intf.links.mutex.Lock() + delete(intf.links.interfaces, intf.info) + intf.links.mutex.Unlock() close(intf.closed) }() - intf.link.core.log.Debugln("DEBUG: registered interface for", intf.name) + intf.links.core.log.Debugln("DEBUG: registered interface for", intf.name) } - intf.link.mutex.Unlock() + intf.links.mutex.Unlock() // Create peer shared := crypto.GetSharedKey(myLinkPriv, &meta.link) - phony.Block(&intf.link.core.peers, func() { + phony.Block(&intf.links.core.peers, func() { // FIXME don't use phony.Block, it's bad practice, even if it's safe here - intf.peer = intf.link.core.peers._newPeer(&meta.box, &meta.sig, shared, intf) + intf.peer = intf.links.core.peers._newPeer(&meta.box, &meta.sig, shared, intf) }) if intf.peer == nil { return errors.New("failed to create peer") @@ -240,7 +240,7 @@ func (intf *linkInterface) handler() error { themAddr := address.AddrForNodeID(crypto.GetNodeID(&intf.info.box)) themAddrString := net.IP(themAddr[:]).String() themString := fmt.Sprintf("%s@%s", themAddrString, intf.info.remote) - intf.link.core.log.Infof("Connected %s: %s, source %s", + intf.links.core.log.Infof("Connected %s: %s, source %s", strings.ToUpper(intf.info.linkType), themString, intf.info.local) // Start things go intf.peer.start() @@ -252,7 +252,7 @@ func (intf *linkInterface) handler() error { defer close(done) go func() { select { - case <-intf.link.stopped: + case <-intf.links.stopped: intf.msgIO.close() case <-done: } @@ -260,10 +260,10 @@ func (intf *linkInterface) handler() error { err = <-intf.reader.err // TODO don't report an error if it's just a 'use of closed network connection' if err != nil { - intf.link.core.log.Infof("Disconnected %s: %s, source %s; error: %s", + intf.links.core.log.Infof("Disconnected %s: %s, source %s; error: %s", strings.ToUpper(intf.info.linkType), themString, intf.info.local, err) } else { - intf.link.core.log.Infof("Disconnected %s: %s, source %s", + intf.links.core.log.Infof("Disconnected %s: %s, source %s", strings.ToUpper(intf.info.linkType), themString, intf.info.local) } return err @@ -355,7 +355,7 @@ func (intf *linkInterface) notifyBlockedSend() { if intf.sendTimer != nil && !intf.blocked { //As far as we know, we're still trying to send, and the timer fired. intf.blocked = true - intf.link.core.switchTable.blockPeer(intf, intf.peer.port) + intf.links.core.switchTable.blockPeer(intf, intf.peer.port) } }) } @@ -392,7 +392,7 @@ func (intf *linkInterface) notifyStalled() { intf.stallTimer.Stop() intf.stallTimer = nil intf.blocked = true - intf.link.core.switchTable.blockPeer(intf, intf.peer.port) + intf.links.core.switchTable.blockPeer(intf, intf.peer.port) } }) } @@ -419,7 +419,7 @@ func (intf *linkInterface) notifyRead(size int) { } if intf.blocked { intf.blocked = false - intf.link.core.switchTable.unblockPeer(intf, intf.peer.port) + intf.links.core.switchTable.unblockPeer(intf, intf.peer.port) } }) } diff --git a/src/yggdrasil/simlink.go b/src/yggdrasil/simlink.go index f830c215..6c04a8c0 100644 --- a/src/yggdrasil/simlink.go +++ b/src/yggdrasil/simlink.go @@ -58,7 +58,7 @@ func (c *Core) NewSimlink() *Simlink { s := &Simlink{rch: make(chan []byte, 1)} n := "Simlink" var err error - s.link, err = c.link.create(s, n, n, n, n, false, true) + s.link, err = c.links.create(s, n, n, n, n, false, true) if err != nil { panic(err) } diff --git a/src/yggdrasil/stream.go b/src/yggdrasil/stream.go index be1398fc..afa97c76 100644 --- a/src/yggdrasil/stream.go +++ b/src/yggdrasil/stream.go @@ -9,7 +9,7 @@ import ( ) // Test that this matches the interface we expect -var _ = linkInterfaceMsgIO(&stream{}) +var _ = linkMsgIO(&stream{}) type stream struct { rwc io.ReadWriteCloser diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index 6ab9a02b..a5c099ba 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -188,7 +188,7 @@ func (t *switchTable) init(core *Core) { func (t *switchTable) reconfigure() { // This is where reconfiguration would go, if we had anything useful to do. - t.core.link.reconfigure() + t.core.links.reconfigure() t.core.peers.reconfigure() } diff --git a/src/yggdrasil/tcp.go b/src/yggdrasil/tcp.go index 9cca4193..17b34e91 100644 --- a/src/yggdrasil/tcp.go +++ b/src/yggdrasil/tcp.go @@ -33,7 +33,7 @@ const tcp_ping_interval = (default_timeout * 2 / 3) // The TCP listener and information about active TCP connections, to avoid duplication. type tcp struct { - link *link + links *links waitgroup sync.WaitGroup mutex sync.Mutex // Protecting the below listeners map[string]*TcpListener @@ -86,8 +86,8 @@ func (t *tcp) getAddr() *net.TCPAddr { } // Initializes the struct. -func (t *tcp) init(l *link) error { - t.link = l +func (t *tcp) init(l *links) error { + t.links = l t.tls.init(t) t.mutex.Lock() t.calls = make(map[string]struct{}) @@ -95,9 +95,9 @@ func (t *tcp) init(l *link) error { t.listeners = make(map[string]*TcpListener) t.mutex.Unlock() - t.link.core.config.Mutex.RLock() - defer t.link.core.config.Mutex.RUnlock() - for _, listenaddr := range t.link.core.config.Current.Listen { + t.links.core.config.Mutex.RLock() + defer t.links.core.config.Mutex.RUnlock() + for _, listenaddr := range t.links.core.config.Current.Listen { switch listenaddr[:6] { case "tcp://": if _, err := t.listen(listenaddr[6:], nil); err != nil { @@ -108,7 +108,7 @@ func (t *tcp) init(l *link) error { return err } default: - t.link.core.log.Errorln("Failed to add listener: listener", listenaddr, "is not correctly formatted, ignoring") + t.links.core.log.Errorln("Failed to add listener: listener", listenaddr, "is not correctly formatted, ignoring") } } @@ -126,35 +126,35 @@ func (t *tcp) stop() error { } func (t *tcp) reconfigure() { - t.link.core.config.Mutex.RLock() - added := util.Difference(t.link.core.config.Current.Listen, t.link.core.config.Previous.Listen) - deleted := util.Difference(t.link.core.config.Previous.Listen, t.link.core.config.Current.Listen) - t.link.core.config.Mutex.RUnlock() + t.links.core.config.Mutex.RLock() + added := util.Difference(t.links.core.config.Current.Listen, t.links.core.config.Previous.Listen) + deleted := util.Difference(t.links.core.config.Previous.Listen, t.links.core.config.Current.Listen) + t.links.core.config.Mutex.RUnlock() if len(added) > 0 || len(deleted) > 0 { for _, a := range added { switch a[:6] { case "tcp://": if _, err := t.listen(a[6:], nil); err != nil { - t.link.core.log.Errorln("Error adding TCP", a[6:], "listener:", err) + t.links.core.log.Errorln("Error adding TCP", a[6:], "listener:", err) } case "tls://": if _, err := t.listen(a[6:], t.tls.forListener); err != nil { - t.link.core.log.Errorln("Error adding TLS", a[6:], "listener:", err) + t.links.core.log.Errorln("Error adding TLS", a[6:], "listener:", err) } default: - t.link.core.log.Errorln("Failed to add listener: listener", a, "is not correctly formatted, ignoring") + t.links.core.log.Errorln("Failed to add listener: listener", a, "is not correctly formatted, ignoring") } } for _, d := range deleted { if d[:6] != "tcp://" && d[:6] != "tls://" { - t.link.core.log.Errorln("Failed to delete listener: listener", d, "is not correctly formatted, ignoring") + t.links.core.log.Errorln("Failed to delete listener: listener", d, "is not correctly formatted, ignoring") continue } t.mutex.Lock() if listener, ok := t.listeners[d[6:]]; ok { t.mutex.Unlock() listener.Stop() - t.link.core.log.Infoln("Stopped TCP listener:", d[6:]) + t.links.core.log.Infoln("Stopped TCP listener:", d[6:]) } else { t.mutex.Unlock() } @@ -202,13 +202,13 @@ func (t *tcp) listener(l *TcpListener, listenaddr string) { } // And here we go! defer func() { - t.link.core.log.Infoln("Stopping TCP listener on:", l.Listener.Addr().String()) + t.links.core.log.Infoln("Stopping TCP listener on:", l.Listener.Addr().String()) l.Listener.Close() t.mutex.Lock() delete(t.listeners, listenaddr) t.mutex.Unlock() }() - t.link.core.log.Infoln("Listening for TCP on:", l.Listener.Addr().String()) + t.links.core.log.Infoln("Listening for TCP on:", l.Listener.Addr().String()) go func() { <-l.stop l.Listener.Close() @@ -217,7 +217,7 @@ func (t *tcp) listener(l *TcpListener, listenaddr string) { for { sock, err := l.Listener.Accept() if err != nil { - t.link.core.log.Errorln("Failed to accept connection:", err) + t.links.core.log.Errorln("Failed to accept connection:", err) return } t.waitgroup.Add(1) @@ -344,7 +344,7 @@ func (t *tcp) call(saddr string, options interface{}, sintf string, upgrade *Tcp } conn, err = dialer.Dial("tcp", dst.String()) if err != nil { - t.link.core.log.Debugf("Failed to dial %s: %s", callproto, err) + t.links.core.log.Debugf("Failed to dial %s: %s", callproto, err) return } t.waitgroup.Add(1) @@ -361,7 +361,7 @@ func (t *tcp) handler(sock net.Conn, incoming bool, options interface{}, upgrade if upgrade != nil { var err error if sock, err = upgrade.upgrade(sock); err != nil { - t.link.core.log.Errorln("TCP handler upgrade failed:", err) + t.links.core.log.Errorln("TCP handler upgrade failed:", err) return } else { upgraded = true @@ -387,12 +387,12 @@ func (t *tcp) handler(sock net.Conn, incoming bool, options interface{}, upgrade remote, _, _ = net.SplitHostPort(sock.RemoteAddr().String()) } force := net.ParseIP(strings.Split(remote, "%")[0]).IsLinkLocalUnicast() - link, err := t.link.core.link.create(&stream, name, proto, local, remote, incoming, force) + link, err := t.links.create(&stream, name, proto, local, remote, incoming, force) if err != nil { - t.link.core.log.Println(err) + t.links.core.log.Println(err) panic(err) } - t.link.core.log.Debugln("DEBUG: starting handler for", name) + t.links.core.log.Debugln("DEBUG: starting handler for", name) err = link.handler() - t.link.core.log.Debugln("DEBUG: stopped handler for", name, err) + t.links.core.log.Debugln("DEBUG: stopped handler for", name, err) } diff --git a/src/yggdrasil/tcp_linux.go b/src/yggdrasil/tcp_linux.go index 9ec3c10f..e18f92b1 100644 --- a/src/yggdrasil/tcp_linux.go +++ b/src/yggdrasil/tcp_linux.go @@ -20,10 +20,10 @@ func (t *tcp) tcpContext(network, address string, c syscall.RawConn) error { // Log any errors if bbr != nil { - t.link.core.log.Debugln("Failed to set tcp_congestion_control to bbr for socket, SetsockoptString error:", bbr) + t.links.core.log.Debugln("Failed to set tcp_congestion_control to bbr for socket, SetsockoptString error:", bbr) } if control != nil { - t.link.core.log.Debugln("Failed to set tcp_congestion_control to bbr for socket, Control error:", control) + t.links.core.log.Debugln("Failed to set tcp_congestion_control to bbr for socket, Control error:", control) } // Return nil because errors here are not considered fatal for the connection, it just means congestion control is suboptimal @@ -38,7 +38,7 @@ func (t *tcp) getControl(sintf string) func(string, string, syscall.RawConn) err } c.Control(btd) if err != nil { - t.link.core.log.Debugln("Failed to set SO_BINDTODEVICE:", sintf) + t.links.core.log.Debugln("Failed to set SO_BINDTODEVICE:", sintf) } return t.tcpContext(network, address, c) } diff --git a/src/yggdrasil/tls.go b/src/yggdrasil/tls.go index 7212c4df..e2861aca 100644 --- a/src/yggdrasil/tls.go +++ b/src/yggdrasil/tls.go @@ -34,7 +34,7 @@ func (t *tcptls) init(tcp *tcp) { } edpriv := make(ed25519.PrivateKey, ed25519.PrivateKeySize) - copy(edpriv[:], tcp.link.core.sigPriv[:]) + copy(edpriv[:], tcp.links.core.sigPriv[:]) certBuf := &bytes.Buffer{} @@ -42,7 +42,7 @@ func (t *tcptls) init(tcp *tcp) { pubtemp := x509.Certificate{ SerialNumber: big.NewInt(1), Subject: pkix.Name{ - CommonName: hex.EncodeToString(tcp.link.core.sigPub[:]), + CommonName: hex.EncodeToString(tcp.links.core.sigPub[:]), }, NotBefore: time.Now(), NotAfter: time.Now().Add(time.Hour * 24 * 365), From 59896f17fd66566b49bc56cc8e963dade5e06c29 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 23 May 2020 10:28:57 -0500 Subject: [PATCH 33/51] more cleanup --- src/yggdrasil/link.go | 82 +++++++++++++++++++++++----------------- src/yggdrasil/peer.go | 16 +------- src/yggdrasil/simlink.go | 2 +- 3 files changed, 50 insertions(+), 50 deletions(-) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 9776ee50..067c2ecc 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -21,11 +21,11 @@ import ( ) type links struct { - core *Core - mutex sync.RWMutex // protects interfaces below - interfaces map[linkInfo]*linkInterface - tcp tcp // TCP interface support - stopped chan struct{} + core *Core + mutex sync.RWMutex // protects links below + links map[linkInfo]*link + tcp tcp // TCP interface support + stopped chan struct{} // TODO timeout (to remove from switch), read from config.ReadTimeout } @@ -46,7 +46,7 @@ type linkMsgIO interface { _recvMetaBytes() ([]byte, error) } -type linkInterface struct { +type link struct { lname string links *links peer *peer @@ -55,8 +55,8 @@ type linkInterface struct { incoming bool force bool closed chan struct{} - reader linkReader // Reads packets, notifies this linkInterface, passes packets to switch - writer linkWriter // Writes packets, notifies this linkInterface + reader linkReader // Reads packets, notifies this link, passes packets to switch + writer linkWriter // Writes packets, notifies this link phony.Inbox // Protects the below sendTimer *time.Timer // Fires to signal that sending is blocked keepAliveTimer *time.Timer // Fires to send keep-alive traffic @@ -69,7 +69,7 @@ type linkInterface struct { func (l *links) init(c *Core) error { l.core = c l.mutex.Lock() - l.interfaces = make(map[linkInfo]*linkInterface) + l.links = make(map[linkInfo]*link) l.mutex.Unlock() l.stopped = make(chan struct{}) @@ -121,9 +121,9 @@ func (l *links) listen(uri string) error { } } -func (l *links) create(msgIO linkMsgIO, name, linkType, local, remote string, incoming, force bool) (*linkInterface, error) { +func (l *links) create(msgIO linkMsgIO, name, linkType, local, remote string, incoming, force bool) (*link, error) { // Technically anything unique would work for names, but let's pick something human readable, just for debugging - intf := linkInterface{ + intf := link{ lname: name, links: l, msgIO: msgIO, @@ -150,7 +150,7 @@ func (l *links) stop() error { return nil } -func (intf *linkInterface) handler() error { +func (intf *link) handler() error { // TODO split some of this into shorter functions, so it's easier to read, and for the FIXME duplicate peer issue mentioned later go func() { for bss := range intf.writer.worker { @@ -201,7 +201,7 @@ func (intf *linkInterface) handler() error { intf.info.box = meta.box intf.info.sig = meta.sig intf.links.mutex.Lock() - if oldIntf, isIn := intf.links.interfaces[intf.info]; isIn { + if oldIntf, isIn := intf.links.links[intf.info]; isIn { intf.links.mutex.Unlock() // FIXME we should really return an error and let the caller block instead // That lets them do things like close connections on its own, avoid printing a connection message in the first place, etc. @@ -214,10 +214,10 @@ func (intf *linkInterface) handler() error { return nil } else { intf.closed = make(chan struct{}) - intf.links.interfaces[intf.info] = intf + intf.links.links[intf.info] = intf defer func() { intf.links.mutex.Lock() - delete(intf.links.interfaces, intf.info) + delete(intf.links.links, intf.info) intf.links.mutex.Unlock() close(intf.closed) }() @@ -271,9 +271,21 @@ func (intf *linkInterface) handler() error { //////////////////////////////////////////////////////////////////////////////// -// linkInterface needs to match the peerInterface type needed by the peers +// link needs to match the linkInterface type needed by the peers -func (intf *linkInterface) out(bss [][]byte) { +type linkInterface interface { + out([][]byte) + linkOut([]byte) + notifyQueued(uint64) + close() + // These next ones are only used by the API + name() string + local() string + remote() string + interfaceType() string +} + +func (intf *link) out(bss [][]byte) { intf.Act(nil, func() { // nil to prevent it from blocking if the link is somehow frozen // this is safe because another packet won't be sent until the link notifies @@ -282,7 +294,7 @@ func (intf *linkInterface) out(bss [][]byte) { }) } -func (intf *linkInterface) linkOut(bs []byte) { +func (intf *link) linkOut(bs []byte) { intf.Act(nil, func() { // nil to prevent it from blocking if the link is somehow frozen // FIXME this is hypothetically not safe, the peer shouldn't be sending @@ -293,7 +305,7 @@ func (intf *linkInterface) linkOut(bs []byte) { }) } -func (intf *linkInterface) notifyQueued(seq uint64) { +func (intf *link) notifyQueued(seq uint64) { // This is the part where we want non-nil 'from' fields intf.Act(intf.peer, func() { if intf.isSending { @@ -302,23 +314,23 @@ func (intf *linkInterface) notifyQueued(seq uint64) { }) } -func (intf *linkInterface) close() { +func (intf *link) close() { intf.Act(nil, func() { intf.msgIO.close() }) } -func (intf *linkInterface) name() string { +func (intf *link) name() string { return intf.lname } -func (intf *linkInterface) local() string { +func (intf *link) local() string { return intf.info.local } -func (intf *linkInterface) remote() string { +func (intf *link) remote() string { return intf.info.remote } -func (intf *linkInterface) interfaceType() string { +func (intf *link) interfaceType() string { return intf.info.linkType } @@ -331,7 +343,7 @@ const ( ) // notify the intf that we're currently sending -func (intf *linkInterface) notifySending(size int) { +func (intf *link) notifySending(size int) { intf.Act(&intf.writer, func() { intf.isSending = true intf.sendTimer = time.AfterFunc(sendTime, intf.notifyBlockedSend) @@ -340,7 +352,7 @@ func (intf *linkInterface) notifySending(size int) { } // we just sent something, so cancel any pending timer to send keep-alive traffic -func (intf *linkInterface) _cancelStallTimer() { +func (intf *link) _cancelStallTimer() { if intf.stallTimer != nil { intf.stallTimer.Stop() intf.stallTimer = nil @@ -350,7 +362,7 @@ func (intf *linkInterface) _cancelStallTimer() { // This gets called from a time.AfterFunc, and notifies the switch that we appear // to have gotten blocked on a write, so the switch should start routing traffic // through other links, if alternatives exist -func (intf *linkInterface) notifyBlockedSend() { +func (intf *link) notifyBlockedSend() { intf.Act(nil, func() { if intf.sendTimer != nil && !intf.blocked { //As far as we know, we're still trying to send, and the timer fired. @@ -361,7 +373,7 @@ func (intf *linkInterface) notifyBlockedSend() { } // notify the intf that we've finished sending, returning the peer to the switch -func (intf *linkInterface) notifySent(size int) { +func (intf *link) notifySent(size int) { intf.Act(&intf.writer, func() { if intf.sendTimer != nil { intf.sendTimer.Stop() @@ -381,12 +393,12 @@ func (intf *linkInterface) notifySent(size int) { } // Notify the peer that we're ready for more traffic -func (intf *linkInterface) _notifyIdle() { +func (intf *link) _notifyIdle() { intf.peer.Act(intf, intf.peer._handleIdle) } // Set the peer as stalled, to prevent them from returning to the switch until a read succeeds -func (intf *linkInterface) notifyStalled() { +func (intf *link) notifyStalled() { intf.Act(nil, func() { // Sent from a time.AfterFunc if intf.stallTimer != nil && !intf.blocked { intf.stallTimer.Stop() @@ -398,7 +410,7 @@ func (intf *linkInterface) notifyStalled() { } // reset the close timer -func (intf *linkInterface) notifyReading() { +func (intf *link) notifyReading() { intf.Act(&intf.reader, func() { if intf.closeTimer != nil { intf.closeTimer.Stop() @@ -408,7 +420,7 @@ func (intf *linkInterface) notifyReading() { } // wake up the link if it was stalled, and (if size > 0) prepare to send keep-alive traffic -func (intf *linkInterface) notifyRead(size int) { +func (intf *link) notifyRead(size int) { intf.Act(&intf.reader, func() { if intf.stallTimer != nil { intf.stallTimer.Stop() @@ -425,7 +437,7 @@ func (intf *linkInterface) notifyRead(size int) { } // We need to send keep-alive traffic now -func (intf *linkInterface) notifyDoKeepAlive() { +func (intf *link) notifyDoKeepAlive() { intf.Act(nil, func() { // Sent from a time.AfterFunc if intf.stallTimer != nil { intf.stallTimer.Stop() @@ -439,7 +451,7 @@ func (intf *linkInterface) notifyDoKeepAlive() { type linkWriter struct { phony.Inbox - intf *linkInterface + intf *link worker chan [][]byte closed bool } @@ -463,7 +475,7 @@ func (w *linkWriter) sendFrom(from phony.Actor, bss [][]byte) { type linkReader struct { phony.Inbox - intf *linkInterface + intf *link err chan error } diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 3cfc0b4f..4463bc6d 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -77,23 +77,11 @@ func (ps *peers) getAllowedEncryptionPublicKeys() []string { return ps.core.config.Current.AllowedEncryptionPublicKeys } -type peerInterface interface { - out([][]byte) - linkOut([]byte) - notifyQueued(uint64) - close() - // These next ones are only used by the API - name() string - local() string - remote() string - interfaceType() string -} - // Information known about a peer, including their box/sig keys, precomputed shared keys (static and ephemeral) and a handler for their outgoing traffic type peer struct { phony.Inbox core *Core - intf peerInterface + intf linkInterface port switchPort box crypto.BoxPubKey sig crypto.SigPubKey @@ -134,7 +122,7 @@ func (ps *peers) _updatePeers() { } // Creates a new peer with the specified box, sig, and linkShared keys, using the lowest unoccupied port number. -func (ps *peers) _newPeer(box *crypto.BoxPubKey, sig *crypto.SigPubKey, linkShared *crypto.BoxSharedKey, intf peerInterface) *peer { +func (ps *peers) _newPeer(box *crypto.BoxPubKey, sig *crypto.SigPubKey, linkShared *crypto.BoxSharedKey, intf linkInterface) *peer { now := time.Now() p := peer{box: *box, core: ps.core, diff --git a/src/yggdrasil/simlink.go b/src/yggdrasil/simlink.go index 6c04a8c0..6675981a 100644 --- a/src/yggdrasil/simlink.go +++ b/src/yggdrasil/simlink.go @@ -9,7 +9,7 @@ type Simlink struct { phony.Inbox rch chan []byte dest *Simlink - link *linkInterface + link *link started bool } From f2b9e95895167d54d53837c5b5ab731ec327982f Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 23 May 2020 12:21:01 -0500 Subject: [PATCH 34/51] simplify routerInterface --- src/yggdrasil/router.go | 43 +++++++++-------------------------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/src/yggdrasil/router.go b/src/yggdrasil/router.go index 303ada69..cfb75a06 100644 --- a/src/yggdrasil/router.go +++ b/src/yggdrasil/router.go @@ -62,9 +62,7 @@ func (r *router) init(core *Core) { }) r.peer.Act(r, r.peer._handleIdle) r.out = func(bs []byte) { - r.intf.Act(r, func() { - r.peer.handlePacketFrom(&r.intf, bs) - }) + r.peer.handlePacketFrom(r, bs) } r.nodeinfo.init(r.core) r.core.config.Mutex.RLock() @@ -262,46 +260,23 @@ func (r *router) _handleNodeInfo(bs []byte, fromKey *crypto.BoxPubKey) { // routerInterface is a helper that implements peerInterface type routerInterface struct { - phony.Inbox router *router - busy bool } func (intf *routerInterface) out(bss [][]byte) { - intf.Act(intf.router.peer, func() { - intf.router.Act(intf, func() { - for _, bs := range bss { - intf.router._handlePacket(bs) - } - // we may block due to the above - // so we send a message to ourself, that we'd handle after unblocking - // that message tells us to tell the interface that we're finally idle again - intf.router.Act(nil, func() { - intf.Act(intf.router, intf._handleIdle) - }) - intf.Act(intf.router, intf._handleBusy) - }) + // Note that this is run in the peer's goroutine + intf.router.Act(intf.router.peer, func() { + for _, bs := range bss { + intf.router._handlePacket(bs) + } }) -} - -func (intf *routerInterface) _handleBusy() { - intf.busy = true -} - -func (intf *routerInterface) _handleIdle() { - intf.busy = false - intf.router.peer.Act(intf, intf.router.peer._handleIdle) + //intf.router.peer.Act(nil, intf.router.peer._handleIdle) + intf.router.peer._handleIdle() } func (intf *routerInterface) linkOut(_ []byte) {} -func (intf *routerInterface) notifyQueued(seq uint64) { - intf.Act(intf.router.peer, func() { - if intf.busy { - intf.router.peer.dropFromQueue(intf, seq) - } - }) -} +func (intf *routerInterface) notifyQueued(seq uint64) {} func (intf *routerInterface) close() {} From 77ded84ea580ae219802b96d5fb6bed389bfe998 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 23 May 2020 12:21:23 -0500 Subject: [PATCH 35/51] simplify routerInterface --- src/yggdrasil/router.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/yggdrasil/router.go b/src/yggdrasil/router.go index cfb75a06..2ab38555 100644 --- a/src/yggdrasil/router.go +++ b/src/yggdrasil/router.go @@ -258,7 +258,7 @@ func (r *router) _handleNodeInfo(bs []byte, fromKey *crypto.BoxPubKey) { //////////////////////////////////////////////////////////////////////////////// -// routerInterface is a helper that implements peerInterface +// routerInterface is a helper that implements linkInterface type routerInterface struct { router *router } From 95743085457314b4b0e8115133a3aaef19e494de Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 24 May 2020 17:24:50 -0500 Subject: [PATCH 36/51] have the peer delay setting a max buffer size, in case things have unblocked in the mean time --- src/yggdrasil/peer.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 4463bc6d..ce120b8d 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -303,10 +303,12 @@ func (p *peer) _handleIdle() { func (p *peer) dropFromQueue(from phony.Actor, seq uint64) { p.Act(from, func() { - if seq == p.seq { - p.drop = true - p.max = p.queue.size + streamMsgSize - } + p.Act(nil, func() { + if seq == p.seq { + p.drop = true + p.max = p.queue.size + streamMsgSize + } + }) }) } From 4382368b0891d75aa6bbcd5fd4d9de8e01a67f40 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 24 May 2020 17:43:35 -0500 Subject: [PATCH 37/51] make sure the peer isn't idle before entering drop mode --- src/yggdrasil/peer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index ce120b8d..b9641fc0 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -304,7 +304,7 @@ func (p *peer) _handleIdle() { func (p *peer) dropFromQueue(from phony.Actor, seq uint64) { p.Act(from, func() { p.Act(nil, func() { - if seq == p.seq { + if seq == p.seq && !p.idle { p.drop = true p.max = p.queue.size + streamMsgSize } From 38dcbb1e2f8c755d718abe12c41d540698ed5bab Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sun, 24 May 2020 17:49:48 -0500 Subject: [PATCH 38/51] cleaner way to handle seq/idle checks for the peer --- src/yggdrasil/peer.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index b9641fc0..3976c8f6 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -290,8 +290,8 @@ func (p *peer) _handleIdle() { break } } + p.seq++ if len(packets) > 0 { - p.seq++ p.bytesSent += uint64(size) p.intf.out(packets) p.max = p.queue.size @@ -304,7 +304,7 @@ func (p *peer) _handleIdle() { func (p *peer) dropFromQueue(from phony.Actor, seq uint64) { p.Act(from, func() { p.Act(nil, func() { - if seq == p.seq && !p.idle { + if seq == p.seq { p.drop = true p.max = p.queue.size + streamMsgSize } From f9bc0b7aee8086bcac6cd35ba78b93cbf2ee5148 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Mon, 25 May 2020 11:49:25 -0500 Subject: [PATCH 39/51] use a more elaborate precomputed lookup table from the switch --- src/yggdrasil/switch.go | 116 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 110 insertions(+), 6 deletions(-) diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index a5c099ba..a3961c31 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -93,6 +93,20 @@ func (l *switchLocator) dist(dest []byte) int { return dist } +func (l *switchLocator) ldist(sl *switchLocator) int { + lca := -1 + for idx := 0; idx < len(l.coords); idx++ { + if idx >= len(sl.coords) { + break + } + if l.coords[idx] != sl.coords[idx] { + break + } + lca = idx + } + return len(l.coords) + len(sl.coords) - 2*(lca+1) +} + // Gets coords in wire encoded format, with *no* length prefix. func (l *switchLocator) getCoords() []byte { bs := make([]byte, 0, len(l.coords)) @@ -140,13 +154,15 @@ type tableElem struct { port switchPort locator switchLocator time time.Time + next map[switchPort]*tableElem } // This is the subset of the information about all peers needed to make routing decisions, and it stored separately in an atomically accessed table, which gets hammered in the "hot loop" of the routing logic (see: peer.handleTraffic in peers.go). type lookupTable struct { - self switchLocator - elems map[switchPort]tableElem - _msg switchMsg + self switchLocator + elems map[switchPort]tableElem // all switch peers, just for sanity checks + API/debugging + _start tableElem // used for lookups + _msg switchMsg } // This is switch information which is mutable and needs to be modified by other goroutines, but is not accessed atomically. @@ -517,10 +533,83 @@ func (t *switchTable) _handleMsg(msg *switchMsg, fromPort switchPort, reprocessi //////////////////////////////////////////////////////////////////////////////// -// The rest of these are related to the switch worker +// The rest of these are related to the switch lookup table + +func (t *switchTable) _updateTable() { + newTable := lookupTable{ + self: t.data.locator.clone(), + elems: make(map[switchPort]tableElem, len(t.data.peers)), + _msg: *t._getMsg(), + } + newTable._init() + for _, pinfo := range t.data.peers { + if pinfo.locator.root != newTable.self.root { + continue + } + loc := pinfo.locator.clone() + loc.coords = loc.coords[:len(loc.coords)-1] // Remove the them->self link + elem := tableElem{ + locator: loc, + port: pinfo.port, + time: pinfo.time, + } + newTable._insert(&elem) + newTable.elems[pinfo.port] = elem + } + t.core.peers.updateTables(t, &newTable) + t.core.router.updateTable(t, &newTable) +} + +func (t *lookupTable) _init() { + // WARNING: this relies on the convention that the self port is 0 + self := tableElem{locator: t.self} // create self elem + t._start = self // initialize _start to self + t._insert(&self) // insert self into table +} + +func (t *lookupTable) _insert(elem *tableElem) { + // This is a helper that should only be run during _updateTable + here := &t._start + for idx := 0; idx <= len(elem.locator.coords); idx++ { + refLoc := here.locator + refLoc.coords = refLoc.coords[:idx] // Note that this is length idx (starts at length 0) + oldDist := refLoc.ldist(&here.locator) + newDist := refLoc.ldist(&elem.locator) + var update bool + switch { + case newDist < oldDist: // new elem is closer to this point in the tree + update = true + case newDist > oldDist: // new elem is too far + case elem.locator.tstamp > refLoc.tstamp: // new elem has a closer timestamp + update = true + case elem.locator.tstamp < refLoc.tstamp: // new elem's timestamp is too old + case elem.time.Before(here.time): // same dist+timestamp, but new elem delivered it faster + update = true + } + if update { + here.port = elem.port + here.locator = elem.locator + here.time = elem.time + // Problem: here is a value, so this doesn't actually update anything... + } + if idx < len(elem.locator.coords) { + if here.next == nil { + here.next = make(map[switchPort]*tableElem) + } + var next *tableElem + var ok bool + if next, ok = here.next[elem.locator.coords[idx]]; !ok { + nextVal := *elem + next = &nextVal + here.next[next.locator.coords[idx]] = next + } + here = next + } + } +} // This is called via a sync.Once to update the atomically readable subset of switch information that gets used for routing decisions. -func (t *switchTable) _updateTable() { +func (t *switchTable) old_updateTable() { // WARNING this should only be called from within t.data.updater.Do() // It relies on the sync.Once for synchronization with messages and lookups // TODO use a pre-computed faster lookup table @@ -558,8 +647,23 @@ func (t *switchTable) start() error { return nil } -// Find the best port to forward to for a given set of coords func (t *lookupTable) lookup(coords []byte) switchPort { + var offset int + here := &t._start + for offset < len(coords) { + port, l := wire_decode_uint64(coords[offset:]) + offset += l + if next, ok := here.next[switchPort(port)]; ok { + here = next + } else { + break + } + } + return here.port +} + +// Find the best port to forward to for a given set of coords +func (t *lookupTable) old_lookup(coords []byte) switchPort { var bestPort switchPort myDist := t.self.dist(coords) bestDist := myDist From 761ae531cb7eddf5c88779c746a0f41b67934b6d Mon Sep 17 00:00:00 2001 From: Arceliar Date: Mon, 25 May 2020 15:19:32 -0500 Subject: [PATCH 40/51] work-in-progress faster queue logic --- src/yggdrasil/packetqueue.go | 85 +++++++----------------------------- 1 file changed, 16 insertions(+), 69 deletions(-) diff --git a/src/yggdrasil/packetqueue.go b/src/yggdrasil/packetqueue.go index 464bc6ce..d91a18ef 100644 --- a/src/yggdrasil/packetqueue.go +++ b/src/yggdrasil/packetqueue.go @@ -1,10 +1,15 @@ package yggdrasil +/* import ( "math/rand" "time" ) +*/ +// TODO separate queues per e.g. traffic flow +// For now, we put everything in queue +/* type pqStreamID string type pqPacketInfo struct { @@ -13,13 +18,15 @@ type pqPacketInfo struct { } type pqStream struct { + id string infos []pqPacketInfo - size uint64 + size int } +*/ -// TODO separate queues per e.g. traffic flow type packetQueue struct { - streams map[pqStreamID]pqStream + //streams []pqStream + packets [][]byte size uint64 } @@ -29,83 +36,23 @@ func (q *packetQueue) drop() bool { if q.size == 0 { return false } - // select a random stream, odds based on stream size - offset := rand.Uint64() % q.size - var worst pqStreamID - var size uint64 - for id, stream := range q.streams { - worst = id - size += stream.size - if size >= offset { - break - } - } - // drop the oldest packet from the stream - worstStream := q.streams[worst] - packet := worstStream.infos[0].packet - worstStream.infos = worstStream.infos[1:] - worstStream.size -= uint64(len(packet)) + packet := q.packets[0] + q.packets = q.packets[1:] q.size -= uint64(len(packet)) pool_putBytes(packet) - // save the modified stream to queues - if len(worstStream.infos) > 0 { - q.streams[worst] = worstStream - } else { - delete(q.streams, worst) - } return true } func (q *packetQueue) push(packet []byte) { - if q.streams == nil { - q.streams = make(map[pqStreamID]pqStream) - } - // get stream - id := pqStreamID(peer_getPacketCoords(packet)) // just coords for now - stream := q.streams[id] - // update stream - stream.infos = append(stream.infos, pqPacketInfo{packet, time.Now()}) - stream.size += uint64(len(packet)) - // save update to queues - q.streams[id] = stream + q.packets = append(q.packets, packet) q.size += uint64(len(packet)) } func (q *packetQueue) pop() ([]byte, bool) { - if len(q.streams) > 0 { - // get the stream that uses the least bandwidth - now := time.Now() - var best pqStreamID - for id := range q.streams { - best = id - break // get a random ID to start - } - bestStream := q.streams[best] - bestSize := float64(bestStream.size) - bestAge := now.Sub(bestStream.infos[0].time).Seconds() - for id, stream := range q.streams { - thisSize := float64(stream.size) - thisAge := now.Sub(stream.infos[0].time).Seconds() - // cross multiply to avoid division by zero issues - if bestSize*thisAge > thisSize*bestAge { - // bestSize/bestAge > thisSize/thisAge -> this uses less bandwidth - best = id - bestStream = stream - bestSize = thisSize - bestAge = thisAge - } - } - // get the oldest packet from the best stream - packet := bestStream.infos[0].packet - bestStream.infos = bestStream.infos[1:] - bestStream.size -= uint64(len(packet)) + if q.size > 0 { + packet := q.packets[0] + q.packets = q.packets[1:] q.size -= uint64(len(packet)) - // save the modified stream to queues - if len(bestStream.infos) > 0 { - q.streams[best] = bestStream - } else { - delete(q.streams, best) - } return packet, true } return nil, false From 1f65ffb310c12343bcd56a53399661046f5490a7 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Mon, 25 May 2020 16:07:56 -0500 Subject: [PATCH 41/51] work-in-progress heap-based queue structure --- src/yggdrasil/packetqueue.go | 89 ++++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 19 deletions(-) diff --git a/src/yggdrasil/packetqueue.go b/src/yggdrasil/packetqueue.go index d91a18ef..358aaeb1 100644 --- a/src/yggdrasil/packetqueue.go +++ b/src/yggdrasil/packetqueue.go @@ -1,15 +1,13 @@ package yggdrasil -/* import ( - "math/rand" + "container/heap" "time" ) -*/ // TODO separate queues per e.g. traffic flow // For now, we put everything in queue -/* + type pqStreamID string type pqPacketInfo struct { @@ -18,15 +16,13 @@ type pqPacketInfo struct { } type pqStream struct { - id string + id pqStreamID infos []pqPacketInfo - size int + size uint64 } -*/ type packetQueue struct { - //streams []pqStream - packets [][]byte + streams []pqStream size uint64 } @@ -36,24 +32,79 @@ func (q *packetQueue) drop() bool { if q.size == 0 { return false } - packet := q.packets[0] - q.packets = q.packets[1:] - q.size -= uint64(len(packet)) - pool_putBytes(packet) + var longestIdx int + for idx := range q.streams { + if q.streams[idx].size > q.streams[longestIdx].size { + longestIdx = idx + } + } + stream := heap.Remove(q, longestIdx).(pqStream) + info := stream.infos[0] + if len(stream.infos) > 1 { + stream.infos = stream.infos[1:] + stream.size -= uint64(len(info.packet)) + heap.Push(q, stream) + } + pool_putBytes(info.packet) return true } func (q *packetQueue) push(packet []byte) { - q.packets = append(q.packets, packet) - q.size += uint64(len(packet)) + id := pqStreamID(peer_getPacketCoords(packet)) // just coords for now + info := pqPacketInfo{packet: packet, time: time.Now()} + for idx := range q.streams { + if q.streams[idx].id == id { + q.streams[idx].infos = append(q.streams[idx].infos, info) + q.streams[idx].size += uint64(len(packet)) + q.size += uint64(len(packet)) + return + } + } + stream := pqStream{id: id, size: uint64(len(packet))} + stream.infos = append(stream.infos, info) + heap.Push(q, stream) } func (q *packetQueue) pop() ([]byte, bool) { if q.size > 0 { - packet := q.packets[0] - q.packets = q.packets[1:] - q.size -= uint64(len(packet)) - return packet, true + stream := heap.Pop(q).(pqStream) + info := stream.infos[0] + if len(stream.infos) > 1 { + stream.infos = stream.infos[1:] + stream.size -= uint64(len(info.packet)) + heap.Push(q, stream) + } + return info.packet, true } return nil, false } + +//////////////////////////////////////////////////////////////////////////////// + +// Interface methods for packetQueue to satisfy heap.Interface + +func (q *packetQueue) Len() int { + return len(q.streams) +} + +func (q *packetQueue) Less(i, j int) bool { + return q.streams[i].infos[0].time.Before(q.streams[j].infos[0].time) +} + +func (q *packetQueue) Swap(i, j int) { + q.streams[i], q.streams[j] = q.streams[j], q.streams[i] +} + +func (q *packetQueue) Push(x interface{}) { + stream := x.(pqStream) + q.streams = append(q.streams, stream) + q.size += stream.size +} + +func (q *packetQueue) Pop() interface{} { + idx := len(q.streams) - 1 + stream := q.streams[idx] + q.streams = q.streams[:idx] + q.size -= stream.size + return stream +} From 674d8b58b652967b65994518cfc438f00017ee9b Mon Sep 17 00:00:00 2001 From: Arceliar Date: Mon, 25 May 2020 19:27:17 -0500 Subject: [PATCH 42/51] get things compiling again --- src/yggdrasil/tcp.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/yggdrasil/tcp.go b/src/yggdrasil/tcp.go index d37d6e62..f686553f 100644 --- a/src/yggdrasil/tcp.go +++ b/src/yggdrasil/tcp.go @@ -407,7 +407,10 @@ func (t *tcp) handler(sock net.Conn, incoming bool, options tcpOptions) { if laddr.IsValid() || lsubnet.IsValid() { // The local address is with the network address/prefix range // This would route ygg over ygg, which we don't want - t.link.core.log.Debugln("Dropping ygg-tunneled connection", local, remote) + // FIXME ideally this check should happen outside of the core library + // Maybe dial/listen at the application level + // Then pass a net.Conn to the core library (after these kinds of checks are done) + t.links.core.log.Debugln("Dropping ygg-tunneled connection", local, remote) return } } From 09f9f4e8e4fdc2aafb65bbc31a88551341dded80 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Mon, 25 May 2020 20:09:57 -0500 Subject: [PATCH 43/51] use heap.Fix instead of heap.Remove + heap.Push when updating queues, this is theoretically faster --- src/yggdrasil/packetqueue.go | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/yggdrasil/packetqueue.go b/src/yggdrasil/packetqueue.go index 358aaeb1..6273e6c8 100644 --- a/src/yggdrasil/packetqueue.go +++ b/src/yggdrasil/packetqueue.go @@ -38,12 +38,16 @@ func (q *packetQueue) drop() bool { longestIdx = idx } } - stream := heap.Remove(q, longestIdx).(pqStream) + stream := q.streams[longestIdx] info := stream.infos[0] if len(stream.infos) > 1 { stream.infos = stream.infos[1:] stream.size -= uint64(len(info.packet)) - heap.Push(q, stream) + q.streams[longestIdx] = stream + q.size -= uint64(len(info.packet)) + heap.Fix(q, longestIdx) + } else { + heap.Remove(q, longestIdx) } pool_putBytes(info.packet) return true @@ -67,12 +71,16 @@ func (q *packetQueue) push(packet []byte) { func (q *packetQueue) pop() ([]byte, bool) { if q.size > 0 { - stream := heap.Pop(q).(pqStream) + stream := q.streams[0] info := stream.infos[0] if len(stream.infos) > 1 { stream.infos = stream.infos[1:] stream.size -= uint64(len(info.packet)) - heap.Push(q, stream) + q.streams[0] = stream + q.size -= uint64(len(info.packet)) + heap.Fix(q, 0) + } else { + heap.Remove(q, 0) } return info.packet, true } From 1df305d31cde064ae5ca3fddc46d1e94d891015d Mon Sep 17 00:00:00 2001 From: Arceliar Date: Wed, 27 May 2020 18:53:14 -0500 Subject: [PATCH 44/51] simplify how blocking is detected and packets are dequeued --- src/yggdrasil/link.go | 11 +---------- src/yggdrasil/peer.go | 16 +++++++--------- src/yggdrasil/router.go | 6 +++--- 3 files changed, 11 insertions(+), 22 deletions(-) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index e0620d10..0dd97ec5 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -326,7 +326,6 @@ func (intf *link) handler() error { type linkInterface interface { out([][]byte) linkOut([]byte) - notifyQueued(uint64) close() // These next ones are only used by the API name() string @@ -355,15 +354,6 @@ func (intf *link) linkOut(bs []byte) { }) } -func (intf *link) notifyQueued(seq uint64) { - // This is the part where we want non-nil 'from' fields - intf.Act(intf.peer, func() { - if intf.isSending { - intf.peer.dropFromQueue(intf, seq) - } - }) -} - func (intf *link) close() { intf.Act(nil, func() { intf.msgIO.close() }) } @@ -398,6 +388,7 @@ func (intf *link) notifySending(size int) { intf.isSending = true intf.sendTimer = time.AfterFunc(sendTime, intf.notifyBlockedSend) intf._cancelStallTimer() + intf.peer.notifyBlocked(intf) }) } diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 3976c8f6..f04ab280 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -266,23 +266,20 @@ func (p *peer) sendPacketFrom(from phony.Actor, packet []byte) { func (p *peer) _sendPacket(packet []byte) { p.queue.push(packet) - switch { - case p.idle: + if p.idle { p.idle = false p._handleIdle() - case p.drop: + } else if p.drop { for p.queue.size > p.max { p.queue.drop() } - default: - p.intf.notifyQueued(p.seq) } } func (p *peer) _handleIdle() { var packets [][]byte var size uint64 - for size < streamMsgSize { + for { if packet, success := p.queue.pop(); success { packets = append(packets, packet) size += uint64(len(packet)) @@ -297,16 +294,17 @@ func (p *peer) _handleIdle() { p.max = p.queue.size } else { p.idle = true - p.drop = false } + p.drop = false } -func (p *peer) dropFromQueue(from phony.Actor, seq uint64) { +func (p *peer) notifyBlocked(from phony.Actor) { p.Act(from, func() { + seq := p.seq p.Act(nil, func() { if seq == p.seq { p.drop = true - p.max = p.queue.size + streamMsgSize + p.max = 2*p.queue.size + streamMsgSize } }) }) diff --git a/src/yggdrasil/router.go b/src/yggdrasil/router.go index 2ab38555..d387346e 100644 --- a/src/yggdrasil/router.go +++ b/src/yggdrasil/router.go @@ -270,14 +270,14 @@ func (intf *routerInterface) out(bss [][]byte) { intf.router._handlePacket(bs) } }) - //intf.router.peer.Act(nil, intf.router.peer._handleIdle) + // This should now immediately make the peer idle again + // So the self-peer shouldn't end up buffering anything + // We let backpressure act as a throttle instead intf.router.peer._handleIdle() } func (intf *routerInterface) linkOut(_ []byte) {} -func (intf *routerInterface) notifyQueued(seq uint64) {} - func (intf *routerInterface) close() {} func (intf *routerInterface) name() string { return "(self)" } From 905c28f7b213ac2f6cdf471e4af1587e307ea32f Mon Sep 17 00:00:00 2001 From: Arceliar Date: Wed, 27 May 2020 19:31:17 -0500 Subject: [PATCH 45/51] fix some issues with the rewritten switch lookup tables --- src/yggdrasil/switch.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index a3961c31..ae14d772 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -444,6 +444,9 @@ func (t *switchTable) _handleMsg(msg *switchMsg, fromPort switchPort, reprocessi } } } + if sender.blocked != oldSender.blocked { + doUpdate = true + } // Update sender t.data.peers[fromPort] = sender // Decide if we should also update our root info to make the sender our parent @@ -543,7 +546,9 @@ func (t *switchTable) _updateTable() { } newTable._init() for _, pinfo := range t.data.peers { - if pinfo.locator.root != newTable.self.root { + if pinfo.blocked || + pinfo.locator.root != newTable.self.root || + pinfo.key == t.key { continue } loc := pinfo.locator.clone() From 8775075c18192676d8a61e2d9e6218df06a9fb05 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Wed, 27 May 2020 19:35:19 -0500 Subject: [PATCH 46/51] debugging --- src/yggdrasil/switch.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index ae14d772..ebdeea45 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -528,7 +528,7 @@ func (t *switchTable) _handleMsg(msg *switchMsg, fromPort switchPort, reprocessi t.parent = sender.port defer t.core.peers.sendSwitchMsgs(t) } - if doUpdate { + if true || doUpdate { defer t._updateTable() } return From 3dc22427125103f2ac3e1dbe3664bf38c7a8b167 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 30 May 2020 10:32:15 -0500 Subject: [PATCH 47/51] fix handling of keepAliveTimer and blocked state in link.go --- src/yggdrasil/link.go | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 0dd97ec5..7a843b68 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -387,19 +387,14 @@ func (intf *link) notifySending(size int) { intf.Act(&intf.writer, func() { intf.isSending = true intf.sendTimer = time.AfterFunc(sendTime, intf.notifyBlockedSend) - intf._cancelStallTimer() + if intf.keepAliveTimer != nil { + intf.keepAliveTimer.Stop() + intf.keepAliveTimer = nil + } intf.peer.notifyBlocked(intf) }) } -// we just sent something, so cancel any pending timer to send keep-alive traffic -func (intf *link) _cancelStallTimer() { - if intf.stallTimer != nil { - intf.stallTimer.Stop() - intf.stallTimer = nil - } -} - // This gets called from a time.AfterFunc, and notifies the switch that we appear // to have gotten blocked on a write, so the switch should start routing traffic // through other links, if alternatives exist @@ -441,11 +436,13 @@ func (intf *link) _notifyIdle() { // Set the peer as stalled, to prevent them from returning to the switch until a read succeeds func (intf *link) notifyStalled() { intf.Act(nil, func() { // Sent from a time.AfterFunc - if intf.stallTimer != nil && !intf.blocked { + if intf.stallTimer != nil { intf.stallTimer.Stop() intf.stallTimer = nil - intf.blocked = true - intf.links.core.switchTable.blockPeer(intf, intf.peer.port) + if !intf.blocked { + intf.blocked = true + intf.links.core.switchTable.blockPeer(intf, intf.peer.port) + } } }) } @@ -480,9 +477,9 @@ func (intf *link) notifyRead(size int) { // We need to send keep-alive traffic now func (intf *link) notifyDoKeepAlive() { intf.Act(nil, func() { // Sent from a time.AfterFunc - if intf.stallTimer != nil { - intf.stallTimer.Stop() - intf.stallTimer = nil + if intf.keepAliveTimer != nil { + intf.keepAliveTimer.Stop() + intf.keepAliveTimer = nil intf.writer.sendFrom(nil, [][]byte{nil}) // Empty keep-alive traffic } }) From 5e170e22e116423fb44310482709062614b5b07d Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 30 May 2020 10:47:54 -0500 Subject: [PATCH 48/51] more switch fixes --- src/yggdrasil/switch.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index ebdeea45..9cea91eb 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -392,6 +392,9 @@ func (t *switchTable) _handleMsg(msg *switchMsg, fromPort switchPort, reprocessi sender.key = prevKey prevKey = hop.Next } + if sender.key == t.key { + return // Don't peer with ourself via different interfaces + } sender.msg = *msg sender.port = fromPort sender.time = now @@ -516,8 +519,8 @@ func (t *switchTable) _handleMsg(msg *switchMsg, fromPort switchPort, reprocessi } // Note that we depend on the LIFO order of the stack of defers here... if updateRoot { + doUpdate = true if !equiv(&sender.locator, &t.data.locator) { - doUpdate = true t.data.seq++ defer t.core.router.reset(t) } @@ -528,8 +531,8 @@ func (t *switchTable) _handleMsg(msg *switchMsg, fromPort switchPort, reprocessi t.parent = sender.port defer t.core.peers.sendSwitchMsgs(t) } - if true || doUpdate { - defer t._updateTable() + if doUpdate { + t._updateTable() } return } @@ -546,9 +549,7 @@ func (t *switchTable) _updateTable() { } newTable._init() for _, pinfo := range t.data.peers { - if pinfo.blocked || - pinfo.locator.root != newTable.self.root || - pinfo.key == t.key { + if pinfo.blocked || pinfo.locator.root != newTable.self.root { continue } loc := pinfo.locator.clone() From 0f28862e99f43b2fee347cb51a4c250daf0d3f2e Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 30 May 2020 10:48:59 -0500 Subject: [PATCH 49/51] remove unused sequence number from switch --- src/yggdrasil/switch.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index 9cea91eb..be97466c 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -171,7 +171,6 @@ type switchData struct { // All data that's mutable and used by exported Table methods // To be read/written with atomic.Value Store/Load calls locator switchLocator - seq uint64 // Sequence number, reported to peers, so they know about changes peers map[switchPort]peerInfo msg *switchMsg } @@ -242,7 +241,6 @@ func (t *switchTable) _cleanRoot() { t.parent = switchPort(0) t.time = now if t.data.locator.root != t.key { - t.data.seq++ defer t.core.router.reset(nil) } t.data.locator = switchLocator{root: t.key, tstamp: now.Unix()} @@ -521,7 +519,6 @@ func (t *switchTable) _handleMsg(msg *switchMsg, fromPort switchPort, reprocessi if updateRoot { doUpdate = true if !equiv(&sender.locator, &t.data.locator) { - t.data.seq++ defer t.core.router.reset(t) } if t.data.locator.tstamp != sender.locator.tstamp { From c83b070c692de8c1c62fed9836c23a08ef0e39ad Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 30 May 2020 13:12:49 -0500 Subject: [PATCH 50/51] remove old switch lookup functions --- src/yggdrasil/switch.go | 69 ----------------------------------------- 1 file changed, 69 deletions(-) diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index be97466c..ed2edf2e 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -611,38 +611,6 @@ func (t *lookupTable) _insert(elem *tableElem) { } } -// This is called via a sync.Once to update the atomically readable subset of switch information that gets used for routing decisions. -func (t *switchTable) old_updateTable() { - // WARNING this should only be called from within t.data.updater.Do() - // It relies on the sync.Once for synchronization with messages and lookups - // TODO use a pre-computed faster lookup table - // Instead of checking distance for every destination every time - // Array of structs, indexed by first coord that differs from self - // Each struct has stores the best port to forward to, and a next coord map - // Move to struct, then iterate over coord maps until you dead end - // The last port before the dead end should be the closest - newTable := lookupTable{ - self: t.data.locator.clone(), - elems: make(map[switchPort]tableElem, len(t.data.peers)), - } - for _, pinfo := range t.data.peers { - //if !pinfo.forward { continue } - if pinfo.locator.root != newTable.self.root { - continue - } - loc := pinfo.locator.clone() - loc.coords = loc.coords[:len(loc.coords)-1] // Remove the them->self link - newTable.elems[pinfo.port] = tableElem{ - locator: loc, - port: pinfo.port, - time: pinfo.time, - } - } - newTable._msg = *t._getMsg() - t.core.peers.updateTables(t, &newTable) - t.core.router.updateTable(t, &newTable) -} - // Starts the switch worker func (t *switchTable) start() error { t.core.log.Infoln("Starting switch") @@ -664,40 +632,3 @@ func (t *lookupTable) lookup(coords []byte) switchPort { } return here.port } - -// Find the best port to forward to for a given set of coords -func (t *lookupTable) old_lookup(coords []byte) switchPort { - var bestPort switchPort - myDist := t.self.dist(coords) - bestDist := myDist - var bestElem tableElem - for _, info := range t.elems { - dist := info.locator.dist(coords) - if dist >= myDist { - continue - } - var update bool - switch { - case dist < bestDist: - // Closer to destination - update = true - case dist > bestDist: - // Further from destination - case info.locator.tstamp > bestElem.locator.tstamp: - // Newer root update - update = true - case info.locator.tstamp < bestElem.locator.tstamp: - // Older root update - case info.time.Before(bestElem.time): - // Received root update via this peer sooner - update = true - default: - } - if update { - bestPort = info.port - bestDist = dist - bestElem = info - } - } - return bestPort -} From d9fd68f18cd4214004e969b546c8f26e915f8934 Mon Sep 17 00:00:00 2001 From: Neil Alexander Date: Mon, 6 Jul 2020 14:21:28 +0100 Subject: [PATCH 51/51] Fix build --- src/yggdrasil/api.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/yggdrasil/api.go b/src/yggdrasil/api.go index 13f4cb10..c800cb0d 100644 --- a/src/yggdrasil/api.go +++ b/src/yggdrasil/api.go @@ -465,12 +465,14 @@ func (c *Core) RemovePeer(addr string, sintf string) error { } } - ports := c.peers.ports.Load().(map[switchPort]*peer) - for p, peer := range ports { - if addr == peer.intf.name { - c.peers.removePeer(p) + c.peers.Act(nil, func() { + ports := c.peers.ports + for _, peer := range ports { + if addr == peer.intf.name() { + c.peers._removePeer(peer) + } } - } + }) return nil }