store 'faster' relationships between all pairs of peers, to make fallback easier when a parent goes offline

This commit is contained in:
Arceliar 2018-12-02 16:36:25 -06:00
parent 38093219fd
commit dcfe55dae8

View File

@ -18,10 +18,12 @@ import (
"time" "time"
) )
const switch_timeout = time.Minute const (
const switch_updateInterval = switch_timeout / 2 switch_timeout = time.Minute
const switch_throttle = switch_updateInterval / 2 switch_updateInterval = switch_timeout / 2
const switch_faster_threshold = 2880 // 1 update per 30 seconds for 24 hours switch_throttle = switch_updateInterval / 2
switch_faster_threshold = 240 //Number of switch updates before switching to a faster parent
)
// The switch locator represents the topology and network state dependent info about a node, minus the signatures that go with it. // The switch locator represents the topology and network state dependent info about a node, minus the signatures that go with it.
// Nodes will pick the best root they see, provided that the root continues to push out updates with new timestamps. // Nodes will pick the best root they see, provided that the root continues to push out updates with new timestamps.
@ -119,13 +121,13 @@ func (x *switchLocator) isAncestorOf(y *switchLocator) bool {
// Information about a peer, used by the switch to build the tree and eventually make routing decisions. // Information about a peer, used by the switch to build the tree and eventually make routing decisions.
type peerInfo struct { type peerInfo struct {
key sigPubKey // ID of this peer key sigPubKey // ID of this peer
locator switchLocator // Should be able to respond with signatures upon request locator switchLocator // Should be able to respond with signatures upon request
degree uint64 // Self-reported degree degree uint64 // Self-reported degree
time time.Time // Time this node was last seen time time.Time // Time this node was last seen
faster uint16 // Counter of how often a node is faster than the current parent, penalized extra if slower faster map[switchPort]uint64 // Counter of how often a node is faster than the current parent, penalized extra if slower
port switchPort // Interface number of this peer port switchPort // Interface number of this peer
msg switchMsg // The wire switchMsg used msg switchMsg // The wire switchMsg used
} }
// This is just a uint64 with a named type for clarity reasons. // This is just a uint64 with a named type for clarity reasons.
@ -350,8 +352,6 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep
prevKey = hop.Next prevKey = hop.Next
} }
sender.msg = *msg sender.msg = *msg
oldSender, isIn := t.data.peers[fromPort]
sender.faster = oldSender.faster
sender.port = fromPort sender.port = fromPort
sender.time = now sender.time = now
// Decide what to do // Decide what to do
@ -370,34 +370,39 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep
return true return true
} }
doUpdate := false doUpdate := false
oldSender := t.data.peers[fromPort]
if !equiv(&sender.locator, &oldSender.locator) { if !equiv(&sender.locator, &oldSender.locator) {
doUpdate = true doUpdate = true
} }
// Check if faster than the current parent, and update sender.faster accordingly // Update the matrix of peer "faster" thresholds
switch { if reprocessing {
case reprocessing: sender.faster = oldSender.faster
// Don't change anything if we're just reprocessing old messages. } else {
case !isIn: sender.faster = make(map[switchPort]uint64, len(oldSender.faster))
// Not known, sender.faster == 0, but set it explicitly just to make that obvious to the reader. for port, peer := range t.data.peers {
sender.faster = 0 if port == fromPort {
case msg.Root != oldSender.locator.root: continue
// This is a new root. }
// Honestly not sure if we should reset or do something else. For now, we'll just leave it alone. switch {
case sender.port == t.parent: case msg.Root != peer.locator.root:
// This is the current parent. If roots change, there's a good chance that they're still the best route to the root, so we probably don't want them to converge towards 0. // Different roots, blindly guess that the relationships will stay the same?
// If we leae them alone, then when a different node gets parented, this one will get penalized by a couple of points, so it hopefully shouldn't flap too hard to leave this alone for now. sender.faster[port] = oldSender.faster[peer.port]
case sender.locator.tstamp <= t.data.locator.tstamp: case sender.locator.tstamp <= peer.locator.tstamp:
// This timestamp came in slower than our parent's, so we should penalize them by more than we reward faster nodes. // Slower than this node, penalize (more than the reward amount)
if sender.faster > 1 { if oldSender.faster[port] > 1 {
sender.faster -= 2 sender.faster[port] = oldSender.faster[peer.port] - 2
} else { } else {
// If exactly 1, don't let it roll under sender.faster[port] = 0
sender.faster = 0 }
default:
// We were faster than this node, so increment, as long as we don't overflow because of it
if oldSender.faster[peer.port] < switch_faster_threshold {
sender.faster[port] = oldSender.faster[peer.port] + 1
} else {
sender.faster[port] = switch_faster_threshold
}
}
} }
default:
// They sent us an update faster than our parent did, so reward them.
// FIXME make sure this can't ever roll over. It shouldn't be possible, we'd switch to them as a parent first, but still...
sender.faster++
} }
// Update sender // Update sender
t.data.peers[fromPort] = sender t.data.peers[fromPort] = sender
@ -433,30 +438,30 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep
case noParent: case noParent:
// We currently have no working parent, and at this point in the switch statement, anything is better than nothing. // We currently have no working parent, and at this point in the switch statement, anything is better than nothing.
updateRoot = true updateRoot = true
case sender.faster > switch_faster_threshold: case sender.faster[t.parent] >= switch_faster_threshold:
// The is reliably faster than the current parent. // The is reliably faster than the current parent.
updateRoot = true updateRoot = true
case reprocessing && len(sender.locator.coords) < len(oldParent.locator.coords): case reprocessing && sender.faster[t.parent] > oldParent.faster[sender.port]:
// We're reprocessing old messages to find a new parent. // The sender seems to be reliably faster than the current parent, so switch to them instead.
// That means we're in the middle of a route flap.
// We don't know how often each node is faster than the others, only relative to the old parent.
// If any of them was faster than the old parent, then we'd probably already be using them.
// So the best we can really do is pick the shortest route and hope it's OK as a starting point.
// TODO: Find some way to reliably store relative order between all peers. Basically a pxp "faster" matrix, more likely a faster port->uint map per peer, but preferably not literally that, since it'd be tedious to manage and probably slows down updates.
updateRoot = true updateRoot = true
case sender.port != t.parent: case sender.port != t.parent:
// Ignore further cases if the sender isn't our parent. // Ignore further cases if the sender isn't our parent.
case !equiv(&sender.locator, &t.data.locator): case !reprocessing && !equiv(&sender.locator, &t.data.locator):
// Special case: // Special case:
// If coords changed, then this may now be a worse parent than before. // If coords changed, then we need to penalize this node somehow, to prevent flapping.
// Re-parent the node (de-parent and reprocess the message). // First, reset all faster-related info to 0.
// Then reprocess *all* messages to look for a better parent. // Then, de-parent the node and reprocess all messages to find a new parent.
// This is so we don't keep using this node as our parent if there's something better.
t.parent = 0 t.parent = 0
t.unlockedHandleMsg(msg, fromPort, true) sender.faster = nil
for _, info := range t.data.peers { for _, peer := range t.data.peers {
t.unlockedHandleMsg(&info.msg, info.port, true) if peer.port == sender.port {
continue
}
delete(peer.faster, sender.port)
t.unlockedHandleMsg(&peer.msg, peer.port, true)
} }
// Process the sender last, to avoid keeping them as a parent if at all possible.
t.unlockedHandleMsg(&sender.msg, sender.port, true)
case now.Sub(t.time) < switch_throttle: case now.Sub(t.time) < switch_throttle:
// We've already gotten an update from this root recently, so ignore this one to avoid flooding. // We've already gotten an update from this root recently, so ignore this one to avoid flooding.
case sender.locator.tstamp > t.data.locator.tstamp: case sender.locator.tstamp > t.data.locator.tstamp: