mirror of
https://github.com/yggdrasil-network/yggdrasil-go.git
synced 2024-11-24 10:35:20 +00:00
904 lines
33 KiB
Python
904 lines
33 KiB
Python
# Tree routing scheme (named Yggdrasil, after the world tree from Norse mythology)
|
|
# Steps:
|
|
# 1: Pick any node, here I'm using highest nodeID
|
|
# 2: Build spanning tree, each node stores path back to root
|
|
# Optionally with weights for each hop
|
|
# Ties broken by preferring a parent with higher degree
|
|
# 3: Distance metric: self->peer + (via tree) peer->dest
|
|
# 4: Perform (modified) greedy lookup via this metric for each direction (A->B and B->A)
|
|
# 5: Source-route traffic using the better of those two paths
|
|
|
|
# Note: This makes no attempt to simulate a dynamic network
|
|
# E.g. A node's peers cannot be disconnected
|
|
|
|
# TODO:
|
|
# Make better use of drop?
|
|
# In particular, we should be ignoring *all* recently dropped *paths* to the root
|
|
# To minimize route flapping
|
|
# Not really an issue in the sim, but probably needed for a real network
|
|
|
|
import array
|
|
import gc
|
|
import glob
|
|
import gzip
|
|
import heapq
|
|
import os
|
|
import random
|
|
import time
|
|
|
|
#############
|
|
# Constants #
|
|
#############
|
|
|
|
# Reminder of where link cost comes in
|
|
LINK_COST = 1
|
|
|
|
# Timeout before dropping something, in simulated seconds
|
|
TIMEOUT = 60
|
|
|
|
###########
|
|
# Classes #
|
|
###########
|
|
|
|
class PathInfo:
|
|
def __init__(self, nodeID):
|
|
self.nodeID = nodeID # e.g. IP
|
|
self.coords = [] # Position in tree
|
|
self.tstamp = 0 # Timestamp from sender, to keep track of old vs new info
|
|
self.degree = 0 # Number of peers the sender has, used to break ties
|
|
# The above should be signed
|
|
self.path = [nodeID] # Path to node (in path-vector route)
|
|
self.time = 0 # Time info was updated, to keep track of e.g. timeouts
|
|
self.treeID = nodeID # Hack, let tree use different ID than IP, used so we can dijkstra once and test many roots
|
|
def clone(self):
|
|
# Return a deep-enough copy of the path
|
|
clone = PathInfo(None)
|
|
clone.nodeID = self.nodeID
|
|
clone.coords = self.coords[:]
|
|
clone.tstamp = self.tstamp
|
|
clone.degree = self.degree
|
|
clone.path = self.path[:]
|
|
clone.time = self.time
|
|
clone.treeID = self.treeID
|
|
return clone
|
|
# End class PathInfo
|
|
|
|
class Node:
|
|
def __init__(self, nodeID):
|
|
self.info = PathInfo(nodeID) # Self NodeInfo
|
|
self.root = None # PathInfo to node at root of tree
|
|
self.drop = dict() # PathInfo to nodes from clus that have timed out
|
|
self.peers = dict() # PathInfo to peers
|
|
self.links = dict() # Links to peers (to pass messages)
|
|
self.msgs = [] # Said messages
|
|
self.table = dict() # Pre-computed lookup table of peer info
|
|
|
|
def tick(self):
|
|
# Do periodic maintenance stuff, including push updates
|
|
self.info.time += 1
|
|
if self.info.time > self.info.tstamp + TIMEOUT/4:
|
|
# Update timestamp at least once every 1/4 timeout period
|
|
# This should probably be randomized in a real implementation
|
|
self.info.tstamp = self.info.time
|
|
self.info.degree = 0# TODO decide if degree should be used, len(self.peers)
|
|
changed = False # Used to track when the network has converged
|
|
changed |= self.cleanRoot()
|
|
self.cleanDropped()
|
|
# Should probably send messages infrequently if there's nothing new to report
|
|
if self.info.tstamp == self.info.time:
|
|
msg = self.createMessage()
|
|
self.sendMessage(msg)
|
|
return changed
|
|
|
|
def cleanRoot(self):
|
|
changed = False
|
|
if self.root and self.info.time - self.root.time > TIMEOUT:
|
|
print "DEBUG: clean root,", self.root.path
|
|
self.drop[self.root.treeID] = self.root
|
|
self.root = None
|
|
changed = True
|
|
if not self.root or self.root.treeID < self.info.treeID:
|
|
# No need to drop someone who'se worse than us
|
|
self.info.coords = [self.info.nodeID]
|
|
self.root = self.info.clone()
|
|
changed = True
|
|
elif self.root.treeID == self.info.treeID:
|
|
self.root = self.info.clone()
|
|
return changed
|
|
|
|
def cleanDropped(self):
|
|
# May actually be a treeID... better to iterate over keys explicitly
|
|
nodeIDs = sorted(self.drop.keys())
|
|
for nodeID in nodeIDs:
|
|
node = self.drop[nodeID]
|
|
if self.info.time - node.time > 4*TIMEOUT:
|
|
del self.drop[nodeID]
|
|
return None
|
|
|
|
def createMessage(self):
|
|
# Message is just a tuple
|
|
# First element is the sender
|
|
# Second element is the root
|
|
# We will .clone() everything during the send operation
|
|
msg = (self.info, self.root)
|
|
return msg
|
|
|
|
def sendMessage(self, msg):
|
|
for link in self.links.values():
|
|
newMsg = (msg[0].clone(), msg[1].clone())
|
|
link.msgs.append(newMsg)
|
|
return None
|
|
|
|
def handleMessages(self):
|
|
changed = False
|
|
while self.msgs:
|
|
changed |= self.handleMessage(self.msgs.pop())
|
|
return changed
|
|
|
|
def handleMessage(self, msg):
|
|
changed = False
|
|
for node in msg:
|
|
# Update the path and timestamp for the sender and root info
|
|
node.path.append(self.info.nodeID)
|
|
node.time = self.info.time
|
|
# Update the sender's info in our list of peers
|
|
sender = msg[0]
|
|
self.peers[sender.nodeID] = sender
|
|
# Decide if we want to update the root
|
|
root = msg[1]
|
|
updateRoot = False
|
|
isSameParent = False
|
|
isBetterParent = False
|
|
if len(self.root.path) > 1 and len(root.path) > 1:
|
|
parent = self.peers[self.root.path[-2]]
|
|
if parent.nodeID == sender.nodeID: isSameParent = True
|
|
if sender.degree > parent.degree:
|
|
# This would also be where you check path uptime/reliability/whatever
|
|
# All else being equal, we prefer parents with high degree
|
|
# We are trusting peers to report degree correctly in this case
|
|
# So expect some performance reduction if your peers aren't trustworthy
|
|
# (Lies can increase average stretch by a few %)
|
|
isBetterParent = True
|
|
if self.info.nodeID in root.path[:-1]: pass # No loopy routes allowed
|
|
elif root.treeID in self.drop and self.drop[root.treeID].tstamp >= root.tstamp: pass
|
|
elif not self.root: updateRoot = True
|
|
elif self.root.treeID < root.treeID: updateRoot = True
|
|
elif self.root.treeID != root.treeID: pass
|
|
elif self.root.tstamp > root.tstamp: pass
|
|
elif len(root.path) < len(self.root.path): updateRoot = True
|
|
elif isBetterParent and len(root.path) == len(self.root.path): updateRoot = True
|
|
elif isSameParent and self.root.tstamp < root.tstamp: updateRoot = True
|
|
if updateRoot:
|
|
if not self.root or self.root.path != root.path: changed = True
|
|
self.root = root
|
|
self.info.coords = self.root.path
|
|
return changed
|
|
|
|
def lookup(self, dest):
|
|
# Note: Can loop in an unconverged network
|
|
# The person looking up the route is responsible for checking for loops
|
|
best = None
|
|
bestDist = 0
|
|
for node in self.peers.itervalues():
|
|
# dist = distance to node + dist (on tree) from node to dest
|
|
dist = len(node.path)-1 + treeDist(node.coords, dest.coords)
|
|
if not best or dist < bestDist:
|
|
best = node
|
|
bestDist = dist
|
|
if best:
|
|
next = best.path[-2]
|
|
assert next in self.peers
|
|
return next
|
|
else:
|
|
# We failed to look something up
|
|
# TODO some way to signal this which doesn't crash
|
|
assert False
|
|
|
|
def initTable(self):
|
|
# Pre-computes a lookup table for destination coords
|
|
# Insert parent first so you prefer them as a next-hop
|
|
self.table.clear()
|
|
parent = self.info.nodeID
|
|
if len(self.info.coords) >= 2: parent = self.info.coords[-2]
|
|
for peer in self.peers.itervalues():
|
|
current = self.table
|
|
for coord in peer.coords:
|
|
if coord not in current: current[coord] = (peer.nodeID, dict())
|
|
old = current[coord]
|
|
next = old[1]
|
|
oldPeer = self.peers[old[0]]
|
|
oldDist = len(oldPeer.coords)
|
|
oldDeg = oldPeer.degree
|
|
newDist = len(peer.coords)
|
|
newDeg = peer.degree
|
|
# Prefer parent
|
|
# Else prefer short distance from root
|
|
# If equal distance, prefer high degree
|
|
if peer.nodeID == parent: current[coord] = (peer.nodeID, next)
|
|
elif newDist < oldDist: current[coord] = (peer.nodeID, next)
|
|
elif newDist == oldDist and newDeg > oldDeg: current[coord] = (peer.nodeID, next)
|
|
current = next
|
|
return None
|
|
|
|
def lookup_new(self, dest):
|
|
# Use pre-computed lookup table to look up next hop for dest coords
|
|
assert self.table
|
|
if len(self.info.coords) >= 2: parent = self.info.coords[-2]
|
|
else: parent = None
|
|
current = (parent, self.table)
|
|
c = None
|
|
for coord in dest.coords:
|
|
c = coord
|
|
if coord not in current[1]: break
|
|
current = current[1][coord]
|
|
next = current[0]
|
|
if c in self.peers: next = c
|
|
if next not in self.peers:
|
|
assert next == None
|
|
# You're the root of a different connected component
|
|
# You'd drop the packet in this case
|
|
# To make the path cache not die, need to return a valid next hop...
|
|
# Returning self for that reason
|
|
next = self.info.nodeID
|
|
return next
|
|
# End class Node
|
|
|
|
####################
|
|
# Helper Functions #
|
|
####################
|
|
|
|
def getIndexOfLCA(source, dest):
|
|
# Return index of last common ancestor in source/dest coords
|
|
# -1 if no common ancestor (e.g. different roots)
|
|
lcaIdx = -1
|
|
minLen = min(len(source), len(dest))
|
|
for idx in xrange(minLen):
|
|
if source[idx] == dest[idx]: lcaIdx = idx
|
|
else: break
|
|
return lcaIdx
|
|
|
|
def treePath(source, dest):
|
|
# Return path with source at head and dest at tail
|
|
lastMatch = getIndexOfLCA(source, dest)
|
|
path = dest[-1:lastMatch:-1] + source[lastMatch:]
|
|
assert path[0] == dest[-1]
|
|
assert path[-1] == source[-1]
|
|
return path
|
|
|
|
def treeDist(source, dest):
|
|
dist = len(source) + len(dest)
|
|
lcaIdx = getIndexOfLCA(source, dest)
|
|
dist -= 2*(lcaIdx+1)
|
|
return dist
|
|
|
|
def dijkstra(nodestore, startingNodeID):
|
|
# Idea to use heapq and basic implementation taken from stackexchange post
|
|
# http://codereview.stackexchange.com/questions/79025/dijkstras-algorithm-in-python
|
|
nodeIDs = sorted(nodestore.keys())
|
|
nNodes = len(nodeIDs)
|
|
idxs = dict()
|
|
for nodeIdx in xrange(nNodes):
|
|
nodeID = nodeIDs[nodeIdx]
|
|
idxs[nodeID] = nodeIdx
|
|
dists = array.array("H", [0]*nNodes)
|
|
queue = [(0, startingNodeID)]
|
|
while queue:
|
|
dist, nodeID = heapq.heappop(queue)
|
|
idx = idxs[nodeID]
|
|
if not dists[idx]: # Unvisited, otherwise we skip it
|
|
dists[idx] = dist
|
|
for peer in nodestore[nodeID].links:
|
|
if not dists[idxs[peer]]:
|
|
# Peer is also unvisited, so add to queue
|
|
heapq.heappush(queue, (dist+LINK_COST, peer))
|
|
return dists
|
|
|
|
def dijkstrall(nodestore):
|
|
# Idea to use heapq and basic implementation taken from stackexchange post
|
|
# http://codereview.stackexchange.com/questions/79025/dijkstras-algorithm-in-python
|
|
nodeIDs = sorted(nodestore.keys())
|
|
nNodes = len(nodeIDs)
|
|
idxs = dict()
|
|
for nodeIdx in xrange(nNodes):
|
|
nodeID = nodeIDs[nodeIdx]
|
|
idxs[nodeID] = nodeIdx
|
|
dists = array.array("H", [0]*nNodes*nNodes) # use GetCacheIndex(nNodes, start, end)
|
|
for sourceIdx in xrange(nNodes):
|
|
print "Finding shortest paths for node {} / {} ({})".format(sourceIdx+1, nNodes, nodeIDs[sourceIdx])
|
|
queue = [(0, sourceIdx)]
|
|
while queue:
|
|
dist, nodeIdx = heapq.heappop(queue)
|
|
distIdx = getCacheIndex(nNodes, sourceIdx, nodeIdx)
|
|
if not dists[distIdx]: # Unvisited, otherwise we skip it
|
|
dists[distIdx] = dist
|
|
for peer in nodestore[nodeIDs[nodeIdx]].links:
|
|
pIdx = idxs[peer]
|
|
pdIdx = getCacheIndex(nNodes, sourceIdx, pIdx)
|
|
if not dists[pdIdx]:
|
|
# Peer is also unvisited, so add to queue
|
|
heapq.heappush(queue, (dist+LINK_COST, pIdx))
|
|
return dists
|
|
|
|
def linkNodes(node1, node2):
|
|
node1.links[node2.info.nodeID] = node2
|
|
node2.links[node1.info.nodeID] = node1
|
|
|
|
############################
|
|
# Store topology functions #
|
|
############################
|
|
|
|
def makeStoreSquareGrid(sideLength, randomize=True):
|
|
# Simple grid in a sideLength*sideLength square
|
|
# Just used to validate that the code runs
|
|
store = dict()
|
|
nodeIDs = list(range(sideLength*sideLength))
|
|
if randomize: random.shuffle(nodeIDs)
|
|
for nodeID in nodeIDs:
|
|
store[nodeID] = Node(nodeID)
|
|
for index in xrange(len(nodeIDs)):
|
|
if (index % sideLength != 0): linkNodes(store[nodeIDs[index]], store[nodeIDs[index-1]])
|
|
if (index >= sideLength): linkNodes(store[nodeIDs[index]], store[nodeIDs[index-sideLength]])
|
|
print "Grid store created, size {}".format(len(store))
|
|
return store
|
|
|
|
def makeStoreASRelGraph(pathToGraph):
|
|
#Existing network graphs, in caida.org's asrel format (ASx|ASy|z per line, z denotes relationship type)
|
|
with open(pathToGraph, "r") as f:
|
|
inData = f.readlines()
|
|
store = dict()
|
|
for line in inData:
|
|
if line.strip()[0] == "#": continue # Skip comment lines
|
|
line = line.replace('|'," ")
|
|
nodes = map(int, line.split()[0:2])
|
|
if nodes[0] not in store: store[nodes[0]] = Node(nodes[0])
|
|
if nodes[1] not in store: store[nodes[1]] = Node(nodes[1])
|
|
linkNodes(store[nodes[0]], store[nodes[1]])
|
|
print "CAIDA AS-relation graph successfully imported, size {}".format(len(store))
|
|
return store
|
|
|
|
def makeStoreASRelGraphMaxDeg(pathToGraph, degIdx=0):
|
|
with open(pathToGraph, "r") as f:
|
|
inData = f.readlines()
|
|
store = dict()
|
|
nodeDeg = dict()
|
|
for line in inData:
|
|
if line.strip()[0] == "#": continue # Skip comment lines
|
|
line = line.replace('|'," ")
|
|
nodes = map(int, line.split()[0:2])
|
|
if nodes[0] not in nodeDeg: nodeDeg[nodes[0]] = 0
|
|
if nodes[1] not in nodeDeg: nodeDeg[nodes[1]] = 0
|
|
nodeDeg[nodes[0]] += 1
|
|
nodeDeg[nodes[1]] += 1
|
|
sortedNodes = sorted(nodeDeg.keys(), \
|
|
key=lambda x: (nodeDeg[x], x), \
|
|
reverse=True)
|
|
maxDegNodeID = sortedNodes[degIdx]
|
|
return makeStoreASRelGraphFixedRoot(pathToGraph, maxDegNodeID)
|
|
|
|
def makeStoreASRelGraphFixedRoot(pathToGraph, rootNodeID):
|
|
with open(pathToGraph, "r") as f:
|
|
inData = f.readlines()
|
|
store = dict()
|
|
for line in inData:
|
|
if line.strip()[0] == "#": continue # Skip comment lines
|
|
line = line.replace('|'," ")
|
|
nodes = map(int, line.split()[0:2])
|
|
if nodes[0] not in store:
|
|
store[nodes[0]] = Node(nodes[0])
|
|
if nodes[0] == rootNodeID: store[nodes[0]].info.treeID += 1000000000
|
|
if nodes[1] not in store:
|
|
store[nodes[1]] = Node(nodes[1])
|
|
if nodes[1] == rootNodeID: store[nodes[1]].info.treeID += 1000000000
|
|
linkNodes(store[nodes[0]], store[nodes[1]])
|
|
print "CAIDA AS-relation graph successfully imported, size {}".format(len(store))
|
|
return store
|
|
|
|
def makeStoreDimesEdges(pathToGraph, rootNodeID=None):
|
|
# Read from a DIMES csv-formatted graph from a gzip file
|
|
store = dict()
|
|
with gzip.open(pathToGraph, "r") as f:
|
|
inData = f.readlines()
|
|
size = len(inData)
|
|
index = 0
|
|
for edge in inData:
|
|
if not index % 1000:
|
|
pct = 100.0*index/size
|
|
print "Processing edge {}, {:.2f}%".format(index, pct)
|
|
index += 1
|
|
dat = edge.rstrip().split(',')
|
|
node1 = "N" + str(dat[0].strip())
|
|
node2 = "N" + str(dat[1].strip())
|
|
if '?' in node1 or '?' in node2: continue #Unknown node
|
|
if node1 == rootNodeID: node1 = "R" + str(dat[0].strip())
|
|
if node2 == rootNodeID: node2 = "R" + str(dat[1].strip())
|
|
if node1 not in store: store[node1] = Node(node1)
|
|
if node2 not in store: store[node2] = Node(node2)
|
|
if node1 != node2: linkNodes(store[node1], store[node2])
|
|
print "DIMES graph successfully imported, size {}".format(len(store))
|
|
return store
|
|
|
|
def makeStoreGeneratedGraph(pathToGraph, root=None):
|
|
with open(pathToGraph, "r") as f:
|
|
inData = f.readlines()
|
|
store = dict()
|
|
for line in inData:
|
|
if line.strip()[0] == "#": continue # Skip comment lines
|
|
nodes = map(int, line.strip().split(' ')[0:2])
|
|
node1 = nodes[0]
|
|
node2 = nodes[1]
|
|
if node1 == root: node1 += 1000000
|
|
if node2 == root: node2 += 1000000
|
|
if node1 not in store: store[node1] = Node(node1)
|
|
if node2 not in store: store[node2] = Node(node2)
|
|
linkNodes(store[node1], store[node2])
|
|
print "Generated graph successfully imported, size {}".format(len(store))
|
|
return store
|
|
|
|
|
|
############################################
|
|
# Functions used as parts of network tests #
|
|
############################################
|
|
|
|
def idleUntilConverged(store):
|
|
nodeIDs = sorted(store.keys())
|
|
timeOfLastChange = 0
|
|
step = 0
|
|
# Idle until the network has converged
|
|
while step - timeOfLastChange < 4*TIMEOUT:
|
|
step += 1
|
|
print "Step: {}, last change: {}".format(step, timeOfLastChange)
|
|
changed = False
|
|
for nodeID in nodeIDs:
|
|
# Update node status, send messages
|
|
changed |= store[nodeID].tick()
|
|
for nodeID in nodeIDs:
|
|
# Process messages
|
|
changed |= store[nodeID].handleMessages()
|
|
if changed: timeOfLastChange = step
|
|
initTables(store)
|
|
return store
|
|
|
|
def getCacheIndex(nodes, sourceIndex, destIndex):
|
|
return sourceIndex*nodes + destIndex
|
|
|
|
def initTables(store):
|
|
nodeIDs = sorted(store.keys())
|
|
nNodes = len(nodeIDs)
|
|
print "Initializing routing tables for {} nodes".format(nNodes)
|
|
for idx in xrange(nNodes):
|
|
nodeID = nodeIDs[idx]
|
|
store[nodeID].initTable()
|
|
print "Routing tables initialized"
|
|
return None
|
|
|
|
def getCache(store):
|
|
nodeIDs = sorted(store.keys())
|
|
nNodes = len(nodeIDs)
|
|
nodeIdxs = dict()
|
|
for nodeIdx in xrange(nNodes):
|
|
nodeIdxs[nodeIDs[nodeIdx]] = nodeIdx
|
|
cache = array.array("H", [0]*nNodes*nNodes)
|
|
for sourceIdx in xrange(nNodes):
|
|
sourceID = nodeIDs[sourceIdx]
|
|
print "Building fast lookup table for node {} / {} ({})".format(sourceIdx+1, nNodes, sourceID)
|
|
for destIdx in xrange(nNodes):
|
|
destID = nodeIDs[destIdx]
|
|
if sourceID == destID: nextHop = destID # lookup would fail
|
|
else: nextHop = store[sourceID].lookup(store[destID].info)
|
|
nextHopIdx = nodeIdxs[nextHop]
|
|
cache[getCacheIndex(nNodes, sourceIdx, destIdx)] = nextHopIdx
|
|
return cache
|
|
|
|
def testPaths(store, dists):
|
|
cache = getCache(store)
|
|
nodeIDs = sorted(store.keys())
|
|
nNodes = len(nodeIDs)
|
|
idxs = dict()
|
|
for nodeIdx in xrange(nNodes):
|
|
nodeID = nodeIDs[nodeIdx]
|
|
idxs[nodeID] = nodeIdx
|
|
results = dict()
|
|
for sourceIdx in xrange(nNodes):
|
|
sourceID = nodeIDs[sourceIdx]
|
|
print "Testing paths from node {} / {} ({})".format(sourceIdx+1, len(nodeIDs), sourceID)
|
|
#dists = dijkstra(store, sourceID)
|
|
for destIdx in xrange(nNodes):
|
|
destID = nodeIDs[destIdx]
|
|
if destID == sourceID: continue # Skip self
|
|
distIdx = getCacheIndex(nNodes, sourceIdx, destIdx)
|
|
eHops = dists[distIdx]
|
|
if not eHops: continue # The network is split, no path exists
|
|
hops = 0
|
|
for pair in ((sourceIdx, destIdx),):
|
|
nHops = 0
|
|
locIdx = pair[0]
|
|
dIdx = pair[1]
|
|
while locIdx != dIdx:
|
|
locIdx = cache[getCacheIndex(nNodes, locIdx, dIdx)]
|
|
nHops += 1
|
|
if not hops or nHops < hops: hops = nHops
|
|
if eHops not in results: results[eHops] = dict()
|
|
if hops not in results[eHops]: results[eHops][hops] = 0
|
|
results[eHops][hops] += 1
|
|
return results
|
|
|
|
def getAvgStretch(pathMatrix):
|
|
avgStretch = 0.
|
|
checked = 0.
|
|
for eHops in sorted(pathMatrix.keys()):
|
|
for nHops in sorted(pathMatrix[eHops].keys()):
|
|
count = pathMatrix[eHops][nHops]
|
|
stretch = float(nHops)/float(max(1, eHops))
|
|
avgStretch += stretch*count
|
|
checked += count
|
|
avgStretch /= max(1, checked)
|
|
return avgStretch
|
|
|
|
def getMaxStretch(pathMatrix):
|
|
maxStretch = 0.
|
|
for eHops in sorted(pathMatrix.keys()):
|
|
for nHops in sorted(pathMatrix[eHops].keys()):
|
|
stretch = float(nHops)/float(max(1, eHops))
|
|
maxStretch = max(maxStretch, stretch)
|
|
return maxStretch
|
|
|
|
def getCertSizes(store):
|
|
# Returns nCerts frequency distribution
|
|
# De-duplicates common certs (for shared prefixes in the path)
|
|
sizes = dict()
|
|
for node in store.values():
|
|
certs = set()
|
|
for peer in node.peers.values():
|
|
pCerts = set()
|
|
assert len(peer.path) == 2
|
|
assert peer.coords[-1] == peer.path[0]
|
|
hops = peer.coords + peer.path[1:]
|
|
for hopIdx in xrange(len(hops)-1):
|
|
send = hops[hopIdx]
|
|
if send == node.info.nodeID: continue # We created it, already have it
|
|
path = hops[0:hopIdx+2]
|
|
# Each cert is signed by the sender
|
|
# Includes information about the path from the sender to the next hop
|
|
# Next hop is at hopIdx+1, so the path to next hop is hops[0:hopIdx+2]
|
|
cert = "{}:{}".format(send, path)
|
|
certs.add(cert)
|
|
size = len(certs)
|
|
if size not in sizes: sizes[size] = 0
|
|
sizes[size] += 1
|
|
return sizes
|
|
|
|
def getMinLinkCertSizes(store):
|
|
# Returns nCerts frequency distribution
|
|
# De-duplicates common certs (for shared prefixes in the path)
|
|
# Based on the minimum number of certs that must be traded through a particular link
|
|
# Handled per link
|
|
sizes = dict()
|
|
for node in store.values():
|
|
peerCerts = dict()
|
|
for peer in node.peers.values():
|
|
pCerts = set()
|
|
assert len(peer.path) == 2
|
|
assert peer.coords[-1] == peer.path[0]
|
|
hops = peer.coords + peer.path[1:]
|
|
for hopIdx in xrange(len(hops)-1):
|
|
send = hops[hopIdx]
|
|
if send == node.info.nodeID: continue # We created it, already have it
|
|
path = hops[0:hopIdx+2]
|
|
# Each cert is signed by the sender
|
|
# Includes information about the path from the sender to the next hop
|
|
# Next hop is at hopIdx+1, so the path to next hop is hops[0:hopIdx+2]
|
|
cert = "{}:{}".format(send, path)
|
|
pCerts.add(cert)
|
|
peerCerts[peer.nodeID] = pCerts
|
|
for peer in peerCerts:
|
|
size = 0
|
|
pCerts = peerCerts[peer]
|
|
for cert in pCerts:
|
|
required = True
|
|
for p2 in peerCerts:
|
|
if p2 == peer: continue
|
|
p2Certs = peerCerts[p2]
|
|
if cert in p2Certs: required = False
|
|
if required: size += 1
|
|
if size not in sizes: sizes[size] = 0
|
|
sizes[size] += 1
|
|
return sizes
|
|
|
|
def getPathSizes(store):
|
|
# Returns frequency distribution of the total number of hops the routing table
|
|
# I.e. a node with 3 peers, each with 5 hop coord+path, would count as 3x5=15
|
|
sizes = dict()
|
|
for node in store.values():
|
|
size = 0
|
|
for peer in node.peers.values():
|
|
assert len(peer.path) == 2
|
|
assert peer.coords[-1] == peer.path[0]
|
|
peerSize = len(peer.coords) + len(peer.path) - 1 # double-counts peer, -1
|
|
size += peerSize
|
|
if size not in sizes: sizes[size] = 0
|
|
sizes[size] += 1
|
|
return sizes
|
|
|
|
def getPeerSizes(store):
|
|
# Returns frequency distribution of the number of peers each node has
|
|
sizes = dict()
|
|
for node in store.values():
|
|
nPeers = len(node.peers)
|
|
if nPeers not in sizes: sizes[nPeers] = 0
|
|
sizes[nPeers] += 1
|
|
return sizes
|
|
|
|
def getAvgSize(sizes):
|
|
sumSizes = 0
|
|
nNodes = 0
|
|
for size in sizes:
|
|
count = sizes[size]
|
|
sumSizes += size*count
|
|
nNodes += count
|
|
avgSize = float(sumSizes)/max(1, nNodes)
|
|
return avgSize
|
|
|
|
def getMaxSize(sizes):
|
|
return max(sizes.keys())
|
|
|
|
def getMinSize(sizes):
|
|
return min(sizes.keys())
|
|
|
|
def getResults(pathMatrix):
|
|
results = []
|
|
for eHops in sorted(pathMatrix.keys()):
|
|
for nHops in sorted(pathMatrix[eHops].keys()):
|
|
count = pathMatrix[eHops][nHops]
|
|
results.append("{} {} {}".format(eHops, nHops, count))
|
|
return '\n'.join(results)
|
|
|
|
####################################
|
|
# Functions to run different tests #
|
|
####################################
|
|
|
|
def runTest(store):
|
|
# Runs the usual set of tests on the store
|
|
# Does not save results, so only meant for quick tests
|
|
# To e.g. check the code works, maybe warm up the pypy jit
|
|
for node in store.values():
|
|
node.info.time = random.randint(0, TIMEOUT)
|
|
node.info.tstamp = TIMEOUT
|
|
print "Begin testing network"
|
|
dists = None
|
|
if not dists: dists = dijkstrall(store)
|
|
idleUntilConverged(store)
|
|
pathMatrix = testPaths(store, dists)
|
|
avgStretch = getAvgStretch(pathMatrix)
|
|
maxStretch = getMaxStretch(pathMatrix)
|
|
peers = getPeerSizes(store)
|
|
certs = getCertSizes(store)
|
|
paths = getPathSizes(store)
|
|
linkCerts = getMinLinkCertSizes(store)
|
|
avgPeerSize = getAvgSize(peers)
|
|
maxPeerSize = getMaxSize(peers)
|
|
avgCertSize = getAvgSize(certs)
|
|
maxCertSize = getMaxSize(certs)
|
|
avgPathSize = getAvgSize(paths)
|
|
maxPathSize = getMaxSize(paths)
|
|
avgLinkCert = getAvgSize(linkCerts)
|
|
maxLinkCert = getMaxSize(linkCerts)
|
|
totalCerts = sum(map(lambda x: x*certs[x], certs.keys()))
|
|
totalLinks = sum(map(lambda x: x*peers[x], peers.keys())) # one-way links
|
|
avgCertsPerLink = float(totalCerts)/max(1, totalLinks)
|
|
print "Finished testing network"
|
|
print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
|
|
print "Avg / Max nPeers size: {} / {}".format(avgPeerSize, maxPeerSize)
|
|
print "Avg / Max nCerts size: {} / {}".format(avgCertSize, maxCertSize)
|
|
print "Avg / Max total hops in any node's routing table: {} / {}".format(avgPathSize, maxPathSize)
|
|
print "Avg / Max lower bound cert requests per link (one-way): {} / {}".format(avgLinkCert, maxLinkCert)
|
|
print "Avg certs per link (one-way): {}".format(avgCertsPerLink)
|
|
return # End of function
|
|
|
|
def rootNodeASTest(path, outDir="output-treesim-AS", dists=None, proc = 1):
|
|
# Checks performance for every possible choice of root node
|
|
# Saves output for each root node to a separate file on disk
|
|
# path = input path to some caida.org formatted AS-relationship graph
|
|
if not os.path.exists(outDir): os.makedirs(outDir)
|
|
assert os.path.exists(outDir)
|
|
store = makeStoreASRelGraph(path)
|
|
nodes = sorted(store.keys())
|
|
for nodeIdx in xrange(len(nodes)):
|
|
if nodeIdx % proc != 0: continue # Work belongs to someone else
|
|
rootNodeID = nodes[nodeIdx]
|
|
outpath = outDir+"/{}".format(rootNodeID)
|
|
if os.path.exists(outpath):
|
|
print "Skipping {}, already processed".format(rootNodeID)
|
|
continue
|
|
store = makeStoreASRelGraphFixedRoot(path, rootNodeID)
|
|
for node in store.values():
|
|
node.info.time = random.randint(0, TIMEOUT)
|
|
node.info.tstamp = TIMEOUT
|
|
print "Beginning {}, size {}".format(nodeIdx, len(store))
|
|
if not dists: dists = dijkstrall(store)
|
|
idleUntilConverged(store)
|
|
pathMatrix = testPaths(store, dists)
|
|
avgStretch = getAvgStretch(pathMatrix)
|
|
maxStretch = getMaxStretch(pathMatrix)
|
|
results = getResults(pathMatrix)
|
|
with open(outpath, "w") as f:
|
|
f.write(results)
|
|
print "Finished test for root AS {} ({} / {})".format(rootNodeID, nodeIdx+1, len(store))
|
|
print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
|
|
#break # Stop after 1, because they can take forever
|
|
return # End of function
|
|
|
|
def timelineASTest():
|
|
# Meant to study the performance of the network as a function of network size
|
|
# Loops over a set of AS-relationship graphs
|
|
# Runs a test on each graph, selecting highest-degree node as the root
|
|
# Saves results for each graph to a separate file on disk
|
|
outDir = "output-treesim-timeline-AS"
|
|
if not os.path.exists(outDir): os.makedirs(outDir)
|
|
assert os.path.exists(outDir)
|
|
paths = sorted(glob.glob("asrel/datasets/*"))
|
|
for path in paths:
|
|
date = os.path.basename(path).split(".")[0]
|
|
outpath = outDir+"/{}".format(date)
|
|
if os.path.exists(outpath):
|
|
print "Skipping {}, already processed".format(date)
|
|
continue
|
|
store = makeStoreASRelGraphMaxDeg(path)
|
|
dists = None
|
|
for node in store.values():
|
|
node.info.time = random.randint(0, TIMEOUT)
|
|
node.info.tstamp = TIMEOUT
|
|
print "Beginning {}, size {}".format(date, len(store))
|
|
if not dists: dists = dijkstrall(store)
|
|
idleUntilConverged(store)
|
|
pathMatrix = testPaths(store, dists)
|
|
avgStretch = getAvgStretch(pathMatrix)
|
|
maxStretch = getMaxStretch(pathMatrix)
|
|
results = getResults(pathMatrix)
|
|
with open(outpath, "w") as f:
|
|
f.write(results)
|
|
print "Finished {} with {} nodes".format(date, len(store))
|
|
print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
|
|
#break # Stop after 1, because they can take forever
|
|
return # End of function
|
|
|
|
def timelineDimesTest():
|
|
# Meant to study the performance of the network as a function of network size
|
|
# Loops over a set of AS-relationship graphs
|
|
# Runs a test on each graph, selecting highest-degree node as the root
|
|
# Saves results for each graph to a separate file on disk
|
|
outDir = "output-treesim-timeline-dimes"
|
|
if not os.path.exists(outDir): os.makedirs(outDir)
|
|
assert os.path.exists(outDir)
|
|
# Input files are named ASEdgesX_Y where X = month (no leading 0), Y = year
|
|
paths = sorted(glob.glob("DIMES/ASEdges/*.gz"))
|
|
exists = set(glob.glob(outDir+"/*"))
|
|
for path in paths:
|
|
date = os.path.basename(path).split(".")[0]
|
|
outpath = outDir+"/{}".format(date)
|
|
if outpath in exists:
|
|
print "Skipping {}, already processed".format(date)
|
|
continue
|
|
store = makeStoreDimesEdges(path)
|
|
# Get the highest degree node and make it root
|
|
# Sorted by nodeID just to make it stable in the event of a tie
|
|
nodeIDs = sorted(store.keys())
|
|
bestRoot = ""
|
|
bestDeg = 0
|
|
for nodeID in nodeIDs:
|
|
node = store[nodeID]
|
|
if len(node.links) > bestDeg:
|
|
bestRoot = nodeID
|
|
bestDeg = len(node.links)
|
|
assert bestRoot
|
|
store = makeStoreDimesEdges(path, bestRoot)
|
|
rootID = "R" + bestRoot[1:]
|
|
assert rootID in store
|
|
# Don't forget to set random seed before setitng times
|
|
# To make results reproducible
|
|
nodeIDs = sorted(store.keys())
|
|
random.seed(12345)
|
|
for nodeID in nodeIDs:
|
|
node = store[nodeID]
|
|
node.info.time = random.randint(0, TIMEOUT)
|
|
node.info.tstamp = TIMEOUT
|
|
print "Beginning {}, size {}".format(date, len(store))
|
|
if not dists: dists = dijkstrall(store)
|
|
idleUntilConverged(store)
|
|
pathMatrix = testPaths(store, dists)
|
|
avgStretch = getAvgStretch(pathMatrix)
|
|
maxStretch = getMaxStretch(pathMatrix)
|
|
results = getResults(pathMatrix)
|
|
with open(outpath, "w") as f:
|
|
f.write(results)
|
|
print "Finished {} with {} nodes".format(date, len(store))
|
|
print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
|
|
break # Stop after 1, because they can take forever
|
|
return # End of function
|
|
|
|
def scalingTest(maxTests=None, inputDir="graphs"):
|
|
# Meant to study the performance of the network as a function of network size
|
|
# Loops over a set of nodes in a previously generated graph
|
|
# Runs a test on each graph, testing each node as the root
|
|
# if maxTests is set, tests only that number of roots (highest degree first)
|
|
# Saves results for each graph to a separate file on disk
|
|
outDir = "output-treesim-{}".format(inputDir)
|
|
if not os.path.exists(outDir): os.makedirs(outDir)
|
|
assert os.path.exists(outDir)
|
|
paths = sorted(glob.glob("{}/*".format(inputDir)))
|
|
exists = set(glob.glob(outDir+"/*"))
|
|
for path in paths:
|
|
gc.collect() # pypy waits for gc to close files
|
|
graph = os.path.basename(path).split(".")[0]
|
|
store = makeStoreGeneratedGraph(path)
|
|
# Get the highest degree node and make it root
|
|
# Sorted by nodeID just to make it stable in the event of a tie
|
|
nodeIDs = sorted(store.keys(), key=lambda x: len(store[x].links), reverse=True)
|
|
dists = None
|
|
if maxTests: nodeIDs = nodeIDs[:maxTests]
|
|
for nodeID in nodeIDs:
|
|
nodeIDStr = str(nodeID).zfill(len(str(len(store)-1)))
|
|
outpath = outDir+"/{}-{}".format(graph, nodeIDStr)
|
|
if outpath in exists:
|
|
print "Skipping {}-{}, already processed".format(graph, nodeIDStr)
|
|
continue
|
|
store = makeStoreGeneratedGraph(path, nodeID)
|
|
# Don't forget to set random seed before setting times
|
|
random.seed(12345) # To make results reproducible
|
|
nIDs = sorted(store.keys())
|
|
for nID in nIDs:
|
|
node = store[nID]
|
|
node.info.time = random.randint(0, TIMEOUT)
|
|
node.info.tstamp = TIMEOUT
|
|
print "Beginning {}, size {}".format(graph, len(store))
|
|
if not dists: dists = dijkstrall(store)
|
|
idleUntilConverged(store)
|
|
pathMatrix = testPaths(store, dists)
|
|
avgStretch = getAvgStretch(pathMatrix)
|
|
maxStretch = getMaxStretch(pathMatrix)
|
|
results = getResults(pathMatrix)
|
|
with open(outpath, "w") as f:
|
|
f.write(results)
|
|
print "Finished {} with {} nodes for root {}".format(graph, len(store), nodeID)
|
|
print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
|
|
return # End of function
|
|
|
|
##################
|
|
# Main Execution #
|
|
##################
|
|
|
|
if __name__ == "__main__":
|
|
if True: # Run a quick test
|
|
random.seed(12345) # DEBUG
|
|
store = makeStoreSquareGrid(4)
|
|
runTest(store) # Quick test
|
|
store = None
|
|
# Do some real work
|
|
#runTest(makeStoreDimesEdges("DIMES/ASEdges/ASEdges1_2007.csv.gz"))
|
|
#timelineDimesTest()
|
|
#rootNodeASTest("asrel/datasets/19980101.as-rel.txt")
|
|
#timelineASTest()
|
|
#rootNodeASTest("hype-2016-09-19.list", "output-treesim-hype")
|
|
#scalingTest(None, "graphs-20") # First argument 1 to only test 1 root per graph
|
|
#store = makeStoreGeneratedGraph("bgp_tables")
|
|
#store = makeStoreGeneratedGraph("skitter")
|
|
#store = makeStoreASRelGraphMaxDeg("hype-2016-09-19.list") #http://hia.cjdns.ca/watchlist/c/walk.peers.20160919
|
|
#store = makeStoreGeneratedGraph("fc00-2017-08-12.txt")
|
|
if store: runTest(store)
|
|
#rootNodeASTest("skitter", "output-treesim-skitter", None, 0, 1)
|
|
#scalingTest(1, "graphs-20") # First argument 1 to only test 1 root per graph
|
|
#scalingTest(1, "graphs-21") # First argument 1 to only test 1 root per graph
|
|
#scalingTest(1, "graphs-22") # First argument 1 to only test 1 root per graph
|
|
#scalingTest(1, "graphs-23") # First argument 1 to only test 1 root per graph
|
|
if not store:
|
|
import sys
|
|
args = sys.argv
|
|
if len(args) == 2:
|
|
job_number = int(sys.argv[1])
|
|
#rootNodeASTest("fc00-2017-08-12.txt", "fc00", None, job_number)
|
|
#rootNodeASTest("skitter", "out-skitter", None, job_number)
|
|
rootNodeASTest("walk-1517414401.txt.map", "out-walk", None, job_number)
|
|
else:
|
|
print "Usage: {} job_number".format(args[0])
|
|
print "job_number = which job set to run on this node (1-indexed)"
|
|
|