diff --git a/.circleci/config.yml b/.circleci/config.yml index f9cd0720..1456332a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -29,6 +29,7 @@ jobs: - run: name: Install RPM utilities command: | + sudo apt-get update sudo apt-get install -y rpm file mkdir -p ~/rpmbuild/BUILD ~/rpmbuild/RPMS ~/rpmbuild/SOURCES ~/rpmbuild/SPECS ~/rpmbuild/SRPMS @@ -67,13 +68,15 @@ jobs: find ~/rpmbuild/SRPMS/ -name '*.rpm' -exec mv {} /tmp/upload \; - run: - name: Build for EdgeRouter + name: Build for EdgeRouter and VyOS command: | rm -f {yggdrasil,yggdrasilctl} git clone https://github.com/neilalexander/vyatta-yggdrasil /tmp/vyatta-yggdrasil; cd /tmp/vyatta-yggdrasil; BUILDDIR_YGG=$CIRCLE_WORKING_DIRECTORY ./build-edgerouter-x $CIRCLE_BRANCH; BUILDDIR_YGG=$CIRCLE_WORKING_DIRECTORY ./build-edgerouter-lite $CIRCLE_BRANCH; + BUILDDIR_YGG=$CIRCLE_WORKING_DIRECTORY ./build-vyos-i386 $CIRCLE_BRANCH + BUILDDIR_YGG=$CIRCLE_WORKING_DIRECTORY ./build-vyos-amd64 $CIRCLE_BRANCH mv *.deb /tmp/upload; - persist_to_workspace: diff --git a/CHANGELOG.md b/CHANGELOG.md index b8133a29..fdac254f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,34 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - in case of vulnerabilities. --> +## [0.3.9] - 2019-09-27 +### Added +- Yggdrasil will now complain more verbosely when a peer URI is incorrectly formatted +- Soft-shutdown methods have been added, allowing a node to shut down gracefully when terminated +- New multicast interval logic which sends multicast beacons more often when Yggdrasil is first started to increase the chance of finding nearby nodes quickly after startup + +### Changed +- The switch now buffers packets more eagerly in an attempt to give the best link a chance to send, which appears to reduce packet reordering when crossing aggregate sets of peerings +- Substantial amounts of the codebase have been refactored to use the actor model, which should substantially reduce the chance of deadlocks +- Nonce tracking in sessions has been modified so that memory usage is reduced whilst still only allowing duplicate packets within a small window +- Soft-reconfiguration support has been simplified using new actor functions +- The garbage collector threshold has been adjusted for mobile builds +- The maximum queue size is now managed exclusively by the switch rather than by the core + +### Fixed +- The broken `hjson-go` dependency which affected builds of the previous version has now been resolved in the module manifest +- Some minor memory leaks in the switch have been fixed, which improves memory usage on mobile builds +- A memory leak in the add-peer loop has been fixed +- The admin socket now reports the correct URI strings for SOCKS peers in `getPeers` +- A race condition when dialling a remote node by both the node address and routed prefix simultaneously has been fixed +- A race condition between the router and the dial code resulting in a panic has been fixed +- A panic which could occur when the TUN/TAP interface disappears (e.g. during soft-shutdown) has been fixed +- A bug in the semantic versioning script which accompanies Yggdrasil for builds has been fixed +- A panic which could occur when the TUN/TAP interface reads an undersized/corrupted packet has been fixed + +### Removed +- A number of legacy debug functions have now been removed and a number of exported API functions are now better documented + ## [0.3.8] - 2019-08-21 ### Changed - Yggdrasil can now send multiple packets from the switch at once, which results in improved throughput with smaller packets or lower MTUs @@ -39,10 +67,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - New nonce tracking should help to reduce the number of packets dropped as a result of multiple/aggregate paths or congestion control in the switch ### Fixed -- **Security vulnerability**: Address verification was not strict enough, which could result in a malicious session sending traffic with unexpected or spoofed source or destination addresses which Yggdrasil could fail to reject +- A deadlock was fixed in the session code which could result in Yggdrasil failing to pass traffic after some time + +### Security +- Address verification was not strict enough, which could result in a malicious session sending traffic with unexpected or spoofed source or destination addresses which Yggdrasil could fail to reject - Versions `0.3.6` and `0.3.7` are vulnerable - users of these versions should upgrade as soon as possible - Versions `0.3.5` and earlier are not affected -- A deadlock was fixed in the session code which could result in Yggdrasil failing to pass traffic after some time ## [0.3.7] - 2019-08-14 ### Changed diff --git a/README.md b/README.md index ec7fe74c..07b202f1 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ some of the below: - NetBSD - OpenWrt -Please see our [Platforms](https://yggdrasil-network.github.io/) pages for more +Please see our [Platforms](https://yggdrasil-network.github.io/platforms.html) pages for more specific information about each of our supported platforms, including installation steps and caveats. diff --git a/build b/build index c1e5f863..7ca5f4fa 100755 --- a/build +++ b/build @@ -34,13 +34,15 @@ if [ $IOS ]; then gomobile bind -target ios -tags mobile -ldflags="$LDFLAGS $STRIP" -gcflags="$GCFLAGS" \ github.com/yggdrasil-network/yggdrasil-go/src/yggdrasil \ github.com/yggdrasil-network/yggdrasil-go/src/config \ - github.com/yggdrasil-network/yggdrasil-extras/src/mobile + github.com/yggdrasil-network/yggdrasil-extras/src/mobile \ + github.com/yggdrasil-network/yggdrasil-extras/src/dummy elif [ $ANDROID ]; then echo "Building aar for Android" gomobile bind -target android -tags mobile -ldflags="$LDFLAGS $STRIP" -gcflags="$GCFLAGS" \ github.com/yggdrasil-network/yggdrasil-go/src/yggdrasil \ github.com/yggdrasil-network/yggdrasil-go/src/config \ - github.com/yggdrasil-network/yggdrasil-extras/src/mobile + github.com/yggdrasil-network/yggdrasil-extras/src/mobile \ + github.com/yggdrasil-network/yggdrasil-extras/src/dummy else for CMD in `ls cmd/` ; do echo "Building: $CMD" diff --git a/cmd/yggdrasil/main.go b/cmd/yggdrasil/main.go index 2fa10720..33a8769d 100644 --- a/cmd/yggdrasil/main.go +++ b/cmd/yggdrasil/main.go @@ -279,6 +279,7 @@ func main() { case _ = <-r: if *useconffile != "" { cfg = readConfig(useconf, useconffile, normaliseconf) + logger.Infoln("Reloading configuration from", *useconffile) n.core.UpdateConfig(cfg) n.tuntap.UpdateConfig(cfg) n.multicast.UpdateConfig(cfg) @@ -291,11 +292,10 @@ exit: } func (n *node) shutdown() { - n.core.Stop() n.admin.Stop() n.multicast.Stop() n.tuntap.Stop() - os.Exit(0) + n.core.Stop() } func (n *node) sessionFirewall(pubkey *crypto.BoxPubKey, initiator bool) bool { diff --git a/contrib/rpm/yggdrasil.spec b/contrib/rpm/yggdrasil.spec deleted file mode 100644 index bab50906..00000000 --- a/contrib/rpm/yggdrasil.spec +++ /dev/null @@ -1,47 +0,0 @@ -Name: yggdrasil -Version: 0.3.0 -Release: 1%{?dist} -Summary: End-to-end encrypted IPv6 networking - -License: GPLv3 -URL: https://yggdrasil-network.github.io -Source0: https://codeload.github.com/yggdrasil-network/yggdrasil-go/tar.gz/v0.3.0 - -%{?systemd_requires} -BuildRequires: systemd golang >= 1.11 - -%description -Yggdrasil is a proof-of-concept to explore a wholly different approach to -network routing. Whereas current computer networks depend heavily on very -centralised design and configuration, Yggdrasil breaks this mould by making -use of a global spanning tree to form a scalable IPv6 encrypted mesh network. - -%prep -%setup -qn yggdrasil-go-%{version} - -%build -./build -t -l "-linkmode=external" - -%install -rm -rf %{buildroot} -mkdir -p %{buildroot}/%{_bindir} -mkdir -p %{buildroot}/%{_sysconfdir}/systemd/system -install -m 0755 yggdrasil %{buildroot}/%{_bindir}/yggdrasil -install -m 0755 yggdrasilctl %{buildroot}/%{_bindir}/yggdrasilctl -install -m 0755 contrib/systemd/yggdrasil.service %{buildroot}/%{_sysconfdir}/systemd/system/yggdrasil.service -install -m 0755 contrib/systemd/yggdrasil-resume.service %{buildroot}/%{_sysconfdir}/systemd/system/yggdrasil-resume.service - -%files -%{_bindir}/yggdrasil -%{_bindir}/yggdrasilctl -%{_sysconfdir}/systemd/system/yggdrasil.service -%{_sysconfdir}/systemd/system/yggdrasil-resume.service - -%post -%systemd_post yggdrasil.service - -%preun -%systemd_preun yggdrasil.service - -%postun -%systemd_postun_with_restart yggdrasil.service diff --git a/contrib/semver/name.sh b/contrib/semver/name.sh index 1fa2ce07..308e07bc 100644 --- a/contrib/semver/name.sh +++ b/contrib/semver/name.sh @@ -6,7 +6,7 @@ BRANCH=$(git symbolic-ref --short HEAD 2>/dev/null) # Complain if the git history is not available if [ $? != 0 ] || [ -z "$BRANCH" ]; then printf "yggdrasil" - exit 1 + exit 0 fi # Remove "/" characters from the branch name if present diff --git a/contrib/semver/version.sh b/contrib/semver/version.sh index 3052094a..db96e339 100644 --- a/contrib/semver/version.sh +++ b/contrib/semver/version.sh @@ -6,7 +6,7 @@ TAG=$(git describe --abbrev=0 --tags --match="v[0-9]*\.[0-9]*\.[0-9]*" 2>/dev/nu # Did getting the tag succeed? if [ $? != 0 ] || [ -z "$TAG" ]; then printf -- "unknown" - exit 1 + exit 0 fi # Get the current branch @@ -36,7 +36,7 @@ if [ "$BRANCH" != "master" ]; then # Did getting the count of commits since the tag succeed? if [ $? != 0 ] || [ -z "$BUILD" ]; then printf -- "-unknown" - exit 1 + exit 0 fi # Is the build greater than zero? diff --git a/contrib/systemd/yggdrasil.service b/contrib/systemd/yggdrasil.service index bd52ec9f..37859e79 100644 --- a/contrib/systemd/yggdrasil.service +++ b/contrib/systemd/yggdrasil.service @@ -16,6 +16,7 @@ ExecStartPre=/bin/sh -ec "if ! test -s /etc/yggdrasil.conf; \ ExecStart=/usr/bin/yggdrasil -useconffile /etc/yggdrasil.conf ExecReload=/bin/kill -HUP $MAINPID Restart=always +TimeoutStopSec=5 [Install] WantedBy=multi-user.target diff --git a/doc/yggdrasil-network.github.io b/doc/yggdrasil-network.github.io index 10672210..c876890a 160000 --- a/doc/yggdrasil-network.github.io +++ b/doc/yggdrasil-network.github.io @@ -1 +1 @@ -Subproject commit 10672210f2fdce97dd5c301dfeed47284d4a28f2 +Subproject commit c876890a51d9140e68d5cec7fbeb2146c2562792 diff --git a/go.mod b/go.mod index 5569496c..d86101bf 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,10 @@ module github.com/yggdrasil-network/yggdrasil-go require ( + github.com/Arceliar/phony v0.0.0-20190907031509-af5bdbeecab6 github.com/gologme/log v0.0.0-20181207131047-4e5d8ccb38e8 github.com/hashicorp/go-syslog v1.0.0 - github.com/hjson/hjson-go v0.0.0-20181010104306-a25ecf6bd222 + github.com/hjson/hjson-go v3.0.1-0.20190209023717-9147687966d9+incompatible github.com/kardianos/minwinsvc v0.0.0-20151122163309-cad6b2b879b0 github.com/mitchellh/mapstructure v1.1.2 github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 diff --git a/go.sum b/go.sum index 756ed5af..cdabc402 100644 --- a/go.sum +++ b/go.sum @@ -1,9 +1,11 @@ +github.com/Arceliar/phony v0.0.0-20190907031509-af5bdbeecab6 h1:zMj5Q1V0yF4WNfV/FpXG6iXfPJ965Xc5asR2vHXanXc= +github.com/Arceliar/phony v0.0.0-20190907031509-af5bdbeecab6/go.mod h1:6Lkn+/zJilRMsKmbmG1RPoamiArC6HS73xbwRyp3UyI= github.com/gologme/log v0.0.0-20181207131047-4e5d8ccb38e8 h1:WD8iJ37bRNwvETMfVTusVSAi0WdXTpfNVGY2aHycNKY= github.com/gologme/log v0.0.0-20181207131047-4e5d8ccb38e8/go.mod h1:gq31gQ8wEHkR+WekdWsqDuf8pXTUZA9BnnzTuPz1Y9U= github.com/hashicorp/go-syslog v1.0.0 h1:KaodqZuhUoZereWVIYmpUgZysurB1kBLX2j0MwMrUAE= github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= -github.com/hjson/hjson-go v0.0.0-20181010104306-a25ecf6bd222 h1:xmvkbxXDeN1ffWq8kvrhyqVYAO2aXuRBsbpxVTR+JyU= -github.com/hjson/hjson-go v0.0.0-20181010104306-a25ecf6bd222/go.mod h1:qsetwF8NlsTsOTwZTApNlTCerV+b2GjYRRcIk4JMFio= +github.com/hjson/hjson-go v3.0.1-0.20190209023717-9147687966d9+incompatible h1:bLQ2Ve+eW65id3b8xEMQiAwJT4qGZeywAEMLvXjznvw= +github.com/hjson/hjson-go v3.0.1-0.20190209023717-9147687966d9+incompatible/go.mod h1:qsetwF8NlsTsOTwZTApNlTCerV+b2GjYRRcIk4JMFio= github.com/kardianos/minwinsvc v0.0.0-20151122163309-cad6b2b879b0 h1:YnZmFjg0Nvk8851WTVWlqMC1ecJH07Ctz+Ezxx4u54g= github.com/kardianos/minwinsvc v0.0.0-20151122163309-cad6b2b879b0/go.mod h1:rUi0/YffDo1oXBOGn1KRq7Fr07LX48XEBecQnmwjsAo= github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE= diff --git a/src/address/address.go b/src/address/address.go index 5c13257b..3960b783 100644 --- a/src/address/address.go +++ b/src/address/address.go @@ -1,22 +1,24 @@ +// Package address contains the types used by yggdrasil to represent IPv6 addresses or prefixes, as well as functions for working with these types. +// Of particular importance are the functions used to derive addresses or subnets from a NodeID, or to get the NodeID and bitmask of the bits visible from an address, which is needed for DHT searches. package address import "github.com/yggdrasil-network/yggdrasil-go/src/crypto" -// address represents an IPv6 address in the yggdrasil address range. +// Address represents an IPv6 address in the yggdrasil address range. type Address [16]byte -// subnet represents an IPv6 /64 subnet in the yggdrasil subnet range. +// Subnet represents an IPv6 /64 subnet in the yggdrasil subnet range. type Subnet [8]byte -// address_prefix is the prefix used for all addresses and subnets in the network. +// GetPrefix returns the address prefix used by yggdrasil. // The current implementation requires this to be a muliple of 8 bits + 7 bits. // The 8th bit of the last byte is used to signal nodes (0) or /64 prefixes (1). -// Nodes that configure this differently will be unable to communicate with eachother, though routing and the DHT machinery *should* still work. +// Nodes that configure this differently will be unable to communicate with eachother using IP packets, though routing and the DHT machinery *should* still work. func GetPrefix() [1]byte { return [...]byte{0x02} } -// isValid returns true if an address falls within the range used by nodes in the network. +// IsValid returns true if an address falls within the range used by nodes in the network. func (a *Address) IsValid() bool { prefix := GetPrefix() for idx := range prefix { @@ -27,7 +29,7 @@ func (a *Address) IsValid() bool { return true } -// isValid returns true if a prefix falls within the range usable by the network. +// IsValid returns true if a prefix falls within the range usable by the network. func (s *Subnet) IsValid() bool { prefix := GetPrefix() l := len(prefix) @@ -39,8 +41,8 @@ func (s *Subnet) IsValid() bool { return (*s)[l-1] == prefix[l-1]|0x01 } -// address_addrForNodeID takes a *NodeID as an argument and returns an *address. -// This subnet begins with the address prefix, with the last bit set to 0 to indicate an address. +// AddrForNodeID takes a *NodeID as an argument and returns an *Address. +// This address begins with the contents of GetPrefix(), with the last bit set to 0 to indicate an address. // The following 8 bits are set to the number of leading 1 bits in the NodeID. // The NodeID, excluding the leading 1 bits and the first leading 0 bit, is truncated to the appropriate length and makes up the remainder of the address. func AddrForNodeID(nid *crypto.NodeID) *Address { @@ -80,7 +82,7 @@ func AddrForNodeID(nid *crypto.NodeID) *Address { return &addr } -// address_subnetForNodeID takes a *NodeID as an argument and returns a *subnet. +// SubnetForNodeID takes a *NodeID as an argument and returns an *Address. // This subnet begins with the address prefix, with the last bit set to 1 to indicate a prefix. // The following 8 bits are set to the number of leading 1 bits in the NodeID. // The NodeID, excluding the leading 1 bits and the first leading 0 bit, is truncated to the appropriate length and makes up the remainder of the subnet. @@ -96,10 +98,10 @@ func SubnetForNodeID(nid *crypto.NodeID) *Subnet { return &snet } -// getNodeIDandMask returns two *NodeID. -// The first is a NodeID with all the bits known from the address set to their correct values. -// The second is a bitmask with 1 bit set for each bit that was known from the address. -// This is used to look up NodeIDs in the DHT and tell if they match an address. +// GetNodeIDandMask returns two *NodeID. +// The first is a NodeID with all the bits known from the Address set to their correct values. +// The second is a bitmask with 1 bit set for each bit that was known from the Address. +// This is used to look up NodeIDs in the DHT and tell if they match an Address. func (a *Address) GetNodeIDandMask() (*crypto.NodeID, *crypto.NodeID) { // Mask is a bitmask to mark the bits visible from the address // This means truncated leading 1s, first leading 0, and visible part of addr @@ -126,10 +128,10 @@ func (a *Address) GetNodeIDandMask() (*crypto.NodeID, *crypto.NodeID) { return &nid, &mask } -// getNodeIDandMask returns two *NodeID. -// The first is a NodeID with all the bits known from the address set to their correct values. -// The second is a bitmask with 1 bit set for each bit that was known from the subnet. -// This is used to look up NodeIDs in the DHT and tell if they match a subnet. +// GetNodeIDandMask returns two *NodeID. +// The first is a NodeID with all the bits known from the Subnet set to their correct values. +// The second is a bitmask with 1 bit set for each bit that was known from the Subnet. +// This is used to look up NodeIDs in the DHT and tell if they match a Subnet. func (s *Subnet) GetNodeIDandMask() (*crypto.NodeID, *crypto.NodeID) { // As with the address version, but visible parts of the subnet prefix instead var nid crypto.NodeID diff --git a/src/config/config.go b/src/config/config.go index 6c127552..ac88bfc5 100644 --- a/src/config/config.go +++ b/src/config/config.go @@ -1,3 +1,19 @@ +/* +The config package contains structures related to the configuration of an +Yggdrasil node. + +The configuration contains, amongst other things, encryption keys which are used +to derive a node's identity, information about peerings and node information +that is shared with the network. There are also some module-specific options +related to TUN/TAP, multicast and the admin socket. + +In order for a node to maintain the same identity across restarts, you should +persist the configuration onto the filesystem or into some configuration storage +so that the encryption keys (and therefore the node ID) do not change. + +Note that Yggdrasil will automatically populate sane defaults for any +configuration option that is not provided. +*/ package config import ( @@ -8,30 +24,30 @@ import ( "github.com/yggdrasil-network/yggdrasil-go/src/defaults" ) -// NodeState represents the active and previous configuration of the node and -// protects it with a mutex +// NodeState represents the active and previous configuration of an Yggdrasil +// node. A NodeState object is returned when starting an Yggdrasil node. Note +// that this structure and related functions are likely to disappear soon. type NodeState struct { Current NodeConfig Previous NodeConfig Mutex sync.RWMutex } -// Current returns the current node config +// Current returns the active node configuration. func (s *NodeState) GetCurrent() NodeConfig { s.Mutex.RLock() defer s.Mutex.RUnlock() return s.Current } -// Previous returns the previous node config +// Previous returns the previous node configuration. func (s *NodeState) GetPrevious() NodeConfig { s.Mutex.RLock() defer s.Mutex.RUnlock() return s.Previous } -// Replace the node configuration with new configuration. This method returns -// both the new and the previous node configs +// Replace the node configuration with new configuration. func (s *NodeState) Replace(n NodeConfig) { s.Mutex.Lock() defer s.Mutex.Unlock() @@ -39,7 +55,9 @@ func (s *NodeState) Replace(n NodeConfig) { s.Current = n } -// NodeConfig defines all configuration values needed to run a signle yggdrasil node +// NodeConfig is the main configuration structure, containing configuration +// options that are necessary for an Yggdrasil node to run. You will need to +// supply one of these structs to the Yggdrasil core when starting a node. type NodeConfig struct { Peers []string `comment:"List of connection strings for outbound peer connections in URI format,\ne.g. tcp://a.b.c.d:e or socks://a.b.c.d:e/f.g.h.i:j. These connections\nwill obey the operating system routing table, therefore you should\nuse this section when you may connect via different interfaces."` InterfacePeers map[string][]string `comment:"List of connection strings for outbound peer connections in URI format,\narranged by source interface, e.g. { \"eth0\": [ tcp://a.b.c.d:e ] }.\nNote that SOCKS peerings will NOT be affected by this option and should\ngo in the \"Peers\" section instead."` @@ -62,7 +80,7 @@ type NodeConfig struct { NodeInfo map[string]interface{} `comment:"Optional node info. This must be a { \"key\": \"value\", ... } map\nor set as null. This is entirely optional but, if set, is visible\nto the whole network on request."` } -// SessionFirewall controls the session firewall configuration +// SessionFirewall controls the session firewall configuration. type SessionFirewall struct { Enable bool `comment:"Enable or disable the session firewall. If disabled, network traffic\nfrom any node will be allowed. If enabled, the below rules apply."` AllowFromDirect bool `comment:"Allow network traffic from directly connected peers."` @@ -72,7 +90,8 @@ type SessionFirewall struct { BlacklistEncryptionPublicKeys []string `comment:"List of public keys from which network traffic is always rejected,\nregardless of the whitelist, AllowFromDirect or AllowFromRemote."` } -// TunnelRouting contains the crypto-key routing tables for tunneling +// TunnelRouting contains the crypto-key routing tables for tunneling regular +// IPv4 or IPv6 subnets across the Yggdrasil network. type TunnelRouting struct { Enable bool `comment:"Enable or disable tunnel routing."` IPv6RemoteSubnets map[string]string `comment:"IPv6 subnets belonging to remote nodes, mapped to the node's public\nkey, e.g. { \"aaaa:bbbb:cccc::/e\": \"boxpubkey\", ... }"` @@ -81,18 +100,15 @@ type TunnelRouting struct { IPv4LocalSubnets []string `comment:"IPv4 subnets belonging to this node's end of the tunnels. Only traffic\nfrom these ranges will be tunnelled."` } -// SwitchOptions contains tuning options for the switch +// SwitchOptions contains tuning options for the switch. These are advanced +// options and shouldn't be changed unless necessary. type SwitchOptions struct { MaxTotalQueueSize uint64 `comment:"Maximum size of all switch queues combined (in bytes)."` } -// Generates default configuration. This is used when outputting the -genconf -// parameter and also when using -autoconf. The isAutoconf flag is used to -// determine whether the operating system should select a free port by itself -// (which guarantees that there will not be a conflict with any other services) -// or whether to generate a random port number. The only side effect of setting -// isAutoconf is that the TCP and UDP ports will likely end up with different -// port numbers. +// Generates default configuration and returns a pointer to the resulting +// NodeConfig. This is used when outputting the -genconf parameter and also when +// using -autoconf. func GenerateConfig() *NodeConfig { // Generate encryption keys. bpub, bpriv := crypto.NewBoxKeys() @@ -122,16 +138,19 @@ func GenerateConfig() *NodeConfig { return &cfg } -// NewEncryptionKeys generates a new encryption keypair. The encryption keys are -// used to encrypt traffic and to derive the IPv6 address/subnet of the node. +// NewEncryptionKeys replaces the encryption keypair in the NodeConfig with a +// new encryption keypair. The encryption keys are used by the router to encrypt +// traffic and to derive the node ID and IPv6 address/subnet of the node, so +// this is equivalent to discarding the node's identity on the network. func (cfg *NodeConfig) NewEncryptionKeys() { bpub, bpriv := crypto.NewBoxKeys() cfg.EncryptionPublicKey = hex.EncodeToString(bpub[:]) cfg.EncryptionPrivateKey = hex.EncodeToString(bpriv[:]) } -// NewSigningKeys generates a new signing keypair. The signing keys are used to -// derive the structure of the spanning tree. +// NewSigningKeys replaces the signing keypair in the NodeConfig with a new +// signing keypair. The signing keys are used by the switch to derive the +// structure of the spanning tree. func (cfg *NodeConfig) NewSigningKeys() { spub, spriv := crypto.NewSigKeys() cfg.SigningPublicKey = hex.EncodeToString(spub[:]) diff --git a/src/crypto/crypto.go b/src/crypto/crypto.go index 75736ba7..6c10a2ef 100644 --- a/src/crypto/crypto.go +++ b/src/crypto/crypto.go @@ -1,3 +1,6 @@ +// Package crypto is a wrapper around packages under golang.org/x/crypto/, particulaly curve25519, ed25519, and nacl/box. +// This is used to avoid explicitly importing and using these packages throughout yggdrasil. +// It also includes the all-important NodeID and TreeID types, which are used to identify nodes in the DHT and in the spanning tree's root selection algorithm, respectively. package crypto /* @@ -26,12 +29,21 @@ import ( // NodeID and TreeID +// NodeIDLen is the length (in bytes) of a NodeID. const NodeIDLen = sha512.Size + +// TreeIDLen is the length (in bytes) of a TreeID. const TreeIDLen = sha512.Size + +// handleLen is the length (in bytes) of a Handle. const handleLen = 8 +// NodeID is how a yggdrasil node is identified in the DHT, and is used to derive IPv6 addresses and subnets in the main executable. It is a sha512sum hash of the node's BoxPubKey type NodeID [NodeIDLen]byte + +// TreeID is how a yggdrasil node is identified in the root selection algorithm used to construct the spanning tree. type TreeID [TreeIDLen]byte + type Handle [handleLen]byte func (n *NodeID) String() string { @@ -69,16 +81,19 @@ func (n *NodeID) PrefixLength() int { return len } +// GetNodeID returns the NodeID associated with a BoxPubKey. func GetNodeID(pub *BoxPubKey) *NodeID { h := sha512.Sum512(pub[:]) return (*NodeID)(&h) } +// GetTreeID returns the TreeID associated with a BoxPubKey func GetTreeID(pub *SigPubKey) *TreeID { h := sha512.Sum512(pub[:]) return (*TreeID)(&h) } +// NewHandle returns a new (cryptographically random) Handle, used by the session code to identify which session an incoming packet is associated with. func NewHandle() *Handle { var h Handle _, err := rand.Read(h[:]) @@ -92,14 +107,25 @@ func NewHandle() *Handle { // Signatures +// SigPubKeyLen is the length of a SigPubKey in bytes. const SigPubKeyLen = ed25519.PublicKeySize + +// SigPrivKeyLen is the length of a SigPrivKey in bytes. const SigPrivKeyLen = ed25519.PrivateKeySize + +// SigLen is the length of SigBytes. const SigLen = ed25519.SignatureSize +// SigPubKey is a public ed25519 signing key. type SigPubKey [SigPubKeyLen]byte + +// SigPrivKey is a private ed25519 signing key. type SigPrivKey [SigPrivKeyLen]byte + +// SigBytes is an ed25519 signature. type SigBytes [SigLen]byte +// NewSigKeys generates a public/private ed25519 key pair. func NewSigKeys() (*SigPubKey, *SigPrivKey) { var pub SigPubKey var priv SigPrivKey @@ -112,6 +138,7 @@ func NewSigKeys() (*SigPubKey, *SigPrivKey) { return &pub, &priv } +// Sign returns the SigBytes signing a message. func Sign(priv *SigPrivKey, msg []byte) *SigBytes { var sig SigBytes sigSlice := ed25519.Sign(priv[:], msg) @@ -119,12 +146,14 @@ func Sign(priv *SigPrivKey, msg []byte) *SigBytes { return &sig } +// Verify returns true if the provided signature matches the key and message. func Verify(pub *SigPubKey, msg []byte, sig *SigBytes) bool { // Should sig be an array instead of a slice?... // It's fixed size, but return ed25519.Verify(pub[:], msg, sig[:]) } +// Public returns the SigPubKey associated with this SigPrivKey. func (p SigPrivKey) Public() SigPubKey { priv := make(ed25519.PrivateKey, ed25519.PrivateKeySize) copy(priv[:], p[:]) @@ -138,17 +167,34 @@ func (p SigPrivKey) Public() SigPubKey { // NaCl-like crypto "box" (curve25519+xsalsa20+poly1305) +// BoxPubKeyLen is the length of a BoxPubKey in bytes. const BoxPubKeyLen = 32 + +// BoxPrivKeyLen is the length of a BoxPrivKey in bytes. const BoxPrivKeyLen = 32 + +// BoxSharedKeyLen is the length of a BoxSharedKey in bytes. const BoxSharedKeyLen = 32 + +// BoxNonceLen is the length of a BoxNonce in bytes. const BoxNonceLen = 24 + +// BoxOverhead is the length of the overhead from boxing something. const BoxOverhead = box.Overhead +// BoxPubKey is a NaCl-like "box" public key (curve25519+xsalsa20+poly1305). type BoxPubKey [BoxPubKeyLen]byte + +// BoxPrivKey is a NaCl-like "box" private key (curve25519+xsalsa20+poly1305). type BoxPrivKey [BoxPrivKeyLen]byte + +// BoxSharedKey is a NaCl-like "box" shared key (curve25519+xsalsa20+poly1305). type BoxSharedKey [BoxSharedKeyLen]byte + +// BoxNonce is the nonce used in NaCl-like crypto "box" operations (curve25519+xsalsa20+poly1305), and must not be reused for different messages encrypted using the same BoxSharedKey. type BoxNonce [BoxNonceLen]byte +// NewBoxKeys generates a new pair of public/private crypto box keys. func NewBoxKeys() (*BoxPubKey, *BoxPrivKey) { pubBytes, privBytes, err := box.GenerateKey(rand.Reader) if err != nil { @@ -159,6 +205,7 @@ func NewBoxKeys() (*BoxPubKey, *BoxPrivKey) { return pub, priv } +// GetSharedKey returns the shared key derived from your private key and the destination's public key. func GetSharedKey(myPrivKey *BoxPrivKey, othersPubKey *BoxPubKey) *BoxSharedKey { var shared [BoxSharedKeyLen]byte @@ -168,6 +215,7 @@ func GetSharedKey(myPrivKey *BoxPrivKey, return (*BoxSharedKey)(&shared) } +// BoxOpen returns a message and true if it successfull opens a crypto box using the provided shared key and nonce. func BoxOpen(shared *BoxSharedKey, boxed []byte, nonce *BoxNonce) ([]byte, bool) { @@ -178,6 +226,9 @@ func BoxOpen(shared *BoxSharedKey, return unboxed, success } +// BoxSeal seals a crypto box using the provided shared key, returning the box and the nonce needed to decrypt it. +// If nonce is nil, a random BoxNonce will be used and returned. +// If nonce is non-nil, then nonce.Increment() will be called before using it, and the incremented BoxNonce is what is returned. func BoxSeal(shared *BoxSharedKey, unboxed []byte, nonce *BoxNonce) ([]byte, *BoxNonce) { if nonce == nil { nonce = NewBoxNonce() @@ -190,6 +241,7 @@ func BoxSeal(shared *BoxSharedKey, unboxed []byte, nonce *BoxNonce) ([]byte, *Bo return boxed, nonce } +// NewBoxNonce generates a (cryptographically) random BoxNonce. func NewBoxNonce() *BoxNonce { var nonce BoxNonce _, err := rand.Read(nonce[:]) @@ -204,6 +256,7 @@ func NewBoxNonce() *BoxNonce { return &nonce } +// Increment adds 2 to a BoxNonce, which is useful if one node intends to send only with odd BoxNonce values, and the other only with even BoxNonce values. func (n *BoxNonce) Increment() { oldNonce := *n n[len(n)-1] += 2 @@ -214,6 +267,7 @@ func (n *BoxNonce) Increment() { } } +// Public returns the BoxPubKey associated with this BoxPrivKey. func (p BoxPrivKey) Public() BoxPubKey { var boxPub [BoxPubKeyLen]byte var boxPriv [BoxPrivKeyLen]byte @@ -222,9 +276,9 @@ func (p BoxPrivKey) Public() BoxPubKey { return boxPub } -// Used to subtract one nonce from another, staying in the range +- 64. -// This is used by the nonce progression machinery to advance the bitmask of recently received packets (indexed by nonce), or to check the appropriate bit of the bitmask. -// It's basically part of the machinery that prevents replays and duplicate packets. +// Minus is the result of subtracting the provided BoNonce from this BoxNonce, bounded at +- 64. +// It's primarily used to determine if a new BoxNonce is higher than the last known BoxNonce from a crypto session, and by how much. +// This is used in the machinery that makes sure replayed packets can't keep a session open indefinitely or stuck using old/bad information about a node. func (n *BoxNonce) Minus(m *BoxNonce) int64 { diff := int64(0) for idx := range n { diff --git a/src/multicast/multicast.go b/src/multicast/multicast.go index 5ab9673e..7044eaa2 100644 --- a/src/multicast/multicast.go +++ b/src/multicast/multicast.go @@ -7,6 +7,7 @@ import ( "regexp" "time" + "github.com/Arceliar/phony" "github.com/gologme/log" "github.com/yggdrasil-network/yggdrasil-go/src/config" @@ -19,14 +20,23 @@ import ( // configured multicast interface, Yggdrasil will attempt to peer with that node // automatically. type Multicast struct { - core *yggdrasil.Core - config *config.NodeState - log *log.Logger - sock *ipv6.PacketConn - groupAddr string - listeners map[string]*yggdrasil.TcpListener - listenPort uint16 - isOpen bool + phony.Inbox + core *yggdrasil.Core + config *config.NodeState + log *log.Logger + sock *ipv6.PacketConn + groupAddr string + listeners map[string]*listenerInfo + listenPort uint16 + isOpen bool + announcer *time.Timer + platformhandler *time.Timer +} + +type listenerInfo struct { + listener *yggdrasil.TcpListener + time time.Time + interval time.Duration } // Init prepares the multicast interface for use. @@ -34,7 +44,7 @@ func (m *Multicast) Init(core *yggdrasil.Core, state *config.NodeState, log *log m.core = core m.config = state m.log = log - m.listeners = make(map[string]*yggdrasil.TcpListener) + m.listeners = make(map[string]*listenerInfo) current := m.config.GetCurrent() m.listenPort = current.LinkLocalTCPPort m.groupAddr = "[ff02::114]:9001" @@ -63,9 +73,9 @@ func (m *Multicast) Start() error { } m.isOpen = true - go m.multicastStarted() go m.listen() - go m.announce() + m.Act(m, m.multicastStarted) + m.Act(m, m.announce) return nil } @@ -73,6 +83,12 @@ func (m *Multicast) Start() error { // Stop is not implemented for multicast yet. func (m *Multicast) Stop() error { m.isOpen = false + if m.announcer != nil { + m.announcer.Stop() + } + if m.platformhandler != nil { + m.platformhandler.Stop() + } m.sock.Close() return nil } @@ -83,7 +99,6 @@ func (m *Multicast) Stop() error { func (m *Multicast) UpdateConfig(config *config.NodeConfig) { m.log.Debugln("Reloading multicast configuration...") m.config.Replace(*config) - m.log.Infoln("Multicast configuration reloaded successfully") } // GetInterfaces returns the currently known/enabled multicast interfaces. It is @@ -137,108 +152,114 @@ func (m *Multicast) announce() { if err != nil { panic(err) } - for { - interfaces := m.Interfaces() - // There might be interfaces that we configured listeners for but are no - // longer up - if that's the case then we should stop the listeners - for name, listener := range m.listeners { - // Prepare our stop function! - stop := func() { - listener.Stop <- true - delete(m.listeners, name) - m.log.Debugln("No longer multicasting on", name) - } - // If the interface is no longer visible on the system then stop the - // listener, as another one will be started further down - if _, ok := interfaces[name]; !ok { - stop() - continue - } - // It's possible that the link-local listener address has changed so if - // that is the case then we should clean up the interface listener - found := false - listenaddr, err := net.ResolveTCPAddr("tcp6", listener.Listener.Addr().String()) - if err != nil { - stop() - continue - } - // Find the interface that matches the listener - if intf, err := net.InterfaceByName(name); err == nil { - if addrs, err := intf.Addrs(); err == nil { - // Loop through the addresses attached to that listener and see if any - // of them match the current address of the listener - for _, addr := range addrs { - if ip, _, err := net.ParseCIDR(addr.String()); err == nil { - // Does the interface address match our listener address? - if ip.Equal(listenaddr.IP) { - found = true - break - } + interfaces := m.Interfaces() + // There might be interfaces that we configured listeners for but are no + // longer up - if that's the case then we should stop the listeners + for name, info := range m.listeners { + // Prepare our stop function! + stop := func() { + info.listener.Stop() + delete(m.listeners, name) + m.log.Debugln("No longer multicasting on", name) + } + // If the interface is no longer visible on the system then stop the + // listener, as another one will be started further down + if _, ok := interfaces[name]; !ok { + stop() + continue + } + // It's possible that the link-local listener address has changed so if + // that is the case then we should clean up the interface listener + found := false + listenaddr, err := net.ResolveTCPAddr("tcp6", info.listener.Listener.Addr().String()) + if err != nil { + stop() + continue + } + // Find the interface that matches the listener + if intf, err := net.InterfaceByName(name); err == nil { + if addrs, err := intf.Addrs(); err == nil { + // Loop through the addresses attached to that listener and see if any + // of them match the current address of the listener + for _, addr := range addrs { + if ip, _, err := net.ParseCIDR(addr.String()); err == nil { + // Does the interface address match our listener address? + if ip.Equal(listenaddr.IP) { + found = true + break } } } } - // If the address has not been found on the adapter then we should stop - // and clean up the TCP listener. A new one will be created below if a - // suitable link-local address is found - if !found { - stop() - } } - // Now that we have a list of valid interfaces from the operating system, - // we can start checking if we can send multicasts on them - for _, iface := range interfaces { - // Find interface addresses - addrs, err := iface.Addrs() - if err != nil { - panic(err) - } - for _, addr := range addrs { - addrIP, _, _ := net.ParseCIDR(addr.String()) - // Ignore IPv4 addresses - if addrIP.To4() != nil { - continue - } - // Ignore non-link-local addresses - if !addrIP.IsLinkLocalUnicast() { - continue - } - // Join the multicast group - m.sock.JoinGroup(&iface, groupAddr) - // Try and see if we already have a TCP listener for this interface - var listener *yggdrasil.TcpListener - if l, ok := m.listeners[iface.Name]; !ok || l.Listener == nil { - // No listener was found - let's create one - listenaddr := fmt.Sprintf("[%s%%%s]:%d", addrIP, iface.Name, m.listenPort) - if li, err := m.core.ListenTCP(listenaddr); err == nil { - m.log.Debugln("Started multicasting on", iface.Name) - // Store the listener so that we can stop it later if needed - m.listeners[iface.Name] = li - listener = li - } else { - m.log.Warnln("Not multicasting on", iface.Name, "due to error:", err) - } - } else { - // An existing listener was found - listener = m.listeners[iface.Name] - } - // Make sure nothing above failed for some reason - if listener == nil { - continue - } - // Get the listener details and construct the multicast beacon - lladdr := listener.Listener.Addr().String() - if a, err := net.ResolveTCPAddr("tcp6", lladdr); err == nil { - a.Zone = "" - destAddr.Zone = iface.Name - msg := []byte(a.String()) - m.sock.WriteTo(msg, nil, destAddr) - } - break - } + // If the address has not been found on the adapter then we should stop + // and clean up the TCP listener. A new one will be created below if a + // suitable link-local address is found + if !found { + stop() } - time.Sleep(time.Second * 15) } + // Now that we have a list of valid interfaces from the operating system, + // we can start checking if we can send multicasts on them + for _, iface := range interfaces { + // Find interface addresses + addrs, err := iface.Addrs() + if err != nil { + panic(err) + } + for _, addr := range addrs { + addrIP, _, _ := net.ParseCIDR(addr.String()) + // Ignore IPv4 addresses + if addrIP.To4() != nil { + continue + } + // Ignore non-link-local addresses + if !addrIP.IsLinkLocalUnicast() { + continue + } + // Join the multicast group + m.sock.JoinGroup(&iface, groupAddr) + // Try and see if we already have a TCP listener for this interface + var info *listenerInfo + if nfo, ok := m.listeners[iface.Name]; !ok || nfo.listener.Listener == nil { + // No listener was found - let's create one + listenaddr := fmt.Sprintf("[%s%%%s]:%d", addrIP, iface.Name, m.listenPort) + if li, err := m.core.ListenTCP(listenaddr); err == nil { + m.log.Debugln("Started multicasting on", iface.Name) + // Store the listener so that we can stop it later if needed + info = &listenerInfo{listener: li, time: time.Now()} + m.listeners[iface.Name] = info + } else { + m.log.Warnln("Not multicasting on", iface.Name, "due to error:", err) + } + } else { + // An existing listener was found + info = m.listeners[iface.Name] + } + // Make sure nothing above failed for some reason + if info == nil { + continue + } + if time.Since(info.time) < info.interval { + continue + } + // Get the listener details and construct the multicast beacon + lladdr := info.listener.Listener.Addr().String() + if a, err := net.ResolveTCPAddr("tcp6", lladdr); err == nil { + a.Zone = "" + destAddr.Zone = iface.Name + msg := []byte(a.String()) + m.sock.WriteTo(msg, nil, destAddr) + } + if info.interval.Seconds() < 15 { + info.interval += time.Second + } + break + } + } + m.announcer = time.AfterFunc(time.Second, func() { + m.Act(m, m.announce) + }) } func (m *Multicast) listen() { diff --git a/src/multicast/multicast_darwin.go b/src/multicast/multicast_darwin.go index c88b4a81..4cfef9e9 100644 --- a/src/multicast/multicast_darwin.go +++ b/src/multicast/multicast_darwin.go @@ -32,21 +32,16 @@ import ( var awdlGoroutineStarted bool func (m *Multicast) multicastStarted() { - if awdlGoroutineStarted { - return - } - awdlGoroutineStarted = true - for { - C.StopAWDLBrowsing() - for intf := range m.Interfaces() { - if intf == "awdl0" { - m.log.Infoln("Multicast discovery is using AWDL discovery") - C.StartAWDLBrowsing() - break - } + C.StopAWDLBrowsing() + for intf := range m.Interfaces() { + if intf == "awdl0" { + C.StartAWDLBrowsing() + break } - time.Sleep(time.Minute) } + m.platformhandler = time.AfterFunc(time.Minute, func() { + m.Act(m, m.multicastStarted) + }) } func (m *Multicast) multicastReuse(network string, address string, c syscall.RawConn) error { diff --git a/src/tuntap/ckr.go b/src/tuntap/ckr.go index ad8c89d4..9af3564a 100644 --- a/src/tuntap/ckr.go +++ b/src/tuntap/ckr.go @@ -20,7 +20,6 @@ import ( type cryptokey struct { tun *TunAdapter enabled atomic.Value // bool - reconfigure chan chan error ipv4remotes []cryptokey_route ipv6remotes []cryptokey_route ipv4cache map[address.Address]cryptokey_route @@ -40,25 +39,11 @@ type cryptokey_route struct { // Initialise crypto-key routing. This must be done before any other CKR calls. func (c *cryptokey) init(tun *TunAdapter) { c.tun = tun - c.reconfigure = make(chan chan error, 1) - go func() { - for { - e := <-c.reconfigure - e <- nil - } - }() - - c.tun.log.Debugln("Configuring CKR...") - if err := c.configure(); err != nil { - c.tun.log.Errorln("CKR configuration failed:", err) - } else { - c.tun.log.Debugln("CKR configured") - } + c.configure() } -// Configure the CKR routes - this must only ever be called from the router -// goroutine, e.g. through router.doAdmin -func (c *cryptokey) configure() error { +// Configure the CKR routes. This should only ever be ran by the TUN/TAP actor. +func (c *cryptokey) configure() { current := c.tun.config.GetCurrent() // Set enabled/disabled state @@ -73,14 +58,14 @@ func (c *cryptokey) configure() error { // Add IPv6 routes for ipv6, pubkey := range current.TunnelRouting.IPv6RemoteSubnets { if err := c.addRemoteSubnet(ipv6, pubkey); err != nil { - return err + c.tun.log.Errorln("Error adding CKR IPv6 remote subnet:", err) } } // Add IPv4 routes for ipv4, pubkey := range current.TunnelRouting.IPv4RemoteSubnets { if err := c.addRemoteSubnet(ipv4, pubkey); err != nil { - return err + c.tun.log.Errorln("Error adding CKR IPv4 remote subnet:", err) } } @@ -94,7 +79,7 @@ func (c *cryptokey) configure() error { c.ipv6locals = make([]net.IPNet, 0) for _, source := range current.TunnelRouting.IPv6LocalSubnets { if err := c.addLocalSubnet(source); err != nil { - return err + c.tun.log.Errorln("Error adding CKR IPv6 local subnet:", err) } } @@ -102,7 +87,7 @@ func (c *cryptokey) configure() error { c.ipv4locals = make([]net.IPNet, 0) for _, source := range current.TunnelRouting.IPv4LocalSubnets { if err := c.addLocalSubnet(source); err != nil { - return err + c.tun.log.Errorln("Error adding CKR IPv4 local subnet:", err) } } @@ -111,8 +96,6 @@ func (c *cryptokey) configure() error { c.ipv4cache = make(map[address.Address]cryptokey_route, 0) c.ipv6cache = make(map[address.Address]cryptokey_route, 0) c.mutexcaches.Unlock() - - return nil } // Enable or disable crypto-key routing. @@ -182,19 +165,19 @@ func (c *cryptokey) addLocalSubnet(cidr string) error { } else if prefixsize == net.IPv4len*8 { routingsources = &c.ipv4locals } else { - return errors.New("Unexpected prefix size") + return errors.New("unexpected prefix size") } // Check if we already have this CIDR for _, subnet := range *routingsources { if subnet.String() == ipnet.String() { - return errors.New("Source subnet already configured") + return errors.New("local subnet already configured") } } // Add the source subnet *routingsources = append(*routingsources, *ipnet) - c.tun.log.Infoln("Added CKR source subnet", cidr) + c.tun.log.Infoln("Added CKR local subnet", cidr) return nil } @@ -227,7 +210,7 @@ func (c *cryptokey) addRemoteSubnet(cidr string, dest string) error { routingtable = &c.ipv4remotes routingcache = &c.ipv4cache } else { - return errors.New("Unexpected prefix size") + return errors.New("unexpected prefix size") } // Is the route an Yggdrasil destination? @@ -236,19 +219,19 @@ func (c *cryptokey) addRemoteSubnet(cidr string, dest string) error { copy(addr[:], ipaddr) copy(snet[:], ipnet.IP) if addr.IsValid() || snet.IsValid() { - return errors.New("Can't specify Yggdrasil destination as crypto-key route") + return errors.New("can't specify Yggdrasil destination as crypto-key route") } // Do we already have a route for this subnet? for _, route := range *routingtable { if route.subnet.String() == ipnet.String() { - return errors.New(fmt.Sprintf("Route already exists for %s", cidr)) + return fmt.Errorf("remote subnet already exists for %s", cidr) } } // Decode the public key if bpk, err := hex.DecodeString(dest); err != nil { return err } else if len(bpk) != crypto.BoxPubKeyLen { - return errors.New(fmt.Sprintf("Incorrect key length for %s", dest)) + return fmt.Errorf("incorrect key length for %s", dest) } else { // Add the new crypto-key route var key crypto.BoxPubKey @@ -271,7 +254,7 @@ func (c *cryptokey) addRemoteSubnet(cidr string, dest string) error { delete(*routingcache, k) } - c.tun.log.Infoln("Added CKR destination subnet", cidr) + c.tun.log.Infoln("Added CKR remote subnet", cidr) return nil } } @@ -285,7 +268,7 @@ func (c *cryptokey) getPublicKeyForAddress(addr address.Address, addrlen int) (c // Check if the address is a valid Yggdrasil address - if so it // is exempt from all CKR checking if addr.IsValid() { - return crypto.BoxPubKey{}, errors.New("Cannot look up CKR for Yggdrasil addresses") + return crypto.BoxPubKey{}, errors.New("cannot look up CKR for Yggdrasil addresses") } // Build our references to the routing table and cache @@ -298,7 +281,7 @@ func (c *cryptokey) getPublicKeyForAddress(addr address.Address, addrlen int) (c } else if addrlen == net.IPv4len { routingcache = &c.ipv4cache } else { - return crypto.BoxPubKey{}, errors.New("Unexpected prefix size") + return crypto.BoxPubKey{}, errors.New("unexpected prefix size") } // Check if there's a cache entry for this addr @@ -318,7 +301,7 @@ func (c *cryptokey) getPublicKeyForAddress(addr address.Address, addrlen int) (c } else if addrlen == net.IPv4len { routingtable = &c.ipv4remotes } else { - return crypto.BoxPubKey{}, errors.New("Unexpected prefix size") + return crypto.BoxPubKey{}, errors.New("unexpected prefix size") } // No cache was found - start by converting the address into a net.IP @@ -379,18 +362,18 @@ func (c *cryptokey) removeLocalSubnet(cidr string) error { } else if prefixsize == net.IPv4len*8 { routingsources = &c.ipv4locals } else { - return errors.New("Unexpected prefix size") + return errors.New("unexpected prefix size") } // Check if we already have this CIDR for idx, subnet := range *routingsources { if subnet.String() == ipnet.String() { *routingsources = append((*routingsources)[:idx], (*routingsources)[idx+1:]...) - c.tun.log.Infoln("Removed CKR source subnet", cidr) + c.tun.log.Infoln("Removed CKR local subnet", cidr) return nil } } - return errors.New("Source subnet not found") + return errors.New("local subnet not found") } // Removes a destination route for the given CIDR to be tunnelled to the node @@ -422,7 +405,7 @@ func (c *cryptokey) removeRemoteSubnet(cidr string, dest string) error { routingtable = &c.ipv4remotes routingcache = &c.ipv4cache } else { - return errors.New("Unexpected prefix size") + return errors.New("unexpected prefix size") } // Decode the public key @@ -430,7 +413,7 @@ func (c *cryptokey) removeRemoteSubnet(cidr string, dest string) error { if err != nil { return err } else if len(bpk) != crypto.BoxPubKeyLen { - return errors.New(fmt.Sprintf("Incorrect key length for %s", dest)) + return fmt.Errorf("incorrect key length for %s", dest) } netStr := ipnet.String() @@ -440,9 +423,9 @@ func (c *cryptokey) removeRemoteSubnet(cidr string, dest string) error { for k := range *routingcache { delete(*routingcache, k) } - c.tun.log.Infof("Removed CKR destination subnet %s via %s\n", cidr, dest) + c.tun.log.Infof("Removed CKR remote subnet %s via %s\n", cidr, dest) return nil } } - return errors.New(fmt.Sprintf("Route does not exists for %s", cidr)) + return fmt.Errorf("route does not exists for %s", cidr) } diff --git a/src/tuntap/conn.go b/src/tuntap/conn.go index 3fb7a544..31875d94 100644 --- a/src/tuntap/conn.go +++ b/src/tuntap/conn.go @@ -5,6 +5,7 @@ import ( "errors" "time" + "github.com/Arceliar/phony" "github.com/yggdrasil-network/yggdrasil-go/src/address" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" "github.com/yggdrasil-network/yggdrasil-go/src/util" @@ -16,22 +17,20 @@ import ( const tunConnTimeout = 2 * time.Minute type tunConn struct { + phony.Inbox tun *TunAdapter conn *yggdrasil.Conn addr address.Address snet address.Subnet - send chan []byte stop chan struct{} - alive chan struct{} + alive *time.Timer // From calling time.AfterFunc } func (s *tunConn) close() { - s.tun.mutex.Lock() - defer s.tun.mutex.Unlock() - s._close_nomutex() + s.tun.Act(s, s._close_from_tun) } -func (s *tunConn) _close_nomutex() { +func (s *tunConn) _close_from_tun() { s.conn.Close() delete(s.tun.addrToConn, s.addr) delete(s.tun.subnetToConn, s.snet) @@ -39,235 +38,197 @@ func (s *tunConn) _close_nomutex() { defer func() { recover() }() close(s.stop) // Closes reader/writer goroutines }() - func() { - defer func() { recover() }() - close(s.alive) // Closes timeout goroutine - }() } -func (s *tunConn) reader() (err error) { +func (s *tunConn) _read(bs []byte) (err error) { select { - case _, ok := <-s.stop: - if !ok { - return errors.New("session was already closed") - } + case <-s.stop: + err = errors.New("session was already closed") + util.PutBytes(bs) + return default: } - s.tun.log.Debugln("Starting conn reader for", s.conn.String()) - defer s.tun.log.Debugln("Stopping conn reader for", s.conn.String()) - for { - select { - case <-s.stop: - return nil - default: + if len(bs) == 0 { + err = errors.New("read packet with 0 size") + util.PutBytes(bs) + return + } + ipv4 := len(bs) > 20 && bs[0]&0xf0 == 0x40 + ipv6 := len(bs) > 40 && bs[0]&0xf0 == 0x60 + isCGA := true + // Check source addresses + switch { + case ipv6 && bs[8] == 0x02 && bytes.Equal(s.addr[:16], bs[8:24]): // source + case ipv6 && bs[8] == 0x03 && bytes.Equal(s.snet[:8], bs[8:16]): // source + default: + isCGA = false + } + // Check destiantion addresses + switch { + case ipv6 && bs[24] == 0x02 && bytes.Equal(s.tun.addr[:16], bs[24:40]): // destination + case ipv6 && bs[24] == 0x03 && bytes.Equal(s.tun.subnet[:8], bs[24:32]): // destination + default: + isCGA = false + } + // Decide how to handle the packet + var skip bool + switch { + case isCGA: // Allowed + case s.tun.ckr.isEnabled() && (ipv4 || ipv6): + var srcAddr address.Address + var dstAddr address.Address + var addrlen int + if ipv4 { + copy(srcAddr[:], bs[12:16]) + copy(dstAddr[:], bs[16:20]) + addrlen = 4 } - var bs []byte - if bs, err = s.conn.ReadNoCopy(); err != nil { - if e, eok := err.(yggdrasil.ConnError); eok && !e.Temporary() { - if e.Closed() { - s.tun.log.Debugln(s.conn.String(), "TUN/TAP conn read debug:", err) - } else { - s.tun.log.Errorln(s.conn.String(), "TUN/TAP conn read error:", err) - } - return e - } - } else if len(bs) > 0 { - ipv4 := len(bs) > 20 && bs[0]&0xf0 == 0x40 - ipv6 := len(bs) > 40 && bs[0]&0xf0 == 0x60 - isCGA := true - // Check source addresses - switch { - case ipv6 && bs[8] == 0x02 && bytes.Equal(s.addr[:16], bs[8:24]): // source - case ipv6 && bs[8] == 0x03 && bytes.Equal(s.snet[:8], bs[8:16]): // source - default: - isCGA = false - } - // Check destiantion addresses - switch { - case ipv6 && bs[24] == 0x02 && bytes.Equal(s.tun.addr[:16], bs[24:40]): // destination - case ipv6 && bs[24] == 0x03 && bytes.Equal(s.tun.subnet[:8], bs[24:32]): // destination - default: - isCGA = false - } - // Decide how to handle the packet - var skip bool - switch { - case isCGA: // Allowed - case s.tun.ckr.isEnabled() && (ipv4 || ipv6): - var srcAddr address.Address - var dstAddr address.Address - var addrlen int - if ipv4 { - copy(srcAddr[:], bs[12:16]) - copy(dstAddr[:], bs[16:20]) - addrlen = 4 - } - if ipv6 { - copy(srcAddr[:], bs[8:24]) - copy(dstAddr[:], bs[24:40]) - addrlen = 16 - } - if !s.tun.ckr.isValidLocalAddress(dstAddr, addrlen) { - // The destination address isn't in our CKR allowed range - skip = true - } else if key, err := s.tun.ckr.getPublicKeyForAddress(srcAddr, addrlen); err == nil { - srcNodeID := crypto.GetNodeID(&key) - if s.conn.RemoteAddr() == *srcNodeID { - // This is the one allowed CKR case, where source and destination addresses are both good - } else { - // The CKR key associated with this address doesn't match the sender's NodeID - skip = true - } - } else { - // We have no CKR route for this source address - skip = true - } - default: + if ipv6 { + copy(srcAddr[:], bs[8:24]) + copy(dstAddr[:], bs[24:40]) + addrlen = 16 + } + if !s.tun.ckr.isValidLocalAddress(dstAddr, addrlen) { + // The destination address isn't in our CKR allowed range + skip = true + } else if key, err := s.tun.ckr.getPublicKeyForAddress(srcAddr, addrlen); err == nil { + srcNodeID := crypto.GetNodeID(&key) + if s.conn.RemoteAddr() == *srcNodeID { + // This is the one allowed CKR case, where source and destination addresses are both good + } else { + // The CKR key associated with this address doesn't match the sender's NodeID skip = true } - if skip { - util.PutBytes(bs) - continue - } - s.tun.send <- bs - s.stillAlive() } else { - util.PutBytes(bs) - } - } -} - -func (s *tunConn) writer() error { - select { - case _, ok := <-s.stop: - if !ok { - return errors.New("session was already closed") + // We have no CKR route for this source address + skip = true } default: + skip = true } - s.tun.log.Debugln("Starting conn writer for", s.conn.String()) - defer s.tun.log.Debugln("Stopping conn writer for", s.conn.String()) - for { - select { - case <-s.stop: - return nil - case bs, ok := <-s.send: - if !ok { - return errors.New("send closed") - } - v4 := len(bs) > 20 && bs[0]&0xf0 == 0x40 - v6 := len(bs) > 40 && bs[0]&0xf0 == 0x60 - isCGA := true - // Check source addresses - switch { - case v6 && bs[8] == 0x02 && bytes.Equal(s.tun.addr[:16], bs[8:24]): // source - case v6 && bs[8] == 0x03 && bytes.Equal(s.tun.subnet[:8], bs[8:16]): // source - default: - isCGA = false - } - // Check destiantion addresses - switch { - case v6 && bs[24] == 0x02 && bytes.Equal(s.addr[:16], bs[24:40]): // destination - case v6 && bs[24] == 0x03 && bytes.Equal(s.snet[:8], bs[24:32]): // destination - default: - isCGA = false - } - // Decide how to handle the packet - var skip bool - switch { - case isCGA: // Allowed - case s.tun.ckr.isEnabled() && (v4 || v6): - var srcAddr address.Address - var dstAddr address.Address - var addrlen int - if v4 { - copy(srcAddr[:], bs[12:16]) - copy(dstAddr[:], bs[16:20]) - addrlen = 4 - } - if v6 { - copy(srcAddr[:], bs[8:24]) - copy(dstAddr[:], bs[24:40]) - addrlen = 16 - } - if !s.tun.ckr.isValidLocalAddress(srcAddr, addrlen) { - // The source address isn't in our CKR allowed range - skip = true - } else if key, err := s.tun.ckr.getPublicKeyForAddress(dstAddr, addrlen); err == nil { - dstNodeID := crypto.GetNodeID(&key) - if s.conn.RemoteAddr() == *dstNodeID { - // This is the one allowed CKR case, where source and destination addresses are both good - } else { - // The CKR key associated with this address doesn't match the sender's NodeID - skip = true - } - } else { - // We have no CKR route for this destination address... why do we have the packet in the first place? - skip = true - } - default: + if skip { + err = errors.New("address not allowed") + util.PutBytes(bs) + return + } + s.tun.writer.writeFrom(s, bs) + s.stillAlive() + return +} + +func (s *tunConn) writeFrom(from phony.Actor, bs []byte) { + s.Act(from, func() { + s._write(bs) + }) +} + +func (s *tunConn) _write(bs []byte) (err error) { + select { + case <-s.stop: + err = errors.New("session was already closed") + util.PutBytes(bs) + return + default: + } + v4 := len(bs) > 20 && bs[0]&0xf0 == 0x40 + v6 := len(bs) > 40 && bs[0]&0xf0 == 0x60 + isCGA := true + // Check source addresses + switch { + case v6 && bs[8] == 0x02 && bytes.Equal(s.tun.addr[:16], bs[8:24]): // source + case v6 && bs[8] == 0x03 && bytes.Equal(s.tun.subnet[:8], bs[8:16]): // source + default: + isCGA = false + } + // Check destiantion addresses + switch { + case v6 && bs[24] == 0x02 && bytes.Equal(s.addr[:16], bs[24:40]): // destination + case v6 && bs[24] == 0x03 && bytes.Equal(s.snet[:8], bs[24:32]): // destination + default: + isCGA = false + } + // Decide how to handle the packet + var skip bool + switch { + case isCGA: // Allowed + case s.tun.ckr.isEnabled() && (v4 || v6): + var srcAddr address.Address + var dstAddr address.Address + var addrlen int + if v4 { + copy(srcAddr[:], bs[12:16]) + copy(dstAddr[:], bs[16:20]) + addrlen = 4 + } + if v6 { + copy(srcAddr[:], bs[8:24]) + copy(dstAddr[:], bs[24:40]) + addrlen = 16 + } + if !s.tun.ckr.isValidLocalAddress(srcAddr, addrlen) { + // The source address isn't in our CKR allowed range + skip = true + } else if key, err := s.tun.ckr.getPublicKeyForAddress(dstAddr, addrlen); err == nil { + dstNodeID := crypto.GetNodeID(&key) + if s.conn.RemoteAddr() == *dstNodeID { + // This is the one allowed CKR case, where source and destination addresses are both good + } else { + // The CKR key associated with this address doesn't match the sender's NodeID skip = true } - if skip { - util.PutBytes(bs) - continue - } - msg := yggdrasil.FlowKeyMessage{ - FlowKey: util.GetFlowKey(bs), - Message: bs, - } - if err := s.conn.WriteNoCopy(msg); err != nil { - if e, eok := err.(yggdrasil.ConnError); !eok { - if e.Closed() { - s.tun.log.Debugln(s.conn.String(), "TUN/TAP generic write debug:", err) - } else { - s.tun.log.Errorln(s.conn.String(), "TUN/TAP generic write error:", err) - } - } else if e.PacketTooBig() { - // TODO: This currently isn't aware of IPv4 for CKR - ptb := &icmp.PacketTooBig{ - MTU: int(e.PacketMaximumSize()), - Data: bs[:900], - } - if packet, err := CreateICMPv6(bs[8:24], bs[24:40], ipv6.ICMPTypePacketTooBig, 0, ptb); err == nil { - s.tun.send <- packet - } + } else { + // We have no CKR route for this destination address... why do we have the packet in the first place? + skip = true + } + default: + skip = true + } + if skip { + err = errors.New("address not allowed") + util.PutBytes(bs) + return + } + msg := yggdrasil.FlowKeyMessage{ + FlowKey: util.GetFlowKey(bs), + Message: bs, + } + s.conn.WriteFrom(s, msg, func(err error) { + if err == nil { + // No point in wasting resources to send back an error if there was none + return + } + s.Act(s.conn, func() { + if e, eok := err.(yggdrasil.ConnError); !eok { + if e.Closed() { + s.tun.log.Debugln(s.conn.String(), "TUN/TAP generic write debug:", err) } else { - if e.Closed() { - s.tun.log.Debugln(s.conn.String(), "TUN/TAP conn write debug:", err) - } else { - s.tun.log.Errorln(s.conn.String(), "TUN/TAP conn write error:", err) - } + s.tun.log.Errorln(s.conn.String(), "TUN/TAP generic write error:", err) + } + } else if e.PacketTooBig() { + // TODO: This currently isn't aware of IPv4 for CKR + ptb := &icmp.PacketTooBig{ + MTU: int(e.PacketMaximumSize()), + Data: bs[:900], + } + if packet, err := CreateICMPv6(bs[8:24], bs[24:40], ipv6.ICMPTypePacketTooBig, 0, ptb); err == nil { + s.tun.writer.writeFrom(s, packet) } } else { - s.stillAlive() + if e.Closed() { + s.tun.log.Debugln(s.conn.String(), "TUN/TAP conn write debug:", err) + } else { + s.tun.log.Errorln(s.conn.String(), "TUN/TAP conn write error:", err) + } } - } - } + }) + }) + s.stillAlive() + return } func (s *tunConn) stillAlive() { - defer func() { recover() }() - select { - case s.alive <- struct{}{}: - default: - } -} - -func (s *tunConn) checkForTimeouts() error { - timer := time.NewTimer(tunConnTimeout) - defer util.TimerStop(timer) - defer s.close() - for { - select { - case _, ok := <-s.alive: - if !ok { - return errors.New("connection closed") - } - util.TimerStop(timer) - timer.Reset(tunConnTimeout) - case <-timer.C: - return errors.New("timed out") - } + if s.alive != nil { + s.alive.Stop() } + s.alive = time.AfterFunc(tunConnTimeout, s.close) } diff --git a/src/tuntap/iface.go b/src/tuntap/iface.go index 35657a9f..0da99631 100644 --- a/src/tuntap/iface.go +++ b/src/tuntap/iface.go @@ -9,264 +9,276 @@ import ( "github.com/yggdrasil-network/yggdrasil-go/src/address" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" "github.com/yggdrasil-network/yggdrasil-go/src/util" + + "github.com/Arceliar/phony" ) -func (tun *TunAdapter) writer() error { - var w int +type tunWriter struct { + phony.Inbox + tun *TunAdapter +} + +func (w *tunWriter) writeFrom(from phony.Actor, b []byte) { + w.Act(from, func() { + w._write(b) + }) +} + +// write is pretty loose with the memory safety rules, e.g. it assumes it can read w.tun.iface.IsTap() safely +func (w *tunWriter) _write(b []byte) { + var written int var err error - for { - b := <-tun.send - n := len(b) - if n == 0 { - continue + n := len(b) + if n == 0 { + return + } + if w.tun.iface.IsTAP() { + sendndp := func(dstAddr address.Address) { + neigh, known := w.tun.icmpv6.getNeighbor(dstAddr) + known = known && (time.Since(neigh.lastsolicitation).Seconds() < 30) + if !known { + w.tun.icmpv6.Solicit(dstAddr) + } } - if tun.iface.IsTAP() { - sendndp := func(dstAddr address.Address) { - neigh, known := tun.icmpv6.getNeighbor(dstAddr) - known = known && (time.Since(neigh.lastsolicitation).Seconds() < 30) - if !known { - tun.icmpv6.Solicit(dstAddr) - } - } - peermac := net.HardwareAddr{0x00, 0x00, 0x00, 0x00, 0x00, 0x00} - var dstAddr address.Address - var peerknown bool - if b[0]&0xf0 == 0x40 { - dstAddr = tun.addr - } else if b[0]&0xf0 == 0x60 { - if !bytes.Equal(tun.addr[:16], dstAddr[:16]) && !bytes.Equal(tun.subnet[:8], dstAddr[:8]) { - dstAddr = tun.addr - } - } - if neighbor, ok := tun.icmpv6.getNeighbor(dstAddr); ok && neighbor.learned { - // If we've learned the MAC of a 300::/7 address, for example, or a CKR - // address, use the MAC address of that - peermac = neighbor.mac - peerknown = true - } else if neighbor, ok := tun.icmpv6.getNeighbor(tun.addr); ok && neighbor.learned { - // Otherwise send directly to the MAC address of the host if that's - // known instead - peermac = neighbor.mac - peerknown = true - } else { - // Nothing has been discovered, try to discover the destination - sendndp(tun.addr) - } - if peerknown { - var proto ethernet.Ethertype - switch { - case b[0]&0xf0 == 0x60: - proto = ethernet.IPv6 - case b[0]&0xf0 == 0x40: - proto = ethernet.IPv4 - } - var frame ethernet.Frame - frame.Prepare( - peermac[:6], // Destination MAC address - tun.icmpv6.mymac[:6], // Source MAC address - ethernet.NotTagged, // VLAN tagging - proto, // Ethertype - len(b)) // Payload length - copy(frame[tun_ETHER_HEADER_LENGTH:], b[:n]) - n += tun_ETHER_HEADER_LENGTH - w, err = tun.iface.Write(frame[:n]) - } else { - tun.log.Errorln("TUN/TAP iface write error: no peer MAC known for", net.IP(dstAddr[:]).String(), "- dropping packet") + peermac := net.HardwareAddr{0x00, 0x00, 0x00, 0x00, 0x00, 0x00} + var dstAddr address.Address + var peerknown bool + if b[0]&0xf0 == 0x40 { + dstAddr = w.tun.addr + } else if b[0]&0xf0 == 0x60 { + if !bytes.Equal(w.tun.addr[:16], dstAddr[:16]) && !bytes.Equal(w.tun.subnet[:8], dstAddr[:8]) { + dstAddr = w.tun.addr } + } + if neighbor, ok := w.tun.icmpv6.getNeighbor(dstAddr); ok && neighbor.learned { + // If we've learned the MAC of a 300::/7 address, for example, or a CKR + // address, use the MAC address of that + peermac = neighbor.mac + peerknown = true + } else if neighbor, ok := w.tun.icmpv6.getNeighbor(w.tun.addr); ok && neighbor.learned { + // Otherwise send directly to the MAC address of the host if that's + // known instead + peermac = neighbor.mac + peerknown = true } else { - w, err = tun.iface.Write(b[:n]) - util.PutBytes(b) + // Nothing has been discovered, try to discover the destination + sendndp(w.tun.addr) } - if err != nil { - if !tun.isOpen { - return err + if peerknown { + var proto ethernet.Ethertype + switch { + case b[0]&0xf0 == 0x60: + proto = ethernet.IPv6 + case b[0]&0xf0 == 0x40: + proto = ethernet.IPv4 } - tun.log.Errorln("TUN/TAP iface write error:", err) - continue - } - if w != n { - tun.log.Errorln("TUN/TAP iface write mismatch:", w, "bytes written vs", n, "bytes given") - continue + var frame ethernet.Frame + frame.Prepare( + peermac[:6], // Destination MAC address + w.tun.icmpv6.mymac[:6], // Source MAC address + ethernet.NotTagged, // VLAN tagging + proto, // Ethertype + len(b)) // Payload length + copy(frame[tun_ETHER_HEADER_LENGTH:], b[:n]) + n += tun_ETHER_HEADER_LENGTH + written, err = w.tun.iface.Write(frame[:n]) + } else { + w.tun.log.Errorln("TUN/TAP iface write error: no peer MAC known for", net.IP(dstAddr[:]).String(), "- dropping packet") } + } else { + written, err = w.tun.iface.Write(b[:n]) + util.PutBytes(b) + } + if err != nil { + w.tun.Act(w, func() { + if !w.tun.isOpen { + w.tun.log.Errorln("TUN/TAP iface write error:", err) + } + }) + } + if written != n { + w.tun.log.Errorln("TUN/TAP iface write mismatch:", written, "bytes written vs", n, "bytes given") } } -// Run in a separate goroutine by the reader -// Does all of the per-packet ICMP checks, passes packets to the right Conn worker -func (tun *TunAdapter) readerPacketHandler(ch chan []byte) { - for recvd := range ch { - // If it's a TAP adapter, update the buffer slice so that we no longer - // include the ethernet headers - offset := 0 - if tun.iface.IsTAP() { - // Set our offset to beyond the ethernet headers - offset = tun_ETHER_HEADER_LENGTH - // Check first of all that we can go beyond the ethernet headers - if len(recvd) <= offset { - continue - } +type tunReader struct { + phony.Inbox + tun *TunAdapter +} + +func (r *tunReader) _read() { + // Get a slice to store the packet in + recvd := util.ResizeBytes(util.GetBytes(), 65535+tun_ETHER_HEADER_LENGTH) + // Wait for a packet to be delivered to us through the TUN/TAP adapter + n, err := r.tun.iface.Read(recvd) + if n <= 0 { + util.PutBytes(recvd) + } else { + r.tun.handlePacketFrom(r, recvd[:n], err) + } + if err == nil { + // Now read again + r.Act(nil, r._read) + } +} + +func (tun *TunAdapter) handlePacketFrom(from phony.Actor, packet []byte, err error) { + tun.Act(from, func() { + tun._handlePacket(packet, err) + }) +} + +// does the work of reading a packet and sending it to the correct tunConn +func (tun *TunAdapter) _handlePacket(recvd []byte, err error) { + if err != nil { + tun.log.Errorln("TUN/TAP iface read error:", err) + return + } + // If it's a TAP adapter, update the buffer slice so that we no longer + // include the ethernet headers + offset := 0 + if tun.iface.IsTAP() { + // Set our offset to beyond the ethernet headers + offset = tun_ETHER_HEADER_LENGTH + // Check first of all that we can go beyond the ethernet headers + if len(recvd) <= offset { + return } - // Offset the buffer from now on so that we can ignore ethernet frames if - // they are present - bs := recvd[offset:] - // If we detect an ICMP packet then hand it to the ICMPv6 module - if bs[6] == 58 { - // Found an ICMPv6 packet - we need to make sure to give ICMPv6 the full - // Ethernet frame rather than just the IPv6 packet as this is needed for - // NDP to work correctly - if err := tun.icmpv6.ParsePacket(recvd); err == nil { - // We acted on the packet in the ICMPv6 module so don't forward or do - // anything else with it - continue - } + } + // Offset the buffer from now on so that we can ignore ethernet frames if + // they are present + bs := recvd[offset:] + // Check if the packet is long enough to detect if it's an ICMP packet or not + if len(bs) < 7 { + tun.log.Traceln("TUN/TAP iface read undersized unknown packet, length:", len(bs)) + return + } + // If we detect an ICMP packet then hand it to the ICMPv6 module + if bs[6] == 58 { + // Found an ICMPv6 packet - we need to make sure to give ICMPv6 the full + // Ethernet frame rather than just the IPv6 packet as this is needed for + // NDP to work correctly + if err := tun.icmpv6.ParsePacket(recvd); err == nil { + // We acted on the packet in the ICMPv6 module so don't forward or do + // anything else with it + return } - if offset != 0 { - // Shift forward to avoid leaking bytes off the front of the slice when we eventually store it - bs = append(recvd[:0], bs...) + } + if offset != 0 { + // Shift forward to avoid leaking bytes off the front of the slice when we eventually store it + bs = append(recvd[:0], bs...) + } + // From the IP header, work out what our source and destination addresses + // and node IDs are. We will need these in order to work out where to send + // the packet + var dstAddr address.Address + var dstSnet address.Subnet + var addrlen int + n := len(bs) + // Check the IP protocol - if it doesn't match then we drop the packet and + // do nothing with it + if bs[0]&0xf0 == 0x60 { + // Check if we have a fully-sized IPv6 header + if len(bs) < 40 { + tun.log.Traceln("TUN/TAP iface read undersized ipv6 packet, length:", len(bs)) + return } - // From the IP header, work out what our source and destination addresses - // and node IDs are. We will need these in order to work out where to send - // the packet - var dstAddr address.Address - var dstSnet address.Subnet - var addrlen int - n := len(bs) - // Check the IP protocol - if it doesn't match then we drop the packet and - // do nothing with it - if bs[0]&0xf0 == 0x60 { - // Check if we have a fully-sized IPv6 header - if len(bs) < 40 { - continue - } - // Check the packet size - if n-tun_IPv6_HEADER_LENGTH != 256*int(bs[4])+int(bs[5]) { - continue - } - // IPv6 address - addrlen = 16 - copy(dstAddr[:addrlen], bs[24:]) - copy(dstSnet[:addrlen/2], bs[24:]) - } else if bs[0]&0xf0 == 0x40 { - // Check if we have a fully-sized IPv4 header - if len(bs) < 20 { - continue - } - // Check the packet size - if n != 256*int(bs[2])+int(bs[3]) { - continue - } - // IPv4 address - addrlen = 4 - copy(dstAddr[:addrlen], bs[16:]) - } else { - // Unknown address length or protocol, so drop the packet and ignore it - tun.log.Traceln("Unknown packet type, dropping") - continue + // Check the packet size + if n-tun_IPv6_HEADER_LENGTH != 256*int(bs[4])+int(bs[5]) { + return } - if tun.ckr.isEnabled() { - if addrlen != 16 || (!dstAddr.IsValid() && !dstSnet.IsValid()) { - if key, err := tun.ckr.getPublicKeyForAddress(dstAddr, addrlen); err == nil { - // A public key was found, get the node ID for the search - dstNodeID := crypto.GetNodeID(&key) - dstAddr = *address.AddrForNodeID(dstNodeID) - dstSnet = *address.SubnetForNodeID(dstNodeID) - addrlen = 16 - } - } + // IPv6 address + addrlen = 16 + copy(dstAddr[:addrlen], bs[24:]) + copy(dstSnet[:addrlen/2], bs[24:]) + } else if bs[0]&0xf0 == 0x40 { + // Check if we have a fully-sized IPv4 header + if len(bs) < 20 { + tun.log.Traceln("TUN/TAP iface read undersized ipv4 packet, length:", len(bs)) + return } + // Check the packet size + if n != 256*int(bs[2])+int(bs[3]) { + return + } + // IPv4 address + addrlen = 4 + copy(dstAddr[:addrlen], bs[16:]) + } else { + // Unknown address length or protocol, so drop the packet and ignore it + tun.log.Traceln("Unknown packet type, dropping") + return + } + if tun.ckr.isEnabled() { if addrlen != 16 || (!dstAddr.IsValid() && !dstSnet.IsValid()) { - // Couldn't find this node's ygg IP - continue - } - // Do we have an active connection for this node address? - var dstNodeID, dstNodeIDMask *crypto.NodeID - tun.mutex.RLock() - session, isIn := tun.addrToConn[dstAddr] - if !isIn || session == nil { - session, isIn = tun.subnetToConn[dstSnet] - if !isIn || session == nil { - // Neither an address nor a subnet mapping matched, therefore populate - // the node ID and mask to commence a search - if dstAddr.IsValid() { - dstNodeID, dstNodeIDMask = dstAddr.GetNodeIDandMask() - } else { - dstNodeID, dstNodeIDMask = dstSnet.GetNodeIDandMask() - } + if key, err := tun.ckr.getPublicKeyForAddress(dstAddr, addrlen); err == nil { + // A public key was found, get the node ID for the search + dstNodeID := crypto.GetNodeID(&key) + dstAddr = *address.AddrForNodeID(dstNodeID) + dstSnet = *address.SubnetForNodeID(dstNodeID) + addrlen = 16 } } - tun.mutex.RUnlock() - // If we don't have a connection then we should open one + } + if addrlen != 16 || (!dstAddr.IsValid() && !dstSnet.IsValid()) { + // Couldn't find this node's ygg IP + return + } + // Do we have an active connection for this node address? + var dstNodeID, dstNodeIDMask *crypto.NodeID + session, isIn := tun.addrToConn[dstAddr] + if !isIn || session == nil { + session, isIn = tun.subnetToConn[dstSnet] if !isIn || session == nil { - // Check we haven't been given empty node ID, really this shouldn't ever - // happen but just to be sure... - if dstNodeID == nil || dstNodeIDMask == nil { - panic("Given empty dstNodeID and dstNodeIDMask - this shouldn't happen") + // Neither an address nor a subnet mapping matched, therefore populate + // the node ID and mask to commence a search + if dstAddr.IsValid() { + dstNodeID, dstNodeIDMask = dstAddr.GetNodeIDandMask() + } else { + dstNodeID, dstNodeIDMask = dstSnet.GetNodeIDandMask() } - // Dial to the remote node + } + } + // If we don't have a connection then we should open one + if !isIn || session == nil { + // Check we haven't been given empty node ID, really this shouldn't ever + // happen but just to be sure... + if dstNodeID == nil || dstNodeIDMask == nil { + panic("Given empty dstNodeID and dstNodeIDMask - this shouldn't happen") + } + _, known := tun.dials[*dstNodeID] + tun.dials[*dstNodeID] = append(tun.dials[*dstNodeID], bs) + for len(tun.dials[*dstNodeID]) > 32 { + util.PutBytes(tun.dials[*dstNodeID][0]) + tun.dials[*dstNodeID] = tun.dials[*dstNodeID][1:] + } + if !known { go func() { - // FIXME just spitting out a goroutine to do this is kind of ugly and means we drop packets until the dial finishes - tun.mutex.Lock() - _, known := tun.dials[*dstNodeID] - tun.dials[*dstNodeID] = append(tun.dials[*dstNodeID], bs) - for len(tun.dials[*dstNodeID]) > 32 { - util.PutBytes(tun.dials[*dstNodeID][0]) - tun.dials[*dstNodeID] = tun.dials[*dstNodeID][1:] - } - tun.mutex.Unlock() - if known { - return - } - var tc *tunConn - if conn, err := tun.dialer.DialByNodeIDandMask(dstNodeID, dstNodeIDMask); err == nil { + conn, err := tun.dialer.DialByNodeIDandMask(dstNodeID, dstNodeIDMask) + tun.Act(nil, func() { + packets := tun.dials[*dstNodeID] + delete(tun.dials, *dstNodeID) + if err != nil { + return + } // We've been given a connection so prepare the session wrapper - if tc, err = tun.wrap(conn); err != nil { + var tc *tunConn + if tc, err = tun._wrap(conn); err != nil { // Something went wrong when storing the connection, typically that // something already exists for this address or subnet tun.log.Debugln("TUN/TAP iface wrap:", err) + return } - } - tun.mutex.Lock() - packets := tun.dials[*dstNodeID] - delete(tun.dials, *dstNodeID) - tun.mutex.Unlock() - if tc != nil { for _, packet := range packets { - p := packet // Possibly required because of how range - tc.send <- p + tc.writeFrom(nil, packet) } - } + }) + return }() - // While the dial is going on we can't do much else - // continuing this iteration - skip to the next one - continue - } - // If we have a connection now, try writing to it - if isIn && session != nil { - session.send <- bs } } -} - -func (tun *TunAdapter) reader() error { - toWorker := make(chan []byte, 32) - defer close(toWorker) - go tun.readerPacketHandler(toWorker) - for { - // Get a slice to store the packet in - recvd := util.ResizeBytes(util.GetBytes(), 65535+tun_ETHER_HEADER_LENGTH) - // Wait for a packet to be delivered to us through the TUN/TAP adapter - n, err := tun.iface.Read(recvd) - if err != nil { - if !tun.isOpen { - return err - } - panic(err) - } - if n == 0 { - util.PutBytes(recvd) - continue - } - // Send the packet to the worker - toWorker <- recvd[:n] + // If we have a connection now, try writing to it + if isIn && session != nil { + session.writeFrom(tun, bs) } } diff --git a/src/tuntap/tun.go b/src/tuntap/tun.go index eef05b87..74d055ee 100644 --- a/src/tuntap/tun.go +++ b/src/tuntap/tun.go @@ -13,8 +13,10 @@ import ( "errors" "fmt" "net" - "sync" + //"sync" + + "github.com/Arceliar/phony" "github.com/gologme/log" "github.com/yggdrasil-network/water" @@ -33,19 +35,21 @@ const tun_ETHER_HEADER_LENGTH = 14 // you should pass this object to the yggdrasil.SetRouterAdapter() function // before calling yggdrasil.Start(). type TunAdapter struct { - config *config.NodeState - log *log.Logger - reconfigure chan chan error - listener *yggdrasil.Listener - dialer *yggdrasil.Dialer - addr address.Address - subnet address.Subnet - ckr cryptokey - icmpv6 ICMPv6 - mtu int - iface *water.Interface - send chan []byte - mutex sync.RWMutex // Protects the below + writer tunWriter + reader tunReader + config *config.NodeState + log *log.Logger + reconfigure chan chan error + listener *yggdrasil.Listener + dialer *yggdrasil.Dialer + addr address.Address + subnet address.Subnet + ckr cryptokey + icmpv6 ICMPv6 + mtu int + iface *water.Interface + phony.Inbox // Currently only used for _handlePacket from the reader, TODO: all the stuff that currently needs a mutex below + //mutex sync.RWMutex // Protects the below addrToConn map[address.Address]*tunConn subnetToConn map[address.Subnet]*tunConn dials map[crypto.NodeID][][]byte // Buffer of packets to send after dialing finishes @@ -114,11 +118,21 @@ func (tun *TunAdapter) Init(config *config.NodeState, log *log.Logger, listener tun.addrToConn = make(map[address.Address]*tunConn) tun.subnetToConn = make(map[address.Subnet]*tunConn) tun.dials = make(map[crypto.NodeID][][]byte) + tun.writer.tun = tun + tun.reader.tun = tun } // Start the setup process for the TUN/TAP adapter. If successful, starts the -// read/write goroutines to handle packets on that interface. +// reader actor to handle packets on that interface. func (tun *TunAdapter) Start() error { + var err error + phony.Block(tun, func() { + err = tun._start() + }) + return err +} + +func (tun *TunAdapter) _start() error { current := tun.config.GetCurrent() if tun.config == nil || tun.listener == nil || tun.dialer == nil { return errors.New("No configuration available to TUN/TAP") @@ -145,11 +159,8 @@ func (tun *TunAdapter) Start() error { tun.log.Debugln("Not starting TUN/TAP as ifname is none or dummy") return nil } - tun.mutex.Lock() tun.isOpen = true - tun.send = make(chan []byte, 32) // TODO: is this a sensible value? tun.reconfigure = make(chan chan error) - tun.mutex.Unlock() go func() { for { e := <-tun.reconfigure @@ -157,8 +168,7 @@ func (tun *TunAdapter) Start() error { } }() go tun.handler() - go tun.reader() - go tun.writer() + tun.reader.Act(nil, tun.reader._read) // Start the reader tun.icmpv6.Init(tun) if iftapmode { go tun.icmpv6.Solicit(tun.addr) @@ -170,10 +180,20 @@ func (tun *TunAdapter) Start() error { // Start the setup process for the TUN/TAP adapter. If successful, starts the // read/write goroutines to handle packets on that interface. func (tun *TunAdapter) Stop() error { + var err error + phony.Block(tun, func() { + err = tun._stop() + }) + return err +} + +func (tun *TunAdapter) _stop() error { tun.isOpen = false - // TODO: we have nothing that cleanly stops all the various goroutines opened // by TUN/TAP, e.g. readers/writers, sessions - tun.iface.Close() + if tun.iface != nil { + // Just in case we failed to start up the iface for some reason, this can apparently happen on Windows + tun.iface.Close() + } return nil } @@ -183,29 +203,11 @@ func (tun *TunAdapter) Stop() error { func (tun *TunAdapter) UpdateConfig(config *config.NodeConfig) { tun.log.Debugln("Reloading TUN/TAP configuration...") + // Replace the active configuration with the supplied one tun.config.Replace(*config) - errors := 0 - - components := []chan chan error{ - tun.reconfigure, - tun.ckr.reconfigure, - } - - for _, component := range components { - response := make(chan error) - component <- response - if err := <-response; err != nil { - tun.log.Errorln(err) - errors++ - } - } - - if errors > 0 { - tun.log.Warnln(errors, "TUN/TAP module(s) reported errors during configuration reload") - } else { - tun.log.Infoln("TUN/TAP configuration reloaded successfully") - } + // Notify children about the configuration change + tun.Act(nil, tun.ckr.configure) } func (tun *TunAdapter) handler() error { @@ -216,22 +218,22 @@ func (tun *TunAdapter) handler() error { tun.log.Errorln("TUN/TAP connection accept error:", err) return err } - if _, err := tun.wrap(conn); err != nil { - // Something went wrong when storing the connection, typically that - // something already exists for this address or subnet - tun.log.Debugln("TUN/TAP handler wrap:", err) - } + phony.Block(tun, func() { + if _, err := tun._wrap(conn); err != nil { + // Something went wrong when storing the connection, typically that + // something already exists for this address or subnet + tun.log.Debugln("TUN/TAP handler wrap:", err) + } + }) } } -func (tun *TunAdapter) wrap(conn *yggdrasil.Conn) (c *tunConn, err error) { +func (tun *TunAdapter) _wrap(conn *yggdrasil.Conn) (c *tunConn, err error) { // Prepare a session wrapper for the given connection s := tunConn{ - tun: tun, - conn: conn, - send: make(chan []byte, 32), // TODO: is this a sensible value? - stop: make(chan struct{}), - alive: make(chan struct{}, 1), + tun: tun, + conn: conn, + stop: make(chan struct{}), } c = &s // Get the remote address and subnet of the other side @@ -239,27 +241,28 @@ func (tun *TunAdapter) wrap(conn *yggdrasil.Conn) (c *tunConn, err error) { s.addr = *address.AddrForNodeID(&remoteNodeID) s.snet = *address.SubnetForNodeID(&remoteNodeID) // Work out if this is already a destination we already know about - tun.mutex.Lock() - defer tun.mutex.Unlock() atc, aok := tun.addrToConn[s.addr] stc, sok := tun.subnetToConn[s.snet] // If we know about a connection for this destination already then assume it // is no longer valid and close it if aok { - atc._close_nomutex() + atc._close_from_tun() err = errors.New("replaced connection for address") } else if sok { - stc._close_nomutex() + stc._close_from_tun() err = errors.New("replaced connection for subnet") } // Save the session wrapper so that we can look it up quickly next time // we receive a packet through the interface for this address tun.addrToConn[s.addr] = &s tun.subnetToConn[s.snet] = &s - // Start the connection goroutines - go s.reader() - go s.writer() - go s.checkForTimeouts() + // Set the read callback and start the timeout + conn.SetReadCallback(func(bs []byte) { + s.Act(conn, func() { + s._read(bs) + }) + }) + s.Act(nil, s.stillAlive) // Return return c, err } diff --git a/src/tuntap/tun_windows.go b/src/tuntap/tun_windows.go index a826c7ad..ea1515ff 100644 --- a/src/tuntap/tun_windows.go +++ b/src/tuntap/tun_windows.go @@ -5,6 +5,7 @@ import ( "fmt" "os/exec" "strings" + "time" water "github.com/yggdrasil-network/water" ) @@ -42,6 +43,7 @@ func (tun *TunAdapter) setup(ifname string, iftapmode bool, addr string, mtu int tun.log.Traceln(string(output)) return err } + time.Sleep(time.Second) // FIXME artifical delay to give netsh time to take effect // Bring the interface back up cmd = exec.Command("netsh", "interface", "set", "interface", iface.Name(), "admin=ENABLED") tun.log.Debugln("netsh command:", strings.Join(cmd.Args, " ")) @@ -51,6 +53,7 @@ func (tun *TunAdapter) setup(ifname string, iftapmode bool, addr string, mtu int tun.log.Traceln(string(output)) return err } + time.Sleep(time.Second) // FIXME artifical delay to give netsh time to take effect // Get a new iface iface, err = water.New(config) if err != nil { @@ -86,6 +89,7 @@ func (tun *TunAdapter) setupMTU(mtu int) error { tun.log.Traceln(string(output)) return err } + time.Sleep(time.Second) // FIXME artifical delay to give netsh time to take effect return nil } @@ -106,5 +110,6 @@ func (tun *TunAdapter) setupAddress(addr string) error { tun.log.Traceln(string(output)) return err } + time.Sleep(time.Second) // FIXME artifical delay to give netsh time to take effect return nil } diff --git a/src/util/bytes_mobile.go b/src/util/bytes_mobile.go new file mode 100644 index 00000000..f862c0cd --- /dev/null +++ b/src/util/bytes_mobile.go @@ -0,0 +1,21 @@ +//+build mobile + +package util + +import "runtime/debug" + +func init() { + debug.SetGCPercent(25) +} + +// GetBytes always returns a nil slice on mobile platforms. +func GetBytes() []byte { + return nil +} + +// PutBytes does literally nothing on mobile platforms. +// This is done rather than keeping a free list of bytes on platforms with memory constraints. +// It's needed to help keep memory usage low enough to fall under the limits set for e.g. iOS NEPacketTunnelProvider apps. +func PutBytes(bs []byte) { + return +} diff --git a/src/util/bytes_other.go b/src/util/bytes_other.go new file mode 100644 index 00000000..7c966087 --- /dev/null +++ b/src/util/bytes_other.go @@ -0,0 +1,18 @@ +//+build !mobile + +package util + +import "sync" + +// This is used to buffer recently used slices of bytes, to prevent allocations in the hot loops. +var byteStore = sync.Pool{New: func() interface{} { return []byte(nil) }} + +// GetBytes returns a 0-length (possibly nil) slice of bytes from a free list, so it may have a larger capacity. +func GetBytes() []byte { + return byteStore.Get().([]byte)[:0] +} + +// PutBytes stores a slice in a free list, where it can potentially be reused to prevent future allocations. +func PutBytes(bs []byte) { + byteStore.Put(bs) +} diff --git a/src/util/cancellation.go b/src/util/cancellation.go index af4721bb..1f6d1658 100644 --- a/src/util/cancellation.go +++ b/src/util/cancellation.go @@ -7,15 +7,22 @@ import ( "time" ) +// Cancellation is used to signal when things should shut down, such as signaling anything associated with a Conn to exit. +// This is and is similar to a context, but with an error to specify the reason for the cancellation. type Cancellation interface { - Finished() <-chan struct{} - Cancel(error) error - Error() error + Finished() <-chan struct{} // Finished returns a channel which will be closed when Cancellation.Cancel is first called. + Cancel(error) error // Cancel closes the channel returned by Finished and sets the error returned by error, or else returns the existing error if the Cancellation has already run. + Error() error // Error returns the error provided to Cancel, or nil if no error has been provided. } +// CancellationFinalized is an error returned if a cancellation object was garbage collected and the finalizer was run. +// If you ever see this, then you're probably doing something wrong with your code. var CancellationFinalized = errors.New("finalizer called") + +// CancellationTimeoutError is used when a CancellationWithTimeout or CancellationWithDeadline is cancelled due to said timeout. var CancellationTimeoutError = errors.New("timeout") +// CancellationFinalizer is set as a finalizer when creating a new cancellation with NewCancellation(), and generally shouldn't be needed by the user, but is included in case other implementations of the same interface want to make use of it. func CancellationFinalizer(c Cancellation) { c.Cancel(CancellationFinalized) } @@ -27,6 +34,7 @@ type cancellation struct { done bool } +// NewCancellation returns a pointer to a struct satisfying the Cancellation interface. func NewCancellation() Cancellation { c := cancellation{ cancel: make(chan struct{}), @@ -35,10 +43,12 @@ func NewCancellation() Cancellation { return &c } +// Finished returns a channel which will be closed when Cancellation.Cancel is first called. func (c *cancellation) Finished() <-chan struct{} { return c.cancel } +// Cancel closes the channel returned by Finished and sets the error returned by error, or else returns the existing error if the Cancellation has already run. func (c *cancellation) Cancel(err error) error { c.mutex.Lock() defer c.mutex.Unlock() @@ -52,6 +62,7 @@ func (c *cancellation) Cancel(err error) error { } } +// Error returns the error provided to Cancel, or nil if no error has been provided. func (c *cancellation) Error() error { c.mutex.RLock() err := c.err @@ -59,6 +70,7 @@ func (c *cancellation) Error() error { return err } +// CancellationChild returns a new Cancellation which can be Cancelled independently of the parent, but which will also be Cancelled if the parent is Cancelled first. func CancellationChild(parent Cancellation) Cancellation { child := NewCancellation() go func() { @@ -71,6 +83,7 @@ func CancellationChild(parent Cancellation) Cancellation { return child } +// CancellationWithTimeout returns a ChildCancellation that will automatically be Cancelled with a CancellationTimeoutError after the timeout. func CancellationWithTimeout(parent Cancellation, timeout time.Duration) Cancellation { child := CancellationChild(parent) go func() { @@ -85,6 +98,7 @@ func CancellationWithTimeout(parent Cancellation, timeout time.Duration) Cancell return child } +// CancellationWithTimeout returns a ChildCancellation that will automatically be Cancelled with a CancellationTimeoutError after the specified deadline. func CancellationWithDeadline(parent Cancellation, deadline time.Time) Cancellation { return CancellationWithTimeout(parent, deadline.Sub(time.Now())) } diff --git a/src/util/util.go b/src/util/util.go index a588a35c..a7a54562 100644 --- a/src/util/util.go +++ b/src/util/util.go @@ -1,3 +1,5 @@ +// Package util contains miscellaneous utilities used by yggdrasil. +// In particular, this includes a crypto worker pool, Cancellation machinery, and a sync.Pool used to reuse []byte. package util // These are misc. utility functions that didn't really fit anywhere else @@ -6,39 +8,25 @@ import ( "runtime" "strconv" "strings" - "sync" "time" ) -// A wrapper around runtime.Gosched() so it doesn't need to be imported elsewhere. +// Yield just executes runtime.Gosched(), and is included so we don't need to explicitly import runtime elsewhere. func Yield() { runtime.Gosched() } -// A wrapper around runtime.LockOSThread() so it doesn't need to be imported elsewhere. +// LockThread executes runtime.LockOSThread(), and is included so we don't need to explicitly import runtime elsewhere. func LockThread() { runtime.LockOSThread() } -// A wrapper around runtime.UnlockOSThread() so it doesn't need to be imported elsewhere. +// UnlockThread executes runtime.UnlockOSThread(), and is included so we don't need to explicitly import runtime elsewhere. func UnlockThread() { runtime.UnlockOSThread() } -// This is used to buffer recently used slices of bytes, to prevent allocations in the hot loops. -var byteStore = sync.Pool{New: func() interface{} { return []byte(nil) }} - -// Gets an empty slice from the byte store. -func GetBytes() []byte { - return byteStore.Get().([]byte)[:0] -} - -// Puts a slice in the store. -func PutBytes(bs []byte) { - byteStore.Put(bs) -} - -// Gets a slice of the appropriate length, reusing existing slice capacity when possible +// ResizeBytes returns a slice of the specified length. If the provided slice has sufficient capacity, it will be resized and returned rather than allocating a new slice. func ResizeBytes(bs []byte, length int) []byte { if cap(bs) >= length { return bs[:length] @@ -47,7 +35,7 @@ func ResizeBytes(bs []byte, length int) []byte { } } -// This is a workaround to go's broken timer implementation +// TimerStop stops a timer and makes sure the channel is drained, returns true if the timer was stopped before firing. func TimerStop(t *time.Timer) bool { stopped := t.Stop() select { @@ -57,10 +45,8 @@ func TimerStop(t *time.Timer) bool { return stopped } -// Run a blocking function with a timeout. -// Returns true if the function returns. -// Returns false if the timer fires. -// The blocked function remains blocked--the caller is responsible for somehow killing it. +// FuncTimeout runs the provided function in a separate goroutine, and returns true if the function finishes executing before the timeout passes, or false if the timeout passes. +// It includes no mechanism to stop the function if the timeout fires, so the user is expected to do so on their own (such as with a Cancellation or a context). func FuncTimeout(f func(), timeout time.Duration) bool { success := make(chan struct{}) go func() { @@ -77,9 +63,8 @@ func FuncTimeout(f func(), timeout time.Duration) bool { } } -// This calculates the difference between two arrays and returns items -// that appear in A but not in B - useful somewhat when reconfiguring -// and working out what configuration items changed +// Difference loops over two strings and returns the elements of A which do not appear in B. +// This is somewhat useful when needing to determine which elements of a configuration file have changed. func Difference(a, b []string) []string { ab := []string{} mb := map[string]bool{} @@ -107,7 +92,7 @@ func DecodeCoordString(in string) (out []uint64) { return out } -// GetFlowLabel takes an IP packet as an argument and returns some information about the traffic flow. +// GetFlowKey takes an IP packet as an argument and returns some information about the traffic flow. // For IPv4 packets, this is derived from the source and destination protocol and port numbers. // For IPv6 packets, this is derived from the FlowLabel field of the packet if this was set, otherwise it's handled like IPv4. // The FlowKey is then used internally by Yggdrasil for congestion control. diff --git a/src/yggdrasil/api.go b/src/yggdrasil/api.go index 57ee5c65..4245f439 100644 --- a/src/yggdrasil/api.go +++ b/src/yggdrasil/api.go @@ -6,38 +6,47 @@ import ( "fmt" "net" "sort" - "sync/atomic" "time" "github.com/gologme/log" "github.com/yggdrasil-network/yggdrasil-go/src/address" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" + + "github.com/Arceliar/phony" ) // Peer represents a single peer object. This contains information from the -// preferred switch port for this peer, although there may be more than one in -// reality. +// preferred switch port for this peer, although there may be more than one +// active switch port connection to the peer in reality. +// +// This struct is informational only - you cannot manipulate peer connections +// using instances of this struct. You should use the AddPeer or RemovePeer +// functions instead. type Peer struct { - PublicKey crypto.BoxPubKey - Endpoint string - BytesSent uint64 - BytesRecvd uint64 - Protocol string - Port uint64 - Uptime time.Duration + PublicKey crypto.BoxPubKey // The public key of the remote node + Endpoint string // The connection string used to connect to the peer + BytesSent uint64 // Number of bytes sent to this peer + BytesRecvd uint64 // Number of bytes received from this peer + Protocol string // The transport protocol that this peer is connected with, typically "tcp" + Port uint64 // Switch port number for this peer connection + Uptime time.Duration // How long this peering has been active for } // SwitchPeer represents a switch connection to a peer. Note that there may be // multiple switch peers per actual peer, e.g. if there are multiple connections // to a given node. +// +// This struct is informational only - you cannot manipulate switch peer +// connections using instances of this struct. You should use the AddPeer or +// RemovePeer functions instead. type SwitchPeer struct { - PublicKey crypto.BoxPubKey - Coords []uint64 - BytesSent uint64 - BytesRecvd uint64 - Port uint64 - Protocol string - Endpoint string + PublicKey crypto.BoxPubKey // The public key of the remote node + Coords []uint64 // The coordinates of the remote node + BytesSent uint64 // Number of bytes sent via this switch port + BytesRecvd uint64 // Number of bytes received via this switch port + Port uint64 // Switch port number for this switch peer + Protocol string // The transport protocol that this switch port is connected with, typically "tcp" + Endpoint string // The connection string used to connect to the switch peer } // DHTEntry represents a single DHT entry that has been learned or cached from @@ -63,32 +72,36 @@ type NodeInfoPayload []byte // congestion and a list of switch queues created in response to congestion on a // given link. type SwitchQueues struct { - Queues []SwitchQueue - Count uint64 - Size uint64 - HighestCount uint64 - HighestSize uint64 - MaximumSize uint64 + Queues []SwitchQueue // An array of SwitchQueue objects containing information about individual queues + Count uint64 // The current number of active switch queues + Size uint64 // The current total size of active switch queues + HighestCount uint64 // The highest recorded number of switch queues so far + HighestSize uint64 // The highest recorded total size of switch queues so far + MaximumSize uint64 // The maximum allowed total size of switch queues, as specified by config } -// SwitchQueue represents a single switch queue, which is created in response -// to congestion on a given link. +// SwitchQueue represents a single switch queue. Switch queues are only created +// in response to congestion on a given link and represent how much data has +// been temporarily cached for sending once the congestion has cleared. type SwitchQueue struct { - ID string - Size uint64 - Packets uint64 - Port uint64 + ID string // The ID of the switch queue + Size uint64 // The total size, in bytes, of the queue + Packets uint64 // The number of packets in the queue + Port uint64 // The switch port to which the queue applies } -// Session represents an open session with another node. +// Session represents an open session with another node. Sessions are opened in +// response to traffic being exchanged between two nodes using Conn objects. +// Note that sessions will automatically be closed by Yggdrasil if no traffic is +// exchanged for around two minutes. type Session struct { - PublicKey crypto.BoxPubKey - Coords []uint64 - BytesSent uint64 - BytesRecvd uint64 - MTU uint16 - Uptime time.Duration - WasMTUFixed bool + PublicKey crypto.BoxPubKey // The public key of the remote node + Coords []uint64 // The coordinates of the remote node + BytesSent uint64 // Bytes sent to the session + BytesRecvd uint64 // Bytes received from the session + MTU uint16 // The maximum supported message size of the session + Uptime time.Duration // How long this session has been active for + WasMTUFixed bool // This field is no longer used } // GetPeers returns one or more Peer objects containing information about active @@ -106,15 +119,18 @@ func (c *Core) GetPeers() []Peer { sort.Slice(ps, func(i, j int) bool { return ps[i] < ps[j] }) for _, port := range ps { p := ports[port] - info := Peer{ - Endpoint: p.intf.name, - BytesSent: atomic.LoadUint64(&p.bytesSent), - BytesRecvd: atomic.LoadUint64(&p.bytesRecvd), - Protocol: p.intf.info.linkType, - Port: uint64(port), - Uptime: time.Since(p.firstSeen), - } - copy(info.PublicKey[:], p.box[:]) + var info Peer + phony.Block(p, func() { + info = Peer{ + Endpoint: p.intf.name, + BytesSent: p.bytesSent, + BytesRecvd: p.bytesRecvd, + Protocol: p.intf.info.linkType, + Port: uint64(port), + Uptime: time.Since(p.firstSeen), + } + copy(info.PublicKey[:], p.box[:]) + }) peers = append(peers, info) } return peers @@ -135,15 +151,18 @@ func (c *Core) GetSwitchPeers() []SwitchPeer { continue } coords := elem.locator.getCoords() - info := SwitchPeer{ - Coords: append([]uint64{}, wire_coordsBytestoUint64s(coords)...), - BytesSent: atomic.LoadUint64(&peer.bytesSent), - BytesRecvd: atomic.LoadUint64(&peer.bytesRecvd), - Port: uint64(elem.port), - Protocol: peer.intf.info.linkType, - Endpoint: peer.intf.info.remote, - } - copy(info.PublicKey[:], peer.box[:]) + var info SwitchPeer + phony.Block(peer, func() { + info = SwitchPeer{ + Coords: append([]uint64{}, wire_coordsBytestoUint64s(coords)...), + BytesSent: peer.bytesSent, + BytesRecvd: peer.bytesRecvd, + Port: uint64(elem.port), + Protocol: peer.intf.info.linkType, + Endpoint: peer.intf.info.remote, + } + copy(info.PublicKey[:], peer.box[:]) + }) switchpeers = append(switchpeers, info) } return switchpeers @@ -156,11 +175,11 @@ func (c *Core) GetDHT() []DHTEntry { getDHT := func() { now := time.Now() var dhtentry []*dhtInfo - for _, v := range c.dht.table { + for _, v := range c.router.dht.table { dhtentry = append(dhtentry, v) } sort.SliceStable(dhtentry, func(i, j int) bool { - return dht_ordered(&c.dht.nodeID, dhtentry[i].getNodeID(), dhtentry[j].getNodeID()) + return dht_ordered(&c.router.dht.nodeID, dhtentry[i].getNodeID(), dhtentry[j].getNodeID()) }) for _, v := range dhtentry { info := DHTEntry{ @@ -171,7 +190,7 @@ func (c *Core) GetDHT() []DHTEntry { dhtentries = append(dhtentries, info) } } - c.router.doAdmin(getDHT) + phony.Block(&c.router, getDHT) return dhtentries } @@ -186,7 +205,7 @@ func (c *Core) GetSwitchQueues() SwitchQueues { Size: switchTable.queues.size, HighestCount: uint64(switchTable.queues.maxbufs), HighestSize: switchTable.queues.maxsize, - MaximumSize: switchTable.queueTotalMaxSize, + MaximumSize: switchTable.queues.totalMaxSize, } for k, v := range switchTable.queues.bufs { nexthop := switchTable.bestPortForCoords([]byte(k)) @@ -198,9 +217,8 @@ func (c *Core) GetSwitchQueues() SwitchQueues { } switchqueues.Queues = append(switchqueues.Queues, queue) } - } - c.switchTable.doAdmin(getSwitchQueues) + phony.Block(&c.switchTable, getSwitchQueues) return switchqueues } @@ -208,12 +226,12 @@ func (c *Core) GetSwitchQueues() SwitchQueues { func (c *Core) GetSessions() []Session { var sessions []Session getSessions := func() { - for _, sinfo := range c.sessions.sinfos { + for _, sinfo := range c.router.sessions.sinfos { var session Session workerFunc := func() { session = Session{ Coords: append([]uint64{}, wire_coordsBytestoUint64s(sinfo.coords)...), - MTU: sinfo.getMTU(), + MTU: sinfo._getMTU(), BytesSent: sinfo.bytesSent, BytesRecvd: sinfo.bytesRecvd, Uptime: time.Now().Sub(sinfo.timeOpened), @@ -221,42 +239,37 @@ func (c *Core) GetSessions() []Session { } copy(session.PublicKey[:], sinfo.theirPermPub[:]) } - var skip bool - func() { - defer func() { - if recover() != nil { - skip = true - } - }() - sinfo.doFunc(workerFunc) - }() - if skip { - continue - } + phony.Block(sinfo, workerFunc) // TODO? skipped known but timed out sessions? sessions = append(sessions, session) } } - c.router.doAdmin(getSessions) + phony.Block(&c.router, getSessions) return sessions } -// ConnListen returns a listener for Yggdrasil session connections. +// ConnListen returns a listener for Yggdrasil session connections. You can only +// call this function once as each Yggdrasil node can only have a single +// ConnListener. Make sure to keep the reference to this for as long as it is +// needed. func (c *Core) ConnListen() (*Listener, error) { - c.sessions.listenerMutex.Lock() - defer c.sessions.listenerMutex.Unlock() - if c.sessions.listener != nil { + c.router.sessions.listenerMutex.Lock() + defer c.router.sessions.listenerMutex.Unlock() + if c.router.sessions.listener != nil { return nil, errors.New("a listener already exists") } - c.sessions.listener = &Listener{ + c.router.sessions.listener = &Listener{ core: c, conn: make(chan *Conn), close: make(chan interface{}), } - return c.sessions.listener, nil + return c.router.sessions.listener, nil } -// ConnDialer returns a dialer for Yggdrasil session connections. +// ConnDialer returns a dialer for Yggdrasil session connections. Since +// ConnDialers are stateless, you can request as many dialers as you like, +// although ideally you should request only one and keep the reference to it for +// as long as it is needed. func (c *Core) ConnDialer() (*Dialer, error) { return &Dialer{ core: c, @@ -270,48 +283,69 @@ func (c *Core) ListenTCP(uri string) (*TcpListener, error) { return c.link.tcp.listen(uri) } -// NodeID gets the node ID. +// NodeID gets the node ID. This is derived from your router encryption keys. +// Remote nodes wanting to open connections to your node will need to know your +// node ID. func (c *Core) NodeID() *crypto.NodeID { return crypto.GetNodeID(&c.boxPub) } -// TreeID gets the tree ID. +// TreeID gets the tree ID. This is derived from your switch signing keys. There +// is typically no need to share this key. func (c *Core) TreeID() *crypto.TreeID { return crypto.GetTreeID(&c.sigPub) } -// SigningPublicKey gets the node's signing public key. +// SigningPublicKey gets the node's signing public key, as used by the switch. func (c *Core) SigningPublicKey() string { return hex.EncodeToString(c.sigPub[:]) } -// EncryptionPublicKey gets the node's encryption public key. +// EncryptionPublicKey gets the node's encryption public key, as used by the +// router. func (c *Core) EncryptionPublicKey() string { return hex.EncodeToString(c.boxPub[:]) } -// Coords returns the current coordinates of the node. +// Coords returns the current coordinates of the node. Note that these can +// change at any time for a number of reasons, not limited to but including +// changes to peerings (either yours or a parent nodes) or changes to the network +// root. +// +// This function may return an empty array - this is normal behaviour if either +// you are the root of the network that you are connected to, or you are not +// connected to any other nodes (effectively making you the root of a +// single-node network). func (c *Core) Coords() []uint64 { table := c.switchTable.table.Load().(lookupTable) return wire_coordsBytestoUint64s(table.self.getCoords()) } // Address gets the IPv6 address of the Yggdrasil node. This is always a /128 -// address. +// address. The IPv6 address is only relevant when the node is operating as an +// IP router and often is meaningless when embedded into an application, unless +// that application also implements either VPN functionality or deals with IP +// packets specifically. func (c *Core) Address() net.IP { address := net.IP(address.AddrForNodeID(c.NodeID())[:]) return address } // Subnet gets the routed IPv6 subnet of the Yggdrasil node. This is always a -// /64 subnet. +// /64 subnet. The IPv6 subnet is only relevant when the node is operating as an +// IP router and often is meaningless when embedded into an application, unless +// that application also implements either VPN functionality or deals with IP +// packets specifically. func (c *Core) Subnet() net.IPNet { subnet := address.SubnetForNodeID(c.NodeID())[:] subnet = append(subnet, 0, 0, 0, 0, 0, 0, 0, 0) return net.IPNet{IP: subnet, Mask: net.CIDRMask(64, 128)} } -// MyNodeInfo gets the currently configured nodeinfo. +// MyNodeInfo gets the currently configured nodeinfo. NodeInfo is typically +// specified through the "NodeInfo" option in the node configuration or using +// the SetNodeInfo function, although it may also contain other built-in values +// such as "buildname", "buildversion" etc. func (c *Core) MyNodeInfo() NodeInfoPayload { return c.router.nodeinfo.getNodeInfo() } @@ -338,11 +372,9 @@ func (c *Core) GetNodeInfo(key crypto.BoxPubKey, coords []uint64, nocache bool) }) c.router.nodeinfo.sendNodeInfo(key, wire_coordsUint64stoBytes(coords), false) } - c.router.doAdmin(sendNodeInfoRequest) - go func() { - time.Sleep(6 * time.Second) - close(response) - }() + phony.Block(&c.router, sendNodeInfoRequest) + timer := time.AfterFunc(6*time.Second, func() { close(response) }) + defer timer.Stop() for res := range response { return *res, nil } @@ -356,14 +388,16 @@ func (c *Core) GetNodeInfo(key crypto.BoxPubKey, coords []uint64, nocache bool) // received an incoming session request. The function should return true to // allow the session or false to reject it. func (c *Core) SetSessionGatekeeper(f func(pubkey *crypto.BoxPubKey, initiator bool) bool) { - c.sessions.isAllowedMutex.Lock() - defer c.sessions.isAllowedMutex.Unlock() + c.router.sessions.isAllowedMutex.Lock() + defer c.router.sessions.isAllowedMutex.Unlock() - c.sessions.isAllowedHandler = f + c.router.sessions.isAllowedHandler = f } // SetLogger sets the output logger of the Yggdrasil node after startup. This -// may be useful if you want to redirect the output later. +// may be useful if you want to redirect the output later. Note that this +// expects a Logger from the github.com/gologme/log package and not from Go's +// built-in log package. func (c *Core) SetLogger(log *log.Logger) { c.log = log } @@ -375,13 +409,34 @@ func (c *Core) SetLogger(log *log.Logger) { // connection drops. func (c *Core) AddPeer(addr string, sintf string) error { if err := c.CallPeer(addr, sintf); err != nil { + // TODO: We maybe want this to write the peer to the persistent + // configuration even if a connection attempt fails, but first we'll need to + // move the code to check the peer URI so that we don't deliberately save a + // peer with a known bad URI. Loading peers from config should really do the + // same thing too but I don't think that happens today return err } c.config.Mutex.Lock() if sintf == "" { + for _, peer := range c.config.Current.Peers { + if peer == addr { + return errors.New("peer already added") + } + } c.config.Current.Peers = append(c.config.Current.Peers, addr) } else { - c.config.Current.InterfacePeers[sintf] = append(c.config.Current.InterfacePeers[sintf], addr) + if _, ok := c.config.Current.InterfacePeers[sintf]; ok { + for _, peer := range c.config.Current.InterfacePeers[sintf] { + if peer == addr { + return errors.New("peer already added") + } + } + } + if _, ok := c.config.Current.InterfacePeers[sintf]; !ok { + c.config.Current.InterfacePeers[sintf] = []string{addr} + } else { + c.config.Current.InterfacePeers[sintf] = append(c.config.Current.InterfacePeers[sintf], addr) + } } c.config.Mutex.Unlock() return nil @@ -413,12 +468,17 @@ func (c *Core) DisconnectPeer(port uint64) error { } // GetAllowedEncryptionPublicKeys returns the public keys permitted for incoming -// peer connections. +// peer connections. If this list is empty then all incoming peer connections +// are accepted by default. func (c *Core) GetAllowedEncryptionPublicKeys() []string { return c.peers.getAllowedEncryptionPublicKeys() } // AddAllowedEncryptionPublicKey whitelists a key for incoming peer connections. +// By default all incoming peer connections are accepted, but adding public keys +// to the whitelist using this function enables strict checking from that point +// forward. Once the whitelist is enabled, only peer connections from +// whitelisted public keys will be accepted. func (c *Core) AddAllowedEncryptionPublicKey(bstr string) (err error) { c.peers.addAllowedEncryptionPublicKey(bstr) return nil @@ -445,12 +505,12 @@ func (c *Core) DHTPing(key crypto.BoxPubKey, coords []uint64, target *crypto.Nod } rq := dhtReqKey{info.key, *target} sendPing := func() { - c.dht.addCallback(&rq, func(res *dhtRes) { + c.router.dht.addCallback(&rq, func(res *dhtRes) { resCh <- res }) - c.dht.ping(&info, &rq.dest) + c.router.dht.ping(&info, &rq.dest) } - c.router.doAdmin(sendPing) + phony.Block(&c.router, sendPing) // TODO: do something better than the below... res := <-resCh if res != nil { diff --git a/src/yggdrasil/conn.go b/src/yggdrasil/conn.go index 25330aae..eb2bb74b 100644 --- a/src/yggdrasil/conn.go +++ b/src/yggdrasil/conn.go @@ -3,12 +3,12 @@ package yggdrasil import ( "errors" "fmt" - "sync" - "sync/atomic" "time" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" "github.com/yggdrasil-network/yggdrasil-go/src/util" + + "github.com/Arceliar/phony" ) // ConnError implements the net.Error interface @@ -53,94 +53,116 @@ func (e *ConnError) Closed() bool { return e.closed } +// The Conn struct is a reference to an active connection session between the +// local node and a remote node. Conn implements the io.ReadWriteCloser +// interface and is used to send and receive traffic with a remote node. type Conn struct { + phony.Inbox core *Core - readDeadline atomic.Value // time.Time // TODO timer - writeDeadline atomic.Value // time.Time // TODO timer - mutex sync.RWMutex // protects the below + readDeadline *time.Time + writeDeadline *time.Time nodeID *crypto.NodeID nodeMask *crypto.NodeID session *sessionInfo + mtu uint16 + readCallback func([]byte) + readBuffer chan []byte } // TODO func NewConn() that initializes additional fields as needed func newConn(core *Core, nodeID *crypto.NodeID, nodeMask *crypto.NodeID, session *sessionInfo) *Conn { conn := Conn{ - core: core, - nodeID: nodeID, - nodeMask: nodeMask, - session: session, + core: core, + nodeID: nodeID, + nodeMask: nodeMask, + session: session, + readBuffer: make(chan []byte, 1024), } return &conn } +// String returns a string that uniquely identifies a connection. Currently this +// takes a form similar to "conn=0x0000000", which contains a memory reference +// to the Conn object. While this value should always be unique for each Conn +// object, the format of this is not strictly defined and may change in the +// future. func (c *Conn) String() string { - c.mutex.RLock() - defer c.mutex.RUnlock() - return fmt.Sprintf("conn=%p", c) + var s string + phony.Block(c, func() { s = fmt.Sprintf("conn=%p", c) }) + return s +} + +func (c *Conn) setMTU(from phony.Actor, mtu uint16) { + c.Act(from, func() { c.mtu = mtu }) } // This should never be called from the router goroutine, used in the dial functions func (c *Conn) search() error { - var sinfo *searchInfo - var isIn bool - c.core.router.doAdmin(func() { sinfo, isIn = c.core.searches.searches[*c.nodeID] }) - if !isIn { - done := make(chan struct{}, 1) - var sess *sessionInfo - var err error - searchCompleted := func(sinfo *sessionInfo, e error) { - sess = sinfo - err = e - // FIXME close can be called multiple times, do a non-blocking send instead - select { - case done <- struct{}{}: - default: + var err error + done := make(chan struct{}) + phony.Block(&c.core.router, func() { + _, isIn := c.core.router.searches.searches[*c.nodeID] + if !isIn { + searchCompleted := func(sinfo *sessionInfo, e error) { + select { + case <-done: + // Somehow this was called multiple times, TODO don't let that happen + if sinfo != nil { + // Need to clean up to avoid a session leak + sinfo.cancel.Cancel(nil) + sinfo.sessions.removeSession(sinfo) + } + default: + if sinfo != nil { + // Finish initializing the session + sinfo.setConn(nil, c) + } + c.session = sinfo + err = e + close(done) + } } - } - c.core.router.doAdmin(func() { - sinfo = c.core.searches.newIterSearch(c.nodeID, c.nodeMask, searchCompleted) + sinfo := c.core.router.searches.newIterSearch(c.nodeID, c.nodeMask, searchCompleted) sinfo.continueSearch() - }) - <-done - c.session = sess - if c.session == nil && err == nil { - panic("search failed but returned no error") + } else { + err = errors.New("search already exists") + close(done) } - if c.session != nil { - c.nodeID = crypto.GetNodeID(&c.session.theirPermPub) - for i := range c.nodeMask { - c.nodeMask[i] = 0xFF - } - } - return err - } else { - return errors.New("search already exists") + }) + <-done + if c.session == nil && err == nil { + panic("search failed but returned no error") } - return nil + if c.session != nil { + c.nodeID = crypto.GetNodeID(&c.session.theirPermPub) + for i := range c.nodeMask { + c.nodeMask[i] = 0xFF + } + } + return err } -// Used in session keep-alive traffic in Conn.Write +// Used in session keep-alive traffic func (c *Conn) doSearch() { routerWork := func() { // Check to see if there is a search already matching the destination - sinfo, isIn := c.core.searches.searches[*c.nodeID] + sinfo, isIn := c.core.router.searches.searches[*c.nodeID] if !isIn { // Nothing was found, so create a new search searchCompleted := func(sinfo *sessionInfo, e error) {} - sinfo = c.core.searches.newIterSearch(c.nodeID, c.nodeMask, searchCompleted) + sinfo = c.core.router.searches.newIterSearch(c.nodeID, c.nodeMask, searchCompleted) c.core.log.Debugf("%s DHT search started: %p", c.String(), sinfo) // Start the search sinfo.continueSearch() } } - go func() { c.core.router.admin <- routerWork }() + c.core.router.Act(c.session, routerWork) } -func (c *Conn) getDeadlineCancellation(value *atomic.Value) (util.Cancellation, bool) { - if deadline, ok := value.Load().(time.Time); ok { +func (c *Conn) _getDeadlineCancellation(t *time.Time) (util.Cancellation, bool) { + if t != nil { // A deadline is set, so return a Cancellation that uses it - c := util.CancellationWithDeadline(c.session.cancel, deadline) + c := util.CancellationWithDeadline(c.session.cancel, *t) return c, true } else { // No deadline was set, so just return the existinc cancellation and a dummy value @@ -148,9 +170,50 @@ func (c *Conn) getDeadlineCancellation(value *atomic.Value) (util.Cancellation, } } +// SetReadCallback allows you to specify a function that will be called whenever +// a packet is received. This should be used if you wish to implement +// asynchronous patterns for receiving data from the remote node. +// +// Note that if a read callback has been supplied, you should no longer attempt +// to use the synchronous Read function. +func (c *Conn) SetReadCallback(callback func([]byte)) { + c.Act(nil, func() { + c.readCallback = callback + c._drainReadBuffer() + }) +} + +func (c *Conn) _drainReadBuffer() { + if c.readCallback == nil { + return + } + select { + case bs := <-c.readBuffer: + c.readCallback(bs) + c.Act(nil, c._drainReadBuffer) // In case there's more + default: + } +} + +// Called by the session to pass a new message to the Conn +func (c *Conn) recvMsg(from phony.Actor, msg []byte) { + c.Act(from, func() { + if c.readCallback != nil { + c.readCallback(msg) + } else { + select { + case c.readBuffer <- msg: + default: + } + } + }) +} + // Used internally by Read, the caller is responsible for util.PutBytes when they're done. -func (c *Conn) ReadNoCopy() ([]byte, error) { - cancel, doCancel := c.getDeadlineCancellation(&c.readDeadline) +func (c *Conn) readNoCopy() ([]byte, error) { + var cancel util.Cancellation + var doCancel bool + phony.Block(c, func() { cancel, doCancel = c._getDeadlineCancellation(c.readDeadline) }) if doCancel { defer cancel.Cancel(nil) } @@ -162,14 +225,21 @@ func (c *Conn) ReadNoCopy() ([]byte, error) { } else { return nil, ConnError{errors.New("session closed"), false, false, true, 0} } - case bs := <-c.session.recv: + case bs := <-c.readBuffer: return bs, nil } } -// Implements net.Conn.Read +// Read allows you to read from the connection in a synchronous fashion. The +// function will block up until the point that either new data is available, the +// connection has been closed or the read deadline has been reached. If the +// function succeeds, the number of bytes read from the connection will be +// returned. Otherwise, an error condition will be returned. +// +// Note that you can also implement asynchronous reads by using SetReadCallback. +// If you do that, you should no longer attempt to use the Read function. func (c *Conn) Read(b []byte) (int, error) { - bs, err := c.ReadNoCopy() + bs, err := c.readNoCopy() if err != nil { return 0, err } @@ -185,53 +255,71 @@ func (c *Conn) Read(b []byte) (int, error) { return n, err } -// Used internally by Write, the caller must not reuse the argument bytes when no error occurs -func (c *Conn) WriteNoCopy(msg FlowKeyMessage) error { - var err error - sessionFunc := func() { - // Does the packet exceed the permitted size for the session? - if uint16(len(msg.Message)) > c.session.getMTU() { - err = ConnError{errors.New("packet too big"), true, false, false, int(c.session.getMTU())} - return - } - // The rest of this work is session keep-alive traffic +func (c *Conn) _write(msg FlowKeyMessage) error { + if len(msg.Message) > int(c.mtu) { + return ConnError{errors.New("packet too big"), true, false, false, int(c.mtu)} + } + c.session.Act(c, func() { + // Send the packet + c.session._send(msg) + // Session keep-alive, while we wait for the crypto workers from send switch { case time.Since(c.session.time) > 6*time.Second: if c.session.time.Before(c.session.pingTime) && time.Since(c.session.pingTime) > 6*time.Second { // TODO double check that the above condition is correct c.doSearch() } else { - c.core.sessions.ping(c.session) + c.session.ping(c.session) // TODO send from self if this becomes an actor } case c.session.reset && c.session.pingTime.Before(c.session.time): - c.core.sessions.ping(c.session) + c.session.ping(c.session) // TODO send from self if this becomes an actor default: // Don't do anything, to keep traffic throttled } - } - c.session.doFunc(sessionFunc) - if err == nil { - cancel, doCancel := c.getDeadlineCancellation(&c.writeDeadline) - if doCancel { - defer cancel.Cancel(nil) - } - select { - case <-cancel.Finished(): - if cancel.Error() == util.CancellationTimeoutError { - err = ConnError{errors.New("write timeout"), true, false, false, 0} - } else { - err = ConnError{errors.New("session closed"), false, false, true, 0} - } - case c.session.send <- msg: + }) + return nil +} + +// WriteFrom should be called by a phony.Actor, and tells the Conn to send a +// message. This is used internaly by Write. If the callback is called with a +// non-nil value, then it is safe to reuse the argument FlowKeyMessage. +func (c *Conn) WriteFrom(from phony.Actor, msg FlowKeyMessage, callback func(error)) { + c.Act(from, func() { + callback(c._write(msg)) + }) +} + +// writeNoCopy is used internally by Write and makes use of WriteFrom under the hood. +// The caller must not reuse the argument FlowKeyMessage when a nil error is returned. +func (c *Conn) writeNoCopy(msg FlowKeyMessage) error { + var cancel util.Cancellation + var doCancel bool + phony.Block(c, func() { cancel, doCancel = c._getDeadlineCancellation(c.writeDeadline) }) + var err error + select { + case <-cancel.Finished(): + if cancel.Error() == util.CancellationTimeoutError { + err = ConnError{errors.New("write timeout"), true, false, false, 0} + } else { + err = ConnError{errors.New("session closed"), false, false, true, 0} } + default: + done := make(chan struct{}) + callback := func(e error) { err = e; close(done) } + c.WriteFrom(nil, msg, callback) + <-done } return err } -// Implements net.Conn.Write +// Write allows you to write to the connection in a synchronous fashion. This +// function may block until either the write has completed, the connection has +// been closed or the write deadline has been reached. If the function succeeds, +// the number of written bytes is returned. Otherwise, an error condition is +// returned. func (c *Conn) Write(b []byte) (int, error) { written := len(b) msg := FlowKeyMessage{Message: append(util.GetBytes(), b...)} - err := c.WriteNoCopy(msg) + err := c.writeNoCopy(msg) if err != nil { util.PutBytes(msg.Message) written = 0 @@ -239,40 +327,66 @@ func (c *Conn) Write(b []byte) (int, error) { return written, err } +// Close will close an open connection and any blocking operations on the +// connection will unblock and return. From this point forward, the connection +// can no longer be used and you should no longer attempt to Read or Write to +// the connection. func (c *Conn) Close() (err error) { - c.mutex.Lock() - defer c.mutex.Unlock() - if c.session != nil { - // Close the session, if it hasn't been closed already - if e := c.session.cancel.Cancel(errors.New("connection closed")); e != nil { - err = ConnError{errors.New("close failed, session already closed"), false, false, true, 0} + phony.Block(c, func() { + if c.session != nil { + // Close the session, if it hasn't been closed already + if e := c.session.cancel.Cancel(errors.New("connection closed")); e != nil { + err = ConnError{errors.New("close failed, session already closed"), false, false, true, 0} + } else { + c.session.doRemove() + } } - } + }) return } +// LocalAddr returns the complete node ID of the local side of the connection. +// This is always going to return your own node's node ID. func (c *Conn) LocalAddr() crypto.NodeID { - return *crypto.GetNodeID(&c.session.core.boxPub) + return *crypto.GetNodeID(&c.core.boxPub) } +// RemoteAddr returns the complete node ID of the remote side of the connection. func (c *Conn) RemoteAddr() crypto.NodeID { - c.mutex.RLock() - defer c.mutex.RUnlock() - return *c.nodeID + // TODO warn that this can block while waiting for the Conn actor to run, so don't call it from other actors... + var n crypto.NodeID + phony.Block(c, func() { n = *c.nodeID }) + return n } +// SetDeadline is equivalent to calling both SetReadDeadline and +// SetWriteDeadline with the same value, configuring the maximum amount of time +// that synchronous Read and Write operations can block for. If no deadline is +// configured, Read and Write operations can potentially block indefinitely. func (c *Conn) SetDeadline(t time.Time) error { c.SetReadDeadline(t) c.SetWriteDeadline(t) return nil } +// SetReadDeadline configures the maximum amount of time that a synchronous Read +// operation can block for. A Read operation will unblock at the point that the +// read deadline is reached if no other condition (such as data arrival or +// connection closure) happens first. If no deadline is configured, Read +// operations can potentially block indefinitely. func (c *Conn) SetReadDeadline(t time.Time) error { - c.readDeadline.Store(t) + // TODO warn that this can block while waiting for the Conn actor to run, so don't call it from other actors... + phony.Block(c, func() { c.readDeadline = &t }) return nil } +// SetWriteDeadline configures the maximum amount of time that a synchronous +// Write operation can block for. A Write operation will unblock at the point +// that the read deadline is reached if no other condition (such as data sending +// or connection closure) happens first. If no deadline is configured, Write +// operations can potentially block indefinitely. func (c *Conn) SetWriteDeadline(t time.Time) error { - c.writeDeadline.Store(t) + // TODO warn that this can block while waiting for the Conn actor to run, so don't call it from other actors... + phony.Block(c, func() { c.writeDeadline = &t }) return nil } diff --git a/src/yggdrasil/core.go b/src/yggdrasil/core.go index 0921ab9f..98a5c6e1 100644 --- a/src/yggdrasil/core.go +++ b/src/yggdrasil/core.go @@ -6,6 +6,7 @@ import ( "io/ioutil" "time" + "github.com/Arceliar/phony" "github.com/gologme/log" "github.com/yggdrasil-network/yggdrasil-go/src/config" @@ -19,22 +20,21 @@ type Core struct { // This is the main data structure that holds everything else for a node // We're going to keep our own copy of the provided config - that way we can // guarantee that it will be covered by the mutex - config config.NodeState // Config - boxPub crypto.BoxPubKey - boxPriv crypto.BoxPrivKey - sigPub crypto.SigPubKey - sigPriv crypto.SigPrivKey - switchTable switchTable - peers peers - sessions sessions - router router - dht dht - searches searches - link link - log *log.Logger + phony.Inbox + config config.NodeState // Config + boxPub crypto.BoxPubKey + boxPriv crypto.BoxPrivKey + sigPub crypto.SigPubKey + sigPriv crypto.SigPrivKey + switchTable switchTable + peers peers + router router + link link + log *log.Logger + addPeerTimer *time.Timer } -func (c *Core) init() error { +func (c *Core) _init() error { // TODO separate init and start functions // Init sets up structs // Start launches goroutines that depend on structs being set up @@ -76,9 +76,6 @@ func (c *Core) init() error { c.log.Warnln("SigningPublicKey in config is incorrect, should be", sp) } - c.searches.init(c) - c.dht.init(c) - c.sessions.init(c) c.peers.init(c) c.router.init(c) c.switchTable.init(c) // TODO move before peers? before router? @@ -89,64 +86,49 @@ func (c *Core) init() error { // If any static peers were provided in the configuration above then we should // configure them. The loop ensures that disconnected peers will eventually // be reconnected with. -func (c *Core) addPeerLoop() { - for { - // the peers from the config - these could change! - current := c.config.GetCurrent() +func (c *Core) _addPeerLoop() { + // Get the peers from the config - these could change! + current := c.config.GetCurrent() - // Add peers from the Peers section - for _, peer := range current.Peers { - go c.AddPeer(peer, "") - time.Sleep(time.Second) - } - - // Add peers from the InterfacePeers section - for intf, intfpeers := range current.InterfacePeers { - for _, peer := range intfpeers { - go c.AddPeer(peer, intf) - time.Sleep(time.Second) + // Add peers from the Peers section + for _, peer := range current.Peers { + go func(peer, intf string) { + if err := c.CallPeer(peer, intf); err != nil { + c.log.Errorln("Failed to add peer:", err) } - } - - // Sit for a while - time.Sleep(time.Minute) + }(peer, "") // TODO: this should be acted and not in a goroutine? } + + // Add peers from the InterfacePeers section + for intf, intfpeers := range current.InterfacePeers { + for _, peer := range intfpeers { + go func(peer, intf string) { + if err := c.CallPeer(peer, intf); err != nil { + c.log.Errorln("Failed to add peer:", err) + } + }(peer, intf) // TODO: this should be acted and not in a goroutine? + } + } + + c.addPeerTimer = time.AfterFunc(time.Minute, func() { + c.Act(nil, c._addPeerLoop) + }) } // UpdateConfig updates the configuration in Core with the provided // config.NodeConfig and then signals the various module goroutines to // reconfigure themselves if needed. func (c *Core) UpdateConfig(config *config.NodeConfig) { - c.log.Debugln("Reloading node configuration...") + c.Act(nil, func() { + c.log.Debugln("Reloading node configuration...") - c.config.Replace(*config) + // Replace the active configuration with the supplied one + c.config.Replace(*config) - errors := 0 - - components := []chan chan error{ - c.searches.reconfigure, - c.dht.reconfigure, - c.sessions.reconfigure, - c.peers.reconfigure, - c.router.reconfigure, - c.switchTable.reconfigure, - c.link.reconfigure, - } - - for _, component := range components { - response := make(chan error) - component <- response - if err := <-response; err != nil { - c.log.Errorln(err) - errors++ - } - } - - if errors > 0 { - c.log.Warnln(errors, "node module(s) reported errors during configuration reload") - } else { - c.log.Infoln("Node configuration reloaded successfully") - } + // Notify the router and switch about the new configuration + c.router.Act(c, c.router.reconfigure) + c.switchTable.Act(c, c.switchTable.reconfigure) + }) } // Start starts up Yggdrasil using the provided config.NodeConfig, and outputs @@ -154,7 +136,15 @@ func (c *Core) UpdateConfig(config *config.NodeConfig) { // TCP and UDP sockets, a multicast discovery socket, an admin socket, router, // switch and DHT node. A config.NodeState is returned which contains both the // current and previous configurations (from reconfigures). -func (c *Core) Start(nc *config.NodeConfig, log *log.Logger) (*config.NodeState, error) { +func (c *Core) Start(nc *config.NodeConfig, log *log.Logger) (conf *config.NodeState, err error) { + phony.Block(c, func() { + conf, err = c._start(nc, log) + }) + return +} + +// This function is unsafe and should only be ran by the core actor. +func (c *Core) _start(nc *config.NodeConfig, log *log.Logger) (*config.NodeState, error) { c.log = log c.config = config.NodeState{ @@ -170,20 +160,13 @@ func (c *Core) Start(nc *config.NodeConfig, log *log.Logger) (*config.NodeState, } c.log.Infoln("Starting up...") - - c.init() + c._init() if err := c.link.init(c); err != nil { c.log.Errorln("Failed to start link interfaces") return nil, err } - c.config.Mutex.RLock() - if c.config.Current.SwitchOptions.MaxTotalQueueSize >= SwitchQueueTotalMinSize { - c.switchTable.queueTotalMaxSize = c.config.Current.SwitchOptions.MaxTotalQueueSize - } - c.config.Mutex.RUnlock() - if err := c.switchTable.start(); err != nil { c.log.Errorln("Failed to start switch") return nil, err @@ -194,7 +177,7 @@ func (c *Core) Start(nc *config.NodeConfig, log *log.Logger) (*config.NodeState, return nil, err } - go c.addPeerLoop() + c.Act(c, c._addPeerLoop) c.log.Infoln("Startup complete") return &c.config, nil @@ -202,5 +185,18 @@ func (c *Core) Start(nc *config.NodeConfig, log *log.Logger) (*config.NodeState, // Stop shuts down the Yggdrasil node. func (c *Core) Stop() { - c.log.Infoln("Stopping...") + phony.Block(c, c._stop) +} + +// This function is unsafe and should only be ran by the core actor. +func (c *Core) _stop() { + c.log.Infoln("Stopping...") + if c.addPeerTimer != nil { + c.addPeerTimer.Stop() + } + c.link.stop() + for _, peer := range c.GetPeers() { + c.DisconnectPeer(peer.Port) + } + c.log.Infoln("Stopped") } diff --git a/src/yggdrasil/debug.go b/src/yggdrasil/debug.go index 9f7707e2..b9bd5cfb 100644 --- a/src/yggdrasil/debug.go +++ b/src/yggdrasil/debug.go @@ -2,20 +2,7 @@ package yggdrasil -// These are functions that should not exist -// They are (or were) used during development, to work around missing features -// They're also used to configure things from the outside -// It would be better to define and export a few config functions elsewhere -// Or define some remote API and call it to send/request configuration info - -import _ "golang.org/x/net/ipv6" // TODO put this somewhere better - -//import "golang.org/x/net/proxy" - import "fmt" -import "net" -import "regexp" -import "encoding/hex" import _ "net/http/pprof" import "net/http" @@ -24,11 +11,6 @@ import "os" import "github.com/gologme/log" -import "github.com/yggdrasil-network/yggdrasil-go/src/address" -import "github.com/yggdrasil-network/yggdrasil-go/src/config" -import "github.com/yggdrasil-network/yggdrasil-go/src/crypto" -import "github.com/yggdrasil-network/yggdrasil-go/src/defaults" - // Start the profiler in debug builds, if the required environment variable is set. func init() { envVarName := "PPROFLISTEN" @@ -49,580 +31,3 @@ func StartProfiler(log *log.Logger) error { go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }() return nil } - -// This function is only called by the simulator to set up a node with random -// keys. It should not be used and may be removed in the future. -func (c *Core) Init() { - bpub, bpriv := crypto.NewBoxKeys() - spub, spriv := crypto.NewSigKeys() - hbpub := hex.EncodeToString(bpub[:]) - hbpriv := hex.EncodeToString(bpriv[:]) - hspub := hex.EncodeToString(spub[:]) - hspriv := hex.EncodeToString(spriv[:]) - cfg := config.NodeConfig{ - EncryptionPublicKey: hbpub, - EncryptionPrivateKey: hbpriv, - SigningPublicKey: hspub, - SigningPrivateKey: hspriv, - } - c.config = config.NodeState{ - Current: cfg, - Previous: cfg, - } - c.init() - c.switchTable.start() - c.router.start() -} - -//////////////////////////////////////////////////////////////////////////////// - -// Core - -func (c *Core) DEBUG_getSigningPublicKey() crypto.SigPubKey { - return (crypto.SigPubKey)(c.sigPub) -} - -func (c *Core) DEBUG_getEncryptionPublicKey() crypto.BoxPubKey { - return (crypto.BoxPubKey)(c.boxPub) -} - -/* -func (c *Core) DEBUG_getSend() chan<- []byte { - return c.router.tun.send -} - -func (c *Core) DEBUG_getRecv() <-chan []byte { - return c.router.tun.recv -} -*/ - -// Peer - -func (c *Core) DEBUG_getPeers() *peers { - return &c.peers -} - -func (ps *peers) DEBUG_newPeer(box crypto.BoxPubKey, sig crypto.SigPubKey, link crypto.BoxSharedKey) *peer { - sim := linkInterface{ - name: "(simulator)", - info: linkInfo{ - local: "(simulator)", - remote: "(simulator)", - linkType: "sim", - }, - } - return ps.newPeer(&box, &sig, &link, &sim, nil) -} - -/* -func (ps *peers) DEBUG_startPeers() { - ps.mutex.RLock() - defer ps.mutex.RUnlock() - for _, p := range ps.ports { - if p == nil { continue } - go p.MainLoop() - } -} -*/ - -func (ps *peers) DEBUG_hasPeer(key crypto.SigPubKey) bool { - ports := ps.ports.Load().(map[switchPort]*peer) - for _, p := range ports { - if p == nil { - continue - } - if p.sig == key { - return true - } - } - return false -} - -func (ps *peers) DEBUG_getPorts() map[switchPort]*peer { - ports := ps.ports.Load().(map[switchPort]*peer) - newPeers := make(map[switchPort]*peer) - for port, p := range ports { - newPeers[port] = p - } - return newPeers -} - -func (p *peer) DEBUG_getSigKey() crypto.SigPubKey { - return p.sig -} - -func (p *peer) DEEBUG_getPort() switchPort { - return p.port -} - -// Router - -func (c *Core) DEBUG_getSwitchTable() *switchTable { - return &c.switchTable -} - -func (c *Core) DEBUG_getLocator() switchLocator { - return c.switchTable.getLocator() -} - -func (l *switchLocator) DEBUG_getCoords() []byte { - return l.getCoords() -} - -func (c *Core) DEBUG_switchLookup(dest []byte) switchPort { - return c.switchTable.DEBUG_lookup(dest) -} - -// This does the switch layer lookups that decide how to route traffic. -// Traffic uses greedy routing in a metric space, where the metric distance between nodes is equal to the distance between them on the tree. -// Traffic must be routed to a node that is closer to the destination via the metric space distance. -// In the event that two nodes are equally close, it gets routed to the one with the longest uptime (due to the order that things are iterated over). -// The size of the outgoing packet queue is added to a node's tree distance when the cost of forwarding to a node, subject to the constraint that the real tree distance puts them closer to the destination than ourself. -// Doing so adds a limited form of backpressure routing, based on local information, which allows us to forward traffic around *local* bottlenecks, provided that another greedy path exists. -func (t *switchTable) DEBUG_lookup(dest []byte) switchPort { - table := t.getTable() - myDist := table.self.dist(dest) - if myDist == 0 { - return 0 - } - // cost is in units of (expected distance) + (expected queue size), where expected distance is used as an approximation of the minimum backpressure gradient needed for packets to flow - ports := t.core.peers.getPorts() - var best switchPort - bestCost := int64(^uint64(0) >> 1) - for _, info := range table.elems { - dist := info.locator.dist(dest) - if !(dist < myDist) { - continue - } - //p, isIn := ports[info.port] - _, isIn := ports[info.port] - if !isIn { - continue - } - cost := int64(dist) // + p.getQueueSize() - if cost < bestCost { - best = info.port - bestCost = cost - } - } - return best -} - -/* -func (t *switchTable) DEBUG_isDirty() bool { - //data := t.data.Load().(*tabledata) - t.mutex.RLock() - defer t.mutex.RUnlock() - data := t.data - return data.dirty -} -*/ - -func (t *switchTable) DEBUG_dumpTable() { - //data := t.data.Load().(*tabledata) - t.mutex.RLock() - defer t.mutex.RUnlock() - data := t.data - for _, peer := range data.peers { - //fmt.Println("DUMPTABLE:", t.treeID, peer.treeID, peer.port, - // peer.locator.Root, peer.coords, - // peer.reverse.Root, peer.reverse.Coords, peer.forward) - fmt.Println("DUMPTABLE:", t.key, peer.key, peer.locator.coords, peer.port /*, peer.forward*/) - } -} - -func (t *switchTable) DEBUG_getReversePort(port switchPort) switchPort { - // Returns Port(0) if it cannot get the reverse peer for any reason - //data := t.data.Load().(*tabledata) - t.mutex.RLock() - defer t.mutex.RUnlock() - data := t.data - if port >= switchPort(len(data.peers)) { - return switchPort(0) - } - pinfo := data.peers[port] - if len(pinfo.locator.coords) < 1 { - return switchPort(0) - } - return pinfo.locator.coords[len(pinfo.locator.coords)-1] -} - -// Wire - -func DEBUG_wire_encode_coords(coords []byte) []byte { - return wire_encode_coords(coords) -} - -// DHT, via core - -func (c *Core) DEBUG_getDHTSize() int { - var total int - c.router.doAdmin(func() { - total = len(c.dht.table) - }) - return total -} - -// TUN defaults - -func (c *Core) DEBUG_GetTUNDefaultIfName() string { - return defaults.GetDefaults().DefaultIfName -} - -func (c *Core) DEBUG_GetTUNDefaultIfMTU() int { - return defaults.GetDefaults().DefaultIfMTU -} - -func (c *Core) DEBUG_GetTUNDefaultIfTAPMode() bool { - return defaults.GetDefaults().DefaultIfTAPMode -} - -// udpInterface -// FIXME udpInterface isn't exported -// So debug functions need to work differently... - -/* -func (c *Core) DEBUG_setupLoopbackUDPInterface() { - iface := udpInterface{} - iface.init(c, "[::1]:0") - c.ifaces = append(c.ifaces[:0], &iface) -} -*/ - -/* -func (c *Core) DEBUG_getLoopbackAddr() net.Addr { - iface := c.ifaces[0] - return iface.sock.LocalAddr() -} -*/ - -/* -func (c *Core) DEBUG_addLoopbackPeer(addr *net.UDPAddr, - in (chan<- []byte), - out (<-chan []byte)) { - iface := c.ifaces[0] - iface.addPeer(addr, in, out) -} -*/ - -/* -func (c *Core) DEBUG_startLoopbackUDPInterface() { - iface := c.ifaces[0] - go iface.reader() - for addr, chs := range iface.peers { - udpAddr, err := net.ResolveUDPAddr("udp6", addr) - if err != nil { panic(err) } - go iface.writer(udpAddr, chs.out) - } -} -*/ - -//////////////////////////////////////////////////////////////////////////////// - -func (c *Core) DEBUG_getAddr() *address.Address { - return address.AddrForNodeID(&c.dht.nodeID) -} - -/* -func (c *Core) DEBUG_startTun(ifname string, iftapmode bool) { - c.DEBUG_startTunWithMTU(ifname, iftapmode, 1280) -} - -func (c *Core) DEBUG_startTunWithMTU(ifname string, iftapmode bool, mtu int) { - addr := c.DEBUG_getAddr() - straddr := fmt.Sprintf("%s/%v", net.IP(addr[:]).String(), 8*len(address.GetPrefix())) - if ifname != "none" { - err := c.router.tun.setup(ifname, iftapmode, straddr, mtu) - if err != nil { - panic(err) - } - c.log.Println("Setup TUN/TAP:", c.router.tun.iface.Name(), straddr) - go func() { panic(c.router.tun.read()) }() - } - go func() { panic(c.router.tun.write()) }() -} - -func (c *Core) DEBUG_stopTun() { - c.router.tun.close() -} -*/ - -//////////////////////////////////////////////////////////////////////////////// - -func (c *Core) DEBUG_newBoxKeys() (*crypto.BoxPubKey, *crypto.BoxPrivKey) { - return crypto.NewBoxKeys() -} - -func (c *Core) DEBUG_getSharedKey(myPrivKey *crypto.BoxPrivKey, othersPubKey *crypto.BoxPubKey) *crypto.BoxSharedKey { - return crypto.GetSharedKey(myPrivKey, othersPubKey) -} - -func (c *Core) DEBUG_newSigKeys() (*crypto.SigPubKey, *crypto.SigPrivKey) { - return crypto.NewSigKeys() -} - -func (c *Core) DEBUG_getNodeID(pub *crypto.BoxPubKey) *crypto.NodeID { - return crypto.GetNodeID(pub) -} - -func (c *Core) DEBUG_getTreeID(pub *crypto.SigPubKey) *crypto.TreeID { - return crypto.GetTreeID(pub) -} - -func (c *Core) DEBUG_addrForNodeID(nodeID *crypto.NodeID) string { - return net.IP(address.AddrForNodeID(nodeID)[:]).String() -} - -func (c *Core) DEBUG_init(bpub []byte, - bpriv []byte, - spub []byte, - spriv []byte) { - /*var boxPub crypto.BoxPubKey - var boxPriv crypto.BoxPrivKey - var sigPub crypto.SigPubKey - var sigPriv crypto.SigPrivKey - copy(boxPub[:], bpub) - copy(boxPriv[:], bpriv) - copy(sigPub[:], spub) - copy(sigPriv[:], spriv) - c.init(&boxPub, &boxPriv, &sigPub, &sigPriv)*/ - hbpub := hex.EncodeToString(bpub[:]) - hbpriv := hex.EncodeToString(bpriv[:]) - hspub := hex.EncodeToString(spub[:]) - hspriv := hex.EncodeToString(spriv[:]) - cfg := config.NodeConfig{ - EncryptionPublicKey: hbpub, - EncryptionPrivateKey: hbpriv, - SigningPublicKey: hspub, - SigningPrivateKey: hspriv, - } - c.config = config.NodeState{ - Current: cfg, - Previous: cfg, - } - c.init() - - if err := c.router.start(); err != nil { - panic(err) - } - -} - -//////////////////////////////////////////////////////////////////////////////// - -/* -func (c *Core) DEBUG_setupAndStartGlobalUDPInterface(addrport string) { - if err := c.udp.init(c, addrport); err != nil { - c.log.Println("Failed to start UDP interface:", err) - panic(err) - } -} - -func (c *Core) DEBUG_getGlobalUDPAddr() *net.UDPAddr { - return c.udp.sock.LocalAddr().(*net.UDPAddr) -} - -func (c *Core) DEBUG_maybeSendUDPKeys(saddr string) { - udpAddr, err := net.ResolveUDPAddr("udp", saddr) - if err != nil { - panic(err) - } - var addr connAddr - addr.fromUDPAddr(udpAddr) - c.udp.mutex.RLock() - _, isIn := c.udp.conns[addr] - c.udp.mutex.RUnlock() - if !isIn { - c.udp.sendKeys(addr) - } -} -*/ - -//////////////////////////////////////////////////////////////////////////////// -/* -func (c *Core) DEBUG_addPeer(addr string) { - err := c.admin.addPeer(addr, "") - if err != nil { - panic(err) - } -} -*/ -/* -func (c *Core) DEBUG_addSOCKSConn(socksaddr, peeraddr string) { - go func() { - dialer, err := proxy.SOCKS5("tcp", socksaddr, nil, proxy.Direct) - if err == nil { - conn, err := dialer.Dial("tcp", peeraddr) - if err == nil { - c.tcp.callWithConn(&wrappedConn{ - c: conn, - raddr: &wrappedAddr{ - network: "tcp", - addr: peeraddr, - }, - }) - } - } - }() -} -*/ - -/* -func (c *Core) DEBUG_setupAndStartGlobalTCPInterface(addrport string) { - c.config.Listen = []string{addrport} - if err := c.link.init(c); err != nil { - c.log.Println("Failed to start interfaces:", err) - panic(err) - } -} - -func (c *Core) DEBUG_getGlobalTCPAddr() *net.TCPAddr { - return c.link.tcp.getAddr() -} - -func (c *Core) DEBUG_addTCPConn(saddr string) { - c.link.tcp.call(saddr, nil, "") -} - -//*/ - -/* -func (c *Core) DEBUG_startSelfPeer() { - c.Peers.mutex.RLock() - defer c.Peers.mutex.RUnlock() - p := c.Peers.ports[0] - go p.MainLoop() -} -*/ - -//////////////////////////////////////////////////////////////////////////////// - -/* -func (c *Core) DEBUG_setupAndStartGlobalKCPInterface(addrport string) { - iface := kcpInterface{} - iface.init(c, addrport) - c.kcp = &iface -} - -func (c *Core) DEBUG_getGlobalKCPAddr() net.Addr { - return c.kcp.serv.Addr() -} - -func (c *Core) DEBUG_addKCPConn(saddr string) { - c.kcp.call(saddr) -} -*/ - -//////////////////////////////////////////////////////////////////////////////// - -/* -func (c *Core) DEBUG_setupAndStartAdminInterface(addrport string) { - a := admin{} - c.config.AdminListen = addrport - a.init() - c.admin = a -} - -func (c *Core) DEBUG_setupAndStartMulticastInterface() { - m := multicast{} - m.init(c) - c.multicast = m - m.start() -} -*/ - -//////////////////////////////////////////////////////////////////////////////// - -func (c *Core) DEBUG_setLogger(log *log.Logger) { - c.log = log -} - -func (c *Core) DEBUG_setIfceExpr(expr *regexp.Regexp) { - c.log.Println("DEBUG_setIfceExpr no longer implemented") -} - -/* -func (c *Core) DEBUG_addAllowedEncryptionPublicKey(boxStr string) { - err := c.admin.addAllowedEncryptionPublicKey(boxStr) - if err != nil { - panic(err) - } -} -*/ -//////////////////////////////////////////////////////////////////////////////// - -func DEBUG_simLinkPeers(p, q *peer) { - // Sets q.out() to point to p and starts p.linkLoop() - goWorkers := func(source, dest *peer) { - source.linkOut = make(chan []byte, 1) - send := make(chan []byte, 1) - source.out = func(bss [][]byte) { - for _, bs := range bss { - send <- bs - } - } - go source.linkLoop() - go func() { - var packets [][]byte - for { - select { - case packet := <-source.linkOut: - packets = append(packets, packet) - continue - case packet := <-send: - packets = append(packets, packet) - source.core.switchTable.idleIn <- source.port - continue - default: - } - if len(packets) > 0 { - dest.handlePacket(packets[0]) - packets = packets[1:] - continue - } - select { - case packet := <-source.linkOut: - packets = append(packets, packet) - case packet := <-send: - packets = append(packets, packet) - source.core.switchTable.idleIn <- source.port - } - } - }() - } - goWorkers(p, q) - goWorkers(q, p) - p.core.switchTable.idleIn <- p.port - q.core.switchTable.idleIn <- q.port -} - -/* -func (c *Core) DEBUG_simFixMTU() { - c.router.tun.mtu = 65535 -} -*/ - -//////////////////////////////////////////////////////////////////////////////// - -func Util_testAddrIDMask() { - for idx := 0; idx < 16; idx++ { - var orig crypto.NodeID - orig[8] = 42 - for bidx := 0; bidx < idx; bidx++ { - orig[bidx/8] |= (0x80 >> uint8(bidx%8)) - } - addr := address.AddrForNodeID(&orig) - nid, mask := addr.GetNodeIDandMask() - for b := 0; b < len(mask); b++ { - nid[b] &= mask[b] - orig[b] &= mask[b] - } - if *nid != orig { - fmt.Println(orig) - fmt.Println(*addr) - fmt.Println(*nid) - fmt.Println(*mask) - panic(idx) - } - } -} diff --git a/src/yggdrasil/dht.go b/src/yggdrasil/dht.go index b53e29c9..575c8b1a 100644 --- a/src/yggdrasil/dht.go +++ b/src/yggdrasil/dht.go @@ -65,33 +65,27 @@ type dhtReqKey struct { // The main DHT struct. type dht struct { - core *Core - reconfigure chan chan error - nodeID crypto.NodeID - peers chan *dhtInfo // other goroutines put incoming dht updates here - reqs map[dhtReqKey]time.Time // Keeps track of recent outstanding requests - callbacks map[dhtReqKey][]dht_callbackInfo // Search and admin lookup callbacks + router *router + nodeID crypto.NodeID + reqs map[dhtReqKey]time.Time // Keeps track of recent outstanding requests + callbacks map[dhtReqKey][]dht_callbackInfo // Search and admin lookup callbacks // These next two could be replaced by a single linked list or similar... table map[crypto.NodeID]*dhtInfo imp []*dhtInfo } // Initializes the DHT. -func (t *dht) init(c *Core) { - t.core = c - t.reconfigure = make(chan chan error, 1) - go func() { - for { - e := <-t.reconfigure - e <- nil - } - }() - t.nodeID = *t.core.NodeID() - t.peers = make(chan *dhtInfo, 1024) +func (t *dht) init(r *router) { + t.router = r + t.nodeID = *t.router.core.NodeID() t.callbacks = make(map[dhtReqKey][]dht_callbackInfo) t.reset() } +func (t *dht) reconfigure() { + // This is where reconfiguration would go, if we had anything to do +} + // Resets the DHT in response to coord changes. // This empties all info from the DHT and drops outstanding requests. func (t *dht) reset() { @@ -192,10 +186,10 @@ func dht_ordered(first, second, third *crypto.NodeID) bool { // Update info about the node that sent the request. func (t *dht) handleReq(req *dhtReq) { // Send them what they asked for - loc := t.core.switchTable.getLocator() + loc := t.router.core.switchTable.getLocator() coords := loc.getCoords() res := dhtRes{ - Key: t.core.boxPub, + Key: t.router.core.boxPub, Coords: coords, Dest: req.Dest, Infos: t.lookup(&req.Dest, false), @@ -223,17 +217,17 @@ func (t *dht) handleReq(req *dhtReq) { func (t *dht) sendRes(res *dhtRes, req *dhtReq) { // Send a reply for a dhtReq bs := res.encode() - shared := t.core.sessions.getSharedKey(&t.core.boxPriv, &req.Key) + shared := t.router.sessions.getSharedKey(&t.router.core.boxPriv, &req.Key) payload, nonce := crypto.BoxSeal(shared, bs, nil) p := wire_protoTrafficPacket{ Coords: req.Coords, ToKey: req.Key, - FromKey: t.core.boxPub, + FromKey: t.router.core.boxPub, Nonce: *nonce, Payload: payload, } packet := p.encode() - t.core.router.out(packet) + t.router.out(packet) } type dht_callbackInfo struct { @@ -287,17 +281,17 @@ func (t *dht) handleRes(res *dhtRes) { func (t *dht) sendReq(req *dhtReq, dest *dhtInfo) { // Send a dhtReq to the node in dhtInfo bs := req.encode() - shared := t.core.sessions.getSharedKey(&t.core.boxPriv, &dest.key) + shared := t.router.sessions.getSharedKey(&t.router.core.boxPriv, &dest.key) payload, nonce := crypto.BoxSeal(shared, bs, nil) p := wire_protoTrafficPacket{ Coords: dest.coords, ToKey: dest.key, - FromKey: t.core.boxPub, + FromKey: t.router.core.boxPub, Nonce: *nonce, Payload: payload, } packet := p.encode() - t.core.router.out(packet) + t.router.out(packet) rq := dhtReqKey{dest.key, req.Dest} t.reqs[rq] = time.Now() } @@ -308,10 +302,10 @@ func (t *dht) ping(info *dhtInfo, target *crypto.NodeID) { if target == nil { target = &t.nodeID } - loc := t.core.switchTable.getLocator() + loc := t.router.core.switchTable.getLocator() coords := loc.getCoords() req := dhtReq{ - Key: t.core.boxPub, + Key: t.router.core.boxPub, Coords: coords, Dest: *target, } @@ -386,7 +380,7 @@ func (t *dht) getImportant() []*dhtInfo { }) // Keep the ones that are no further than the closest seen so far minDist := ^uint64(0) - loc := t.core.switchTable.getLocator() + loc := t.router.core.switchTable.getLocator() important := infos[:0] for _, info := range infos { dist := uint64(loc.dist(info.coords)) @@ -415,12 +409,12 @@ func (t *dht) getImportant() []*dhtInfo { // Returns true if this is a node we need to keep track of for the DHT to work. func (t *dht) isImportant(ninfo *dhtInfo) bool { - if ninfo.key == t.core.boxPub { + if ninfo.key == t.router.core.boxPub { return false } important := t.getImportant() // Check if ninfo is of equal or greater importance to what we already know - loc := t.core.switchTable.getLocator() + loc := t.router.core.switchTable.getLocator() ndist := uint64(loc.dist(ninfo.coords)) minDist := ^uint64(0) for _, info := range important { diff --git a/src/yggdrasil/dialer.go b/src/yggdrasil/dialer.go index 6b24cfb4..04410855 100644 --- a/src/yggdrasil/dialer.go +++ b/src/yggdrasil/dialer.go @@ -65,6 +65,7 @@ func (d *Dialer) DialByNodeIDandMask(nodeID, nodeMask *crypto.NodeID) (*Conn, er conn.Close() return nil, err } + conn.session.setConn(nil, conn) t := time.NewTimer(6 * time.Second) // TODO use a context instead defer t.Stop() select { diff --git a/src/yggdrasil/doc.go b/src/yggdrasil/doc.go new file mode 100644 index 00000000..44d39b66 --- /dev/null +++ b/src/yggdrasil/doc.go @@ -0,0 +1,176 @@ +/* +Package yggdrasil implements the core functionality of the Yggdrasil Network. + +Introduction + +Yggdrasil is a proof-of-concept mesh network which provides end-to-end encrypted +communication between nodes in a decentralised fashion. The network is arranged +using a globally-agreed spanning tree which provides each node with a locator +(coordinates relative to the root) and a distributed hash table (DHT) mechanism +for finding other nodes. + +Each node also implements a router, which is responsible for encryption of +traffic, searches and connections, and a switch, which is responsible ultimately +for forwarding traffic across the network. + +While many Yggdrasil nodes in existence today are IP nodes - that is, they are +transporting IPv6 packets, like a kind of mesh VPN - it is also possible to +integrate Yggdrasil into your own applications and use it as a generic data +transport, similar to UDP. + +This library is what you need to integrate and use Yggdrasil in your own +application. + +Basics + +In order to start an Yggdrasil node, you should start by generating node +configuration, which amongst other things, includes encryption keypairs which +are used to generate the node's identity, and supply a logger which Yggdrasil's +output will be written to. + +This may look something like this: + + import ( + "os" + "github.com/gologme/log" + "github.com/yggdrasil-network/yggdrasil-go/src/config" + "github.com/yggdrasil-network/yggdrasil-go/src/yggdrasil" + ) + + type node struct { + core yggdrasil.Core + config *config.NodeConfig + log *log.Logger + } + +You then can supply node configuration and a logger: + + n := node{} + n.log = log.New(os.Stdout, "", log.Flags()) + n.config = config.GenerateConfig() + +In the above example, we ask the config package to supply new configuration each +time, which results in fresh encryption keys and therefore a new identity. It is +normally preferable in most cases to persist node configuration onto the +filesystem or into some configuration store so that the node's identity does not +change each time that the program starts. Note that Yggdrasil will automatically +fill in any missing configuration items with sane defaults. + +Once you have supplied a logger and some node configuration, you can then start +the node: + + n.core.Start(n.config, n.log) + +Add some peers to connect to the network: + + n.core.AddPeer("tcp://some-host.net:54321", "") + n.core.AddPeer("tcp://[2001::1:2:3]:54321", "") + n.core.AddPeer("tcp://1.2.3.4:54321", "") + +You can also ask the API for information about our node: + + n.log.Println("My node ID is", n.core.NodeID()) + n.log.Println("My public key is", n.core.EncryptionPublicKey()) + n.log.Println("My coords are", n.core.Coords()) + +Incoming Connections + +Once your node is started, you can then listen for connections from other nodes +by asking the API for a Listener: + + listener, err := n.core.ConnListen() + if err != nil { + // ... + } + +The Listener has a blocking Accept function which will wait for incoming +connections from remote nodes. It will return a Conn when a connection is +received. If the node never receives any incoming connections then this function +can block forever, so be prepared for that, perhaps by listening in a separate +goroutine. + +Assuming that you have defined a myConnectionHandler function to deal with +incoming connections: + + for { + conn, err := listener.Accept() + if err != nil { + // ... + } + + // We've got a new connection + go myConnectionHandler(conn) + } + +Outgoing Connections + +If you know the node ID of the remote node that you want to talk to, you can +dial an outbound connection to it. To do this, you should first ask the API for +a Dialer: + + dialer, err := n.core.ConnDialer() + if err != nil { + // ... + } + +You can then dial using the 16-byte node ID in hexadecimal format, for example: + + conn, err := dialer.Dial("nodeid", "24a58cfce691ec016b0f698f7be1bee983cea263781017e99ad3ef62b4ef710a45d6c1a072c5ce46131bd574b78818c9957042cafeeed13966f349e94eb771bf") + if err != nil { + // ... + } + +Using Connections + +Conn objects are implementations of io.ReadWriteCloser, and as such, you can +Read, Write and Close them as necessary. + +Each Read or Write operation can deal with a buffer with a maximum size of 65535 +bytes - any bigger than this and the operation will return an error. + +For example, to write to the Conn from the supplied buffer: + + buf := []byte{1, 2, 3, 4, 5} + w, err := conn.Write(buf) + if err != nil { + // ... + } else { + // written w bytes + } + +Reading from the Conn into the supplied buffer: + + buf := make([]byte, 65535) + r, err := conn.Read(buf) + if err != nil { + // ... + } else { + // read r bytes + } + +When you are happy that a connection is no longer required, you can discard it: + + err := conn.Close() + if err != nil { + // ... + } + +Limitations + +You should be aware of the following limitations when working with the Yggdrasil +library: + +Individual messages written through Yggdrasil connections can not exceed 65535 +bytes in size. Yggdrasil has no concept of fragmentation, so if you try to send +a message that exceeds 65535 bytes in size, it will be dropped altogether and +an error will be returned. + +Yggdrasil connections are unreliable by nature. Messages are delivered on a +best-effort basis, and employs congestion control where appropriate to ensure +that congestion does not affect message transport, but Yggdrasil will not +retransmit any messages that have been lost. If reliable delivery is important +then you should manually implement acknowledgement and retransmission of +messages. + +*/ +package yggdrasil diff --git a/src/yggdrasil/link.go b/src/yggdrasil/link.go index 4ce374b4..98c080c7 100644 --- a/src/yggdrasil/link.go +++ b/src/yggdrasil/link.go @@ -16,14 +16,16 @@ import ( "github.com/yggdrasil-network/yggdrasil-go/src/address" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" "github.com/yggdrasil-network/yggdrasil-go/src/util" + + "github.com/Arceliar/phony" ) type link struct { - core *Core - reconfigure chan chan error - mutex sync.RWMutex // protects interfaces below - interfaces map[linkInfo]*linkInterface - tcp tcp // TCP interface support + core *Core + mutex sync.RWMutex // protects interfaces below + interfaces map[linkInfo]*linkInterface + tcp tcp // TCP interface support + stopped chan struct{} // TODO timeout (to remove from switch), read from config.ReadTimeout } @@ -45,48 +47,48 @@ type linkInterfaceMsgIO interface { } type linkInterface struct { - name string - link *link - peer *peer - msgIO linkInterfaceMsgIO - info linkInfo - incoming bool - force bool - closed chan struct{} + name string + link *link + peer *peer + msgIO linkInterfaceMsgIO + info linkInfo + incoming bool + force bool + closed chan struct{} + reader linkReader // Reads packets, notifies this linkInterface, passes packets to switch + writer linkWriter // Writes packets, notifies this linkInterface + phony.Inbox // Protects the below + sendTimer *time.Timer // Fires to signal that sending is blocked + keepAliveTimer *time.Timer // Fires to send keep-alive traffic + stallTimer *time.Timer // Fires to signal that no incoming traffic (including keep-alive) has been seen + closeTimer *time.Timer // Fires when the link has been idle so long we need to close it + inSwitch bool // True if the switch is tracking this link + stalled bool // True if we haven't been receiving any response traffic } func (l *link) init(c *Core) error { l.core = c l.mutex.Lock() l.interfaces = make(map[linkInfo]*linkInterface) - l.reconfigure = make(chan chan error) l.mutex.Unlock() + l.stopped = make(chan struct{}) if err := l.tcp.init(l); err != nil { c.log.Errorln("Failed to start TCP interface") return err } - go func() { - for { - e := <-l.reconfigure - tcpresponse := make(chan error) - l.tcp.reconfigure <- tcpresponse - if err := <-tcpresponse; err != nil { - e <- err - continue - } - e <- nil - } - }() - return nil } +func (l *link) reconfigure() { + l.tcp.reconfigure() +} + func (l *link) call(uri string, sintf string) error { u, err := url.Parse(uri) if err != nil { - return err + return fmt.Errorf("peer %s is not correctly formatted (%s)", uri, err) } pathtokens := strings.Split(strings.Trim(u.Path, "/"), "/") switch u.Scheme { @@ -103,7 +105,7 @@ func (l *link) call(uri string, sintf string) error { func (l *link) listen(uri string) error { u, err := url.Parse(uri) if err != nil { - return err + return fmt.Errorf("listener %s is not correctly formatted (%s)", uri, err) } switch u.Scheme { case "tcp": @@ -128,9 +130,20 @@ func (l *link) create(msgIO linkInterfaceMsgIO, name, linkType, local, remote st incoming: incoming, force: force, } + intf.writer.intf = &intf + intf.reader.intf = &intf + intf.reader.err = make(chan error) return &intf, nil } +func (l *link) stop() error { + close(l.stopped) + if err := l.tcp.stop(); err != nil { + return err + } + return nil +} + func (intf *linkInterface) handler() error { // TODO split some of this into shorter functions, so it's easier to read, and for the FIXME duplicate peer issue mentioned later myLinkPub, myLinkPriv := crypto.NewBoxKeys() @@ -206,213 +219,219 @@ func (intf *linkInterface) handler() error { // More cleanup can go here intf.link.core.peers.removePeer(intf.peer.port) }() - // Finish setting up the peer struct - out := make(chan [][]byte, 1) - defer close(out) intf.peer.out = func(msgs [][]byte) { - defer func() { recover() }() - out <- msgs + intf.writer.sendFrom(intf.peer, msgs, false) + } + intf.peer.linkOut = func(bs []byte) { + intf.writer.sendFrom(intf.peer, [][]byte{bs}, true) } - intf.peer.linkOut = make(chan []byte, 1) themAddr := address.AddrForNodeID(crypto.GetNodeID(&intf.info.box)) themAddrString := net.IP(themAddr[:]).String() themString := fmt.Sprintf("%s@%s", themAddrString, intf.info.remote) intf.link.core.log.Infof("Connected %s: %s, source %s", strings.ToUpper(intf.info.linkType), themString, intf.info.local) - // Start the link loop - go intf.peer.linkLoop() - // Start the writer - signalReady := make(chan struct{}, 1) - signalSent := make(chan bool, 1) - sendAck := make(chan struct{}, 1) - sendBlocked := time.NewTimer(time.Second) - defer util.TimerStop(sendBlocked) - util.TimerStop(sendBlocked) + // Start things + go intf.peer.start() + intf.reader.Act(nil, intf.reader._read) + // Wait for the reader to finish + // TODO find a way to do this without keeping live goroutines around + done := make(chan struct{}) + defer close(done) go func() { - defer close(signalReady) - defer close(signalSent) - interval := 4 * time.Second - tcpTimer := time.NewTimer(interval) // used for backwards compat with old tcp - defer util.TimerStop(tcpTimer) - send := func(bss [][]byte) { - sendBlocked.Reset(time.Second) - size, _ := intf.msgIO.writeMsgs(bss) - util.TimerStop(sendBlocked) - select { - case signalSent <- size > 0: - default: - } - } - for { - // First try to send any link protocol traffic - select { - case msg := <-intf.peer.linkOut: - send([][]byte{msg}) - continue - default: - } - // No protocol traffic to send, so reset the timer - util.TimerStop(tcpTimer) - tcpTimer.Reset(interval) - // Now block until something is ready or the timer triggers keepalive traffic - select { - case <-tcpTimer.C: - intf.link.core.log.Tracef("Sending (legacy) keep-alive to %s: %s, source %s", - strings.ToUpper(intf.info.linkType), themString, intf.info.local) - send([][]byte{nil}) - case <-sendAck: - intf.link.core.log.Tracef("Sending ack to %s: %s, source %s", - strings.ToUpper(intf.info.linkType), themString, intf.info.local) - send([][]byte{nil}) - case msg := <-intf.peer.linkOut: - send([][]byte{msg}) - case msgs, ok := <-out: - if !ok { - return - } - send(msgs) - for _, msg := range msgs { - util.PutBytes(msg) - } - select { - case signalReady <- struct{}{}: - default: - } - //intf.link.core.log.Tracef("Sending packet to %s: %s, source %s", - // strings.ToUpper(intf.info.linkType), themString, intf.info.local) - } - } - }() - //intf.link.core.switchTable.idleIn <- intf.peer.port // notify switch that we're idle - // Used to enable/disable activity in the switch - signalAlive := make(chan bool, 1) // True = real packet, false = keep-alive - defer close(signalAlive) - ret := make(chan error, 1) // How we signal the return value when multiple goroutines are involved - go func() { - var isAlive bool - var isReady bool - var sendTimerRunning bool - var recvTimerRunning bool - recvTime := 6 * time.Second // TODO set to ReadTimeout from the config, reset if it gets changed - closeTime := 2 * switch_timeout // TODO or maybe this makes more sense for ReadTimeout?... - sendTime := time.Second - sendTimer := time.NewTimer(sendTime) - defer util.TimerStop(sendTimer) - recvTimer := time.NewTimer(recvTime) - defer util.TimerStop(recvTimer) - closeTimer := time.NewTimer(closeTime) - defer util.TimerStop(closeTimer) - for { - //intf.link.core.log.Debugf("State of %s: %s, source %s :: isAlive %t isReady %t sendTimerRunning %t recvTimerRunning %t", - // strings.ToUpper(intf.info.linkType), themString, intf.info.local, - // isAlive, isReady, sendTimerRunning, recvTimerRunning) - select { - case gotMsg, ok := <-signalAlive: - if !ok { - return - } - util.TimerStop(closeTimer) - closeTimer.Reset(closeTime) - util.TimerStop(recvTimer) - recvTimerRunning = false - isAlive = true - if !isReady { - // (Re-)enable in the switch - intf.link.core.switchTable.idleIn <- intf.peer.port - isReady = true - } - if gotMsg && !sendTimerRunning { - // We got a message - // Start a timer, if it expires then send a 0-sized ack to let them know we're alive - util.TimerStop(sendTimer) - sendTimer.Reset(sendTime) - sendTimerRunning = true - } - if !gotMsg { - intf.link.core.log.Tracef("Received ack from %s: %s, source %s", - strings.ToUpper(intf.info.linkType), themString, intf.info.local) - } - case sentMsg, ok := <-signalSent: - // Stop any running ack timer - if !ok { - return - } - util.TimerStop(sendTimer) - sendTimerRunning = false - if sentMsg && !recvTimerRunning { - // We sent a message - // Start a timer, if it expires and we haven't gotten any return traffic (including a 0-sized ack), then assume there's a problem - util.TimerStop(recvTimer) - recvTimer.Reset(recvTime) - recvTimerRunning = true - } - case _, ok := <-signalReady: - if !ok { - return - } - if !isAlive { - // Disable in the switch - isReady = false - } else { - // Keep enabled in the switch - intf.link.core.switchTable.idleIn <- intf.peer.port - isReady = true - } - case <-sendBlocked.C: - // We blocked while trying to send something - isReady = false - intf.link.core.switchTable.blockPeer(intf.peer.port) - case <-sendTimer.C: - // We haven't sent anything, so signal a send of a 0 packet to let them know we're alive - select { - case sendAck <- struct{}{}: - default: - } - case <-recvTimer.C: - // We haven't received anything, so assume there's a problem and don't return this node to the switch until they start responding - isAlive = false - intf.link.core.switchTable.blockPeer(intf.peer.port) - case <-closeTimer.C: - // We haven't received anything in a really long time, so things have died at the switch level and then some... - // Just close the connection at this point... - select { - case ret <- errors.New("timeout"): - default: - } - intf.msgIO.close() - } - } - }() - // Run reader loop - for { - msg, err := intf.msgIO.readMsg() - if len(msg) > 0 { - intf.peer.handlePacket(msg) - } - if err != nil { - if err != io.EOF { - select { - case ret <- err: - default: - } - } - break - } select { - case signalAlive <- len(msg) > 0: - default: + case <-intf.link.stopped: + intf.msgIO.close() + case <-done: } - } - //////////////////////////////////////////////////////////////////////////////// - // Remember to set `err` to something useful before returning - select { - case err = <-ret: + }() + err = <-intf.reader.err + // TODO don't report an error if it's just a 'use of closed network connection' + if err != nil { intf.link.core.log.Infof("Disconnected %s: %s, source %s; error: %s", strings.ToUpper(intf.info.linkType), themString, intf.info.local, err) - default: - err = nil + } else { intf.link.core.log.Infof("Disconnected %s: %s, source %s", strings.ToUpper(intf.info.linkType), themString, intf.info.local) } return err } + +//////////////////////////////////////////////////////////////////////////////// + +const ( + sendTime = 1 * time.Second // How long to wait before deciding a send is blocked + keepAliveTime = 2 * time.Second // How long to wait before sending a keep-alive response if we have no real traffic to send + stallTime = 6 * time.Second // How long to wait for response traffic before deciding the connection has stalled + closeTime = 2 * switch_timeout // How long to wait before closing the link +) + +// notify the intf that we're currently sending +func (intf *linkInterface) notifySending(size int, isLinkTraffic bool) { + intf.Act(&intf.writer, func() { + if !isLinkTraffic { + intf.inSwitch = false + } + intf.sendTimer = time.AfterFunc(sendTime, intf.notifyBlockedSend) + intf._cancelStallTimer() + }) +} + +// called by an AfterFunc if we seem to be blocked in a send syscall for a long time +func (intf *linkInterface) _notifySyscall() { + intf.link.core.switchTable.Act(intf, func() { + intf.link.core.switchTable._sendingIn(intf.peer.port) + }) +} + +// we just sent something, so cancel any pending timer to send keep-alive traffic +func (intf *linkInterface) _cancelStallTimer() { + if intf.stallTimer != nil { + intf.stallTimer.Stop() + intf.stallTimer = nil + } +} + +// This gets called from a time.AfterFunc, and notifies the switch that we appear +// to have gotten blocked on a write, so the switch should start routing traffic +// through other links, if alternatives exist +func (intf *linkInterface) notifyBlockedSend() { + intf.Act(nil, func() { + if intf.sendTimer != nil { + //As far as we know, we're still trying to send, and the timer fired. + intf.link.core.switchTable.blockPeer(intf.peer.port) + } + }) +} + +// notify the intf that we've finished sending, returning the peer to the switch +func (intf *linkInterface) notifySent(size int, isLinkTraffic bool) { + intf.Act(&intf.writer, func() { + intf.sendTimer.Stop() + intf.sendTimer = nil + if !isLinkTraffic { + intf._notifySwitch() + } + if size > 0 && intf.stallTimer == nil { + intf.stallTimer = time.AfterFunc(stallTime, intf.notifyStalled) + } + }) +} + +// Notify the switch that we're ready for more traffic, assuming we're not in a stalled state +func (intf *linkInterface) _notifySwitch() { + if !intf.inSwitch && !intf.stalled { + intf.inSwitch = true + intf.link.core.switchTable.Act(intf, func() { + intf.link.core.switchTable._idleIn(intf.peer.port) + }) + } +} + +// Set the peer as stalled, to prevent them from returning to the switch until a read succeeds +func (intf *linkInterface) notifyStalled() { + intf.Act(nil, func() { // Sent from a time.AfterFunc + if intf.stallTimer != nil { + intf.stallTimer.Stop() + intf.stallTimer = nil + intf.stalled = true + intf.link.core.switchTable.blockPeer(intf.peer.port) + } + }) +} + +// reset the close timer +func (intf *linkInterface) notifyReading() { + intf.Act(&intf.reader, func() { + if intf.closeTimer != nil { + intf.closeTimer.Stop() + } + intf.closeTimer = time.AfterFunc(closeTime, func() { intf.msgIO.close() }) + }) +} + +// wake up the link if it was stalled, and (if size > 0) prepare to send keep-alive traffic +func (intf *linkInterface) notifyRead(size int) { + intf.Act(&intf.reader, func() { + if intf.stallTimer != nil { + intf.stallTimer.Stop() + intf.stallTimer = nil + } + intf.stalled = false + intf._notifySwitch() + if size > 0 && intf.stallTimer == nil { + intf.stallTimer = time.AfterFunc(keepAliveTime, intf.notifyDoKeepAlive) + } + }) +} + +// We need to send keep-alive traffic now +func (intf *linkInterface) notifyDoKeepAlive() { + intf.Act(nil, func() { // Sent from a time.AfterFunc + if intf.stallTimer != nil { + intf.stallTimer.Stop() + intf.stallTimer = nil + intf.writer.sendFrom(nil, [][]byte{nil}, true) // Empty keep-alive traffic + } + }) +} + +//////////////////////////////////////////////////////////////////////////////// + +type linkWriter struct { + phony.Inbox + intf *linkInterface +} + +func (w *linkWriter) sendFrom(from phony.Actor, bss [][]byte, isLinkTraffic bool) { + w.Act(from, func() { + var size int + for _, bs := range bss { + size += len(bs) + } + w.intf.notifySending(size, isLinkTraffic) + // start a timer that will fire if we get stuck in writeMsgs for an oddly long time + var once sync.Once + timer := time.AfterFunc(time.Millisecond, func() { + // 1 ms is kind of arbitrary + // the rationale is that this should be very long compared to a syscall + // but it's still short compared to end-to-end latency or human perception + once.Do(func() { + w.intf.Act(nil, w.intf._notifySyscall) + }) + }) + w.intf.msgIO.writeMsgs(bss) + // Make sure we either stop the timer from doing anything or wait until it's done + once.Do(func() { timer.Stop() }) + w.intf.notifySent(size, isLinkTraffic) + // Cleanup + for _, bs := range bss { + util.PutBytes(bs) + } + }) +} + +//////////////////////////////////////////////////////////////////////////////// + +type linkReader struct { + phony.Inbox + intf *linkInterface + err chan error +} + +func (r *linkReader) _read() { + r.intf.notifyReading() + msg, err := r.intf.msgIO.readMsg() + r.intf.notifyRead(len(msg)) + if len(msg) > 0 { + r.intf.peer.handlePacketFrom(r, msg) + } + if err != nil { + if err != io.EOF { + r.err <- err + } + close(r.err) + return + } + // Now try to read again + r.Act(nil, r._read) +} diff --git a/src/yggdrasil/listener.go b/src/yggdrasil/listener.go index 62225412..fec543f4 100644 --- a/src/yggdrasil/listener.go +++ b/src/yggdrasil/listener.go @@ -31,8 +31,8 @@ func (l *Listener) Close() (err error) { recover() err = errors.New("already closed") }() - if l.core.sessions.listener == l { - l.core.sessions.listener = nil + if l.core.router.sessions.listener == l { + l.core.router.sessions.listener = nil } close(l.close) close(l.conn) diff --git a/src/yggdrasil/nodeinfo.go b/src/yggdrasil/nodeinfo.go index 73d4e115..8a5d7872 100644 --- a/src/yggdrasil/nodeinfo.go +++ b/src/yggdrasil/nodeinfo.go @@ -47,25 +47,25 @@ func (m *nodeinfo) init(core *Core) { m.callbacks = make(map[crypto.BoxPubKey]nodeinfoCallback) m.cache = make(map[crypto.BoxPubKey]nodeinfoCached) - go func() { - for { - m.callbacksMutex.Lock() - for boxPubKey, callback := range m.callbacks { - if time.Since(callback.created) > time.Minute { - delete(m.callbacks, boxPubKey) - } + var f func() + f = func() { + m.callbacksMutex.Lock() + for boxPubKey, callback := range m.callbacks { + if time.Since(callback.created) > time.Minute { + delete(m.callbacks, boxPubKey) } - m.callbacksMutex.Unlock() - m.cacheMutex.Lock() - for boxPubKey, cache := range m.cache { - if time.Since(cache.created) > time.Hour { - delete(m.cache, boxPubKey) - } - } - m.cacheMutex.Unlock() - time.Sleep(time.Second * 30) } - }() + m.callbacksMutex.Unlock() + m.cacheMutex.Lock() + for boxPubKey, cache := range m.cache { + if time.Since(cache.created) > time.Hour { + delete(m.cache, boxPubKey) + } + } + m.cacheMutex.Unlock() + time.AfterFunc(time.Second*30, f) + } + go f() } // Add a callback for a nodeinfo lookup @@ -172,7 +172,7 @@ func (m *nodeinfo) sendNodeInfo(key crypto.BoxPubKey, coords []byte, isResponse NodeInfo: m.getNodeInfo(), } bs := nodeinfo.encode() - shared := m.core.sessions.getSharedKey(&m.core.boxPriv, &key) + shared := m.core.router.sessions.getSharedKey(&m.core.boxPriv, &key) payload, nonce := crypto.BoxSeal(shared, bs, nil) p := wire_protoTrafficPacket{ Coords: coords, diff --git a/src/yggdrasil/peer.go b/src/yggdrasil/peer.go index 379ca85b..381e6917 100644 --- a/src/yggdrasil/peer.go +++ b/src/yggdrasil/peer.go @@ -12,6 +12,8 @@ import ( "github.com/yggdrasil-network/yggdrasil-go/src/crypto" "github.com/yggdrasil-network/yggdrasil-go/src/util" + + "github.com/Arceliar/phony" ) // The peers struct represents peers with an active connection. @@ -19,10 +21,9 @@ import ( // In most cases, this involves passing the packet to the handler for outgoing traffic to another peer. // In other cases, it's link protocol traffic used to build the spanning tree, in which case this checks signatures and passes the message along to the switch. type peers struct { - core *Core - reconfigure chan chan error - mutex sync.Mutex // Synchronize writes to atomic - ports atomic.Value //map[switchPort]*peer, use CoW semantics + core *Core + mutex sync.Mutex // Synchronize writes to atomic + ports atomic.Value //map[switchPort]*peer, use CoW semantics } // Initializes the peers struct. @@ -31,13 +32,10 @@ func (ps *peers) init(c *Core) { defer ps.mutex.Unlock() ps.putPorts(make(map[switchPort]*peer)) ps.core = c - ps.reconfigure = make(chan chan error, 1) - go func() { - for { - e := <-ps.reconfigure - e <- nil - } - }() +} + +func (ps *peers) reconfigure() { + // This is where reconfiguration would go, if we had anything to do } // Returns true if an incoming peer connection to a key is allowed, either @@ -94,9 +92,7 @@ func (ps *peers) putPorts(ports map[switchPort]*peer) { // Information known about a peer, including thier box/sig keys, precomputed shared keys (static and ephemeral) and a handler for their outgoing traffic type peer struct { - bytesSent uint64 // To track bandwidth usage for getPeers - bytesRecvd uint64 // To track bandwidth usage for getPeers - // BUG: sync/atomic, 32 bit platforms need the above to be the first element + phony.Inbox core *Core intf *linkInterface port switchPort @@ -106,11 +102,14 @@ type peer struct { linkShared crypto.BoxSharedKey endpoint string firstSeen time.Time // To track uptime for getPeers - linkOut (chan []byte) // used for protocol traffic (to bypass queues) - doSend (chan struct{}) // tell the linkLoop to send a switchMsg - dinfo (chan *dhtInfo) // used to keep the DHT working + linkOut func([]byte) // used for protocol traffic (bypasses the switch) + dinfo *dhtInfo // used to keep the DHT working out func([][]byte) // Set up by whatever created the peers struct, used to send packets to other nodes + done (chan struct{}) // closed to exit the linkLoop close func() // Called when a peer is removed, to close the underlying connection, or via admin api + // The below aren't actually useful internally, they're just gathered for getPeers statistics + bytesSent uint64 + bytesRecvd uint64 } // Creates a new peer with the specified box, sig, and linkShared keys, using the lowest unoccupied port number. @@ -121,8 +120,7 @@ func (ps *peers) newPeer(box *crypto.BoxPubKey, sig *crypto.SigPubKey, linkShare shared: *crypto.GetSharedKey(&ps.core.boxPriv, box), linkShared: *linkShared, firstSeen: now, - doSend: make(chan struct{}, 1), - dinfo: make(chan *dhtInfo, 1), + done: make(chan struct{}), close: closer, core: ps.core, intf: intf, @@ -150,7 +148,7 @@ func (ps *peers) removePeer(port switchPort) { if port == 0 { return } // Can't remove self peer - ps.core.router.doAdmin(func() { + phony.Block(&ps.core.router, func() { ps.core.switchTable.forgetPeer(port) }) ps.mutex.Lock() @@ -167,103 +165,106 @@ func (ps *peers) removePeer(port switchPort) { if p.close != nil { p.close() } - close(p.doSend) + close(p.done) } } // If called, sends a notification to each peer that they should send a new switch message. // Mainly called by the switch after an update. -func (ps *peers) sendSwitchMsgs() { +func (ps *peers) sendSwitchMsgs(from phony.Actor) { ports := ps.getPorts() for _, p := range ports { if p.port == 0 { continue } - p.doSendSwitchMsgs() - } -} - -// If called, sends a notification to the peer's linkLoop to trigger a switchMsg send. -// Mainly called by sendSwitchMsgs or during linkLoop startup. -func (p *peer) doSendSwitchMsgs() { - defer func() { recover() }() // In case there's a race with close(p.doSend) - select { - case p.doSend <- struct{}{}: - default: + p.Act(from, p._sendSwitchMsg) } } // This must be launched in a separate goroutine by whatever sets up the peer struct. // It handles link protocol traffic. -func (p *peer) linkLoop() { - tick := time.NewTicker(time.Second) - defer tick.Stop() - p.doSendSwitchMsgs() - var dinfo *dhtInfo - for { - select { - case _, ok := <-p.doSend: - if !ok { - return +func (p *peer) start() { + var updateDHT func() + updateDHT = func() { + phony.Block(p, func() { + select { + case <-p.done: + default: + p._updateDHT() + time.AfterFunc(time.Second, updateDHT) } - p.sendSwitchMsg() - case dinfo = <-p.dinfo: - case _ = <-tick.C: - if dinfo != nil { - p.core.dht.peers <- dinfo - } - } + }) } + updateDHT() + // Just for good measure, immediately send a switch message to this peer when we start + p.Act(nil, p._sendSwitchMsg) +} + +func (p *peer) _updateDHT() { + if p.dinfo != nil { + p.core.router.insertPeer(p, p.dinfo) + } +} + +func (p *peer) handlePacketFrom(from phony.Actor, packet []byte) { + p.Act(from, func() { + p._handlePacket(packet) + }) } // Called to handle incoming packets. // Passes the packet to a handler for that packet type. -func (p *peer) handlePacket(packet []byte) { +func (p *peer) _handlePacket(packet []byte) { // FIXME this is off by stream padding and msg length overhead, should be done in tcp.go - atomic.AddUint64(&p.bytesRecvd, uint64(len(packet))) + p.bytesRecvd += uint64(len(packet)) pType, pTypeLen := wire_decode_uint64(packet) if pTypeLen == 0 { return } switch pType { case wire_Traffic: - p.handleTraffic(packet, pTypeLen) + p._handleTraffic(packet) case wire_ProtocolTraffic: - p.handleTraffic(packet, pTypeLen) + p._handleTraffic(packet) case wire_LinkProtocolTraffic: - p.handleLinkTraffic(packet) + p._handleLinkTraffic(packet) default: util.PutBytes(packet) } - return } // Called to handle traffic or protocolTraffic packets. // In either case, this reads from the coords of the packet header, does a switch lookup, and forwards to the next node. -func (p *peer) handleTraffic(packet []byte, pTypeLen int) { +func (p *peer) _handleTraffic(packet []byte) { table := p.core.switchTable.getTable() if _, isIn := table.elems[p.port]; !isIn && p.port != 0 { // Drop traffic if the peer isn't in the switch return } - p.core.switchTable.packetIn <- packet + p.core.switchTable.packetInFrom(p, packet) +} + +func (p *peer) sendPacketsFrom(from phony.Actor, packets [][]byte) { + p.Act(from, func() { + p._sendPackets(packets) + }) } // This just calls p.out(packet) for now. -func (p *peer) sendPackets(packets [][]byte) { +func (p *peer) _sendPackets(packets [][]byte) { // Is there ever a case where something more complicated is needed? // What if p.out blocks? var size int for _, packet := range packets { size += len(packet) } - atomic.AddUint64(&p.bytesSent, uint64(size)) + p.bytesSent += uint64(size) p.out(packets) } // This wraps the packet in the inner (ephemeral) and outer (permanent) crypto layers. // It sends it to p.linkOut, which bypasses the usual packet queues. -func (p *peer) sendLinkPacket(packet []byte) { +func (p *peer) _sendLinkPacket(packet []byte) { innerPayload, innerNonce := crypto.BoxSeal(&p.linkShared, packet, nil) innerLinkPacket := wire_linkProtoTrafficPacket{ Nonce: *innerNonce, @@ -276,12 +277,12 @@ func (p *peer) sendLinkPacket(packet []byte) { Payload: bs, } packet = linkPacket.encode() - p.linkOut <- packet + p.linkOut(packet) } // Decrypts the outer (permanent) and inner (ephemeral) crypto layers on link traffic. // Identifies the link traffic type and calls the appropriate handler. -func (p *peer) handleLinkTraffic(bs []byte) { +func (p *peer) _handleLinkTraffic(bs []byte) { packet := wire_linkProtoTrafficPacket{} if !packet.decode(bs) { return @@ -304,14 +305,14 @@ func (p *peer) handleLinkTraffic(bs []byte) { } switch pType { case wire_SwitchMsg: - p.handleSwitchMsg(payload) + p._handleSwitchMsg(payload) default: util.PutBytes(bs) } } // Gets a switchMsg from the switch, adds signed next-hop info for this peer, and sends it to them. -func (p *peer) sendSwitchMsg() { +func (p *peer) _sendSwitchMsg() { msg := p.core.switchTable.getMsg() if msg == nil { return @@ -323,12 +324,12 @@ func (p *peer) sendSwitchMsg() { Sig: *crypto.Sign(&p.core.sigPriv, bs), }) packet := msg.encode() - p.sendLinkPacket(packet) + p._sendLinkPacket(packet) } // Handles a switchMsg from the peer, checking signatures and passing good messages to the switch. // Also creates a dhtInfo struct and arranges for it to be added to the dht (this is how dht bootstrapping begins). -func (p *peer) handleSwitchMsg(packet []byte) { +func (p *peer) _handleSwitchMsg(packet []byte) { var msg switchMsg if !msg.decode(packet) { return @@ -352,16 +353,16 @@ func (p *peer) handleSwitchMsg(packet []byte) { p.core.switchTable.handleMsg(&msg, p.port) if !p.core.switchTable.checkRoot(&msg) { // Bad switch message - p.dinfo <- nil + p.dinfo = nil return } // Pass a mesage to the dht informing it that this peer (still) exists loc.coords = loc.coords[:len(loc.coords)-1] - dinfo := dhtInfo{ + p.dinfo = &dhtInfo{ key: p.box, coords: loc.getCoords(), } - p.dinfo <- &dinfo + p._updateDHT() } // This generates the bytes that we sign or check the signature of for a switchMsg. diff --git a/src/yggdrasil/router.go b/src/yggdrasil/router.go index bdead848..64c81701 100644 --- a/src/yggdrasil/router.go +++ b/src/yggdrasil/router.go @@ -30,29 +30,29 @@ import ( "github.com/yggdrasil-network/yggdrasil-go/src/address" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" "github.com/yggdrasil-network/yggdrasil-go/src/util" + + "github.com/Arceliar/phony" ) // The router struct has channels to/from the adapter device and a self peer (0), which is how messages are passed between this node and the peers/switch layer. -// The router's mainLoop goroutine is responsible for managing all information related to the dht, searches, and crypto sessions. +// The router's phony.Inbox goroutine is responsible for managing all information related to the dht, searches, and crypto sessions. type router struct { - core *Core - reconfigure chan chan error - addr address.Address - subnet address.Subnet - in <-chan [][]byte // packets we received from the network, link to peer's "out" - out func([]byte) // packets we're sending to the network, link to peer's "in" - reset chan struct{} // signal that coords changed (re-init sessions/dht) - admin chan func() // pass a lambda for the admin socket to query stuff - nodeinfo nodeinfo + phony.Inbox + core *Core + addr address.Address + subnet address.Subnet + out func([]byte) // packets we're sending to the network, link to peer's "in" + dht dht + nodeinfo nodeinfo + searches searches + sessions sessions } // Initializes the router struct, which includes setting up channels to/from the adapter. func (r *router) init(core *Core) { r.core = core - r.reconfigure = make(chan chan error, 1) - r.addr = *address.AddrForNodeID(&r.core.dht.nodeID) - r.subnet = *address.SubnetForNodeID(&r.core.dht.nodeID) - in := make(chan [][]byte, 1) // TODO something better than this... + r.addr = *address.AddrForNodeID(&r.dht.nodeID) + r.subnet = *address.SubnetForNodeID(&r.dht.nodeID) self := linkInterface{ name: "(self)", info: linkInfo{ @@ -62,120 +62,109 @@ func (r *router) init(core *Core) { }, } p := r.core.peers.newPeer(&r.core.boxPub, &r.core.sigPub, &crypto.BoxSharedKey{}, &self, nil) - p.out = func(packets [][]byte) { in <- packets } - r.in = in - out := make(chan []byte, 32) - go func() { - for packet := range out { - p.handlePacket(packet) - } - }() - out2 := make(chan []byte, 32) - go func() { - // This worker makes sure r.out never blocks - // It will buffer traffic long enough for the switch worker to take it - // If (somehow) you can send faster than the switch can receive, then this would use unbounded memory - // But crypto slows sends enough that the switch should always be able to take the packets... - var buf [][]byte - for { - buf = append(buf, <-out2) - for len(buf) > 0 { - select { - case bs := <-out2: - buf = append(buf, bs) - case out <- buf[0]: - buf = buf[1:] - } - } - } - }() - r.out = func(packet []byte) { out2 <- packet } - r.reset = make(chan struct{}, 1) - r.admin = make(chan func(), 32) + p.out = func(packets [][]byte) { r.handlePackets(p, packets) } + r.out = func(bs []byte) { p.handlePacketFrom(r, bs) } r.nodeinfo.init(r.core) r.core.config.Mutex.RLock() r.nodeinfo.setNodeInfo(r.core.config.Current.NodeInfo, r.core.config.Current.NodeInfoPrivacy) r.core.config.Mutex.RUnlock() + r.dht.init(r) + r.searches.init(r) + r.sessions.init(r) } -// Starts the mainLoop goroutine. +// Reconfigures the router and any child modules. This should only ever be run +// by the router actor. +func (r *router) reconfigure() { + // Reconfigure the router + current := r.core.config.GetCurrent() + if err := r.nodeinfo.setNodeInfo(current.NodeInfo, current.NodeInfoPrivacy); err != nil { + r.core.log.Errorln("Error reloading NodeInfo:", err) + } else { + r.core.log.Infoln("NodeInfo updated") + } + // Reconfigure children + r.dht.reconfigure() + r.searches.reconfigure() + r.sessions.reconfigure() +} + +// Starts the tickerLoop goroutine. func (r *router) start() error { r.core.log.Infoln("Starting router") - go r.mainLoop() + go r.doMaintenance() return nil } -// Takes traffic from the adapter and passes it to router.send, or from r.in and handles incoming traffic. -// Also adds new peer info to the DHT. -// Also resets the DHT and sesssions in the event of a coord change. -// Also does periodic maintenance stuff. -func (r *router) mainLoop() { - ticker := time.NewTicker(time.Second) - defer ticker.Stop() - for { - select { - case ps := <-r.in: - for _, p := range ps { - r.handleIn(p) - } - case info := <-r.core.dht.peers: - r.core.dht.insertPeer(info) - case <-r.reset: - r.core.sessions.reset() - r.core.dht.reset() - case <-ticker.C: - { - // Any periodic maintenance stuff goes here - r.core.switchTable.doMaintenance() - r.core.dht.doMaintenance() - r.core.sessions.cleanup() - } - case f := <-r.admin: - f() - case e := <-r.reconfigure: - current := r.core.config.GetCurrent() - e <- r.nodeinfo.setNodeInfo(current.NodeInfo, current.NodeInfoPrivacy) +// In practice, the switch will call this with 1 packet +func (r *router) handlePackets(from phony.Actor, packets [][]byte) { + r.Act(from, func() { + for _, packet := range packets { + r._handlePacket(packet) } - } + }) +} + +// Insert a peer info into the dht, TODO? make the dht a separate actor +func (r *router) insertPeer(from phony.Actor, info *dhtInfo) { + r.Act(from, func() { + r.dht.insertPeer(info) + }) +} + +// Reset sessions and DHT after the switch sees our coords change +func (r *router) reset(from phony.Actor) { + r.Act(from, func() { + r.sessions.reset() + r.dht.reset() + }) +} + +// TODO remove reconfigure so this is just a ticker loop +// and then find something better than a ticker loop to schedule things... +func (r *router) doMaintenance() { + phony.Block(r, func() { + // Any periodic maintenance stuff goes here + r.core.switchTable.doMaintenance() + r.dht.doMaintenance() + r.sessions.cleanup() + }) + time.AfterFunc(time.Second, r.doMaintenance) } // Checks incoming traffic type and passes it to the appropriate handler. -func (r *router) handleIn(packet []byte) { +func (r *router) _handlePacket(packet []byte) { pType, pTypeLen := wire_decode_uint64(packet) if pTypeLen == 0 { return } switch pType { case wire_Traffic: - r.handleTraffic(packet) + r._handleTraffic(packet) case wire_ProtocolTraffic: - r.handleProto(packet) + r._handleProto(packet) default: } } // Handles incoming traffic, i.e. encapuslated ordinary IPv6 packets. // Passes them to the crypto session worker to be decrypted and sent to the adapter. -func (r *router) handleTraffic(packet []byte) { +func (r *router) _handleTraffic(packet []byte) { defer util.PutBytes(packet) p := wire_trafficPacket{} if !p.decode(packet) { return } - sinfo, isIn := r.core.sessions.getSessionForHandle(&p.Handle) + sinfo, isIn := r.sessions.getSessionForHandle(&p.Handle) if !isIn { util.PutBytes(p.Payload) return } - select { - case sinfo.fromRouter <- p: - case <-sinfo.cancel.Finished(): - util.PutBytes(p.Payload) - } + sinfo.recv(r, &p) } // Handles protocol traffic by decrypting it, checking its type, and passing it to the appropriate handler for that traffic type. -func (r *router) handleProto(packet []byte) { +func (r *router) _handleProto(packet []byte) { // First parse the packet p := wire_protoTrafficPacket{} if !p.decode(packet) { @@ -185,7 +174,7 @@ func (r *router) handleProto(packet []byte) { var sharedKey *crypto.BoxSharedKey if p.ToKey == r.core.boxPub { // Try to open using our permanent key - sharedKey = r.core.sessions.getSharedKey(&r.core.boxPriv, &p.FromKey) + sharedKey = r.sessions.getSharedKey(&r.core.boxPriv, &p.FromKey) } else { return } @@ -202,59 +191,59 @@ func (r *router) handleProto(packet []byte) { } switch bsType { case wire_SessionPing: - r.handlePing(bs, &p.FromKey) + r._handlePing(bs, &p.FromKey) case wire_SessionPong: - r.handlePong(bs, &p.FromKey) + r._handlePong(bs, &p.FromKey) case wire_NodeInfoRequest: fallthrough case wire_NodeInfoResponse: - r.handleNodeInfo(bs, &p.FromKey) + r._handleNodeInfo(bs, &p.FromKey) case wire_DHTLookupRequest: - r.handleDHTReq(bs, &p.FromKey) + r._handleDHTReq(bs, &p.FromKey) case wire_DHTLookupResponse: - r.handleDHTRes(bs, &p.FromKey) + r._handleDHTRes(bs, &p.FromKey) default: util.PutBytes(packet) } } // Decodes session pings from wire format and passes them to sessions.handlePing where they either create or update a session. -func (r *router) handlePing(bs []byte, fromKey *crypto.BoxPubKey) { +func (r *router) _handlePing(bs []byte, fromKey *crypto.BoxPubKey) { ping := sessionPing{} if !ping.decode(bs) { return } ping.SendPermPub = *fromKey - r.core.sessions.handlePing(&ping) + r.sessions.handlePing(&ping) } // Handles session pongs (which are really pings with an extra flag to prevent acknowledgement). -func (r *router) handlePong(bs []byte, fromKey *crypto.BoxPubKey) { - r.handlePing(bs, fromKey) +func (r *router) _handlePong(bs []byte, fromKey *crypto.BoxPubKey) { + r._handlePing(bs, fromKey) } // Decodes dht requests and passes them to dht.handleReq to trigger a lookup/response. -func (r *router) handleDHTReq(bs []byte, fromKey *crypto.BoxPubKey) { +func (r *router) _handleDHTReq(bs []byte, fromKey *crypto.BoxPubKey) { req := dhtReq{} if !req.decode(bs) { return } req.Key = *fromKey - r.core.dht.handleReq(&req) + r.dht.handleReq(&req) } // Decodes dht responses and passes them to dht.handleRes to update the DHT table and further pass them to the search code (if applicable). -func (r *router) handleDHTRes(bs []byte, fromKey *crypto.BoxPubKey) { +func (r *router) _handleDHTRes(bs []byte, fromKey *crypto.BoxPubKey) { res := dhtRes{} if !res.decode(bs) { return } res.Key = *fromKey - r.core.dht.handleRes(&res) + r.dht.handleRes(&res) } // Decodes nodeinfo request -func (r *router) handleNodeInfo(bs []byte, fromKey *crypto.BoxPubKey) { +func (r *router) _handleNodeInfo(bs []byte, fromKey *crypto.BoxPubKey) { req := nodeinfoReqRes{} if !req.decode(bs) { return @@ -262,18 +251,3 @@ func (r *router) handleNodeInfo(bs []byte, fromKey *crypto.BoxPubKey) { req.SendPermPub = *fromKey r.nodeinfo.handleNodeInfo(&req) } - -// Passed a function to call. -// This will send the function to r.admin and block until it finishes. -// It's used by the admin socket to ask the router mainLoop goroutine about information in the session or dht structs, which cannot be read safely from outside that goroutine. -func (r *router) doAdmin(f func()) { - // Pass this a function that needs to be run by the router's main goroutine - // It will pass the function to the router and wait for the router to finish - done := make(chan struct{}) - newF := func() { - f() - close(done) - } - r.admin <- newF - <-done -} diff --git a/src/yggdrasil/search.go b/src/yggdrasil/search.go index b970fe55..322131ed 100644 --- a/src/yggdrasil/search.go +++ b/src/yggdrasil/search.go @@ -33,7 +33,7 @@ const search_RETRY_TIME = time.Second // Information about an ongoing search. // Includes the target NodeID, the bitmask to match it to an IP, and the list of nodes to visit / already visited. type searchInfo struct { - core *Core + searches *searches dest crypto.NodeID mask crypto.NodeID time time.Time @@ -45,28 +45,24 @@ type searchInfo struct { // This stores a map of active searches. type searches struct { - core *Core - reconfigure chan chan error - searches map[crypto.NodeID]*searchInfo + router *router + searches map[crypto.NodeID]*searchInfo } // Initializes the searches struct. -func (s *searches) init(core *Core) { - s.core = core - s.reconfigure = make(chan chan error, 1) - go func() { - for { - e := <-s.reconfigure - e <- nil - } - }() +func (s *searches) init(r *router) { + s.router = r s.searches = make(map[crypto.NodeID]*searchInfo) } +func (s *searches) reconfigure() { + // This is where reconfiguration would go, if we had anything to do +} + // Creates a new search info, adds it to the searches struct, and returns a pointer to the info. func (s *searches) createSearch(dest *crypto.NodeID, mask *crypto.NodeID, callback func(*sessionInfo, error)) *searchInfo { info := searchInfo{ - core: s.core, + searches: s, dest: *dest, mask: *mask, time: time.Now(), @@ -100,7 +96,7 @@ func (sinfo *searchInfo) addToSearch(res *dhtRes) { from := dhtInfo{key: res.Key, coords: res.Coords} sinfo.visited[*from.getNodeID()] = true for _, info := range res.Infos { - if *info.getNodeID() == sinfo.core.dht.nodeID || sinfo.visited[*info.getNodeID()] { + if *info.getNodeID() == sinfo.searches.router.dht.nodeID || sinfo.visited[*info.getNodeID()] { continue } if dht_ordered(&sinfo.dest, info.getNodeID(), from.getNodeID()) { @@ -134,7 +130,7 @@ func (sinfo *searchInfo) doSearchStep() { if len(sinfo.toVisit) == 0 { if time.Since(sinfo.time) > search_RETRY_TIME { // Dead end and no response in too long, do cleanup - delete(sinfo.core.searches.searches, sinfo.dest) + delete(sinfo.searches.searches, sinfo.dest) sinfo.callback(nil, errors.New("search reached dead end")) } return @@ -143,8 +139,8 @@ func (sinfo *searchInfo) doSearchStep() { var next *dhtInfo next, sinfo.toVisit = sinfo.toVisit[0], sinfo.toVisit[1:] rq := dhtReqKey{next.key, sinfo.dest} - sinfo.core.dht.addCallback(&rq, sinfo.handleDHTRes) - sinfo.core.dht.ping(next, &sinfo.dest) + sinfo.searches.router.dht.addCallback(&rq, sinfo.handleDHTRes) + sinfo.searches.router.dht.ping(next, &sinfo.dest) sinfo.time = time.Now() } @@ -155,27 +151,25 @@ func (sinfo *searchInfo) continueSearch() { // In case the search dies, try to spawn another thread later // Note that this will spawn multiple parallel searches as time passes // Any that die aren't restarted, but a new one will start later - retryLater := func() { - // FIXME this keeps the search alive forever if not for the searches map, fix that - newSearchInfo := sinfo.core.searches.searches[sinfo.dest] - if newSearchInfo != sinfo { - return - } - sinfo.continueSearch() - } - go func() { - time.Sleep(search_RETRY_TIME) - sinfo.core.router.admin <- retryLater - }() + time.AfterFunc(search_RETRY_TIME, func() { + sinfo.searches.router.Act(nil, func() { + // FIXME this keeps the search alive forever if not for the searches map, fix that + newSearchInfo := sinfo.searches.searches[sinfo.dest] + if newSearchInfo != sinfo { + return + } + sinfo.continueSearch() + }) + }) } // Calls create search, and initializes the iterative search parts of the struct before returning it. func (s *searches) newIterSearch(dest *crypto.NodeID, mask *crypto.NodeID, callback func(*sessionInfo, error)) *searchInfo { sinfo := s.createSearch(dest, mask, callback) sinfo.visited = make(map[crypto.NodeID]bool) - loc := s.core.switchTable.getLocator() + loc := s.router.core.switchTable.getLocator() sinfo.toVisit = append(sinfo.toVisit, &dhtInfo{ - key: s.core.boxPub, + key: s.router.core.boxPub, coords: loc.getCoords(), }) // Start the search by asking ourself, useful if we're the destination return sinfo @@ -196,26 +190,31 @@ func (sinfo *searchInfo) checkDHTRes(res *dhtRes) bool { return false } // They match, so create a session and send a sessionRequest - sess, isIn := sinfo.core.sessions.getByTheirPerm(&res.Key) + sess, isIn := sinfo.searches.router.sessions.getByTheirPerm(&res.Key) if !isIn { - sess = sinfo.core.sessions.createSession(&res.Key) + sess = sinfo.searches.router.sessions.createSession(&res.Key) if sess == nil { // nil if the DHT search finished but the session wasn't allowed sinfo.callback(nil, errors.New("session not allowed")) // Cleanup - delete(sinfo.core.searches.searches, res.Dest) + delete(sinfo.searches.searches, res.Dest) return true } - _, isIn := sinfo.core.sessions.getByTheirPerm(&res.Key) + _, isIn := sinfo.searches.router.sessions.getByTheirPerm(&res.Key) if !isIn { panic("This should never happen") } + } else { + sinfo.callback(nil, errors.New("session already exists")) + // Cleanup + delete(sinfo.searches.searches, res.Dest) + return true } // FIXME (!) replay attacks could mess with coords? Give it a handle (tstamp)? sess.coords = res.Coords - sinfo.core.sessions.ping(sess) + sess.ping(sinfo.searches.router) sinfo.callback(sess, nil) // Cleanup - delete(sinfo.core.searches.searches, res.Dest) + delete(sinfo.searches.searches, res.Dest) return true } diff --git a/src/yggdrasil/session.go b/src/yggdrasil/session.go index 7cd92db9..2f5e0af3 100644 --- a/src/yggdrasil/session.go +++ b/src/yggdrasil/session.go @@ -6,77 +6,57 @@ package yggdrasil import ( "bytes" - "container/heap" - "errors" "sync" "time" "github.com/yggdrasil-network/yggdrasil-go/src/address" "github.com/yggdrasil-network/yggdrasil-go/src/crypto" "github.com/yggdrasil-network/yggdrasil-go/src/util" + + "github.com/Arceliar/phony" ) // Duration that we keep track of old nonces per session, to allow some out-of-order packet delivery const nonceWindow = time.Second -// A heap of nonces, used with a map[nonce]time to allow out-of-order packets a little time to arrive without rejecting them -type nonceHeap []crypto.BoxNonce - -func (h nonceHeap) Len() int { return len(h) } -func (h nonceHeap) Less(i, j int) bool { return h[i].Minus(&h[j]) < 0 } -func (h nonceHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } -func (h *nonceHeap) Push(x interface{}) { *h = append(*h, x.(crypto.BoxNonce)) } -func (h *nonceHeap) Pop() interface{} { - l := len(*h) - var n crypto.BoxNonce - n, *h = (*h)[l-1], (*h)[:l-1] - return n -} -func (h nonceHeap) peek() *crypto.BoxNonce { return &h[len(h)-1] } - // All the information we know about an active session. // This includes coords, permanent and ephemeral keys, handles and nonces, various sorts of timing information for timeout and maintenance, and some metadata for the admin API. type sessionInfo struct { - mutex sync.Mutex // Protects all of the below, use it any time you read/chance the contents of a session - core *Core // - reconfigure chan chan error // - theirAddr address.Address // - theirSubnet address.Subnet // - theirPermPub crypto.BoxPubKey // - theirSesPub crypto.BoxPubKey // - mySesPub crypto.BoxPubKey // - mySesPriv crypto.BoxPrivKey // - sharedSesKey crypto.BoxSharedKey // derived from session keys - theirHandle crypto.Handle // - myHandle crypto.Handle // - theirNonce crypto.BoxNonce // - theirNonceHeap nonceHeap // priority queue to keep track of the lowest nonce we recently accepted - theirNonceMap map[crypto.BoxNonce]time.Time // time we added each nonce to the heap - myNonce crypto.BoxNonce // - theirMTU uint16 // - myMTU uint16 // - wasMTUFixed bool // Was the MTU fixed by a receive error? - timeOpened time.Time // Time the sessino was opened - time time.Time // Time we last received a packet - mtuTime time.Time // time myMTU was last changed - pingTime time.Time // time the first ping was sent since the last received packet - pingSend time.Time // time the last ping was sent - coords []byte // coords of destination - reset bool // reset if coords change - tstamp int64 // ATOMIC - tstamp from their last session ping, replay attack mitigation - bytesSent uint64 // Bytes of real traffic sent in this session - bytesRecvd uint64 // Bytes of real traffic received in this session - init chan struct{} // Closed when the first session pong arrives, used to signal that the session is ready for initial use - cancel util.Cancellation // Used to terminate workers - fromRouter chan wire_trafficPacket // Received packets go here, to be decrypted by the session - recv chan []byte // Decrypted packets go here, picked up by the associated Conn - send chan FlowKeyMessage // Packets with optional flow key go here, to be encrypted and sent + phony.Inbox // Protects all of the below, use it any time you read/change the contents of a session + sessions *sessions // + theirAddr address.Address // + theirSubnet address.Subnet // + theirPermPub crypto.BoxPubKey // + theirSesPub crypto.BoxPubKey // + mySesPub crypto.BoxPubKey // + mySesPriv crypto.BoxPrivKey // + sharedPermKey crypto.BoxSharedKey // used for session pings + sharedSesKey crypto.BoxSharedKey // derived from session keys + theirHandle crypto.Handle // + myHandle crypto.Handle // + theirNonce crypto.BoxNonce // + myNonce crypto.BoxNonce // + theirMTU uint16 // + myMTU uint16 // + wasMTUFixed bool // Was the MTU fixed by a receive error? + timeOpened time.Time // Time the sessino was opened + time time.Time // Time we last received a packet + mtuTime time.Time // time myMTU was last changed + pingTime time.Time // time the first ping was sent since the last received packet + pingSend time.Time // time the last ping was sent + coords []byte // coords of destination + reset bool // reset if coords change + tstamp int64 // ATOMIC - tstamp from their last session ping, replay attack mitigation + bytesSent uint64 // Bytes of real traffic sent in this session + bytesRecvd uint64 // Bytes of real traffic received in this session + init chan struct{} // Closed when the first session pong arrives, used to signal that the session is ready for initial use + cancel util.Cancellation // Used to terminate workers + conn *Conn // The associated Conn object + callbacks []chan func() // Finished work from crypto workers } -func (sinfo *sessionInfo) doFunc(f func()) { - sinfo.mutex.Lock() - defer sinfo.mutex.Unlock() - f() +func (sinfo *sessionInfo) reconfigure() { + // This is where reconfiguration would go, if we had anything to do } // Represents a session ping/pong packet, andincludes information like public keys, a session handle, coords, a timestamp to prevent replays, and the tun/tap MTU. @@ -92,7 +72,7 @@ type sessionPing struct { // Updates session info in response to a ping, after checking that the ping is OK. // Returns true if the session was updated, or false otherwise. -func (s *sessionInfo) update(p *sessionPing) bool { +func (s *sessionInfo) _update(p *sessionPing) bool { if !(p.Tstamp > s.tstamp) { // To protect against replay attacks return false @@ -107,11 +87,12 @@ func (s *sessionInfo) update(p *sessionPing) bool { s.theirHandle = p.Handle s.sharedSesKey = *crypto.GetSharedKey(&s.mySesPriv, &s.theirSesPub) s.theirNonce = crypto.BoxNonce{} - s.theirNonceHeap = nil - s.theirNonceMap = make(map[crypto.BoxNonce]time.Time) } if p.MTU >= 1280 || p.MTU == 0 { s.theirMTU = p.MTU + if s.conn != nil { + s.conn.setMTU(s, s._getMTU()) + } } if !bytes.Equal(s.coords, p.Coords) { // allocate enough space for additional coords @@ -134,10 +115,9 @@ func (s *sessionInfo) update(p *sessionPing) bool { // Sessions are indexed by handle. // Additionally, stores maps of address/subnet onto keys, and keys onto handles. type sessions struct { - core *Core + router *router listener *Listener listenerMutex sync.Mutex - reconfigure chan chan error lastCleanup time.Time isAllowedHandler func(pubkey *crypto.BoxPubKey, initiator bool) bool // Returns true or false if session setup is allowed isAllowedMutex sync.RWMutex // Protects the above @@ -147,32 +127,20 @@ type sessions struct { } // Initializes the session struct. -func (ss *sessions) init(core *Core) { - ss.core = core - ss.reconfigure = make(chan chan error, 1) - go func() { - for { - e := <-ss.reconfigure - responses := make(map[crypto.Handle]chan error) - for index, session := range ss.sinfos { - responses[index] = make(chan error) - session.reconfigure <- responses[index] - } - for _, response := range responses { - if err := <-response; err != nil { - e <- err - continue - } - } - e <- nil - } - }() +func (ss *sessions) init(r *router) { + ss.router = r ss.permShared = make(map[crypto.BoxPubKey]*crypto.BoxSharedKey) ss.sinfos = make(map[crypto.Handle]*sessionInfo) ss.byTheirPerm = make(map[crypto.BoxPubKey]*crypto.Handle) ss.lastCleanup = time.Now() } +func (ss *sessions) reconfigure() { + for _, session := range ss.sinfos { + session.reconfigure() + } +} + // Determines whether the session with a given publickey is allowed based on // session firewall rules. func (ss *sessions) isSessionAllowed(pubkey *crypto.BoxPubKey, initiator bool) bool { @@ -211,17 +179,17 @@ func (ss *sessions) createSession(theirPermKey *crypto.BoxPubKey) *sessionInfo { return nil } sinfo := sessionInfo{} - sinfo.core = ss.core - sinfo.reconfigure = make(chan chan error, 1) + sinfo.sessions = ss sinfo.theirPermPub = *theirPermKey + sinfo.sharedPermKey = *ss.getSharedKey(&ss.router.core.boxPriv, &sinfo.theirPermPub) pub, priv := crypto.NewBoxKeys() sinfo.mySesPub = *pub sinfo.mySesPriv = *priv sinfo.myNonce = *crypto.NewBoxNonce() sinfo.theirMTU = 1280 - ss.core.config.Mutex.RLock() - sinfo.myMTU = uint16(ss.core.config.Current.IfMTU) - ss.core.config.Mutex.RUnlock() + ss.router.core.config.Mutex.RLock() + sinfo.myMTU = uint16(ss.router.core.config.Current.IfMTU) + ss.router.core.config.Mutex.RUnlock() now := time.Now() sinfo.timeOpened = now sinfo.time = now @@ -231,11 +199,11 @@ func (ss *sessions) createSession(theirPermKey *crypto.BoxPubKey) *sessionInfo { sinfo.init = make(chan struct{}) sinfo.cancel = util.NewCancellation() higher := false - for idx := range ss.core.boxPub { - if ss.core.boxPub[idx] > sinfo.theirPermPub[idx] { + for idx := range ss.router.core.boxPub { + if ss.router.core.boxPub[idx] > sinfo.theirPermPub[idx] { higher = true break - } else if ss.core.boxPub[idx] < sinfo.theirPermPub[idx] { + } else if ss.router.core.boxPub[idx] < sinfo.theirPermPub[idx] { break } } @@ -249,17 +217,8 @@ func (ss *sessions) createSession(theirPermKey *crypto.BoxPubKey) *sessionInfo { sinfo.myHandle = *crypto.NewHandle() sinfo.theirAddr = *address.AddrForNodeID(crypto.GetNodeID(&sinfo.theirPermPub)) sinfo.theirSubnet = *address.SubnetForNodeID(crypto.GetNodeID(&sinfo.theirPermPub)) - sinfo.fromRouter = make(chan wire_trafficPacket, 1) - sinfo.recv = make(chan []byte, 32) - sinfo.send = make(chan FlowKeyMessage, 32) ss.sinfos[sinfo.myHandle] = &sinfo ss.byTheirPerm[sinfo.theirPermPub] = &sinfo.myHandle - go func() { - // Run cleanup when the session is canceled - <-sinfo.cancel.Finished() - sinfo.core.router.doAdmin(sinfo.close) - }() - go sinfo.startWorkers() return &sinfo } @@ -291,20 +250,26 @@ func (ss *sessions) cleanup() { ss.lastCleanup = time.Now() } +func (sinfo *sessionInfo) doRemove() { + sinfo.sessions.router.Act(nil, func() { + sinfo.sessions.removeSession(sinfo) + }) +} + // Closes a session, removing it from sessions maps. -func (sinfo *sessionInfo) close() { - if s := sinfo.core.sessions.sinfos[sinfo.myHandle]; s == sinfo { - delete(sinfo.core.sessions.sinfos, sinfo.myHandle) - delete(sinfo.core.sessions.byTheirPerm, sinfo.theirPermPub) +func (ss *sessions) removeSession(sinfo *sessionInfo) { + if s := sinfo.sessions.sinfos[sinfo.myHandle]; s == sinfo { + delete(sinfo.sessions.sinfos, sinfo.myHandle) + delete(sinfo.sessions.byTheirPerm, sinfo.theirPermPub) } } // Returns a session ping appropriate for the given session info. -func (ss *sessions) getPing(sinfo *sessionInfo) sessionPing { - loc := ss.core.switchTable.getLocator() +func (sinfo *sessionInfo) _getPing() sessionPing { + loc := sinfo.sessions.router.core.switchTable.getLocator() coords := loc.getCoords() - ref := sessionPing{ - SendPermPub: ss.core.boxPub, + ping := sessionPing{ + SendPermPub: sinfo.sessions.router.core.boxPub, Handle: sinfo.myHandle, SendSesPub: sinfo.mySesPub, Tstamp: time.Now().Unix(), @@ -312,7 +277,7 @@ func (ss *sessions) getPing(sinfo *sessionInfo) sessionPing { MTU: sinfo.myMTU, } sinfo.myNonce.Increment() - return ref + return ping } // Gets the shared key for a pair of box keys. @@ -339,41 +304,50 @@ func (ss *sessions) getSharedKey(myPriv *crypto.BoxPrivKey, } // Sends a session ping by calling sendPingPong in ping mode. -func (ss *sessions) ping(sinfo *sessionInfo) { - ss.sendPingPong(sinfo, false) +func (sinfo *sessionInfo) ping(from phony.Actor) { + sinfo.Act(from, func() { + sinfo._sendPingPong(false) + }) } // Calls getPing, sets the appropriate ping/pong flag, encodes to wire format, and send it. // Updates the time the last ping was sent in the session info. -func (ss *sessions) sendPingPong(sinfo *sessionInfo, isPong bool) { - ping := ss.getPing(sinfo) +func (sinfo *sessionInfo) _sendPingPong(isPong bool) { + ping := sinfo._getPing() ping.IsPong = isPong bs := ping.encode() - shared := ss.getSharedKey(&ss.core.boxPriv, &sinfo.theirPermPub) - payload, nonce := crypto.BoxSeal(shared, bs, nil) + payload, nonce := crypto.BoxSeal(&sinfo.sharedPermKey, bs, nil) p := wire_protoTrafficPacket{ Coords: sinfo.coords, ToKey: sinfo.theirPermPub, - FromKey: ss.core.boxPub, + FromKey: sinfo.sessions.router.core.boxPub, Nonce: *nonce, Payload: payload, } packet := p.encode() - ss.core.router.out(packet) + // TODO rewrite the below if/when the peer struct becomes an actor, to not go through the router first + sinfo.sessions.router.Act(sinfo, func() { sinfo.sessions.router.out(packet) }) if sinfo.pingTime.Before(sinfo.time) { sinfo.pingTime = time.Now() } } +func (sinfo *sessionInfo) setConn(from phony.Actor, conn *Conn) { + sinfo.Act(from, func() { + sinfo.conn = conn + sinfo.conn.setMTU(sinfo, sinfo._getMTU()) + }) +} + // Handles a session ping, creating a session if needed and calling update, then possibly responding with a pong if the ping was in ping mode and the update was successful. // If the session has a packet cached (common when first setting up a session), it will be sent. func (ss *sessions) handlePing(ping *sessionPing) { // Get the corresponding session (or create a new session) sinfo, isIn := ss.getByTheirPerm(&ping.SendPermPub) switch { + case ping.IsPong: // This is a response, not an initial ping, so ignore it. case isIn: // Session already exists case !ss.isSessionAllowed(&ping.SendPermPub, false): // Session is not allowed - case ping.IsPong: // This is a response, not an initial ping, so ignore it. default: ss.listenerMutex.Lock() if ss.listener != nil { @@ -383,23 +357,24 @@ func (ss *sessions) handlePing(ping *sessionPing) { if s, _ := ss.getByTheirPerm(&ping.SendPermPub); s != sinfo { panic("This should not happen") } - conn := newConn(ss.core, crypto.GetNodeID(&sinfo.theirPermPub), &crypto.NodeID{}, sinfo) + conn := newConn(ss.router.core, crypto.GetNodeID(&sinfo.theirPermPub), &crypto.NodeID{}, sinfo) for i := range conn.nodeMask { conn.nodeMask[i] = 0xFF } + sinfo.setConn(ss.router, conn) c := ss.listener.conn go func() { c <- conn }() } ss.listenerMutex.Unlock() } if sinfo != nil { - sinfo.doFunc(func() { + sinfo.Act(ss.router, func() { // Update the session - if !sinfo.update(ping) { /*panic("Should not happen in testing")*/ + if !sinfo._update(ping) { /*panic("Should not happen in testing")*/ return } if !ping.IsPong { - ss.sendPingPong(sinfo, true) + sinfo._sendPingPong(true) } }) } @@ -408,7 +383,7 @@ func (ss *sessions) handlePing(ping *sessionPing) { // Get the MTU of the session. // Will be equal to the smaller of this node's MTU or the remote node's MTU. // If sending over links with a maximum message size (this was a thing with the old UDP code), it could be further lowered, to a minimum of 1280. -func (sinfo *sessionInfo) getMTU() uint16 { +func (sinfo *sessionInfo) _getMTU() uint16 { if sinfo.theirMTU == 0 || sinfo.myMTU == 0 { return 0 } @@ -419,49 +394,31 @@ func (sinfo *sessionInfo) getMTU() uint16 { } // Checks if a packet's nonce is recent enough to fall within the window of allowed packets, and not already received. -func (sinfo *sessionInfo) nonceIsOK(theirNonce *crypto.BoxNonce) bool { +func (sinfo *sessionInfo) _nonceIsOK(theirNonce *crypto.BoxNonce) bool { // The bitmask is to allow for some non-duplicate out-of-order packets if theirNonce.Minus(&sinfo.theirNonce) > 0 { // This is newer than the newest nonce we've seen return true } - if len(sinfo.theirNonceHeap) > 0 { - if theirNonce.Minus(sinfo.theirNonceHeap.peek()) > 0 { - if _, isIn := sinfo.theirNonceMap[*theirNonce]; !isIn { - // This nonce is recent enough that we keep track of older nonces, but it's not one we've seen yet - return true - } - } - } - return false + return time.Since(sinfo.time) < nonceWindow } // Updates the nonce mask by (possibly) shifting the bitmask and setting the bit corresponding to this nonce to 1, and then updating the most recent nonce -func (sinfo *sessionInfo) updateNonce(theirNonce *crypto.BoxNonce) { - // Start with some cleanup - for len(sinfo.theirNonceHeap) > 64 { - if time.Since(sinfo.theirNonceMap[*sinfo.theirNonceHeap.peek()]) < nonceWindow { - // This nonce is still fairly new, so keep it around - break - } - // TODO? reallocate the map in some cases, to free unused map space? - delete(sinfo.theirNonceMap, *sinfo.theirNonceHeap.peek()) - heap.Pop(&sinfo.theirNonceHeap) - } +func (sinfo *sessionInfo) _updateNonce(theirNonce *crypto.BoxNonce) { if theirNonce.Minus(&sinfo.theirNonce) > 0 { // This nonce is the newest we've seen, so make a note of that sinfo.theirNonce = *theirNonce + sinfo.time = time.Now() } - // Add it to the heap/map so we know not to allow it again - heap.Push(&sinfo.theirNonceHeap, *theirNonce) - sinfo.theirNonceMap[*theirNonce] = time.Now() } // Resets all sessions to an uninitialized state. // Called after coord changes, so attemtps to use a session will trigger a new ping and notify the remote end of the coord change. +// Only call this from the router actor. func (ss *sessions) reset() { - for _, sinfo := range ss.sinfos { - sinfo.doFunc(func() { + for _, _sinfo := range ss.sinfos { + sinfo := _sinfo // So we can safely put it in a closure + sinfo.Act(ss.router, func() { sinfo.reset = true }) } @@ -471,198 +428,119 @@ func (ss *sessions) reset() { //////////////////////////// Worker Functions Below //////////////////////////// //////////////////////////////////////////////////////////////////////////////// -func (sinfo *sessionInfo) startWorkers() { - go sinfo.recvWorker() - go sinfo.sendWorker() +type sessionCryptoManager struct { + phony.Inbox } +func (m *sessionCryptoManager) workerGo(from phony.Actor, f func()) { + m.Act(from, func() { + util.WorkerGo(f) + }) +} + +var manager = sessionCryptoManager{} + type FlowKeyMessage struct { FlowKey uint64 Message []byte } -func (sinfo *sessionInfo) recvWorker() { - // TODO move theirNonce etc into a struct that gets stored here, passed in over a channel - // Since there's no reason for anywhere else in the session code to need to *read* it... - // Only needs to be updated from the outside if a ping resets it... - // That would get rid of the need to take a mutex for the sessionFunc - var callbacks []chan func() - doRecv := func(p wire_trafficPacket) { - var bs []byte - var err error - var k crypto.BoxSharedKey - sessionFunc := func() { - if !sinfo.nonceIsOK(&p.Nonce) { - err = ConnError{errors.New("packet dropped due to invalid nonce"), false, true, false, 0} - return - } - k = sinfo.sharedSesKey - } - sinfo.doFunc(sessionFunc) - if err != nil { - util.PutBytes(p.Payload) - return - } - var isOK bool - ch := make(chan func(), 1) - poolFunc := func() { - bs, isOK = crypto.BoxOpen(&k, p.Payload, &p.Nonce) - callback := func() { - util.PutBytes(p.Payload) - if !isOK { - util.PutBytes(bs) - return - } - sessionFunc = func() { - if k != sinfo.sharedSesKey || !sinfo.nonceIsOK(&p.Nonce) { - // The session updated in the mean time, so return an error - err = ConnError{errors.New("session updated during crypto operation"), false, true, false, 0} - return - } - sinfo.updateNonce(&p.Nonce) - sinfo.time = time.Now() - sinfo.bytesRecvd += uint64(len(bs)) - } - sinfo.doFunc(sessionFunc) - if err != nil { - // Not sure what else to do with this packet, I guess just drop it - util.PutBytes(bs) - } else { - // Pass the packet to the buffer for Conn.Read - select { - case <-sinfo.cancel.Finished(): - util.PutBytes(bs) - case sinfo.recv <- bs: - } - } - } - ch <- callback - } - // Send to the worker and wait for it to finish - util.WorkerGo(poolFunc) - callbacks = append(callbacks, ch) - } - fromHelper := make(chan wire_trafficPacket, 1) - go func() { - var buf []wire_trafficPacket - for { - for len(buf) > 0 { - select { - case <-sinfo.cancel.Finished(): - return - case p := <-sinfo.fromRouter: - buf = append(buf, p) - for len(buf) > 64 { // Based on nonce window size - util.PutBytes(buf[0].Payload) - buf = buf[1:] - } - case fromHelper <- buf[0]: - buf = buf[1:] - } - } - select { - case <-sinfo.cancel.Finished(): - return - case p := <-sinfo.fromRouter: - buf = append(buf, p) - } - } - }() - select { - case <-sinfo.cancel.Finished(): - return - case <-sinfo.init: - // Wait until the session has finished initializing before processing any packets - } - for { - for len(callbacks) > 0 { - select { - case f := <-callbacks[0]: - callbacks = callbacks[1:] - f() - case <-sinfo.cancel.Finished(): - return - case p := <-fromHelper: - doRecv(p) - } - } - select { - case <-sinfo.cancel.Finished(): - return - case p := <-fromHelper: - doRecv(p) - } - } +func (sinfo *sessionInfo) recv(from phony.Actor, packet *wire_trafficPacket) { + sinfo.Act(from, func() { + sinfo._recvPacket(packet) + }) } -func (sinfo *sessionInfo) sendWorker() { - // TODO move info that this worker needs here, send updates via a channel - // Otherwise we need to take a mutex to avoid races with update() - var callbacks []chan func() - doSend := func(msg FlowKeyMessage) { - var p wire_trafficPacket - var k crypto.BoxSharedKey - sessionFunc := func() { - sinfo.bytesSent += uint64(len(msg.Message)) - p = wire_trafficPacket{ - Coords: append([]byte(nil), sinfo.coords...), - Handle: sinfo.theirHandle, - Nonce: sinfo.myNonce, - } - if msg.FlowKey != 0 { - // Helps ensure that traffic from this flow ends up in a separate queue from other flows - // The zero padding relies on the fact that the self-peer is always on port 0 - p.Coords = append(p.Coords, 0) - p.Coords = wire_put_uint64(msg.FlowKey, p.Coords) - } - sinfo.myNonce.Increment() - k = sinfo.sharedSesKey - } - // Get the mutex-protected info needed to encrypt the packet - sinfo.doFunc(sessionFunc) - ch := make(chan func(), 1) - poolFunc := func() { - // Encrypt the packet - p.Payload, _ = crypto.BoxSeal(&k, msg.Message, &p.Nonce) - // The callback will send the packet - callback := func() { - // Encoding may block on a util.GetBytes(), so kept out of the worker pool - packet := p.encode() - // Cleanup - util.PutBytes(msg.Message) - util.PutBytes(p.Payload) - // Send the packet - sinfo.core.router.out(packet) - } - ch <- callback - } - // Send to the worker and wait for it to finish - util.WorkerGo(poolFunc) - callbacks = append(callbacks, ch) - } +func (sinfo *sessionInfo) _recvPacket(p *wire_trafficPacket) { select { - case <-sinfo.cancel.Finished(): - return case <-sinfo.init: - // Wait until the session has finished initializing before processing any packets + default: + // TODO find a better way to drop things until initialized + util.PutBytes(p.Payload) + return } - for { - for len(callbacks) > 0 { - select { - case f := <-callbacks[0]: - callbacks = callbacks[1:] - f() - case <-sinfo.cancel.Finished(): + if !sinfo._nonceIsOK(&p.Nonce) { + util.PutBytes(p.Payload) + return + } + k := sinfo.sharedSesKey + var isOK bool + var bs []byte + ch := make(chan func(), 1) + poolFunc := func() { + bs, isOK = crypto.BoxOpen(&k, p.Payload, &p.Nonce) + callback := func() { + util.PutBytes(p.Payload) + if !isOK || k != sinfo.sharedSesKey || !sinfo._nonceIsOK(&p.Nonce) { + // Either we failed to decrypt, or the session was updated, or we + // received this packet in the mean time + util.PutBytes(bs) return - case msg := <-sinfo.send: - doSend(msg) + } + sinfo._updateNonce(&p.Nonce) + sinfo.bytesRecvd += uint64(len(bs)) + sinfo.conn.recvMsg(sinfo, bs) + } + ch <- callback + sinfo.checkCallbacks() + } + sinfo.callbacks = append(sinfo.callbacks, ch) + manager.workerGo(sinfo, poolFunc) +} + +func (sinfo *sessionInfo) _send(msg FlowKeyMessage) { + select { + case <-sinfo.init: + default: + // TODO find a better way to drop things until initialized + util.PutBytes(msg.Message) + return + } + sinfo.bytesSent += uint64(len(msg.Message)) + coords := append([]byte(nil), sinfo.coords...) + if msg.FlowKey != 0 { + coords = append(coords, 0) + coords = append(coords, wire_encode_uint64(msg.FlowKey)...) + } + p := wire_trafficPacket{ + Coords: coords, + Handle: sinfo.theirHandle, + Nonce: sinfo.myNonce, + } + sinfo.myNonce.Increment() + k := sinfo.sharedSesKey + ch := make(chan func(), 1) + poolFunc := func() { + p.Payload, _ = crypto.BoxSeal(&k, msg.Message, &p.Nonce) + callback := func() { + // Encoding may block on a util.GetBytes(), so kept out of the worker pool + packet := p.encode() + // Cleanup + util.PutBytes(msg.Message) + util.PutBytes(p.Payload) + // Send the packet + // TODO replace this with a send to the peer struct if that becomes an actor + sinfo.sessions.router.Act(sinfo, func() { + sinfo.sessions.router.out(packet) + }) + } + ch <- callback + sinfo.checkCallbacks() + } + sinfo.callbacks = append(sinfo.callbacks, ch) + manager.workerGo(sinfo, poolFunc) +} + +func (sinfo *sessionInfo) checkCallbacks() { + sinfo.Act(nil, func() { + if len(sinfo.callbacks) > 0 { + select { + case callback := <-sinfo.callbacks[0]: + sinfo.callbacks = sinfo.callbacks[1:] + callback() + sinfo.checkCallbacks() + default: } } - select { - case <-sinfo.cancel.Finished(): - return - case bs := <-sinfo.send: - doSend(bs) - } - } + }) } diff --git a/src/yggdrasil/switch.go b/src/yggdrasil/switch.go index cc316d16..ba30758c 100644 --- a/src/yggdrasil/switch.go +++ b/src/yggdrasil/switch.go @@ -19,6 +19,8 @@ import ( "github.com/yggdrasil-network/yggdrasil-go/src/crypto" "github.com/yggdrasil-network/yggdrasil-go/src/util" + + "github.com/Arceliar/phony" ) const ( @@ -141,6 +143,7 @@ type switchPort uint64 type tableElem struct { port switchPort locator switchLocator + time time.Time } // This is the subset of the information about all peers needed to make routing decisions, and it stored separately in an atomically accessed table, which gets hammered in the "hot loop" of the routing logic (see: peer.handleTraffic in peers.go). @@ -162,22 +165,19 @@ type switchData struct { // All the information stored by the switch. type switchTable struct { - core *Core - reconfigure chan chan error - key crypto.SigPubKey // Our own key - time time.Time // Time when locator.tstamp was last updated - drop map[crypto.SigPubKey]int64 // Tstamp associated with a dropped root - mutex sync.RWMutex // Lock for reads/writes of switchData - parent switchPort // Port of whatever peer is our parent, or self if we're root - data switchData // - updater atomic.Value // *sync.Once - table atomic.Value // lookupTable - packetIn chan []byte // Incoming packets for the worker to handle - idleIn chan switchPort // Incoming idle notifications from peer links - admin chan func() // Pass a lambda for the admin socket to query stuff - queues switch_buffers // Queues - not atomic so ONLY use through admin chan - queueTotalMaxSize uint64 // Maximum combined size of queues - toRouter chan []byte // Packets to be sent to the router + core *Core + key crypto.SigPubKey // Our own key + time time.Time // Time when locator.tstamp was last updated + drop map[crypto.SigPubKey]int64 // Tstamp associated with a dropped root + mutex sync.RWMutex // Lock for reads/writes of switchData + parent switchPort // Port of whatever peer is our parent, or self if we're root + data switchData // + updater atomic.Value // *sync.Once + table atomic.Value // lookupTable + phony.Inbox // Owns the below + queues switch_buffers // Queues - not atomic so ONLY use through the actor + idle map[switchPort]struct{} // idle peers - not atomic so ONLY use through the actor + sending map[switchPort]struct{} // peers known to be blocked in a send (somehow) } // Minimum allowed total size of switch queues. @@ -187,7 +187,6 @@ const SwitchQueueTotalMinSize = 4 * 1024 * 1024 func (t *switchTable) init(core *Core) { now := time.Now() t.core = core - t.reconfigure = make(chan chan error, 1) t.key = t.core.sigPub locator := switchLocator{root: t.key, tstamp: now.Unix()} peers := make(map[switchPort]peerInfo) @@ -195,11 +194,24 @@ func (t *switchTable) init(core *Core) { t.updater.Store(&sync.Once{}) t.table.Store(lookupTable{}) t.drop = make(map[crypto.SigPubKey]int64) - t.packetIn = make(chan []byte, 1024) - t.idleIn = make(chan switchPort, 1024) - t.admin = make(chan func()) - t.queueTotalMaxSize = SwitchQueueTotalMinSize - t.toRouter = make(chan []byte, 1) + phony.Block(t, func() { + core.config.Mutex.RLock() + if core.config.Current.SwitchOptions.MaxTotalQueueSize > SwitchQueueTotalMinSize { + t.queues.totalMaxSize = core.config.Current.SwitchOptions.MaxTotalQueueSize + } else { + t.queues.totalMaxSize = SwitchQueueTotalMinSize + } + core.config.Mutex.RUnlock() + t.queues.bufs = make(map[string]switch_buffer) + t.idle = make(map[switchPort]struct{}) + t.sending = make(map[switchPort]struct{}) + }) +} + +func (t *switchTable) reconfigure() { + // This is where reconfiguration would go, if we had anything useful to do. + t.core.link.reconfigure() + t.core.peers.reconfigure() } // Safely gets a copy of this node's locator. @@ -245,13 +257,10 @@ func (t *switchTable) cleanRoot() { if t.data.locator.root != t.key { t.data.seq++ t.updater.Store(&sync.Once{}) - select { - case t.core.router.reset <- struct{}{}: - default: - } + t.core.router.reset(nil) } t.data.locator = switchLocator{root: t.key, tstamp: now.Unix()} - t.core.peers.sendSwitchMsgs() + t.core.peers.sendSwitchMsgs(t) } } @@ -279,7 +288,7 @@ func (t *switchTable) blockPeer(port switchPort) { } // Removes a peer. -// Must be called by the router mainLoop goroutine, e.g. call router.doAdmin with a lambda that calls this. +// Must be called by the router actor with a lambda that calls this. // If the removed peer was this node's parent, it immediately tries to find a new parent. func (t *switchTable) forgetPeer(port switchPort) { t.mutex.Lock() @@ -511,19 +520,16 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep if !equiv(&sender.locator, &t.data.locator) { doUpdate = true t.data.seq++ - select { - case t.core.router.reset <- struct{}{}: - default: - } + t.core.router.reset(nil) } if t.data.locator.tstamp != sender.locator.tstamp { t.time = now } t.data.locator = sender.locator t.parent = sender.port - t.core.peers.sendSwitchMsgs() + t.core.peers.sendSwitchMsgs(t) } - if doUpdate { + if true || doUpdate { t.updater.Store(&sync.Once{}) } return @@ -559,6 +565,7 @@ func (t *switchTable) updateTable() { newTable.elems[pinfo.port] = tableElem{ locator: loc, port: pinfo.port, + time: pinfo.time, } } t.table.Store(newTable) @@ -573,12 +580,12 @@ func (t *switchTable) getTable() lookupTable { // Starts the switch worker func (t *switchTable) start() error { t.core.log.Infoln("Starting switch") - go t.doWorker() + // There's actually nothing to do to start it... return nil } type closerInfo struct { - port switchPort + elem tableElem dist int } @@ -595,7 +602,7 @@ func (t *switchTable) getCloser(dest []byte) []closerInfo { for _, info := range table.elems { dist := info.locator.dist(dest) if dist < myDist { - t.queues.closer = append(t.queues.closer, closerInfo{info.port, dist}) + t.queues.closer = append(t.queues.closer, closerInfo{info, dist}) } } return t.queues.closer @@ -659,58 +666,61 @@ func (t *switchTable) bestPortForCoords(coords []byte) switchPort { // Handle an incoming packet // Either send it to ourself, or to the first idle peer that's free // Returns true if the packet has been handled somehow, false if it should be queued -func (t *switchTable) handleIn(packet []byte, idle map[switchPort]time.Time) bool { +func (t *switchTable) _handleIn(packet []byte, idle map[switchPort]struct{}, sending map[switchPort]struct{}) bool { coords := switch_getPacketCoords(packet) closer := t.getCloser(coords) if len(closer) == 0 { // TODO? call the router directly, and remove the whole concept of a self peer? - t.toRouter <- packet + self := t.core.peers.getPorts()[0] + self.sendPacketsFrom(t, [][]byte{packet}) return true } - var best *peer - var bestDist int - var bestTime time.Time + var best *closerInfo ports := t.core.peers.getPorts() for _, cinfo := range closer { - to := ports[cinfo.port] - thisTime, isIdle := idle[cinfo.port] + to := ports[cinfo.elem.port] + //_, isIdle := idle[cinfo.elem.port] + _, isSending := sending[cinfo.elem.port] var update bool switch { case to == nil: // no port was found, ignore it - case !isIdle: + case isSending: // the port is busy, ignore it case best == nil: // this is the first idle port we've found, so select it until we find a // better candidate port to use instead update = true - case cinfo.dist < bestDist: + case cinfo.dist < best.dist: // the port takes a shorter path/is more direct than our current // candidate, so select that instead update = true - case cinfo.dist > bestDist: + case cinfo.dist > best.dist: // the port takes a longer path/is less direct than our current candidate, // ignore it - case thisTime.After(bestTime): - // all else equal, this port was used more recently than our current - // candidate, so choose that instead. this should mean that, in low - // traffic scenarios, we consistently pick the same link which helps with - // packet ordering + case cinfo.elem.locator.tstamp > best.elem.locator.tstamp: + // has a newer tstamp from the root, so presumably a better path + update = true + case cinfo.elem.locator.tstamp < best.elem.locator.tstamp: + // has a n older tstamp, so presumably a worse path + case cinfo.elem.time.Before(best.elem.time): + // same tstamp, but got it earlier, so presumably a better path + //t.core.log.Println("DEBUG new best:", best.elem.time, cinfo.elem.time) update = true default: // the search for a port has finished } if update { - best = to - bestDist = cinfo.dist - bestTime = thisTime + b := cinfo // because cinfo gets mutated by the iteration + best = &b } } if best != nil { - // Send to the best idle next hop - delete(idle, best.port) - best.sendPackets([][]byte{packet}) - return true + if _, isIdle := idle[best.elem.port]; isIdle { + delete(idle, best.elem.port) + ports[best.elem.port].sendPacketsFrom(t, [][]byte{packet}) + return true + } } // Didn't find anyone idle to send it to return false @@ -729,15 +739,15 @@ type switch_buffer struct { } type switch_buffers struct { - switchTable *switchTable - bufs map[string]switch_buffer // Buffers indexed by StreamID - size uint64 // Total size of all buffers, in bytes - maxbufs int - maxsize uint64 - closer []closerInfo // Scratch space + totalMaxSize uint64 + bufs map[string]switch_buffer // Buffers indexed by StreamID + size uint64 // Total size of all buffers, in bytes + maxbufs int + maxsize uint64 + closer []closerInfo // Scratch space } -func (b *switch_buffers) cleanup(t *switchTable) { +func (b *switch_buffers) _cleanup(t *switchTable) { for streamID, buf := range b.bufs { // Remove queues for which we have no next hop packet := buf.packets[0] @@ -751,7 +761,7 @@ func (b *switch_buffers) cleanup(t *switchTable) { } } - for b.size > b.switchTable.queueTotalMaxSize { + for b.size > b.totalMaxSize { // Drop a random queue target := rand.Uint64() % b.size var size uint64 // running total @@ -779,14 +789,15 @@ func (b *switch_buffers) cleanup(t *switchTable) { // Handles incoming idle notifications // Loops over packets and sends the newest one that's OK for this peer to send // Returns true if the peer is no longer idle, false if it should be added to the idle list -func (t *switchTable) handleIdle(port switchPort) bool { +func (t *switchTable) _handleIdle(port switchPort) bool { + // TODO? only send packets for which this is the best next hop that isn't currently blocked sending to := t.core.peers.getPorts()[port] if to == nil { return true } var packets [][]byte var psize int - t.queues.cleanup(t) + t.queues._cleanup(t) now := time.Now() for psize < 65535 { var best string @@ -823,102 +834,56 @@ func (t *switchTable) handleIdle(port switchPort) bool { } } if len(packets) > 0 { - to.sendPackets(packets) + to.sendPacketsFrom(t, packets) return true } return false } -// The switch worker does routing lookups and sends packets to where they need to be -func (t *switchTable) doWorker() { - sendingToRouter := make(chan []byte, 1) - go func() { - // Keep sending packets to the router - self := t.core.peers.getPorts()[0] - for bs := range sendingToRouter { - self.sendPackets([][]byte{bs}) +func (t *switchTable) packetInFrom(from phony.Actor, bytes []byte) { + t.Act(from, func() { + t._packetIn(bytes) + }) +} + +func (t *switchTable) _packetIn(bytes []byte) { + // Try to send it somewhere (or drop it if it's corrupt or at a dead end) + if !t._handleIn(bytes, t.idle, t.sending) { + // There's nobody free to take it right now, so queue it for later + packet := switch_packetInfo{bytes, time.Now()} + streamID := switch_getPacketStreamID(packet.bytes) + buf, bufExists := t.queues.bufs[streamID] + buf.packets = append(buf.packets, packet) + buf.size += uint64(len(packet.bytes)) + t.queues.size += uint64(len(packet.bytes)) + // Keep a track of the max total queue size + if t.queues.size > t.queues.maxsize { + t.queues.maxsize = t.queues.size } - }() - go func() { - // Keep taking packets from the idle worker and sending them to the above whenever it's idle, keeping anything extra in a (fifo, head-drop) buffer - var buf [][]byte - var size int - for { - bs := <-t.toRouter - size += len(bs) - buf = append(buf, bs) - for len(buf) > 0 { - select { - case bs := <-t.toRouter: - size += len(bs) - buf = append(buf, bs) - for size > int(t.queueTotalMaxSize) { - size -= len(buf[0]) - util.PutBytes(buf[0]) - buf = buf[1:] - } - case sendingToRouter <- buf[0]: - size -= len(buf[0]) - buf = buf[1:] - } + t.queues.bufs[streamID] = buf + if !bufExists { + // Keep a track of the max total queue count. Only recalculate this + // when the queue is new because otherwise repeating len(dict) might + // cause unnecessary processing overhead + if len(t.queues.bufs) > t.queues.maxbufs { + t.queues.maxbufs = len(t.queues.bufs) } } - }() - t.queues.switchTable = t - t.queues.bufs = make(map[string]switch_buffer) // Packets per PacketStreamID (string) - idle := make(map[switchPort]time.Time) // this is to deduplicate things - for { - //t.core.log.Debugf("Switch state: idle = %d, buffers = %d", len(idle), len(t.queues.bufs)) - select { - case bytes := <-t.packetIn: - // Try to send it somewhere (or drop it if it's corrupt or at a dead end) - if !t.handleIn(bytes, idle) { - // There's nobody free to take it right now, so queue it for later - packet := switch_packetInfo{bytes, time.Now()} - streamID := switch_getPacketStreamID(packet.bytes) - buf, bufExists := t.queues.bufs[streamID] - buf.packets = append(buf.packets, packet) - buf.size += uint64(len(packet.bytes)) - t.queues.size += uint64(len(packet.bytes)) - // Keep a track of the max total queue size - if t.queues.size > t.queues.maxsize { - t.queues.maxsize = t.queues.size - } - t.queues.bufs[streamID] = buf - if !bufExists { - // Keep a track of the max total queue count. Only recalculate this - // when the queue is new because otherwise repeating len(dict) might - // cause unnecessary processing overhead - if len(t.queues.bufs) > t.queues.maxbufs { - t.queues.maxbufs = len(t.queues.bufs) - } - } - t.queues.cleanup(t) - } - case port := <-t.idleIn: - // Try to find something to send to this peer - if !t.handleIdle(port) { - // Didn't find anything ready to send yet, so stay idle - idle[port] = time.Now() - } - case f := <-t.admin: - f() - case e := <-t.reconfigure: - e <- nil - } + t.queues._cleanup(t) } } -// Passed a function to call. -// This will send the function to t.admin and block until it finishes. -func (t *switchTable) doAdmin(f func()) { - // Pass this a function that needs to be run by the router's main goroutine - // It will pass the function to the router and wait for the router to finish - done := make(chan struct{}) - newF := func() { - f() - close(done) +func (t *switchTable) _idleIn(port switchPort) { + // Try to find something to send to this peer + delete(t.sending, port) + if !t._handleIdle(port) { + // Didn't find anything ready to send yet, so stay idle + t.idle[port] = struct{}{} + } +} + +func (t *switchTable) _sendingIn(port switchPort) { + if _, isIn := t.idle[port]; !isIn { + t.sending[port] = struct{}{} } - t.admin <- newF - <-done } diff --git a/src/yggdrasil/tcp.go b/src/yggdrasil/tcp.go index dfb41510..66f708c2 100644 --- a/src/yggdrasil/tcp.go +++ b/src/yggdrasil/tcp.go @@ -33,12 +33,12 @@ const tcp_ping_interval = (default_timeout * 2 / 3) // The TCP listener and information about active TCP connections, to avoid duplication. type tcp struct { - link *link - reconfigure chan chan error - mutex sync.Mutex // Protecting the below - listeners map[string]*TcpListener - calls map[string]struct{} - conns map[linkInfo](chan struct{}) + link *link + waitgroup sync.WaitGroup + mutex sync.Mutex // Protecting the below + listeners map[string]*TcpListener + calls map[string]struct{} + conns map[linkInfo](chan struct{}) } // TcpListener is a stoppable TCP listener interface. These are typically @@ -47,7 +47,12 @@ type tcp struct { // multicast interfaces. type TcpListener struct { Listener net.Listener - Stop chan bool + stop chan struct{} +} + +func (l *TcpListener) Stop() { + defer func() { recover() }() + close(l.stop) } // Wrapper function to set additional options for specific connection types. @@ -76,53 +81,17 @@ func (t *tcp) getAddr() *net.TCPAddr { // Initializes the struct. func (t *tcp) init(l *link) error { t.link = l - t.reconfigure = make(chan chan error, 1) t.mutex.Lock() t.calls = make(map[string]struct{}) t.conns = make(map[linkInfo](chan struct{})) t.listeners = make(map[string]*TcpListener) t.mutex.Unlock() - go func() { - for { - e := <-t.reconfigure - t.link.core.config.Mutex.RLock() - added := util.Difference(t.link.core.config.Current.Listen, t.link.core.config.Previous.Listen) - deleted := util.Difference(t.link.core.config.Previous.Listen, t.link.core.config.Current.Listen) - t.link.core.config.Mutex.RUnlock() - if len(added) > 0 || len(deleted) > 0 { - for _, a := range added { - if a[:6] != "tcp://" { - continue - } - if _, err := t.listen(a[6:]); err != nil { - e <- err - continue - } - } - for _, d := range deleted { - if d[:6] != "tcp://" { - continue - } - t.mutex.Lock() - if listener, ok := t.listeners[d[6:]]; ok { - t.mutex.Unlock() - listener.Stop <- true - } else { - t.mutex.Unlock() - } - } - e <- nil - } else { - e <- nil - } - } - }() - t.link.core.config.Mutex.RLock() defer t.link.core.config.Mutex.RUnlock() for _, listenaddr := range t.link.core.config.Current.Listen { if listenaddr[:6] != "tcp://" { + t.link.core.log.Errorln("Failed to add listener: listener", listenaddr, "is not correctly formatted, ignoring") continue } if _, err := t.listen(listenaddr[6:]); err != nil { @@ -133,6 +102,50 @@ func (t *tcp) init(l *link) error { return nil } +func (t *tcp) stop() error { + t.mutex.Lock() + for _, listener := range t.listeners { + listener.Stop() + } + t.mutex.Unlock() + t.waitgroup.Wait() + return nil +} + +func (t *tcp) reconfigure() { + t.link.core.config.Mutex.RLock() + added := util.Difference(t.link.core.config.Current.Listen, t.link.core.config.Previous.Listen) + deleted := util.Difference(t.link.core.config.Previous.Listen, t.link.core.config.Current.Listen) + t.link.core.config.Mutex.RUnlock() + if len(added) > 0 || len(deleted) > 0 { + for _, a := range added { + if a[:6] != "tcp://" { + t.link.core.log.Errorln("Failed to add listener: listener", a, "is not correctly formatted, ignoring") + continue + } + if _, err := t.listen(a[6:]); err != nil { + t.link.core.log.Errorln("Error adding TCP", a[6:], "listener:", err) + } else { + t.link.core.log.Infoln("Started TCP listener:", a[6:]) + } + } + for _, d := range deleted { + if d[:6] != "tcp://" { + t.link.core.log.Errorln("Failed to delete listener: listener", d, "is not correctly formatted, ignoring") + continue + } + t.mutex.Lock() + if listener, ok := t.listeners[d[6:]]; ok { + t.mutex.Unlock() + listener.Stop() + t.link.core.log.Infoln("Stopped TCP listener:", d[6:]) + } else { + t.mutex.Unlock() + } + } + } +} + func (t *tcp) listen(listenaddr string) (*TcpListener, error) { var err error @@ -144,8 +157,9 @@ func (t *tcp) listen(listenaddr string) (*TcpListener, error) { if err == nil { l := TcpListener{ Listener: listener, - Stop: make(chan bool), + stop: make(chan struct{}), } + t.waitgroup.Add(1) go t.listener(&l, listenaddr) return &l, nil } @@ -155,6 +169,7 @@ func (t *tcp) listen(listenaddr string) (*TcpListener, error) { // Runs the listener, which spawns off goroutines for incoming connections. func (t *tcp) listener(l *TcpListener, listenaddr string) { + defer t.waitgroup.Done() if l == nil { return } @@ -169,7 +184,6 @@ func (t *tcp) listener(l *TcpListener, listenaddr string) { t.mutex.Unlock() } // And here we go! - accepted := make(chan bool) defer func() { t.link.core.log.Infoln("Stopping TCP listener on:", l.Listener.Addr().String()) l.Listener.Close() @@ -178,36 +192,29 @@ func (t *tcp) listener(l *TcpListener, listenaddr string) { t.mutex.Unlock() }() t.link.core.log.Infoln("Listening for TCP on:", l.Listener.Addr().String()) + go func() { + <-l.stop + l.Listener.Close() + }() + defer l.Stop() for { - var sock net.Conn - var err error - // Listen in a separate goroutine, as that way it does not block us from - // receiving "stop" events - go func() { - sock, err = l.Listener.Accept() - accepted <- true - }() - // Wait for either an accepted connection, or a message telling us to stop - // the TCP listener - select { - case <-accepted: - if err != nil { - t.link.core.log.Errorln("Failed to accept connection:", err) - return - } - go t.handler(sock, true, nil) - case <-l.Stop: + sock, err := l.Listener.Accept() + if err != nil { + t.link.core.log.Errorln("Failed to accept connection:", err) return } + t.waitgroup.Add(1) + go t.handler(sock, true, nil) } } // Checks if we already are calling this address -func (t *tcp) isAlreadyCalling(saddr string) bool { +func (t *tcp) startCalling(saddr string) bool { t.mutex.Lock() defer t.mutex.Unlock() _, isIn := t.calls[saddr] - return isIn + t.calls[saddr] = struct{}{} + return !isIn } // Checks if a connection already exists. @@ -221,16 +228,14 @@ func (t *tcp) call(saddr string, options interface{}, sintf string) { if sintf != "" { callname = fmt.Sprintf("%s/%s", saddr, sintf) } - if t.isAlreadyCalling(callname) { + if !t.startCalling(callname) { return } - t.mutex.Lock() - t.calls[callname] = struct{}{} - t.mutex.Unlock() defer func() { // Block new calls for a little while, to mitigate livelock scenarios - time.Sleep(default_timeout) - time.Sleep(time.Duration(rand.Intn(1000)) * time.Millisecond) + rand.Seed(time.Now().UnixNano()) + delay := default_timeout + time.Duration(rand.Intn(10000))*time.Millisecond + time.Sleep(delay) t.mutex.Lock() delete(t.calls, callname) t.mutex.Unlock() @@ -255,7 +260,8 @@ func (t *tcp) call(saddr string, options interface{}, sintf string) { if err != nil { return } - t.handler(conn, false, dialerdst.String()) + t.waitgroup.Add(1) + t.handler(conn, false, saddr) } else { dst, err := net.ResolveTCPAddr("tcp", saddr) if err != nil { @@ -269,6 +275,7 @@ func (t *tcp) call(saddr string, options interface{}, sintf string) { } dialer := net.Dialer{ Control: t.tcpContext, + Timeout: time.Second * 5, } if sintf != "" { ief, err := net.InterfaceByName(sintf) @@ -318,28 +325,31 @@ func (t *tcp) call(saddr string, options interface{}, sintf string) { t.link.core.log.Debugln("Failed to dial TCP:", err) return } + t.waitgroup.Add(1) t.handler(conn, false, nil) } }() } func (t *tcp) handler(sock net.Conn, incoming bool, options interface{}) { + defer t.waitgroup.Done() // Happens after sock.close defer sock.Close() t.setExtraOptions(sock) stream := stream{} stream.init(sock) - local, _, _ := net.SplitHostPort(sock.LocalAddr().String()) - remote, _, _ := net.SplitHostPort(sock.RemoteAddr().String()) - force := net.ParseIP(strings.Split(remote, "%")[0]).IsLinkLocalUnicast() - var name string - var proto string + var name, proto, local, remote string if socksaddr, issocks := options.(string); issocks { - name = "socks://" + socksaddr + "/" + sock.RemoteAddr().String() + name = "socks://" + sock.RemoteAddr().String() + "/" + socksaddr proto = "socks" + local, _, _ = net.SplitHostPort(sock.LocalAddr().String()) + remote, _, _ = net.SplitHostPort(socksaddr) } else { name = "tcp://" + sock.RemoteAddr().String() proto = "tcp" + local, _, _ = net.SplitHostPort(sock.LocalAddr().String()) + remote, _, _ = net.SplitHostPort(sock.RemoteAddr().String()) } + force := net.ParseIP(strings.Split(remote, "%")[0]).IsLinkLocalUnicast() link, err := t.link.core.link.create(&stream, name, proto, local, remote, incoming, force) if err != nil { t.link.core.log.Println(err)