Extract chunker

2025-08-23 14:57:37 +00:00 · 2015-07-07 18:55:58 -04:00
parent 50f9c20987
commit 36a62cf77d
12 changed files with 12 additions and 8 deletions
--- a/chunker/chunker.go
+++ b/chunker/chunker.go
@@ -1,341 +0,0 @@
-package chunker
-
-import (
-	"errors"
-	"hash"
-	"io"
-	"sync"
-)
-
-const (
-	KiB = 1024
-	MiB = 1024 * KiB
-
-	// WindowSize is the size of the sliding window.
-	windowSize = 64
-
-	// aim to create chunks of 20 bits or about 1MiB on average.
-	averageBits = 20
-
-	// MinSize is the minimal size of a chunk.
-	MinSize = 512 * KiB
-	// MaxSize is the maximal size of a chunk.
-	MaxSize = 8 * MiB
-
-	splitmask = (1 << averageBits) - 1
-
-	chunkerBufSize = 512 * KiB
-)
-
-var bufPool = sync.Pool{
-	New: func() interface{} { return make([]byte, chunkerBufSize) },
-}
-
-type tables struct {
-	out [256]Pol
-	mod [256]Pol
-}
-
-// cache precomputed tables, these are read-only anyway
-var cache struct {
-	entries map[Pol]*tables
-	sync.Mutex
-}
-
-func init() {
-	cache.entries = make(map[Pol]*tables)
-}
-
-// Chunk is one content-dependent chunk of bytes whose end was cut when the
-// Rabin Fingerprint had the value stored in Cut.
-type Chunk struct {
-	Start  uint
-	Length uint
-	Cut    uint64
-	Digest []byte
-}
-
-func (c Chunk) Reader(r io.ReaderAt) io.Reader {
-	return io.NewSectionReader(r, int64(c.Start), int64(c.Length))
-}
-
-// Chunker splits content with Rabin Fingerprints.
-type Chunker struct {
-	pol      Pol
-	polShift uint
-	tables   *tables
-
-	rd     io.Reader
-	closed bool
-
-	window [windowSize]byte
-	wpos   int
-
-	buf  []byte
-	bpos uint
-	bmax uint
-
-	start uint
-	count uint
-	pos   uint
-
-	pre uint // wait for this many bytes before start calculating an new chunk
-
-	digest uint64
-	h      hash.Hash
-}
-
-// New returns a new Chunker based on polynomial p that reads from rd
-// with bufsize and pass all data to hash along the way.
-func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker {
-	c := &Chunker{
-		buf: bufPool.Get().([]byte),
-		h:   h,
-		pol: pol,
-		rd:  rd,
-	}
-
-	c.reset()
-
-	return c
-}
-
-func (c *Chunker) reset() {
-	c.polShift = uint(c.pol.Deg() - 8)
-	c.fillTables()
-
-	for i := 0; i < windowSize; i++ {
-		c.window[i] = 0
-	}
-
-	c.closed = false
-	c.digest = 0
-	c.wpos = 0
-	c.count = 0
-	c.slide(1)
-	c.start = c.pos
-
-	if c.h != nil {
-		c.h.Reset()
-	}
-
-	// do not start a new chunk unless at least MinSize bytes have been read
-	c.pre = MinSize - windowSize
-}
-
-// Calculate out_table and mod_table for optimization. Must be called only
-// once. This implementation uses a cache in the global variable cache.
-func (c *Chunker) fillTables() {
-	// if polynomial hasn't been specified, do not compute anything for now
-	if c.pol == 0 {
-		return
-	}
-
-	// test if the tables are cached for this polynomial
-	cache.Lock()
-	defer cache.Unlock()
-	if t, ok := cache.entries[c.pol]; ok {
-		c.tables = t
-		return
-	}
-
-	// else create a new entry
-	c.tables = &tables{}
-	cache.entries[c.pol] = c.tables
-
-	// calculate table for sliding out bytes. The byte to slide out is used as
-	// the index for the table, the value contains the following:
-	// out_table[b] = Hash(b || 0 ||        ...        || 0)
-	//                          \ windowsize-1 zero bytes /
-	// To slide out byte b_0 for window size w with known hash
-	// H := H(b_0 || ... || b_w), it is sufficient to add out_table[b_0]:
-	//    H(b_0 || ... || b_w) + H(b_0 || 0 || ... || 0)
-	//  = H(b_0 + b_0 || b_1 + 0 || ... || b_w + 0)
-	//  = H(    0     || b_1 || ...     || b_w)
-	//
-	// Afterwards a new byte can be shifted in.
-	for b := 0; b < 256; b++ {
-		var h Pol
-
-		h = appendByte(h, byte(b), c.pol)
-		for i := 0; i < windowSize-1; i++ {
-			h = appendByte(h, 0, c.pol)
-		}
-		c.tables.out[b] = h
-	}
-
-	// calculate table for reduction mod Polynomial
-	k := c.pol.Deg()
-	for b := 0; b < 256; b++ {
-		// mod_table[b] = A | B, where A = (b(x) * x^k mod pol) and  B = b(x) * x^k
-		//
-		// The 8 bits above deg(Polynomial) determine what happens next and so
-		// these bits are used as a lookup to this table. The value is split in
-		// two parts: Part A contains the result of the modulus operation, part
-		// B is used to cancel out the 8 top bits so that one XOR operation is
-		// enough to reduce modulo Polynomial
-		c.tables.mod[b] = Pol(uint64(b)<<uint(k)).Mod(c.pol) | (Pol(b) << uint(k))
-	}
-}
-
-// Next returns the position and length of the next chunk of data. If an error
-// occurs while reading, the error is returned with a nil chunk. The state of
-// the current chunk is undefined. When the last chunk has been returned, all
-// subsequent calls yield a nil chunk and an io.EOF error.
-func (c *Chunker) Next() (*Chunk, error) {
-	if c.tables == nil {
-		return nil, errors.New("polynomial is not set")
-	}
-
-	for {
-		if c.bpos >= c.bmax {
-			n, err := io.ReadFull(c.rd, c.buf[:])
-
-			if err == io.ErrUnexpectedEOF {
-				err = nil
-			}
-
-			// io.ReadFull only returns io.EOF when no bytes could be read. If
-			// this is the case and we're in this branch, there are no more
-			// bytes to buffer, so this was the last chunk. If a different
-			// error has occurred, return that error and abandon the current
-			// chunk.
-			if err == io.EOF && !c.closed {
-				c.closed = true
-
-				// return the buffer to the pool
-				bufPool.Put(c.buf)
-
-				// return current chunk, if any bytes have been processed
-				if c.count > 0 {
-					return &Chunk{
-						Start:  c.start,
-						Length: c.count,
-						Cut:    c.digest,
-						Digest: c.hashDigest(),
-					}, nil
-				}
-			}
-
-			if err != nil {
-				return nil, err
-			}
-
-			c.bpos = 0
-			c.bmax = uint(n)
-		}
-
-		// check if bytes have to be dismissed before starting a new chunk
-		if c.pre > 0 {
-			n := c.bmax - c.bpos
-			if c.pre > uint(n) {
-				c.pre -= uint(n)
-				c.updateHash(c.buf[c.bpos:c.bmax])
-
-				c.count += uint(n)
-				c.pos += uint(n)
-				c.bpos = c.bmax
-
-				continue
-			}
-
-			c.updateHash(c.buf[c.bpos : c.bpos+c.pre])
-
-			c.bpos += c.pre
-			c.count += c.pre
-			c.pos += c.pre
-			c.pre = 0
-		}
-
-		add := c.count
-		for _, b := range c.buf[c.bpos:c.bmax] {
-			// inline c.slide(b) and append(b) to increase performance
-			out := c.window[c.wpos]
-			c.window[c.wpos] = b
-			c.digest ^= uint64(c.tables.out[out])
-			c.wpos = (c.wpos + 1) % windowSize
-
-			// c.append(b)
-			index := c.digest >> c.polShift
-			c.digest <<= 8
-			c.digest |= uint64(b)
-
-			c.digest ^= uint64(c.tables.mod[index])
-			// end inline
-
-			add++
-			if add < MinSize {
-				continue
-			}
-
-			if (c.digest&splitmask) == 0 || add >= MaxSize {
-				i := add - c.count - 1
-				c.updateHash(c.buf[c.bpos : c.bpos+uint(i)+1])
-				c.count = add
-				c.pos += uint(i) + 1
-				c.bpos += uint(i) + 1
-
-				chunk := &Chunk{
-					Start:  c.start,
-					Length: c.count,
-					Cut:    c.digest,
-					Digest: c.hashDigest(),
-				}
-
-				c.reset()
-
-				return chunk, nil
-			}
-		}
-
-		steps := c.bmax - c.bpos
-		if steps > 0 {
-			c.updateHash(c.buf[c.bpos : c.bpos+steps])
-		}
-		c.count += steps
-		c.pos += steps
-		c.bpos = c.bmax
-	}
-}
-
-func (c *Chunker) updateHash(data []byte) {
-	if c.h != nil {
-		// the hashes from crypto/sha* do not return an error
-		_, err := c.h.Write(data)
-		if err != nil {
-			panic(err)
-		}
-	}
-}
-
-func (c *Chunker) hashDigest() []byte {
-	if c.h == nil {
-		return nil
-	}
-
-	return c.h.Sum(nil)
-}
-
-func (c *Chunker) append(b byte) {
-	index := c.digest >> c.polShift
-	c.digest <<= 8
-	c.digest |= uint64(b)
-
-	c.digest ^= uint64(c.tables.mod[index])
-}
-
-func (c *Chunker) slide(b byte) {
-	out := c.window[c.wpos]
-	c.window[c.wpos] = b
-	c.digest ^= uint64(c.tables.out[out])
-	c.wpos = (c.wpos + 1) % windowSize
-
-	c.append(b)
-}
-
-func appendByte(hash Pol, b byte, pol Pol) Pol {
-	hash <<= 8
-	hash |= Pol(b)
-
-	return hash.Mod(pol)
-}
--- a/chunker/chunker_test.go
+++ b/chunker/chunker_test.go
@@ -1,298 +0,0 @@
-package chunker_test
-
-import (
-	"bytes"
-	"crypto/md5"
-	"crypto/sha256"
-	"encoding/hex"
-	"hash"
-	"io"
-	"io/ioutil"
-	"math/rand"
-	"testing"
-	"time"
-
-	"github.com/restic/restic/chunker"
-	. "github.com/restic/restic/test"
-)
-
-func parseDigest(s string) []byte {
-	d, err := hex.DecodeString(s)
-	if err != nil {
-		panic(err)
-	}
-
-	return d
-}
-
-type chunk struct {
-	Length uint
-	CutFP  uint64
-	Digest []byte
-}
-
-// polynomial used for all the tests below
-const testPol = chunker.Pol(0x3DA3358B4DC173)
-
-// created for 32MB of random data out of math/rand's Uint32() seeded by
-// constant 23
-//
-// chunking configuration:
-// window size 64, avg chunksize 1<<20, min chunksize 1<<19, max chunksize 1<<23
-// polynom 0x3DA3358B4DC173
-var chunks1 = []chunk{
-	chunk{2163460, 0x000b98d4cdf00000, parseDigest("4b94cb2cf293855ea43bf766731c74969b91aa6bf3c078719aabdd19860d590d")},
-	chunk{643703, 0x000d4e8364d00000, parseDigest("5727a63c0964f365ab8ed2ccf604912f2ea7be29759a2b53ede4d6841e397407")},
-	chunk{1528956, 0x0015a25c2ef00000, parseDigest("a73759636a1e7a2758767791c69e81b69fb49236c6929e5d1b654e06e37674ba")},
-	chunk{1955808, 0x00102a8242e00000, parseDigest("c955fb059409b25f07e5ae09defbbc2aadf117c97a3724e06ad4abd2787e6824")},
-	chunk{2222372, 0x00045da878000000, parseDigest("6ba5e9f7e1b310722be3627716cf469be941f7f3e39a4c3bcefea492ec31ee56")},
-	chunk{2538687, 0x00198a8179900000, parseDigest("8687937412f654b5cfe4a82b08f28393a0c040f77c6f95e26742c2fc4254bfde")},
-	chunk{609606, 0x001d4e8d17100000, parseDigest("5da820742ff5feb3369112938d3095785487456f65a8efc4b96dac4be7ebb259")},
-	chunk{1205738, 0x000a7204dd600000, parseDigest("cc70d8fad5472beb031b1aca356bcab86c7368f40faa24fe5f8922c6c268c299")},
-	chunk{959742, 0x00183e71e1400000, parseDigest("4065bdd778f95676c92b38ac265d361f81bff17d76e5d9452cf985a2ea5a4e39")},
-	chunk{4036109, 0x001fec043c700000, parseDigest("b9cf166e75200eb4993fc9b6e22300a6790c75e6b0fc8f3f29b68a752d42f275")},
-	chunk{1525894, 0x000b1574b1500000, parseDigest("2f238180e4ca1f7520a05f3d6059233926341090f9236ce677690c1823eccab3")},
-	chunk{1352720, 0x00018965f2e00000, parseDigest("afd12f13286a3901430de816e62b85cc62468c059295ce5888b76b3af9028d84")},
-	chunk{811884, 0x00155628aa100000, parseDigest("42d0cdb1ee7c48e552705d18e061abb70ae7957027db8ae8db37ec756472a70a")},
-	chunk{1282314, 0x001909a0a1400000, parseDigest("819721c2457426eb4f4c7565050c44c32076a56fa9b4515a1c7796441730eb58")},
-	chunk{1318021, 0x001cceb980000000, parseDigest("842eb53543db55bacac5e25cb91e43cc2e310fe5f9acc1aee86bdf5e91389374")},
-	chunk{948640, 0x0011f7a470a00000, parseDigest("b8e36bf7019bb96ac3fb7867659d2167d9d3b3148c09fe0de45850b8fe577185")},
-	chunk{645464, 0x00030ce2d9400000, parseDigest("5584bd27982191c3329f01ed846bfd266e96548dfa87018f745c33cfc240211d")},
-	chunk{533758, 0x0004435c53c00000, parseDigest("4da778a25b72a9a0d53529eccfe2e5865a789116cb1800f470d8df685a8ab05d")},
-	chunk{1128303, 0x0000c48517800000, parseDigest("08c6b0b38095b348d80300f0be4c5184d2744a17147c2cba5cc4315abf4c048f")},
-	chunk{800374, 0x000968473f900000, parseDigest("820284d2c8fd243429674c996d8eb8d3450cbc32421f43113e980f516282c7bf")},
-	chunk{2453512, 0x001e197c92600000, parseDigest("5fa870ed107c67704258e5e50abe67509fb73562caf77caa843b5f243425d853")},
-	chunk{2651975, 0x000ae6c868000000, parseDigest("181347d2bbec32bef77ad5e9001e6af80f6abcf3576549384d334ee00c1988d8")},
-	chunk{237392, 0x0000000000000001, parseDigest("fcd567f5d866357a8e299fd5b2359bb2c8157c30395229c4e9b0a353944a7978")},
-}
-
-// test if nullbytes are correctly split, even if length is a multiple of MinSize.
-var chunks2 = []chunk{
-	chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
-	chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
-	chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
-	chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
-}
-
-func testWithData(t *testing.T, chnker *chunker.Chunker, testChunks []chunk) []*chunker.Chunk {
-	chunks := []*chunker.Chunk{}
-
-	pos := uint(0)
-	for i, chunk := range testChunks {
-		c, err := chnker.Next()
-
-		if err != nil {
-			t.Fatalf("Error returned with chunk %d: %v", i, err)
-		}
-
-		if c == nil {
-			t.Fatalf("Nil chunk returned")
-		}
-
-		if c != nil {
-			if c.Start != pos {
-				t.Fatalf("Start for chunk %d does not match: expected %d, got %d",
-					i, pos, c.Start)
-			}
-
-			if c.Length != chunk.Length {
-				t.Fatalf("Length for chunk %d does not match: expected %d, got %d",
-					i, chunk.Length, c.Length)
-			}
-
-			if c.Cut != chunk.CutFP {
-				t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
-					i, len(chunks)-1, chunk.CutFP, c.Cut)
-			}
-
-			if c.Digest != nil && !bytes.Equal(c.Digest, chunk.Digest) {
-				t.Fatalf("Digest fingerprint for chunk %d/%d does not match: expected %02x, got %02x",
-					i, len(chunks)-1, chunk.Digest, c.Digest)
-			}
-
-			pos += c.Length
-			chunks = append(chunks, c)
-		}
-	}
-
-	c, err := chnker.Next()
-
-	if c != nil {
-		t.Fatal("additional non-nil chunk returned")
-	}
-
-	if err != io.EOF {
-		t.Fatal("wrong error returned after last chunk")
-	}
-
-	return chunks
-}
-
-func getRandom(seed, count int) []byte {
-	buf := make([]byte, count)
-
-	rnd := rand.New(rand.NewSource(23))
-	for i := 0; i < count; i += 4 {
-		r := rnd.Uint32()
-		buf[i] = byte(r)
-		buf[i+1] = byte(r >> 8)
-		buf[i+2] = byte(r >> 16)
-		buf[i+3] = byte(r >> 24)
-	}
-
-	return buf
-}
-
-func TestChunker(t *testing.T) {
-	// setup data source
-	buf := getRandom(23, 32*1024*1024)
-	ch := chunker.New(bytes.NewReader(buf), testPol, sha256.New())
-	chunks := testWithData(t, ch, chunks1)
-
-	// test reader
-	for i, c := range chunks {
-		rd := c.Reader(bytes.NewReader(buf))
-
-		h := sha256.New()
-		n, err := io.Copy(h, rd)
-		if err != nil {
-			t.Fatalf("io.Copy(): %v", err)
-		}
-
-		if uint(n) != chunks1[i].Length {
-			t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
-				chunks1[i].Length, n)
-		}
-
-		d := h.Sum(nil)
-		if !bytes.Equal(d, chunks1[i].Digest) {
-			t.Fatalf("wrong hash returned: expected %02x, got %02x",
-				chunks1[i].Digest, d)
-		}
-	}
-
-	// setup nullbyte data source
-	buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
-	ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New())
-
-	testWithData(t, ch, chunks2)
-}
-
-func TestChunkerWithRandomPolynomial(t *testing.T) {
-	// setup data source
-	buf := getRandom(23, 32*1024*1024)
-
-	// generate a new random polynomial
-	start := time.Now()
-	p, err := chunker.RandomPolynomial()
-	OK(t, err)
-	t.Logf("generating random polynomial took %v", time.Since(start))
-
-	start = time.Now()
-	ch := chunker.New(bytes.NewReader(buf), p, sha256.New())
-	t.Logf("creating chunker took %v", time.Since(start))
-
-	// make sure that first chunk is different
-	c, err := ch.Next()
-
-	Assert(t, c.Cut != chunks1[0].CutFP,
-		"Cut point is the same")
-	Assert(t, c.Length != chunks1[0].Length,
-		"Length is the same")
-	Assert(t, !bytes.Equal(c.Digest, chunks1[0].Digest),
-		"Digest is the same")
-}
-
-func TestChunkerWithoutHash(t *testing.T) {
-	// setup data source
-	buf := getRandom(23, 32*1024*1024)
-
-	ch := chunker.New(bytes.NewReader(buf), testPol, nil)
-	chunks := testWithData(t, ch, chunks1)
-
-	// test reader
-	for i, c := range chunks {
-		rd := c.Reader(bytes.NewReader(buf))
-
-		buf2, err := ioutil.ReadAll(rd)
-		if err != nil {
-			t.Fatalf("io.Copy(): %v", err)
-		}
-
-		if uint(len(buf2)) != chunks1[i].Length {
-			t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
-				chunks1[i].Length, uint(len(buf2)))
-		}
-
-		if uint(len(buf2)) != chunks1[i].Length {
-			t.Fatalf("wrong number of bytes returned: expected %02x, got %02x",
-				chunks[i].Length, len(buf2))
-		}
-
-		if !bytes.Equal(buf[c.Start:c.Start+c.Length], buf2) {
-			t.Fatalf("invalid data for chunk returned: expected %02x, got %02x",
-				buf[c.Start:c.Start+c.Length], buf2)
-		}
-	}
-
-	// setup nullbyte data source
-	buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
-	ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New())
-
-	testWithData(t, ch, chunks2)
-}
-
-func benchmarkChunker(b *testing.B, hash hash.Hash) {
-	size := 10 * 1024 * 1024
-	rd := bytes.NewReader(getRandom(23, size))
-
-	b.ResetTimer()
-	b.SetBytes(int64(size))
-
-	var chunks int
-	for i := 0; i < b.N; i++ {
-		chunks = 0
-
-		rd.Seek(0, 0)
-		ch := chunker.New(rd, testPol, hash)
-
-		for {
-			_, err := ch.Next()
-
-			if err == io.EOF {
-				break
-			}
-
-			if err != nil {
-				b.Fatalf("Unexpected error occurred: %v", err)
-			}
-
-			chunks++
-		}
-	}
-
-	b.Logf("%d chunks, average chunk size: %d bytes", chunks, size/chunks)
-}
-
-func BenchmarkChunkerWithSHA256(b *testing.B) {
-	benchmarkChunker(b, sha256.New())
-}
-
-func BenchmarkChunkerWithMD5(b *testing.B) {
-	benchmarkChunker(b, md5.New())
-}
-
-func BenchmarkChunker(b *testing.B) {
-	benchmarkChunker(b, nil)
-}
-
-func BenchmarkNewChunker(b *testing.B) {
-	p, err := chunker.RandomPolynomial()
-	OK(b, err)
-
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
-		chunker.New(bytes.NewBuffer(nil), p, nil)
-	}
-}
--- a/chunker/doc.go
+++ b/chunker/doc.go
@@ -1,82 +0,0 @@
-// Copyright 2014 Alexander Neumann. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
-Package chunker implements Content Defined Chunking (CDC) based on a rolling
-Rabin Checksum.
-
-Choosing a Random Irreducible Polynomial
-
-The function RandomPolynomial() returns a new random polynomial of degree 53
-for use with the chunker. The degree 53 is chosen because it is the largest
-prime below 64-8 = 56, so that the top 8 bits of an uint64 can be used for
-optimising calculations in the chunker.
-
-A random polynomial is chosen selecting 64 random bits, masking away bits
-64..54 and setting bit 53 to one (otherwise the polynomial is not of the
-desired degree) and bit 0 to one (otherwise the polynomial is trivially
-reducible), so that 51 bits are chosen at random.
-
-This process is repeated until Irreducible() returns true, then this
-polynomials is returned. If this doesn't happen after 1 million tries, the
-function returns an error. The probability for selecting an irreducible
-polynomial at random is about 7.5% ( (2^53-2)/53 / 2^51), so the probability
-that no irreducible polynomial has been found after 100 tries is lower than
-0.04%.
-
-Verifying Irreducible Polynomials
-
-During development the results have been verified using the computational
-discrete algebra system GAP, which can be obtained from the website at
-http://www.gap-system.org/.
-
-For filtering a given list of polynomials in hexadecimal coefficient notation,
-the following script can be used:
-
-	# create x over F_2 = GF(2)
-	x := Indeterminate(GF(2), "x");
-
-	# test if polynomial is irreducible, i.e. the number of factors is one
-	IrredPoly := function (poly)
-		return (Length(Factors(poly)) = 1);
-	end;;
-
-	# create a polynomial in x from the hexadecimal representation of the
-	# coefficients
-	Hex2Poly := function (s)
-		return ValuePol(CoefficientsQadic(IntHexString(s), 2), x);
-	end;;
-
-	# list of candidates, in hex
-	candidates := [ "3DA3358B4DC173" ];
-
-	# create real polynomials
-	L := List(candidates, Hex2Poly);
-
-	# filter and display the list of irreducible polynomials contained in L
-	Display(Filtered(L, x -> (IrredPoly(x))));
-
-All irreducible polynomials from the list are written to the output.
-
-Background Literature
-
-An introduction to Rabin Fingerprints/Checksums can be found in the following articles:
-
-Michael O. Rabin (1981): "Fingerprinting by Random Polynomials"
-http://www.xmailserver.org/rabin.pdf
-
-Ross N. Williams (1993): "A Painless Guide to CRC Error Detection Algorithms"
-http://www.zlib.net/crc_v3.txt
-
-Andrei Z. Broder (1993): "Some Applications of Rabin's Fingerprinting Method"
-http://www.xmailserver.org/rabin_apps.pdf
-
-Shuhong Gao and Daniel Panario (1997): "Tests and Constructions of Irreducible Polynomials over Finite Fields"
-http://www.math.clemson.edu/~sgao/papers/GP97a.pdf
-
-Andrew Kadatch, Bob Jenkins (2007): "Everything we know about CRC but afraid to forget"
-http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
-
-*/
-package chunker
--- a/chunker/polynomials.go
+++ b/chunker/polynomials.go
@@ -1,278 +0,0 @@
-package chunker
-
-import (
-	"crypto/rand"
-	"encoding/binary"
-	"errors"
-	"fmt"
-	"strconv"
-)
-
-// Pol is a polynomial from F_2[X].
-type Pol uint64
-
-// Add returns x+y.
-func (x Pol) Add(y Pol) Pol {
-	r := Pol(uint64(x) ^ uint64(y))
-	return r
-}
-
-// mulOverflows returns true if the multiplication would overflow uint64.
-// Code by Rob Pike, see
-// https://groups.google.com/d/msg/golang-nuts/h5oSN5t3Au4/KaNQREhZh0QJ
-func mulOverflows(a, b Pol) bool {
-	if a <= 1 || b <= 1 {
-		return false
-	}
-	c := a.mul(b)
-	d := c.Div(b)
-	if d != a {
-		return true
-	}
-
-	return false
-}
-
-func (x Pol) mul(y Pol) Pol {
-	if x == 0 || y == 0 {
-		return 0
-	}
-
-	var res Pol
-	for i := 0; i <= y.Deg(); i++ {
-		if (y & (1 << uint(i))) > 0 {
-			res = res.Add(x << uint(i))
-		}
-	}
-
-	return res
-}
-
-// Mul returns x*y. When an overflow occurs, Mul panics.
-func (x Pol) Mul(y Pol) Pol {
-	if mulOverflows(x, y) {
-		panic("multiplication would overflow uint64")
-	}
-
-	return x.mul(y)
-}
-
-// Deg returns the degree of the polynomial x. If x is zero, -1 is returned.
-func (x Pol) Deg() int {
-	// the degree of 0 is -1
-	if x == 0 {
-		return -1
-	}
-
-	var mask Pol = (1 << 63)
-	for i := 63; i >= 0; i-- {
-		// test if bit i is set
-		if x&mask > 0 {
-			// this is the degree of x
-			return i
-		}
-		mask >>= 1
-	}
-
-	// fall-through, return -1
-	return -1
-}
-
-// String returns the coefficients in hex.
-func (x Pol) String() string {
-	return "0x" + strconv.FormatUint(uint64(x), 16)
-}
-
-// Expand returns the string representation of the polynomial x.
-func (x Pol) Expand() string {
-	if x == 0 {
-		return "0"
-	}
-
-	s := ""
-	for i := x.Deg(); i > 1; i-- {
-		if x&(1<<uint(i)) > 0 {
-			s += fmt.Sprintf("+x^%d", i)
-		}
-	}
-
-	if x&2 > 0 {
-		s += "+x"
-	}
-
-	if x&1 > 0 {
-		s += "+1"
-	}
-
-	return s[1:]
-}
-
-// DivMod returns x / d = q, and remainder r,
-// see https://en.wikipedia.org/wiki/Division_algorithm
-func (x Pol) DivMod(d Pol) (Pol, Pol) {
-	if x == 0 {
-		return 0, 0
-	}
-
-	if d == 0 {
-		panic("division by zero")
-	}
-
-	D := d.Deg()
-	diff := x.Deg() - D
-	if diff < 0 {
-		return 0, x
-	}
-
-	var q Pol
-	for diff >= 0 {
-		m := d << uint(diff)
-		q |= (1 << uint(diff))
-		x = x.Add(m)
-
-		diff = x.Deg() - D
-	}
-
-	return q, x
-}
-
-// Div returns the integer division result x / d.
-func (x Pol) Div(d Pol) Pol {
-	q, _ := x.DivMod(d)
-	return q
-}
-
-// Mod returns the remainder of x / d
-func (x Pol) Mod(d Pol) Pol {
-	_, r := x.DivMod(d)
-	return r
-}
-
-// I really dislike having a function that does not terminate, so specify a
-// really large upper bound for finding a new irreducible polynomial, and
-// return an error when no irreducible polynomial has been found within
-// randPolMaxTries.
-const randPolMaxTries = 1e6
-
-// RandomPolynomial returns a new random irreducible polynomial of degree 53
-// (largest prime number below 64-8). There are (2^53-2/53) irreducible
-// polynomials of degree 53 in F_2[X], c.f. Michael O. Rabin (1981):
-// "Fingerprinting by Random Polynomials", page 4. If no polynomial could be
-// found in one million tries, an error is returned.
-func RandomPolynomial() (Pol, error) {
-	for i := 0; i < randPolMaxTries; i++ {
-		var f Pol
-
-		// choose polynomial at random
-		err := binary.Read(rand.Reader, binary.LittleEndian, &f)
-		if err != nil {
-			return 0, err
-		}
-
-		// mask away bits above bit 53
-		f &= Pol((1 << 54) - 1)
-
-		// set highest and lowest bit so that the degree is 53 and the
-		// polynomial is not trivially reducible
-		f |= (1 << 53) | 1
-
-		// test if f is irreducible
-		if f.Irreducible() {
-			return f, nil
-		}
-	}
-
-	// If this is reached, we haven't found an irreducible polynomial in
-	// randPolMaxTries. This error is very unlikely to occur.
-	return 0, errors.New("unable to find new random irreducible polynomial")
-}
-
-// GCD computes the Greatest Common Divisor x and f.
-func (x Pol) GCD(f Pol) Pol {
-	if f == 0 {
-		return x
-	}
-
-	if x == 0 {
-		return f
-	}
-
-	if x.Deg() < f.Deg() {
-		x, f = f, x
-	}
-
-	return f.GCD(x.Mod(f))
-}
-
-// Irreducible returns true iff x is irreducible over F_2. This function
-// uses Ben Or's reducibility test.
-//
-// For details see "Tests and Constructions of Irreducible Polynomials over
-// Finite Fields".
-func (x Pol) Irreducible() bool {
-	for i := 1; i <= x.Deg()/2; i++ {
-		if x.GCD(qp(uint(i), x)) != 1 {
-			return false
-		}
-	}
-
-	return true
-}
-
-// MulMod computes x*f mod g
-func (x Pol) MulMod(f, g Pol) Pol {
-	if x == 0 || f == 0 {
-		return 0
-	}
-
-	var res Pol
-	for i := 0; i <= f.Deg(); i++ {
-		if (f & (1 << uint(i))) > 0 {
-			a := x
-			for j := 0; j < i; j++ {
-				a = a.Mul(2).Mod(g)
-			}
-			res = res.Add(a).Mod(g)
-		}
-	}
-
-	return res
-}
-
-// qp computes the polynomial (x^(2^p)-x) mod g. This is needed for the
-// reducibility test.
-func qp(p uint, g Pol) Pol {
-	num := (1 << p)
-	i := 1
-
-	// start with x
-	res := Pol(2)
-
-	for i < num {
-		// repeatedly square res
-		res = res.MulMod(res, g)
-		i *= 2
-	}
-
-	// add x
-	return res.Add(2).Mod(g)
-}
-
-func (p Pol) MarshalJSON() ([]byte, error) {
-	buf := strconv.AppendUint([]byte{'"'}, uint64(p), 16)
-	buf = append(buf, '"')
-	return buf, nil
-}
-
-func (p *Pol) UnmarshalJSON(data []byte) error {
-	if len(data) < 2 {
-		return errors.New("invalid string for polynomial")
-	}
-	n, err := strconv.ParseUint(string(data[1:len(data)-1]), 16, 64)
-	if err != nil {
-		return err
-	}
-	*p = Pol(n)
-
-	return nil
-}
--- a/chunker/polynomials_test.go
+++ b/chunker/polynomials_test.go
@@ -1,385 +0,0 @@
-package chunker_test
-
-import (
-	"strconv"
-	"testing"
-
-	"github.com/restic/restic/chunker"
-	. "github.com/restic/restic/test"
-)
-
-var polAddTests = []struct {
-	x, y chunker.Pol
-	sum  chunker.Pol
-}{
-	{23, 16, 23 ^ 16},
-	{0x9a7e30d1e855e0a0, 0x670102a1f4bcd414, 0xfd7f32701ce934b4},
-	{0x9a7e30d1e855e0a0, 0x9a7e30d1e855e0a0, 0},
-}
-
-func TestPolAdd(t *testing.T) {
-	for _, test := range polAddTests {
-		Equals(t, test.sum, test.x.Add(test.y))
-		Equals(t, test.sum, test.y.Add(test.x))
-	}
-}
-
-func parseBin(s string) chunker.Pol {
-	i, err := strconv.ParseUint(s, 2, 64)
-	if err != nil {
-		panic(err)
-	}
-
-	return chunker.Pol(i)
-}
-
-var polMulTests = []struct {
-	x, y chunker.Pol
-	res  chunker.Pol
-}{
-	{1, 2, 2},
-	{
-		parseBin("1101"),
-		parseBin("10"),
-		parseBin("11010"),
-	},
-	{
-		parseBin("1101"),
-		parseBin("11"),
-		parseBin("10111"),
-	},
-	{
-		0x40000000,
-		0x40000000,
-		0x1000000000000000,
-	},
-	{
-		parseBin("1010"),
-		parseBin("100100"),
-		parseBin("101101000"),
-	},
-	{
-		parseBin("100"),
-		parseBin("11"),
-		parseBin("1100"),
-	},
-	{
-		parseBin("11"),
-		parseBin("110101"),
-		parseBin("1011111"),
-	},
-	{
-		parseBin("10011"),
-		parseBin("110101"),
-		parseBin("1100001111"),
-	},
-}
-
-func TestPolMul(t *testing.T) {
-	for i, test := range polMulTests {
-		m := test.x.Mul(test.y)
-		Assert(t, test.res == m,
-			"TestPolMul failed for test %d: %v * %v: want %v, got %v",
-			i, test.x, test.y, test.res, m)
-		m = test.y.Mul(test.x)
-		Assert(t, test.res == test.y.Mul(test.x),
-			"TestPolMul failed for %d: %v * %v: want %v, got %v",
-			i, test.x, test.y, test.res, m)
-	}
-}
-
-func TestPolMulOverflow(t *testing.T) {
-	defer func() {
-		// try to recover overflow error
-		err := recover()
-
-		if e, ok := err.(string); ok && e == "multiplication would overflow uint64" {
-			return
-		} else {
-			t.Logf("invalid error raised: %v", err)
-			// re-raise error if not overflow
-			panic(err)
-		}
-	}()
-
-	x := chunker.Pol(1 << 63)
-	x.Mul(2)
-	t.Fatal("overflow test did not panic")
-}
-
-var polDivTests = []struct {
-	x, y chunker.Pol
-	res  chunker.Pol
-}{
-	{10, 50, 0},
-	{0, 1, 0},
-	{
-		parseBin("101101000"), // 0x168
-		parseBin("1010"),      // 0xa
-		parseBin("100100"),    // 0x24
-	},
-	{2, 2, 1},
-	{
-		0x8000000000000000,
-		0x8000000000000000,
-		1,
-	},
-	{
-		parseBin("1100"),
-		parseBin("100"),
-		parseBin("11"),
-	},
-	{
-		parseBin("1100001111"),
-		parseBin("10011"),
-		parseBin("110101"),
-	},
-}
-
-func TestPolDiv(t *testing.T) {
-	for i, test := range polDivTests {
-		m := test.x.Div(test.y)
-		Assert(t, test.res == m,
-			"TestPolDiv failed for test %d: %v * %v: want %v, got %v",
-			i, test.x, test.y, test.res, m)
-	}
-}
-
-var polModTests = []struct {
-	x, y chunker.Pol
-	res  chunker.Pol
-}{
-	{10, 50, 10},
-	{0, 1, 0},
-	{
-		parseBin("101101001"),
-		parseBin("1010"),
-		parseBin("1"),
-	},
-	{2, 2, 0},
-	{
-		0x8000000000000000,
-		0x8000000000000000,
-		0,
-	},
-	{
-		parseBin("1100"),
-		parseBin("100"),
-		parseBin("0"),
-	},
-	{
-		parseBin("1100001111"),
-		parseBin("10011"),
-		parseBin("0"),
-	},
-}
-
-func TestPolModt(t *testing.T) {
-	for _, test := range polModTests {
-		Equals(t, test.res, test.x.Mod(test.y))
-	}
-}
-
-func BenchmarkPolDivMod(t *testing.B) {
-	f := chunker.Pol(0x2482734cacca49)
-	g := chunker.Pol(0x3af4b284899)
-
-	for i := 0; i < t.N; i++ {
-		g.DivMod(f)
-	}
-}
-
-func BenchmarkPolDiv(t *testing.B) {
-	f := chunker.Pol(0x2482734cacca49)
-	g := chunker.Pol(0x3af4b284899)
-
-	for i := 0; i < t.N; i++ {
-		g.Div(f)
-	}
-}
-
-func BenchmarkPolMod(t *testing.B) {
-	f := chunker.Pol(0x2482734cacca49)
-	g := chunker.Pol(0x3af4b284899)
-
-	for i := 0; i < t.N; i++ {
-		g.Mod(f)
-	}
-}
-
-func BenchmarkPolDeg(t *testing.B) {
-	f := chunker.Pol(0x3af4b284899)
-	d := f.Deg()
-	if d != 41 {
-		t.Fatalf("BenchmalPolDeg: Wrong degree %d returned, expected %d",
-			d, 41)
-	}
-
-	for i := 0; i < t.N; i++ {
-		f.Deg()
-	}
-}
-
-func TestRandomPolynomial(t *testing.T) {
-	_, err := chunker.RandomPolynomial()
-	OK(t, err)
-}
-
-func BenchmarkRandomPolynomial(t *testing.B) {
-	for i := 0; i < t.N; i++ {
-		_, err := chunker.RandomPolynomial()
-		OK(t, err)
-	}
-}
-
-func TestExpandPolynomial(t *testing.T) {
-	pol := chunker.Pol(0x3DA3358B4DC173)
-	s := pol.Expand()
-	Equals(t, "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1", s)
-}
-
-var polIrredTests = []struct {
-	f     chunker.Pol
-	irred bool
-}{
-	{0x38f1e565e288df, false},
-	{0x3DA3358B4DC173, true},
-	{0x30a8295b9d5c91, false},
-	{0x255f4350b962cb, false},
-	{0x267f776110a235, false},
-	{0x2f4dae10d41227, false},
-	{0x2482734cacca49, true},
-	{0x312daf4b284899, false},
-	{0x29dfb6553d01d1, false},
-	{0x3548245eb26257, false},
-	{0x3199e7ef4211b3, false},
-	{0x362f39017dae8b, false},
-	{0x200d57aa6fdacb, false},
-	{0x35e0a4efa1d275, false},
-	{0x2ced55b026577f, false},
-	{0x260b012010893d, false},
-	{0x2df29cbcd59e9d, false},
-	{0x3f2ac7488bd429, false},
-	{0x3e5cb1711669fb, false},
-	{0x226d8de57a9959, false},
-	{0x3c8de80aaf5835, false},
-	{0x2026a59efb219b, false},
-	{0x39dfa4d13fb231, false},
-	{0x3143d0464b3299, false},
-}
-
-func TestPolIrreducible(t *testing.T) {
-	for _, test := range polIrredTests {
-		Assert(t, test.f.Irreducible() == test.irred,
-			"Irreducibility test for Polynomial %v failed: got %v, wanted %v",
-			test.f, test.f.Irreducible(), test.irred)
-	}
-}
-
-func BenchmarkPolIrreducible(b *testing.B) {
-	// find first irreducible polynomial
-	var pol chunker.Pol
-	for _, test := range polIrredTests {
-		if test.irred {
-			pol = test.f
-			break
-		}
-	}
-
-	for i := 0; i < b.N; i++ {
-		Assert(b, pol.Irreducible(),
-			"Irreducibility test for Polynomial %v failed", pol)
-	}
-}
-
-var polGCDTests = []struct {
-	f1  chunker.Pol
-	f2  chunker.Pol
-	gcd chunker.Pol
-}{
-	{10, 50, 2},
-	{0, 1, 1},
-	{
-		parseBin("101101001"),
-		parseBin("1010"),
-		parseBin("1"),
-	},
-	{2, 2, 2},
-	{
-		parseBin("1010"),
-		parseBin("11"),
-		parseBin("11"),
-	},
-	{
-		0x8000000000000000,
-		0x8000000000000000,
-		0x8000000000000000,
-	},
-	{
-		parseBin("1100"),
-		parseBin("101"),
-		parseBin("11"),
-	},
-	{
-		parseBin("1100001111"),
-		parseBin("10011"),
-		parseBin("10011"),
-	},
-	{
-		0x3DA3358B4DC173,
-		0x3DA3358B4DC173,
-		0x3DA3358B4DC173,
-	},
-	{
-		0x3DA3358B4DC173,
-		0x230d2259defd,
-		1,
-	},
-	{
-		0x230d2259defd,
-		0x51b492b3eff2,
-		parseBin("10011"),
-	},
-}
-
-func TestPolGCD(t *testing.T) {
-	for i, test := range polGCDTests {
-		gcd := test.f1.GCD(test.f2)
-		Assert(t, test.gcd == gcd,
-			"GCD test %d (%+v) failed: got %v, wanted %v",
-			i, test, gcd, test.gcd)
-		gcd = test.f2.GCD(test.f1)
-		Assert(t, test.gcd == gcd,
-			"GCD test %d (%+v) failed: got %v, wanted %v",
-			i, test, gcd, test.gcd)
-	}
-}
-
-var polMulModTests = []struct {
-	f1  chunker.Pol
-	f2  chunker.Pol
-	g   chunker.Pol
-	mod chunker.Pol
-}{
-	{
-		0x1230,
-		0x230,
-		0x55,
-		0x22,
-	},
-	{
-		0x0eae8c07dbbb3026,
-		0xd5d6db9de04771de,
-		0xdd2bda3b77c9,
-		0x425ae8595b7a,
-	},
-}
-
-func TestPolMulMod(t *testing.T) {
-	for i, test := range polMulModTests {
-		mod := test.f1.MulMod(test.f2, test.g)
-		Assert(t, mod == test.mod,
-			"MulMod test %d (%+v) failed: got %v, wanted %v",
-			i, test, mod, test.mod)
-	}
-}