repository: split index into a separate package

2025-12-03 21:21:47 +00:00 · 2022-06-12 14:43:43 +02:00
parent 5760ba6989
commit 2e3f1c08c5
20 changed files with 101 additions and 80 deletions
--- a/internal/index/index.go
+++ b/internal/index/index.go
@@ -0,0 +1,589 @@
+package index
+
+import (
+	"context"
+	"encoding/json"
+	"io"
+	"sync"
+	"time"
+
+	"github.com/restic/restic/internal/crypto"
+	"github.com/restic/restic/internal/errors"
+	"github.com/restic/restic/internal/restic"
+
+	"github.com/restic/restic/internal/debug"
+)
+
+// In large repositories, millions of blobs are stored in the repository
+// and restic needs to store an index entry for each blob in memory for
+// most operations.
+// Hence the index data structure defined here is one of the main contributions
+// to the total memory requirements of restic.
+//
+// We store the index entries in indexMaps. In these maps, entries take 56
+// bytes each, plus 8/4 = 2 bytes of unused pointers on average, not counting
+// malloc and header struct overhead and ignoring duplicates (those are only
+// present in edge cases and are also removed by prune runs).
+//
+// In the index entries, we need to reference the packID. As one pack may
+// contain many blobs the packIDs are saved in a separate array and only the index
+// within this array is saved in the indexEntry
+//
+// We assume on average a minimum of 8 blobs per pack; BP=8.
+// (Note that for large files there should be 3 blobs per pack as the average chunk
+// size is 1.5 MB and the minimum pack size is 4 MB)
+//
+// We have the following sizes:
+// indexEntry:  56 bytes  (on amd64)
+// each packID: 32 bytes
+//
+// To save N index entries, we therefore need:
+// N * (56 + 2) bytes + N * 32 bytes / BP = N * 62 bytes,
+// i.e., fewer than 64 bytes per blob in an index.
+
+// Index holds lookup tables for id -> pack.
+type Index struct {
+	m      sync.Mutex
+	byType [restic.NumBlobTypes]indexMap
+	packs  restic.IDs
+
+	final      bool       // set to true for all indexes read from the backend ("finalized")
+	ids        restic.IDs // set to the IDs of the contained finalized indexes
+	supersedes restic.IDs
+	created    time.Time
+}
+
+// NewIndex returns a new index.
+func NewIndex() *Index {
+	return &Index{
+		created: time.Now(),
+	}
+}
+
+// addToPacks saves the given pack ID and return the index.
+// This procedere allows to use pack IDs which can be easily garbage collected after.
+func (idx *Index) addToPacks(id restic.ID) int {
+	idx.packs = append(idx.packs, id)
+	return len(idx.packs) - 1
+}
+
+const maxuint32 = 1<<32 - 1
+
+func (idx *Index) store(packIndex int, blob restic.Blob) {
+	// assert that offset and length fit into uint32!
+	if blob.Offset > maxuint32 || blob.Length > maxuint32 || blob.UncompressedLength > maxuint32 {
+		panic("offset or length does not fit in uint32. You have packs > 4GB!")
+	}
+
+	m := &idx.byType[blob.Type]
+	m.add(blob.ID, packIndex, uint32(blob.Offset), uint32(blob.Length), uint32(blob.UncompressedLength))
+}
+
+// Final returns true iff the index is already written to the repository, it is
+// finalized.
+func (idx *Index) Final() bool {
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	return idx.final
+}
+
+const (
+	indexMaxBlobs           = 50000
+	indexMaxBlobsCompressed = 3 * indexMaxBlobs
+	indexMaxAge             = 10 * time.Minute
+)
+
+// IndexFull returns true iff the index is "full enough" to be saved as a preliminary index.
+var IndexFull = func(idx *Index, compress bool) bool {
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	debug.Log("checking whether index %p is full", idx)
+
+	var blobs uint
+	for typ := range idx.byType {
+		blobs += idx.byType[typ].len()
+	}
+	age := time.Since(idx.created)
+	var maxBlobs uint
+	if compress {
+		maxBlobs = indexMaxBlobsCompressed
+	} else {
+		maxBlobs = indexMaxBlobs
+	}
+
+	switch {
+	case age >= indexMaxAge:
+		debug.Log("index %p is old enough", idx, age)
+		return true
+	case blobs >= maxBlobs:
+		debug.Log("index %p has %d blobs", idx, blobs)
+		return true
+	}
+
+	debug.Log("index %p only has %d blobs and is too young (%v)", idx, blobs, age)
+	return false
+
+}
+
+// StorePack remembers the ids of all blobs of a given pack
+// in the index
+func (idx *Index) StorePack(id restic.ID, blobs []restic.Blob) {
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	if idx.final {
+		panic("store new item in finalized index")
+	}
+
+	debug.Log("%v", blobs)
+	packIndex := idx.addToPacks(id)
+
+	for _, blob := range blobs {
+		idx.store(packIndex, blob)
+	}
+}
+
+func (idx *Index) toPackedBlob(e *indexEntry, t restic.BlobType) restic.PackedBlob {
+	return restic.PackedBlob{
+		Blob: restic.Blob{
+			BlobHandle: restic.BlobHandle{
+				ID:   e.id,
+				Type: t},
+			Length:             uint(e.length),
+			Offset:             uint(e.offset),
+			UncompressedLength: uint(e.uncompressedLength),
+		},
+		PackID: idx.packs[e.packIndex],
+	}
+}
+
+// Lookup queries the index for the blob ID and returns all entries including
+// duplicates. Adds found entries to blobs and returns the result.
+func (idx *Index) Lookup(bh restic.BlobHandle, pbs []restic.PackedBlob) []restic.PackedBlob {
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	idx.byType[bh.Type].foreachWithID(bh.ID, func(e *indexEntry) {
+		pbs = append(pbs, idx.toPackedBlob(e, bh.Type))
+	})
+
+	return pbs
+}
+
+// Has returns true iff the id is listed in the index.
+func (idx *Index) Has(bh restic.BlobHandle) bool {
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	return idx.byType[bh.Type].get(bh.ID) != nil
+}
+
+// LookupSize returns the length of the plaintext content of the blob with the
+// given id.
+func (idx *Index) LookupSize(bh restic.BlobHandle) (plaintextLength uint, found bool) {
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	e := idx.byType[bh.Type].get(bh.ID)
+	if e == nil {
+		return 0, false
+	}
+	if e.uncompressedLength != 0 {
+		return uint(e.uncompressedLength), true
+	}
+	return uint(crypto.PlaintextLength(int(e.length))), true
+}
+
+// Supersedes returns the list of indexes this index supersedes, if any.
+func (idx *Index) Supersedes() restic.IDs {
+	return idx.supersedes
+}
+
+// AddToSupersedes adds the ids to the list of indexes superseded by this
+// index. If the index has already been finalized, an error is returned.
+func (idx *Index) AddToSupersedes(ids ...restic.ID) error {
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	if idx.final {
+		return errors.New("index already finalized")
+	}
+
+	idx.supersedes = append(idx.supersedes, ids...)
+	return nil
+}
+
+// Each passes all blobs known to the index to the callback fn. This blocks any
+// modification of the index.
+func (idx *Index) Each(ctx context.Context, fn func(restic.PackedBlob)) {
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	for typ := range idx.byType {
+		m := &idx.byType[typ]
+		m.foreach(func(e *indexEntry) bool {
+			if ctx.Err() != nil {
+				return false
+			}
+			fn(idx.toPackedBlob(e, restic.BlobType(typ)))
+			return true
+		})
+	}
+}
+
+type EachByPackResult struct {
+	PackID restic.ID
+	Blobs  []restic.Blob
+}
+
+// EachByPack returns a channel that yields all blobs known to the index
+// grouped by packID but ignoring blobs with a packID in packPlacklist for
+// finalized indexes.
+// This filtering is used when rebuilding the index where we need to ignore packs
+// from the finalized index which have been re-read into a non-finalized index.
+// When the  context is cancelled, the background goroutine
+// terminates. This blocks any modification of the index.
+func (idx *Index) EachByPack(ctx context.Context, packBlacklist restic.IDSet) <-chan EachByPackResult {
+	idx.m.Lock()
+
+	ch := make(chan EachByPackResult)
+
+	go func() {
+		defer idx.m.Unlock()
+		defer func() {
+			close(ch)
+		}()
+
+		byPack := make(map[restic.ID][][]*indexEntry)
+
+		for typ := range idx.byType {
+			m := &idx.byType[typ]
+			m.foreach(func(e *indexEntry) bool {
+				packID := idx.packs[e.packIndex]
+				if !idx.final || !packBlacklist.Has(packID) {
+					if _, ok := byPack[packID]; !ok {
+						byPack[packID] = make([][]*indexEntry, restic.NumBlobTypes)
+					}
+					byPack[packID][typ] = append(byPack[packID][typ], e)
+				}
+				return true
+			})
+		}
+
+		for packID, packByType := range byPack {
+			var result EachByPackResult
+			result.PackID = packID
+			for typ, pack := range packByType {
+				for _, e := range pack {
+					result.Blobs = append(result.Blobs, idx.toPackedBlob(e, restic.BlobType(typ)).Blob)
+				}
+			}
+			// allow GC once entry is no longer necessary
+			byPack[packID] = nil
+			select {
+			case <-ctx.Done():
+				return
+			case ch <- result:
+			}
+		}
+	}()
+
+	return ch
+}
+
+// Packs returns all packs in this index
+func (idx *Index) Packs() restic.IDSet {
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	packs := restic.NewIDSet()
+	for _, packID := range idx.packs {
+		packs.Insert(packID)
+	}
+
+	return packs
+}
+
+type packJSON struct {
+	ID    restic.ID  `json:"id"`
+	Blobs []blobJSON `json:"blobs"`
+}
+
+type blobJSON struct {
+	ID                 restic.ID       `json:"id"`
+	Type               restic.BlobType `json:"type"`
+	Offset             uint            `json:"offset"`
+	Length             uint            `json:"length"`
+	UncompressedLength uint            `json:"uncompressed_length,omitempty"`
+}
+
+// generatePackList returns a list of packs.
+func (idx *Index) generatePackList() ([]*packJSON, error) {
+	list := []*packJSON{}
+	packs := make(map[restic.ID]*packJSON)
+
+	for typ := range idx.byType {
+		m := &idx.byType[typ]
+		m.foreach(func(e *indexEntry) bool {
+			packID := idx.packs[e.packIndex]
+			if packID.IsNull() {
+				panic("null pack id")
+			}
+
+			debug.Log("handle blob %v", e.id)
+
+			// see if pack is already in map
+			p, ok := packs[packID]
+			if !ok {
+				// else create new pack
+				p = &packJSON{ID: packID}
+
+				// and append it to the list and map
+				list = append(list, p)
+				packs[p.ID] = p
+			}
+
+			// add blob
+			p.Blobs = append(p.Blobs, blobJSON{
+				ID:                 e.id,
+				Type:               restic.BlobType(typ),
+				Offset:             uint(e.offset),
+				Length:             uint(e.length),
+				UncompressedLength: uint(e.uncompressedLength),
+			})
+
+			return true
+		})
+	}
+
+	debug.Log("done")
+
+	return list, nil
+}
+
+type jsonIndex struct {
+	Supersedes restic.IDs  `json:"supersedes,omitempty"`
+	Packs      []*packJSON `json:"packs"`
+}
+
+// Encode writes the JSON serialization of the index to the writer w.
+func (idx *Index) Encode(w io.Writer) error {
+	debug.Log("encoding index")
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	list, err := idx.generatePackList()
+	if err != nil {
+		return err
+	}
+
+	enc := json.NewEncoder(w)
+	idxJSON := jsonIndex{
+		Supersedes: idx.supersedes,
+		Packs:      list,
+	}
+	return enc.Encode(idxJSON)
+}
+
+// Finalize sets the index to final.
+func (idx *Index) Finalize() {
+	debug.Log("finalizing index")
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	idx.final = true
+}
+
+// IDs returns the IDs of the index, if available. If the index is not yet
+// finalized, an error is returned.
+func (idx *Index) IDs() (restic.IDs, error) {
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	if !idx.final {
+		return nil, errors.New("index not finalized")
+	}
+
+	return idx.ids, nil
+}
+
+// SetID sets the ID the index has been written to. This requires that
+// Finalize() has been called before, otherwise an error is returned.
+func (idx *Index) SetID(id restic.ID) error {
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	if !idx.final {
+		return errors.New("index is not final")
+	}
+
+	if len(idx.ids) > 0 {
+		return errors.New("ID already set")
+	}
+
+	debug.Log("ID set to %v", id)
+	idx.ids = append(idx.ids, id)
+
+	return nil
+}
+
+// Dump writes the pretty-printed JSON representation of the index to w.
+func (idx *Index) Dump(w io.Writer) error {
+	debug.Log("dumping index")
+	idx.m.Lock()
+	defer idx.m.Unlock()
+
+	list, err := idx.generatePackList()
+	if err != nil {
+		return err
+	}
+
+	outer := jsonIndex{
+		Supersedes: idx.Supersedes(),
+		Packs:      list,
+	}
+
+	buf, err := json.MarshalIndent(outer, "", "  ")
+	if err != nil {
+		return err
+	}
+
+	_, err = w.Write(append(buf, '\n'))
+	if err != nil {
+		return errors.Wrap(err, "Write")
+	}
+
+	debug.Log("done")
+
+	return nil
+}
+
+// merge() merges indexes, i.e. idx.merge(idx2) merges the contents of idx2 into idx.
+// During merging exact duplicates are removed;  idx2 is not changed by this method.
+func (idx *Index) merge(idx2 *Index) error {
+	idx.m.Lock()
+	defer idx.m.Unlock()
+	idx2.m.Lock()
+	defer idx2.m.Unlock()
+
+	if !idx2.final {
+		return errors.New("index to merge is not final")
+	}
+
+	packlen := len(idx.packs)
+	// first append packs as they might be accessed when looking for duplicates below
+	idx.packs = append(idx.packs, idx2.packs...)
+
+	// copy all index entries of idx2 to idx
+	for typ := range idx2.byType {
+		m2 := &idx2.byType[typ]
+		m := &idx.byType[typ]
+
+		// helper func to test if identical entry is contained in idx
+		hasIdenticalEntry := func(e2 *indexEntry) (found bool) {
+			m.foreachWithID(e2.id, func(e *indexEntry) {
+				b := idx.toPackedBlob(e, restic.BlobType(typ))
+				b2 := idx2.toPackedBlob(e2, restic.BlobType(typ))
+				if b == b2 {
+					found = true
+				}
+			})
+			return found
+		}
+
+		m2.foreach(func(e2 *indexEntry) bool {
+			if !hasIdenticalEntry(e2) {
+				// packIndex needs to be changed as idx2.pack was appended to idx.pack, see above
+				m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length, e2.uncompressedLength)
+			}
+			return true
+		})
+	}
+
+	idx.ids = append(idx.ids, idx2.ids...)
+	idx.supersedes = append(idx.supersedes, idx2.supersedes...)
+
+	return nil
+}
+
+// isErrOldIndex returns true if the error may be caused by an old index
+// format.
+func isErrOldIndex(err error) bool {
+	e, ok := err.(*json.UnmarshalTypeError)
+	return ok && e.Value == "array"
+}
+
+// DecodeIndex unserializes an index from buf.
+func DecodeIndex(buf []byte, id restic.ID) (idx *Index, oldFormat bool, err error) {
+	debug.Log("Start decoding index")
+	idxJSON := &jsonIndex{}
+
+	err = json.Unmarshal(buf, idxJSON)
+	if err != nil {
+		debug.Log("Error %v", err)
+
+		if isErrOldIndex(err) {
+			debug.Log("index is probably old format, trying that")
+			idx, err = decodeOldIndex(buf)
+			return idx, err == nil, err
+		}
+
+		return nil, false, errors.Wrap(err, "DecodeIndex")
+	}
+
+	idx = NewIndex()
+	for _, pack := range idxJSON.Packs {
+		packID := idx.addToPacks(pack.ID)
+
+		for _, blob := range pack.Blobs {
+			idx.store(packID, restic.Blob{
+				BlobHandle: restic.BlobHandle{
+					Type: blob.Type,
+					ID:   blob.ID},
+				Offset:             blob.Offset,
+				Length:             blob.Length,
+				UncompressedLength: blob.UncompressedLength,
+			})
+		}
+	}
+	idx.supersedes = idxJSON.Supersedes
+	idx.ids = append(idx.ids, id)
+	idx.final = true
+
+	debug.Log("done")
+	return idx, false, nil
+}
+
+// DecodeOldIndex loads and unserializes an index in the old format from rd.
+func decodeOldIndex(buf []byte) (idx *Index, err error) {
+	debug.Log("Start decoding old index")
+	list := []*packJSON{}
+
+	err = json.Unmarshal(buf, &list)
+	if err != nil {
+		debug.Log("Error %#v", err)
+		return nil, errors.Wrap(err, "Decode")
+	}
+
+	idx = NewIndex()
+	for _, pack := range list {
+		packID := idx.addToPacks(pack.ID)
+
+		for _, blob := range pack.Blobs {
+			idx.store(packID, restic.Blob{
+				BlobHandle: restic.BlobHandle{
+					Type: blob.Type,
+					ID:   blob.ID},
+				Offset: blob.Offset,
+				Length: blob.Length,
+				// no compressed length in the old index format
+			})
+		}
+	}
+	idx.final = true
+
+	debug.Log("done")
+	return idx, nil
+}
--- a/internal/index/index_parallel.go
+++ b/internal/index/index_parallel.go
@@ -0,0 +1,83 @@
+package index
+
+import (
+	"context"
+	"runtime"
+	"sync"
+
+	"github.com/restic/restic/internal/debug"
+	"github.com/restic/restic/internal/restic"
+	"golang.org/x/sync/errgroup"
+)
+
+// ForAllIndexes loads all index files in parallel and calls the given callback.
+// It is guaranteed that the function is not run concurrently. If the callback
+// returns an error, this function is cancelled and also returns that error.
+func ForAllIndexes(ctx context.Context, repo restic.Repository,
+	fn func(id restic.ID, index *Index, oldFormat bool, err error) error) error {
+
+	debug.Log("Start")
+
+	type FileInfo struct {
+		restic.ID
+		Size int64
+	}
+
+	var m sync.Mutex
+
+	// track spawned goroutines using wg, create a new context which is
+	// cancelled as soon as an error occurs.
+	wg, ctx := errgroup.WithContext(ctx)
+
+	ch := make(chan FileInfo)
+	// send list of index files through ch, which is closed afterwards
+	wg.Go(func() error {
+		defer close(ch)
+		return repo.List(ctx, restic.IndexFile, func(id restic.ID, size int64) error {
+			select {
+			case <-ctx.Done():
+				return ctx.Err()
+			case ch <- FileInfo{id, size}:
+			}
+			return nil
+		})
+	})
+
+	// a worker receives an index ID from ch, loads the index, and sends it to indexCh
+	worker := func() error {
+		var buf []byte
+		for fi := range ch {
+			debug.Log("worker got file %v", fi.ID.Str())
+			var err error
+			var idx *Index
+			oldFormat := false
+
+			if cap(buf) < int(fi.Size) {
+				// overallocate a bit
+				buf = make([]byte, fi.Size+128*1024)
+			}
+			buf, err = repo.LoadUnpacked(ctx, restic.IndexFile, fi.ID, buf[:0])
+			if err == nil {
+				idx, oldFormat, err = DecodeIndex(buf, fi.ID)
+			}
+
+			m.Lock()
+			err = fn(fi.ID, idx, oldFormat, err)
+			m.Unlock()
+			if err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+
+	// decoding an index can take quite some time such that this can be both CPU- or IO-bound
+	// as the whole index is kept in memory anyways, a few workers too much don't matter
+	workerCount := int(repo.Connections()) + runtime.GOMAXPROCS(0)
+	// run workers on ch
+	for i := 0; i < workerCount; i++ {
+		wg.Go(worker)
+	}
+
+	return wg.Wait()
+}
--- a/internal/index/index_parallel_test.go
+++ b/internal/index/index_parallel_test.go
@@ -0,0 +1,50 @@
+package index_test
+
+import (
+	"context"
+	"path/filepath"
+	"testing"
+
+	"github.com/restic/restic/internal/errors"
+	"github.com/restic/restic/internal/index"
+	"github.com/restic/restic/internal/repository"
+	"github.com/restic/restic/internal/restic"
+	rtest "github.com/restic/restic/internal/test"
+)
+
+var repoFixture = filepath.Join("..", "repository", "testdata", "test-repo.tar.gz")
+
+func TestRepositoryForAllIndexes(t *testing.T) {
+	repodir, cleanup := rtest.Env(t, repoFixture)
+	defer cleanup()
+
+	repo := repository.TestOpenLocal(t, repodir)
+
+	expectedIndexIDs := restic.NewIDSet()
+	rtest.OK(t, repo.List(context.TODO(), restic.IndexFile, func(id restic.ID, size int64) error {
+		expectedIndexIDs.Insert(id)
+		return nil
+	}))
+
+	// check that all expected indexes are loaded without errors
+	indexIDs := restic.NewIDSet()
+	var indexErr error
+	rtest.OK(t, index.ForAllIndexes(context.TODO(), repo, func(id restic.ID, index *index.Index, oldFormat bool, err error) error {
+		if err != nil {
+			indexErr = err
+		}
+		indexIDs.Insert(id)
+		return nil
+	}))
+	rtest.OK(t, indexErr)
+	rtest.Equals(t, expectedIndexIDs, indexIDs)
+
+	// must failed with the returned error
+	iterErr := errors.New("error to pass upwards")
+
+	err := index.ForAllIndexes(context.TODO(), repo, func(id restic.ID, index *index.Index, oldFormat bool, err error) error {
+		return iterErr
+	})
+
+	rtest.Equals(t, iterErr, err)
+}
--- a/internal/index/index_test.go
+++ b/internal/index/index_test.go
@@ -0,0 +1,649 @@
+package index_test
+
+import (
+	"bytes"
+	"context"
+	"math/rand"
+	"sync"
+	"testing"
+
+	"github.com/restic/restic/internal/index"
+	"github.com/restic/restic/internal/restic"
+	rtest "github.com/restic/restic/internal/test"
+)
+
+func TestIndexSerialize(t *testing.T) {
+	tests := []restic.PackedBlob{}
+
+	idx := index.NewIndex()
+
+	// create 50 packs with 20 blobs each
+	for i := 0; i < 50; i++ {
+		packID := restic.NewRandomID()
+		var blobs []restic.Blob
+
+		pos := uint(0)
+		for j := 0; j < 20; j++ {
+			length := uint(i*100 + j)
+			uncompressedLength := uint(0)
+			if i >= 25 {
+				// test a mix of compressed and uncompressed packs
+				uncompressedLength = 2 * length
+			}
+			pb := restic.PackedBlob{
+				Blob: restic.Blob{
+					BlobHandle:         restic.NewRandomBlobHandle(),
+					Offset:             pos,
+					Length:             length,
+					UncompressedLength: uncompressedLength,
+				},
+				PackID: packID,
+			}
+			blobs = append(blobs, pb.Blob)
+			tests = append(tests, pb)
+			pos += length
+		}
+		idx.StorePack(packID, blobs)
+	}
+
+	wr := bytes.NewBuffer(nil)
+	err := idx.Encode(wr)
+	rtest.OK(t, err)
+
+	idx2ID := restic.NewRandomID()
+	idx2, oldFormat, err := index.DecodeIndex(wr.Bytes(), idx2ID)
+	rtest.OK(t, err)
+	rtest.Assert(t, idx2 != nil,
+		"nil returned for decoded index")
+	rtest.Assert(t, !oldFormat, "new index format recognized as old format")
+	indexID, err := idx2.IDs()
+	rtest.OK(t, err)
+	rtest.Equals(t, indexID, restic.IDs{idx2ID})
+
+	wr2 := bytes.NewBuffer(nil)
+	err = idx2.Encode(wr2)
+	rtest.OK(t, err)
+
+	for _, testBlob := range tests {
+		list := idx.Lookup(testBlob.BlobHandle, nil)
+		if len(list) != 1 {
+			t.Errorf("expected one result for blob %v, got %v: %v", testBlob.ID.Str(), len(list), list)
+		}
+		result := list[0]
+
+		rtest.Equals(t, testBlob, result)
+
+		list2 := idx2.Lookup(testBlob.BlobHandle, nil)
+		if len(list2) != 1 {
+			t.Errorf("expected one result for blob %v, got %v: %v", testBlob.ID.Str(), len(list2), list2)
+		}
+		result2 := list2[0]
+
+		rtest.Equals(t, testBlob, result2)
+	}
+
+	// add more blobs to idx
+	newtests := []restic.PackedBlob{}
+	for i := 0; i < 10; i++ {
+		packID := restic.NewRandomID()
+		var blobs []restic.Blob
+
+		pos := uint(0)
+		for j := 0; j < 10; j++ {
+			length := uint(i*100 + j)
+			pb := restic.PackedBlob{
+				Blob: restic.Blob{
+					BlobHandle: restic.NewRandomBlobHandle(),
+					Offset:     pos,
+					Length:     length,
+				},
+				PackID: packID,
+			}
+			blobs = append(blobs, pb.Blob)
+			newtests = append(newtests, pb)
+			pos += length
+		}
+		idx.StorePack(packID, blobs)
+	}
+
+	// finalize; serialize idx, unserialize to idx3
+	idx.Finalize()
+	wr3 := bytes.NewBuffer(nil)
+	err = idx.Encode(wr3)
+	rtest.OK(t, err)
+
+	rtest.Assert(t, idx.Final(),
+		"index not final after encoding")
+
+	id := restic.NewRandomID()
+	rtest.OK(t, idx.SetID(id))
+	ids, err := idx.IDs()
+	rtest.OK(t, err)
+	rtest.Equals(t, restic.IDs{id}, ids)
+
+	idx3, oldFormat, err := index.DecodeIndex(wr3.Bytes(), id)
+	rtest.OK(t, err)
+	rtest.Assert(t, idx3 != nil,
+		"nil returned for decoded index")
+	rtest.Assert(t, idx3.Final(),
+		"decoded index is not final")
+	rtest.Assert(t, !oldFormat, "new index format recognized as old format")
+
+	// all new blobs must be in the index
+	for _, testBlob := range newtests {
+		list := idx3.Lookup(testBlob.BlobHandle, nil)
+		if len(list) != 1 {
+			t.Errorf("expected one result for blob %v, got %v: %v", testBlob.ID.Str(), len(list), list)
+		}
+
+		blob := list[0]
+
+		rtest.Equals(t, testBlob, blob)
+	}
+}
+
+func TestIndexSize(t *testing.T) {
+	idx := index.NewIndex()
+
+	packs := 200
+	blobCount := 100
+	for i := 0; i < packs; i++ {
+		packID := restic.NewRandomID()
+		var blobs []restic.Blob
+
+		pos := uint(0)
+		for j := 0; j < blobCount; j++ {
+			length := uint(i*100 + j)
+			blobs = append(blobs, restic.Blob{
+				BlobHandle: restic.NewRandomBlobHandle(),
+				Offset:     pos,
+				Length:     length,
+			})
+
+			pos += length
+		}
+		idx.StorePack(packID, blobs)
+	}
+
+	wr := bytes.NewBuffer(nil)
+
+	err := idx.Encode(wr)
+	rtest.OK(t, err)
+
+	t.Logf("Index file size for %d blobs in %d packs is %d", blobCount*packs, packs, wr.Len())
+}
+
+// example index serialization from doc/Design.rst
+var docExampleV1 = []byte(`
+{
+  "supersedes": [
+	"ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452"
+  ],
+  "packs": [
+	{
+	  "id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
+	  "blobs": [
+		{
+		  "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
+		  "type": "data",
+		  "offset": 0,
+		  "length": 38
+		},{
+		  "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
+		  "type": "tree",
+		  "offset": 38,
+		  "length": 112
+		},
+		{
+		  "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
+		  "type": "data",
+		  "offset": 150,
+		  "length": 123
+		}
+	  ]
+	}
+  ]
+}
+`)
+
+var docExampleV2 = []byte(`
+{
+	"supersedes": [
+	  "ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452"
+	],
+	"packs": [
+	  {
+		"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
+		"blobs": [
+		  {
+			"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
+			"type": "data",
+			"offset": 0,
+			"length": 38
+		  },
+		  {
+			"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
+			"type": "tree",
+			"offset": 38,
+			"length": 112,
+			"uncompressed_length": 511
+		  },
+		  {
+			"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
+			"type": "data",
+			"offset": 150,
+			"length": 123,
+			"uncompressed_length": 234
+		  }
+		]
+	  }
+	]
+  }
+`)
+
+var docOldExample = []byte(`
+[ {
+  "id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
+  "blobs": [
+	{
+	  "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
+	  "type": "data",
+	  "offset": 0,
+	  "length": 38
+	},{
+	  "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
+	  "type": "tree",
+	  "offset": 38,
+	  "length": 112
+	},
+	{
+	  "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
+	  "type": "data",
+	  "offset": 150,
+	  "length": 123
+	}
+  ]
+} ]
+`)
+
+var exampleTests = []struct {
+	id, packID         restic.ID
+	tpe                restic.BlobType
+	offset, length     uint
+	uncompressedLength uint
+}{
+	{
+		restic.TestParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"),
+		restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
+		restic.DataBlob, 0, 38, 0,
+	}, {
+		restic.TestParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"),
+		restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
+		restic.TreeBlob, 38, 112, 511,
+	}, {
+		restic.TestParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"),
+		restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
+		restic.DataBlob, 150, 123, 234,
+	},
+}
+
+var exampleLookupTest = struct {
+	packID restic.ID
+	blobs  map[restic.ID]restic.BlobType
+}{
+	restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
+	map[restic.ID]restic.BlobType{
+		restic.TestParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"): restic.DataBlob,
+		restic.TestParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"): restic.TreeBlob,
+		restic.TestParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"): restic.DataBlob,
+	},
+}
+
+func TestIndexUnserialize(t *testing.T) {
+	for _, task := range []struct {
+		idxBytes []byte
+		version  int
+	}{
+		{docExampleV1, 1},
+		{docExampleV2, 2},
+	} {
+		oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")}
+
+		idx, oldFormat, err := index.DecodeIndex(task.idxBytes, restic.NewRandomID())
+		rtest.OK(t, err)
+		rtest.Assert(t, !oldFormat, "new index format recognized as old format")
+
+		for _, test := range exampleTests {
+			list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil)
+			if len(list) != 1 {
+				t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
+			}
+			blob := list[0]
+
+			t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob)
+
+			rtest.Equals(t, test.packID, blob.PackID)
+			rtest.Equals(t, test.tpe, blob.Type)
+			rtest.Equals(t, test.offset, blob.Offset)
+			rtest.Equals(t, test.length, blob.Length)
+			if task.version == 1 {
+				rtest.Equals(t, uint(0), blob.UncompressedLength)
+			} else if task.version == 2 {
+				rtest.Equals(t, test.uncompressedLength, blob.UncompressedLength)
+			} else {
+				t.Fatal("Invalid index version")
+			}
+		}
+
+		rtest.Equals(t, oldIdx, idx.Supersedes())
+
+		blobs := listPack(idx, exampleLookupTest.packID)
+		if len(blobs) != len(exampleLookupTest.blobs) {
+			t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs))
+		}
+
+		for _, blob := range blobs {
+			b, ok := exampleLookupTest.blobs[blob.ID]
+			if !ok {
+				t.Errorf("unexpected blob %v found", blob.ID.Str())
+			}
+			if blob.Type != b {
+				t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type)
+			}
+		}
+	}
+}
+
+func listPack(idx *index.Index, id restic.ID) (pbs []restic.PackedBlob) {
+	idx.Each(context.TODO(), func(pb restic.PackedBlob) {
+		if pb.PackID.Equal(id) {
+			pbs = append(pbs, pb)
+		}
+	})
+	return pbs
+}
+
+var (
+	benchmarkIndexJSON     []byte
+	benchmarkIndexJSONOnce sync.Once
+)
+
+func initBenchmarkIndexJSON() {
+	idx, _ := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
+	var buf bytes.Buffer
+	err := idx.Encode(&buf)
+	if err != nil {
+		panic(err)
+	}
+
+	benchmarkIndexJSON = buf.Bytes()
+}
+
+func BenchmarkDecodeIndex(b *testing.B) {
+	benchmarkIndexJSONOnce.Do(initBenchmarkIndexJSON)
+
+	id := restic.NewRandomID()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		_, _, err := index.DecodeIndex(benchmarkIndexJSON, id)
+		rtest.OK(b, err)
+	}
+}
+
+func BenchmarkDecodeIndexParallel(b *testing.B) {
+	benchmarkIndexJSONOnce.Do(initBenchmarkIndexJSON)
+	id := restic.NewRandomID()
+
+	b.ResetTimer()
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			_, _, err := index.DecodeIndex(benchmarkIndexJSON, id)
+			rtest.OK(b, err)
+		}
+	})
+}
+
+func TestIndexUnserializeOld(t *testing.T) {
+	idx, oldFormat, err := index.DecodeIndex(docOldExample, restic.NewRandomID())
+	rtest.OK(t, err)
+	rtest.Assert(t, oldFormat, "old index format recognized as new format")
+
+	for _, test := range exampleTests {
+		list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil)
+		if len(list) != 1 {
+			t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
+		}
+		blob := list[0]
+
+		rtest.Equals(t, test.packID, blob.PackID)
+		rtest.Equals(t, test.tpe, blob.Type)
+		rtest.Equals(t, test.offset, blob.Offset)
+		rtest.Equals(t, test.length, blob.Length)
+	}
+
+	rtest.Equals(t, 0, len(idx.Supersedes()))
+}
+
+func TestIndexPacks(t *testing.T) {
+	idx := index.NewIndex()
+	packs := restic.NewIDSet()
+
+	for i := 0; i < 20; i++ {
+		packID := restic.NewRandomID()
+		idx.StorePack(packID, []restic.Blob{
+			{
+				BlobHandle: restic.NewRandomBlobHandle(),
+				Offset:     0,
+				Length:     23,
+			},
+		})
+
+		packs.Insert(packID)
+	}
+
+	idxPacks := idx.Packs()
+	rtest.Assert(t, packs.Equals(idxPacks), "packs in index do not match packs added to index")
+}
+
+const maxPackSize = 16 * 1024 * 1024
+
+// This function generates a (insecure) random ID, similar to NewRandomID
+func NewRandomTestID(rng *rand.Rand) restic.ID {
+	id := restic.ID{}
+	rng.Read(id[:])
+	return id
+}
+
+func createRandomIndex(rng *rand.Rand, packfiles int) (idx *index.Index, lookupBh restic.BlobHandle) {
+	idx = index.NewIndex()
+
+	// create index with given number of pack files
+	for i := 0; i < packfiles; i++ {
+		packID := NewRandomTestID(rng)
+		var blobs []restic.Blob
+		offset := 0
+		for offset < maxPackSize {
+			size := 2000 + rng.Intn(4*1024*1024)
+			id := NewRandomTestID(rng)
+			blobs = append(blobs, restic.Blob{
+				BlobHandle: restic.BlobHandle{
+					Type: restic.DataBlob,
+					ID:   id,
+				},
+				Length:             uint(size),
+				UncompressedLength: uint(2 * size),
+				Offset:             uint(offset),
+			})
+
+			offset += size
+		}
+		idx.StorePack(packID, blobs)
+
+		if i == 0 {
+			lookupBh = restic.BlobHandle{
+				Type: restic.DataBlob,
+				ID:   blobs[rng.Intn(len(blobs))].ID,
+			}
+		}
+	}
+
+	return idx, lookupBh
+}
+
+func BenchmarkIndexHasUnknown(b *testing.B) {
+	idx, _ := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
+	lookupBh := restic.NewRandomBlobHandle()
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		idx.Has(lookupBh)
+	}
+}
+
+func BenchmarkIndexHasKnown(b *testing.B) {
+	idx, lookupBh := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		idx.Has(lookupBh)
+	}
+}
+
+func BenchmarkIndexAlloc(b *testing.B) {
+	rng := rand.New(rand.NewSource(0))
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		createRandomIndex(rng, 200000)
+	}
+}
+
+func BenchmarkIndexAllocParallel(b *testing.B) {
+	b.ReportAllocs()
+
+	b.RunParallel(func(pb *testing.PB) {
+		rng := rand.New(rand.NewSource(0))
+		for pb.Next() {
+			createRandomIndex(rng, 200000)
+		}
+	})
+}
+
+func TestIndexHas(t *testing.T) {
+	tests := []restic.PackedBlob{}
+
+	idx := index.NewIndex()
+
+	// create 50 packs with 20 blobs each
+	for i := 0; i < 50; i++ {
+		packID := restic.NewRandomID()
+		var blobs []restic.Blob
+
+		pos := uint(0)
+		for j := 0; j < 20; j++ {
+			length := uint(i*100 + j)
+			uncompressedLength := uint(0)
+			if i >= 25 {
+				// test a mix of compressed and uncompressed packs
+				uncompressedLength = 2 * length
+			}
+			pb := restic.PackedBlob{
+				Blob: restic.Blob{
+					BlobHandle:         restic.NewRandomBlobHandle(),
+					Offset:             pos,
+					Length:             length,
+					UncompressedLength: uncompressedLength,
+				},
+				PackID: packID,
+			}
+			blobs = append(blobs, pb.Blob)
+			tests = append(tests, pb)
+			pos += length
+		}
+		idx.StorePack(packID, blobs)
+	}
+
+	for _, testBlob := range tests {
+		rtest.Assert(t, idx.Has(testBlob.BlobHandle), "Index reports not having data blob added to it")
+	}
+
+	rtest.Assert(t, !idx.Has(restic.NewRandomBlobHandle()), "Index reports having a data blob not added to it")
+	rtest.Assert(t, !idx.Has(restic.BlobHandle{ID: tests[0].ID, Type: restic.TreeBlob}), "Index reports having a tree blob added to it with the same id as a data blob")
+}
+
+func TestMixedEachByPack(t *testing.T) {
+	idx := index.NewIndex()
+
+	expected := make(map[restic.ID]int)
+	// create 50 packs with 2 blobs each
+	for i := 0; i < 50; i++ {
+		packID := restic.NewRandomID()
+		expected[packID] = 1
+		blobs := []restic.Blob{
+			{
+				BlobHandle: restic.BlobHandle{Type: restic.DataBlob, ID: restic.NewRandomID()},
+				Offset:     0,
+				Length:     42,
+			},
+			{
+				BlobHandle: restic.BlobHandle{Type: restic.TreeBlob, ID: restic.NewRandomID()},
+				Offset:     42,
+				Length:     43,
+			},
+		}
+		idx.StorePack(packID, blobs)
+	}
+
+	reported := make(map[restic.ID]int)
+	for bp := range idx.EachByPack(context.TODO(), restic.NewIDSet()) {
+		reported[bp.PackID]++
+
+		rtest.Equals(t, 2, len(bp.Blobs)) // correct blob count
+		if bp.Blobs[0].Offset > bp.Blobs[1].Offset {
+			bp.Blobs[1], bp.Blobs[0] = bp.Blobs[0], bp.Blobs[1]
+		}
+		b0 := bp.Blobs[0]
+		rtest.Assert(t, b0.Type == restic.DataBlob && b0.Offset == 0 && b0.Length == 42, "wrong blob", b0)
+		b1 := bp.Blobs[1]
+		rtest.Assert(t, b1.Type == restic.TreeBlob && b1.Offset == 42 && b1.Length == 43, "wrong blob", b1)
+	}
+	rtest.Equals(t, expected, reported)
+}
+
+func TestEachByPackIgnoes(t *testing.T) {
+	idx := index.NewIndex()
+
+	ignores := restic.NewIDSet()
+	expected := make(map[restic.ID]int)
+	// create 50 packs with one blob each
+	for i := 0; i < 50; i++ {
+		packID := restic.NewRandomID()
+		if i < 3 {
+			ignores.Insert(packID)
+		} else {
+			expected[packID] = 1
+		}
+		blobs := []restic.Blob{
+			{
+				BlobHandle: restic.BlobHandle{Type: restic.DataBlob, ID: restic.NewRandomID()},
+				Offset:     0,
+				Length:     42,
+			},
+		}
+		idx.StorePack(packID, blobs)
+	}
+	idx.Finalize()
+
+	reported := make(map[restic.ID]int)
+	for bp := range idx.EachByPack(context.TODO(), ignores) {
+		reported[bp.PackID]++
+		rtest.Equals(t, 1, len(bp.Blobs)) // correct blob count
+		b0 := bp.Blobs[0]
+		rtest.Assert(t, b0.Type == restic.DataBlob && b0.Offset == 0 && b0.Length == 42, "wrong blob", b0)
+	}
+	rtest.Equals(t, expected, reported)
+}
--- a/internal/index/indexmap.go
+++ b/internal/index/indexmap.go
@@ -0,0 +1,170 @@
+package index
+
+import (
+	"hash/maphash"
+
+	"github.com/restic/restic/internal/restic"
+)
+
+// An indexMap is a chained hash table that maps blob IDs to indexEntries.
+// It allows storing multiple entries with the same key.
+//
+// IndexMap uses some optimizations that are not compatible with supporting
+// deletions.
+//
+// The buckets in this hash table contain only pointers, rather than inlined
+// key-value pairs like the standard Go map. This way, only a pointer array
+// needs to be resized when the table grows, preventing memory usage spikes.
+type indexMap struct {
+	// The number of buckets is always a power of two and never zero.
+	buckets    []*indexEntry
+	numentries uint
+
+	mh maphash.Hash
+
+	free *indexEntry // Free list.
+}
+
+const (
+	growthFactor = 2 // Must be a power of 2.
+	maxLoad      = 4 // Max. number of entries per bucket.
+)
+
+// add inserts an indexEntry for the given arguments into the map,
+// using id as the key.
+func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompressedLength uint32) {
+	switch {
+	case m.numentries == 0: // Lazy initialization.
+		m.init()
+	case m.numentries >= maxLoad*uint(len(m.buckets)):
+		m.grow()
+	}
+
+	h := m.hash(id)
+	e := m.newEntry()
+	e.id = id
+	e.next = m.buckets[h] // Prepend to existing chain.
+	e.packIndex = packIdx
+	e.offset = offset
+	e.length = length
+	e.uncompressedLength = uncompressedLength
+
+	m.buckets[h] = e
+	m.numentries++
+}
+
+// foreach calls fn for all entries in the map, until fn returns false.
+func (m *indexMap) foreach(fn func(*indexEntry) bool) {
+	for _, e := range m.buckets {
+		for e != nil {
+			if !fn(e) {
+				return
+			}
+			e = e.next
+		}
+	}
+}
+
+// foreachWithID calls fn for all entries with the given id.
+func (m *indexMap) foreachWithID(id restic.ID, fn func(*indexEntry)) {
+	if len(m.buckets) == 0 {
+		return
+	}
+
+	h := m.hash(id)
+	for e := m.buckets[h]; e != nil; e = e.next {
+		if e.id != id {
+			continue
+		}
+		fn(e)
+	}
+}
+
+// get returns the first entry for the given id.
+func (m *indexMap) get(id restic.ID) *indexEntry {
+	if len(m.buckets) == 0 {
+		return nil
+	}
+
+	h := m.hash(id)
+	for e := m.buckets[h]; e != nil; e = e.next {
+		if e.id == id {
+			return e
+		}
+	}
+	return nil
+}
+
+func (m *indexMap) grow() {
+	old := m.buckets
+	m.buckets = make([]*indexEntry, growthFactor*len(m.buckets))
+
+	for _, e := range old {
+		for e != nil {
+			h := m.hash(e.id)
+			next := e.next
+			e.next = m.buckets[h]
+			m.buckets[h] = e
+			e = next
+		}
+	}
+}
+
+func (m *indexMap) hash(id restic.ID) uint {
+	// We use maphash to prevent backups of specially crafted inputs
+	// from degrading performance.
+	// While SHA-256 should be collision-resistant, for hash table indices
+	// we use only a few bits of it and finding collisions for those is
+	// much easier than breaking the whole algorithm.
+	m.mh.Reset()
+	_, _ = m.mh.Write(id[:])
+	h := uint(m.mh.Sum64())
+	return h & uint(len(m.buckets)-1)
+}
+
+func (m *indexMap) init() {
+	const initialBuckets = 64
+	m.buckets = make([]*indexEntry, initialBuckets)
+}
+
+func (m *indexMap) len() uint { return m.numentries }
+
+func (m *indexMap) newEntry() *indexEntry {
+	// We keep a free list of objects to speed up allocation and GC.
+	// There's an obvious trade-off here: allocating in larger batches
+	// means we allocate faster and the GC has to keep fewer bits to track
+	// what we have in use, but it means we waste some space.
+	//
+	// Then again, allocating each indexEntry separately also wastes space
+	// on 32-bit platforms, because the Go malloc has no size class for
+	// exactly 52 bytes, so it puts the indexEntry in a 64-byte slot instead.
+	// See src/runtime/sizeclasses.go in the Go source repo.
+	//
+	// The batch size of 4 means we hit the size classes for 4×64=256 bytes
+	// (64-bit) and 4×52=208 bytes (32-bit), wasting nothing in malloc on
+	// 64-bit and relatively little on 32-bit.
+	const entryAllocBatch = 4
+
+	e := m.free
+	if e != nil {
+		m.free = e.next
+	} else {
+		free := new([entryAllocBatch]indexEntry)
+		e = &free[0]
+		for i := 1; i < len(free)-1; i++ {
+			free[i].next = &free[i+1]
+		}
+		m.free = &free[1]
+	}
+
+	return e
+}
+
+type indexEntry struct {
+	id                 restic.ID
+	next               *indexEntry
+	packIndex          int // Position in containing Index's packs field.
+	offset             uint32
+	length             uint32
+	uncompressedLength uint32
+}
--- a/internal/index/indexmap_test.go
+++ b/internal/index/indexmap_test.go
@@ -0,0 +1,130 @@
+package index
+
+import (
+	"math/rand"
+	"testing"
+	"time"
+
+	"github.com/restic/restic/internal/restic"
+	rtest "github.com/restic/restic/internal/test"
+)
+
+func TestIndexMapBasic(t *testing.T) {
+	t.Parallel()
+
+	var (
+		id restic.ID
+		m  indexMap
+		r  = rand.New(rand.NewSource(98765))
+	)
+
+	for i := 1; i <= 400; i++ {
+		r.Read(id[:])
+		rtest.Assert(t, m.get(id) == nil, "%v retrieved but not added", id)
+
+		m.add(id, 0, 0, 0, 0)
+		rtest.Assert(t, m.get(id) != nil, "%v added but not retrieved", id)
+		rtest.Equals(t, uint(i), m.len())
+	}
+}
+
+func TestIndexMapForeach(t *testing.T) {
+	t.Parallel()
+
+	const N = 10
+
+	var m indexMap
+
+	// Don't crash on empty map.
+	m.foreach(func(*indexEntry) bool { return true })
+
+	for i := 0; i < N; i++ {
+		var id restic.ID
+		id[0] = byte(i)
+		m.add(id, i, uint32(i), uint32(i), uint32(i/2))
+	}
+
+	seen := make(map[int]struct{})
+	m.foreach(func(e *indexEntry) bool {
+		i := int(e.id[0])
+		rtest.Assert(t, i < N, "unknown id %v in indexMap", e.id)
+		rtest.Equals(t, i, e.packIndex)
+		rtest.Equals(t, i, int(e.length))
+		rtest.Equals(t, i, int(e.offset))
+		rtest.Equals(t, i/2, int(e.uncompressedLength))
+
+		seen[i] = struct{}{}
+		return true
+	})
+
+	rtest.Equals(t, N, len(seen))
+
+	ncalls := 0
+	m.foreach(func(*indexEntry) bool {
+		ncalls++
+		return false
+	})
+	rtest.Equals(t, 1, ncalls)
+}
+
+func TestIndexMapForeachWithID(t *testing.T) {
+	t.Parallel()
+
+	const ndups = 3
+
+	var (
+		id restic.ID
+		m  indexMap
+		r  = rand.New(rand.NewSource(1234321))
+	)
+	r.Read(id[:])
+
+	// No result (and no crash) for empty map.
+	n := 0
+	m.foreachWithID(id, func(*indexEntry) { n++ })
+	rtest.Equals(t, 0, n)
+
+	// Test insertion and retrieval of duplicates.
+	for i := 0; i < ndups; i++ {
+		m.add(id, i, 0, 0, 0)
+	}
+
+	for i := 0; i < 100; i++ {
+		var otherid restic.ID
+		r.Read(otherid[:])
+		m.add(otherid, -1, 0, 0, 0)
+	}
+
+	n = 0
+	var packs [ndups]bool
+	m.foreachWithID(id, func(e *indexEntry) {
+		packs[e.packIndex] = true
+		n++
+	})
+	rtest.Equals(t, ndups, n)
+
+	for i := range packs {
+		rtest.Assert(t, packs[i], "duplicate from pack %d not retrieved", i)
+	}
+}
+
+func BenchmarkIndexMapHash(b *testing.B) {
+	var m indexMap
+	m.add(restic.ID{}, 0, 0, 0, 0) // Trigger lazy initialization.
+
+	ids := make([]restic.ID, 128) // 4 KiB.
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+	for i := range ids {
+		r.Read(ids[i][:])
+	}
+
+	b.ReportAllocs()
+	b.SetBytes(int64(len(restic.ID{}) * len(ids)))
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		for _, id := range ids {
+			m.hash(id)
+		}
+	}
+}
--- a/internal/index/master_index.go
+++ b/internal/index/master_index.go
@@ -0,0 +1,445 @@
+package index
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"runtime"
+	"sync"
+
+	"github.com/restic/restic/internal/debug"
+	"github.com/restic/restic/internal/restic"
+	"github.com/restic/restic/internal/ui/progress"
+	"golang.org/x/sync/errgroup"
+)
+
+// MasterIndex is a collection of indexes and IDs of chunks that are in the process of being saved.
+type MasterIndex struct {
+	idx          []*Index
+	pendingBlobs restic.BlobSet
+	idxMutex     sync.RWMutex
+	compress     bool
+}
+
+// NewMasterIndex creates a new master index.
+func NewMasterIndex() *MasterIndex {
+	// Always add an empty final index, such that MergeFinalIndexes can merge into this.
+	// Note that removing this index could lead to a race condition in the rare
+	// sitation that only two indexes exist which are saved and merged concurrently.
+	idx := []*Index{NewIndex()}
+	idx[0].Finalize()
+	return &MasterIndex{idx: idx, pendingBlobs: restic.NewBlobSet()}
+}
+
+func (mi *MasterIndex) MarkCompressed() {
+	mi.compress = true
+}
+
+// Lookup queries all known Indexes for the ID and returns all matches.
+func (mi *MasterIndex) Lookup(bh restic.BlobHandle) (pbs []restic.PackedBlob) {
+	mi.idxMutex.RLock()
+	defer mi.idxMutex.RUnlock()
+
+	for _, idx := range mi.idx {
+		pbs = idx.Lookup(bh, pbs)
+	}
+
+	return pbs
+}
+
+// LookupSize queries all known Indexes for the ID and returns the first match.
+func (mi *MasterIndex) LookupSize(bh restic.BlobHandle) (uint, bool) {
+	mi.idxMutex.RLock()
+	defer mi.idxMutex.RUnlock()
+
+	for _, idx := range mi.idx {
+		if size, found := idx.LookupSize(bh); found {
+			return size, found
+		}
+	}
+
+	return 0, false
+}
+
+// AddPending adds a given blob to list of pending Blobs
+// Before doing so it checks if this blob is already known.
+// Returns true if adding was successful and false if the blob
+// was already known
+func (mi *MasterIndex) AddPending(bh restic.BlobHandle) bool {
+
+	mi.idxMutex.Lock()
+	defer mi.idxMutex.Unlock()
+
+	// Check if blob is pending or in index
+	if mi.pendingBlobs.Has(bh) {
+		return false
+	}
+
+	for _, idx := range mi.idx {
+		if idx.Has(bh) {
+			return false
+		}
+	}
+
+	// really not known -> insert
+	mi.pendingBlobs.Insert(bh)
+	return true
+}
+
+// Has queries all known Indexes for the ID and returns the first match.
+// Also returns true if the ID is pending.
+func (mi *MasterIndex) Has(bh restic.BlobHandle) bool {
+	mi.idxMutex.RLock()
+	defer mi.idxMutex.RUnlock()
+
+	// also return true if blob is pending
+	if mi.pendingBlobs.Has(bh) {
+		return true
+	}
+
+	for _, idx := range mi.idx {
+		if idx.Has(bh) {
+			return true
+		}
+	}
+
+	return false
+}
+
+// IDs returns the IDs of all indexes contained in the index.
+func (mi *MasterIndex) IDs() restic.IDSet {
+	mi.idxMutex.RLock()
+	defer mi.idxMutex.RUnlock()
+
+	ids := restic.NewIDSet()
+	for _, idx := range mi.idx {
+		if !idx.Final() {
+			continue
+		}
+		indexIDs, err := idx.IDs()
+		if err != nil {
+			debug.Log("not using index, ID() returned error %v", err)
+			continue
+		}
+		for _, id := range indexIDs {
+			ids.Insert(id)
+		}
+	}
+	return ids
+}
+
+// Packs returns all packs that are covered by the index.
+// If packBlacklist is given, those packs are only contained in the
+// resulting IDSet if they are contained in a non-final (newly written) index.
+func (mi *MasterIndex) Packs(packBlacklist restic.IDSet) restic.IDSet {
+	mi.idxMutex.RLock()
+	defer mi.idxMutex.RUnlock()
+
+	packs := restic.NewIDSet()
+	for _, idx := range mi.idx {
+		idxPacks := idx.Packs()
+		if idx.final && len(packBlacklist) > 0 {
+			idxPacks = idxPacks.Sub(packBlacklist)
+		}
+		packs.Merge(idxPacks)
+	}
+
+	return packs
+}
+
+// Insert adds a new index to the MasterIndex.
+func (mi *MasterIndex) Insert(idx *Index) {
+	mi.idxMutex.Lock()
+	defer mi.idxMutex.Unlock()
+
+	mi.idx = append(mi.idx, idx)
+}
+
+// StorePack remembers the id and pack in the index.
+func (mi *MasterIndex) StorePack(id restic.ID, blobs []restic.Blob) {
+	mi.idxMutex.Lock()
+	defer mi.idxMutex.Unlock()
+
+	// delete blobs from pending
+	for _, blob := range blobs {
+		mi.pendingBlobs.Delete(restic.BlobHandle{Type: blob.Type, ID: blob.ID})
+	}
+
+	for _, idx := range mi.idx {
+		if !idx.Final() {
+			idx.StorePack(id, blobs)
+			return
+		}
+	}
+
+	newIdx := NewIndex()
+	newIdx.StorePack(id, blobs)
+	mi.idx = append(mi.idx, newIdx)
+}
+
+// finalizeNotFinalIndexes finalizes all indexes that
+// have not yet been saved and returns that list
+func (mi *MasterIndex) finalizeNotFinalIndexes() []*Index {
+	mi.idxMutex.Lock()
+	defer mi.idxMutex.Unlock()
+
+	var list []*Index
+
+	for _, idx := range mi.idx {
+		if !idx.Final() {
+			idx.Finalize()
+			list = append(list, idx)
+		}
+	}
+
+	debug.Log("return %d indexes", len(list))
+	return list
+}
+
+// finalizeFullIndexes finalizes all indexes that are full and returns that list.
+func (mi *MasterIndex) finalizeFullIndexes() []*Index {
+	mi.idxMutex.Lock()
+	defer mi.idxMutex.Unlock()
+
+	var list []*Index
+
+	debug.Log("checking %d indexes", len(mi.idx))
+	for _, idx := range mi.idx {
+		if idx.Final() {
+			continue
+		}
+
+		if IndexFull(idx, mi.compress) {
+			debug.Log("index %p is full", idx)
+			idx.Finalize()
+			list = append(list, idx)
+		} else {
+			debug.Log("index %p not full", idx)
+		}
+	}
+
+	debug.Log("return %d indexes", len(list))
+	return list
+}
+
+// Each runs fn on all blobs known to the index. When the context is cancelled,
+// the index iteration return immediately. This blocks any modification of the index.
+func (mi *MasterIndex) Each(ctx context.Context, fn func(restic.PackedBlob)) {
+	mi.idxMutex.RLock()
+	defer mi.idxMutex.RUnlock()
+
+	for _, idx := range mi.idx {
+		idx.Each(ctx, fn)
+	}
+}
+
+// MergeFinalIndexes merges all final indexes together.
+// After calling, there will be only one big final index in MasterIndex
+// containing all final index contents.
+// Indexes that are not final are left untouched.
+// This merging can only be called after all index files are loaded - as
+// removing of superseded index contents is only possible for unmerged indexes.
+func (mi *MasterIndex) MergeFinalIndexes() error {
+	mi.idxMutex.Lock()
+	defer mi.idxMutex.Unlock()
+
+	// The first index is always final and the one to merge into
+	newIdx := mi.idx[:1]
+	for i := 1; i < len(mi.idx); i++ {
+		idx := mi.idx[i]
+		// clear reference in masterindex as it may become stale
+		mi.idx[i] = nil
+		// do not merge indexes that have no id set
+		ids, _ := idx.IDs()
+		if !idx.Final() || len(ids) == 0 {
+			newIdx = append(newIdx, idx)
+		} else {
+			err := mi.idx[0].merge(idx)
+			if err != nil {
+				return fmt.Errorf("MergeFinalIndexes: %w", err)
+			}
+		}
+	}
+	mi.idx = newIdx
+
+	return nil
+}
+
+// Save saves all known indexes to index files, leaving out any
+// packs whose ID is contained in packBlacklist from finalized indexes.
+// The new index contains the IDs of all known indexes in the "supersedes"
+// field. The IDs are also returned in the IDSet obsolete.
+// After calling this function, you should remove the obsolete index files.
+func (mi *MasterIndex) Save(ctx context.Context, repo restic.SaverUnpacked, packBlacklist restic.IDSet, extraObsolete restic.IDs, p *progress.Counter) (obsolete restic.IDSet, err error) {
+	p.SetMax(uint64(len(mi.Packs(packBlacklist))))
+
+	mi.idxMutex.Lock()
+	defer mi.idxMutex.Unlock()
+
+	debug.Log("start rebuilding index of %d indexes, pack blacklist: %v", len(mi.idx), packBlacklist)
+
+	newIndex := NewIndex()
+	obsolete = restic.NewIDSet()
+
+	// track spawned goroutines using wg, create a new context which is
+	// cancelled as soon as an error occurs.
+	wg, ctx := errgroup.WithContext(ctx)
+
+	ch := make(chan *Index)
+
+	wg.Go(func() error {
+		defer close(ch)
+		for i, idx := range mi.idx {
+			if idx.Final() {
+				ids, err := idx.IDs()
+				if err != nil {
+					debug.Log("index %d does not have an ID: %v", err)
+					return err
+				}
+
+				debug.Log("adding index ids %v to supersedes field", ids)
+
+				err = newIndex.AddToSupersedes(ids...)
+				if err != nil {
+					return err
+				}
+				obsolete.Merge(restic.NewIDSet(ids...))
+			} else {
+				debug.Log("index %d isn't final, don't add to supersedes field", i)
+			}
+
+			debug.Log("adding index %d", i)
+
+			for pbs := range idx.EachByPack(ctx, packBlacklist) {
+				newIndex.StorePack(pbs.PackID, pbs.Blobs)
+				p.Add(1)
+				if IndexFull(newIndex, mi.compress) {
+					select {
+					case ch <- newIndex:
+					case <-ctx.Done():
+						return ctx.Err()
+					}
+					newIndex = NewIndex()
+				}
+			}
+		}
+
+		err = newIndex.AddToSupersedes(extraObsolete...)
+		if err != nil {
+			return err
+		}
+		obsolete.Merge(restic.NewIDSet(extraObsolete...))
+
+		select {
+		case ch <- newIndex:
+		case <-ctx.Done():
+		}
+		return nil
+	})
+
+	// a worker receives an index from ch, and saves the index
+	worker := func() error {
+		for idx := range ch {
+			idx.Finalize()
+			if _, err := SaveIndex(ctx, repo, idx); err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+
+	// encoding an index can take quite some time such that this can be both CPU- or IO-bound
+	workerCount := int(repo.Connections()) + runtime.GOMAXPROCS(0)
+	// run workers on ch
+	for i := 0; i < workerCount; i++ {
+		wg.Go(worker)
+	}
+	err = wg.Wait()
+
+	return obsolete, err
+}
+
+// SaveIndex saves an index in the repository.
+func SaveIndex(ctx context.Context, repo restic.SaverUnpacked, index *Index) (restic.ID, error) {
+	buf := bytes.NewBuffer(nil)
+
+	err := index.Encode(buf)
+	if err != nil {
+		return restic.ID{}, err
+	}
+
+	id, err := repo.SaveUnpacked(ctx, restic.IndexFile, buf.Bytes())
+	ierr := index.SetID(id)
+	if ierr != nil {
+		// logic bug
+		panic(ierr)
+	}
+	return id, err
+}
+
+// saveIndex saves all indexes in the backend.
+func (mi *MasterIndex) saveIndex(ctx context.Context, r restic.SaverUnpacked, indexes ...*Index) error {
+	for i, idx := range indexes {
+		debug.Log("Saving index %d", i)
+
+		sid, err := SaveIndex(ctx, r, idx)
+		if err != nil {
+			return err
+		}
+
+		debug.Log("Saved index %d as %v", i, sid)
+	}
+
+	return mi.MergeFinalIndexes()
+}
+
+// SaveIndex saves all new indexes in the backend.
+func (mi *MasterIndex) SaveIndex(ctx context.Context, r restic.SaverUnpacked) error {
+	return mi.saveIndex(ctx, r, mi.finalizeNotFinalIndexes()...)
+}
+
+// SaveFullIndex saves all full indexes in the backend.
+func (mi *MasterIndex) SaveFullIndex(ctx context.Context, r restic.SaverUnpacked) error {
+	return mi.saveIndex(ctx, r, mi.finalizeFullIndexes()...)
+}
+
+// ListPacks returns the blobs of the specified pack files grouped by pack file.
+func (mi *MasterIndex) ListPacks(ctx context.Context, packs restic.IDSet) <-chan restic.PackBlobs {
+	out := make(chan restic.PackBlobs)
+	go func() {
+		defer close(out)
+		// only resort a part of the index to keep the memory overhead bounded
+		for i := byte(0); i < 16; i++ {
+			if ctx.Err() != nil {
+				return
+			}
+
+			packBlob := make(map[restic.ID][]restic.Blob)
+			for pack := range packs {
+				if pack[0]&0xf == i {
+					packBlob[pack] = nil
+				}
+			}
+			if len(packBlob) == 0 {
+				continue
+			}
+			mi.Each(ctx, func(pb restic.PackedBlob) {
+				if packs.Has(pb.PackID) && pb.PackID[0]&0xf == i {
+					packBlob[pb.PackID] = append(packBlob[pb.PackID], pb.Blob)
+				}
+			})
+
+			// pass on packs
+			for packID, pbs := range packBlob {
+				// allow GC
+				packBlob[packID] = nil
+				select {
+				case out <- restic.PackBlobs{PackID: packID, Blobs: pbs}:
+				case <-ctx.Done():
+					return
+				}
+			}
+		}
+	}()
+	return out
+}
--- a/internal/index/master_index_test.go
+++ b/internal/index/master_index_test.go
@@ -0,0 +1,398 @@
+package index_test
+
+import (
+	"context"
+	"fmt"
+	"math/rand"
+	"testing"
+	"time"
+
+	"github.com/restic/restic/internal/checker"
+	"github.com/restic/restic/internal/crypto"
+	"github.com/restic/restic/internal/index"
+	"github.com/restic/restic/internal/repository"
+	"github.com/restic/restic/internal/restic"
+	rtest "github.com/restic/restic/internal/test"
+)
+
+func TestMasterIndex(t *testing.T) {
+	bhInIdx1 := restic.NewRandomBlobHandle()
+	bhInIdx2 := restic.NewRandomBlobHandle()
+	bhInIdx12 := restic.BlobHandle{ID: restic.NewRandomID(), Type: restic.TreeBlob}
+
+	blob1 := restic.PackedBlob{
+		PackID: restic.NewRandomID(),
+		Blob: restic.Blob{
+			BlobHandle: bhInIdx1,
+			Length:     uint(crypto.CiphertextLength(10)),
+			Offset:     0,
+		},
+	}
+
+	blob2 := restic.PackedBlob{
+		PackID: restic.NewRandomID(),
+		Blob: restic.Blob{
+			BlobHandle:         bhInIdx2,
+			Length:             uint(crypto.CiphertextLength(100)),
+			Offset:             10,
+			UncompressedLength: 200,
+		},
+	}
+
+	blob12a := restic.PackedBlob{
+		PackID: restic.NewRandomID(),
+		Blob: restic.Blob{
+			BlobHandle: bhInIdx12,
+			Length:     uint(crypto.CiphertextLength(123)),
+			Offset:     110,
+		},
+	}
+
+	blob12b := restic.PackedBlob{
+		PackID: restic.NewRandomID(),
+		Blob: restic.Blob{
+			BlobHandle:         bhInIdx12,
+			Length:             uint(crypto.CiphertextLength(123)),
+			Offset:             50,
+			UncompressedLength: 80,
+		},
+	}
+
+	idx1 := index.NewIndex()
+	idx1.StorePack(blob1.PackID, []restic.Blob{blob1.Blob})
+	idx1.StorePack(blob12a.PackID, []restic.Blob{blob12a.Blob})
+
+	idx2 := index.NewIndex()
+	idx2.StorePack(blob2.PackID, []restic.Blob{blob2.Blob})
+	idx2.StorePack(blob12b.PackID, []restic.Blob{blob12b.Blob})
+
+	mIdx := index.NewMasterIndex()
+	mIdx.Insert(idx1)
+	mIdx.Insert(idx2)
+
+	// test idInIdx1
+	found := mIdx.Has(bhInIdx1)
+	rtest.Equals(t, true, found)
+
+	blobs := mIdx.Lookup(bhInIdx1)
+	rtest.Equals(t, []restic.PackedBlob{blob1}, blobs)
+
+	size, found := mIdx.LookupSize(bhInIdx1)
+	rtest.Equals(t, true, found)
+	rtest.Equals(t, uint(10), size)
+
+	// test idInIdx2
+	found = mIdx.Has(bhInIdx2)
+	rtest.Equals(t, true, found)
+
+	blobs = mIdx.Lookup(bhInIdx2)
+	rtest.Equals(t, []restic.PackedBlob{blob2}, blobs)
+
+	size, found = mIdx.LookupSize(bhInIdx2)
+	rtest.Equals(t, true, found)
+	rtest.Equals(t, uint(200), size)
+
+	// test idInIdx12
+	found = mIdx.Has(bhInIdx12)
+	rtest.Equals(t, true, found)
+
+	blobs = mIdx.Lookup(bhInIdx12)
+	rtest.Equals(t, 2, len(blobs))
+
+	// test Lookup result for blob12a
+	found = false
+	if blobs[0] == blob12a || blobs[1] == blob12a {
+		found = true
+	}
+	rtest.Assert(t, found, "blob12a not found in result")
+
+	// test Lookup result for blob12b
+	found = false
+	if blobs[0] == blob12b || blobs[1] == blob12b {
+		found = true
+	}
+	rtest.Assert(t, found, "blob12a not found in result")
+
+	size, found = mIdx.LookupSize(bhInIdx12)
+	rtest.Equals(t, true, found)
+	rtest.Equals(t, uint(123), size)
+
+	// test not in index
+	found = mIdx.Has(restic.BlobHandle{ID: restic.NewRandomID(), Type: restic.TreeBlob})
+	rtest.Assert(t, !found, "Expected no blobs when fetching with a random id")
+	blobs = mIdx.Lookup(restic.NewRandomBlobHandle())
+	rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id")
+	_, found = mIdx.LookupSize(restic.NewRandomBlobHandle())
+	rtest.Assert(t, !found, "Expected no blobs when fetching with a random id")
+}
+
+func TestMasterMergeFinalIndexes(t *testing.T) {
+	bhInIdx1 := restic.NewRandomBlobHandle()
+	bhInIdx2 := restic.NewRandomBlobHandle()
+
+	blob1 := restic.PackedBlob{
+		PackID: restic.NewRandomID(),
+		Blob: restic.Blob{
+			BlobHandle: bhInIdx1,
+			Length:     10,
+			Offset:     0,
+		},
+	}
+
+	blob2 := restic.PackedBlob{
+		PackID: restic.NewRandomID(),
+		Blob: restic.Blob{
+			BlobHandle:         bhInIdx2,
+			Length:             100,
+			Offset:             10,
+			UncompressedLength: 200,
+		},
+	}
+
+	idx1 := index.NewIndex()
+	idx1.StorePack(blob1.PackID, []restic.Blob{blob1.Blob})
+
+	idx2 := index.NewIndex()
+	idx2.StorePack(blob2.PackID, []restic.Blob{blob2.Blob})
+
+	mIdx := index.NewMasterIndex()
+	mIdx.Insert(idx1)
+	mIdx.Insert(idx2)
+
+	finalIndexes, idxCount := index.TestMergeIndex(t, mIdx)
+	rtest.Equals(t, []*index.Index{idx1, idx2}, finalIndexes)
+	rtest.Equals(t, 1, idxCount)
+
+	blobCount := 0
+	mIdx.Each(context.TODO(), func(pb restic.PackedBlob) {
+		blobCount++
+	})
+	rtest.Equals(t, 2, blobCount)
+
+	blobs := mIdx.Lookup(bhInIdx1)
+	rtest.Equals(t, []restic.PackedBlob{blob1}, blobs)
+
+	blobs = mIdx.Lookup(bhInIdx2)
+	rtest.Equals(t, []restic.PackedBlob{blob2}, blobs)
+
+	blobs = mIdx.Lookup(restic.NewRandomBlobHandle())
+	rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id")
+
+	// merge another index containing identical blobs
+	idx3 := index.NewIndex()
+	idx3.StorePack(blob1.PackID, []restic.Blob{blob1.Blob})
+	idx3.StorePack(blob2.PackID, []restic.Blob{blob2.Blob})
+
+	mIdx.Insert(idx3)
+	finalIndexes, idxCount = index.TestMergeIndex(t, mIdx)
+	rtest.Equals(t, []*index.Index{idx3}, finalIndexes)
+	rtest.Equals(t, 1, idxCount)
+
+	// Index should have same entries as before!
+	blobs = mIdx.Lookup(bhInIdx1)
+	rtest.Equals(t, []restic.PackedBlob{blob1}, blobs)
+
+	blobs = mIdx.Lookup(bhInIdx2)
+	rtest.Equals(t, []restic.PackedBlob{blob2}, blobs)
+
+	blobCount = 0
+	mIdx.Each(context.TODO(), func(pb restic.PackedBlob) {
+		blobCount++
+	})
+	rtest.Equals(t, 2, blobCount)
+}
+
+func createRandomMasterIndex(t testing.TB, rng *rand.Rand, num, size int) (*index.MasterIndex, restic.BlobHandle) {
+	mIdx := index.NewMasterIndex()
+	for i := 0; i < num-1; i++ {
+		idx, _ := createRandomIndex(rng, size)
+		mIdx.Insert(idx)
+	}
+	idx1, lookupBh := createRandomIndex(rng, size)
+	mIdx.Insert(idx1)
+
+	index.TestMergeIndex(t, mIdx)
+
+	return mIdx, lookupBh
+}
+
+func BenchmarkMasterIndexAlloc(b *testing.B) {
+	rng := rand.New(rand.NewSource(0))
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		createRandomMasterIndex(b, rng, 10000, 5)
+	}
+}
+
+func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) {
+	mIdx, lookupBh := createRandomMasterIndex(b, rand.New(rand.NewSource(0)), 1, 200000)
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		mIdx.Lookup(lookupBh)
+	}
+}
+
+func BenchmarkMasterIndexLookupMultipleIndex(b *testing.B) {
+	mIdx, lookupBh := createRandomMasterIndex(b, rand.New(rand.NewSource(0)), 100, 10000)
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		mIdx.Lookup(lookupBh)
+	}
+}
+
+func BenchmarkMasterIndexLookupSingleIndexUnknown(b *testing.B) {
+
+	lookupBh := restic.NewRandomBlobHandle()
+	mIdx, _ := createRandomMasterIndex(b, rand.New(rand.NewSource(0)), 1, 200000)
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		mIdx.Lookup(lookupBh)
+	}
+}
+
+func BenchmarkMasterIndexLookupMultipleIndexUnknown(b *testing.B) {
+	lookupBh := restic.NewRandomBlobHandle()
+	mIdx, _ := createRandomMasterIndex(b, rand.New(rand.NewSource(0)), 100, 10000)
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		mIdx.Lookup(lookupBh)
+	}
+}
+
+func BenchmarkMasterIndexLookupParallel(b *testing.B) {
+	for _, numindices := range []int{25, 50, 100} {
+		var lookupBh restic.BlobHandle
+
+		b.StopTimer()
+		rng := rand.New(rand.NewSource(0))
+		mIdx, lookupBh := createRandomMasterIndex(b, rng, numindices, 10000)
+		b.StartTimer()
+
+		name := fmt.Sprintf("known,indices=%d", numindices)
+		b.Run(name, func(b *testing.B) {
+			b.RunParallel(func(pb *testing.PB) {
+				for pb.Next() {
+					mIdx.Lookup(lookupBh)
+				}
+			})
+		})
+
+		lookupBh = restic.NewRandomBlobHandle()
+		name = fmt.Sprintf("unknown,indices=%d", numindices)
+		b.Run(name, func(b *testing.B) {
+			b.RunParallel(func(pb *testing.PB) {
+				for pb.Next() {
+					mIdx.Lookup(lookupBh)
+				}
+			})
+		})
+	}
+}
+
+func BenchmarkMasterIndexLookupBlobSize(b *testing.B) {
+	rng := rand.New(rand.NewSource(0))
+	mIdx, lookupBh := createRandomMasterIndex(b, rand.New(rng), 5, 200000)
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		mIdx.LookupSize(lookupBh)
+	}
+}
+
+func BenchmarkMasterIndexEach(b *testing.B) {
+	rng := rand.New(rand.NewSource(0))
+	mIdx, _ := createRandomMasterIndex(b, rand.New(rng), 5, 200000)
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		entries := 0
+		mIdx.Each(context.TODO(), func(pb restic.PackedBlob) {
+			entries++
+		})
+	}
+}
+
+var (
+	snapshotTime = time.Unix(1470492820, 207401672)
+	depth        = 3
+)
+
+func createFilledRepo(t testing.TB, snapshots int, dup float32, version uint) (restic.Repository, func()) {
+	repo, cleanup := repository.TestRepositoryWithVersion(t, version)
+
+	for i := 0; i < 3; i++ {
+		restic.TestCreateSnapshot(t, repo, snapshotTime.Add(time.Duration(i)*time.Second), depth, dup)
+	}
+
+	return repo, cleanup
+}
+
+func TestIndexSave(t *testing.T) {
+	repository.TestAllVersions(t, testIndexSave)
+}
+
+func testIndexSave(t *testing.T, version uint) {
+	repo, cleanup := createFilledRepo(t, 3, 0, version)
+	defer cleanup()
+
+	err := repo.LoadIndex(context.TODO())
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	obsoletes, err := repo.Index().Save(context.TODO(), repo, nil, nil, nil)
+	if err != nil {
+		t.Fatalf("unable to save new index: %v", err)
+	}
+
+	for id := range obsoletes {
+		t.Logf("remove index %v", id.Str())
+		h := restic.Handle{Type: restic.IndexFile, Name: id.String()}
+		err = repo.Backend().Remove(context.TODO(), h)
+		if err != nil {
+			t.Errorf("error removing index %v: %v", id, err)
+		}
+	}
+
+	checker := checker.New(repo, false)
+	err = checker.LoadSnapshots(context.TODO())
+	if err != nil {
+		t.Error(err)
+	}
+
+	hints, errs := checker.LoadIndex(context.TODO())
+	for _, h := range hints {
+		t.Logf("hint: %v\n", h)
+	}
+
+	for _, err := range errs {
+		t.Errorf("checker found error: %v", err)
+	}
+
+	ctx, cancel := context.WithCancel(context.TODO())
+	defer cancel()
+
+	errCh := make(chan error)
+	go checker.Structure(ctx, nil, errCh)
+	i := 0
+	for err := range errCh {
+		t.Errorf("checker returned error: %v", err)
+		i++
+		if i == 10 {
+			t.Errorf("more than 10 errors returned, skipping the rest")
+			cancel()
+			break
+		}
+	}
+}
--- a/internal/index/testing.go
+++ b/internal/index/testing.go
@@ -0,0 +1,18 @@
+package index
+
+import (
+	"testing"
+
+	"github.com/restic/restic/internal/restic"
+	"github.com/restic/restic/internal/test"
+)
+
+func TestMergeIndex(t testing.TB, mi *MasterIndex) ([]*Index, int) {
+	finalIndexes := mi.finalizeNotFinalIndexes()
+	for _, idx := range finalIndexes {
+		test.OK(t, idx.SetID(restic.NewRandomID()))
+	}
+
+	test.OK(t, mi.MergeFinalIndexes())
+	return finalIndexes, len(mi.idx)
+}