Moves files

This commit is contained in:
Alexander Neumann
2017-07-23 14:19:13 +02:00
parent d1bd160b0a
commit 83d1a46526
284 changed files with 0 additions and 0 deletions

292
internal/index/index.go Normal file
View File

@@ -0,0 +1,292 @@
// Package index contains various data structures for indexing content in a repository or backend.
package index
import (
"context"
"fmt"
"os"
"restic"
"restic/debug"
"restic/list"
"restic/pack"
"restic/worker"
"restic/errors"
)
// Pack contains information about the contents of a pack.
type Pack struct {
ID restic.ID
Size int64
Entries []restic.Blob
}
// Index contains information about blobs and packs stored in a repo.
type Index struct {
Packs map[restic.ID]Pack
IndexIDs restic.IDSet
}
func newIndex() *Index {
return &Index{
Packs: make(map[restic.ID]Pack),
IndexIDs: restic.NewIDSet(),
}
}
// New creates a new index for repo from scratch. InvalidFiles contains all IDs
// of files that cannot be listed successfully.
func New(ctx context.Context, repo restic.Repository, ignorePacks restic.IDSet, p *restic.Progress) (idx *Index, invalidFiles restic.IDs, err error) {
p.Start()
defer p.Done()
ch := make(chan worker.Job)
go list.AllPacks(ctx, repo, ignorePacks, ch)
idx = newIndex()
for job := range ch {
p.Report(restic.Stat{Blobs: 1})
packID := job.Data.(restic.ID)
if job.Error != nil {
cause := errors.Cause(job.Error)
if _, ok := cause.(pack.InvalidFileError); ok {
invalidFiles = append(invalidFiles, packID)
continue
}
fmt.Fprintf(os.Stderr, "pack file cannot be listed %v: %v\n", packID.Str(), job.Error)
continue
}
j := job.Result.(list.Result)
debug.Log("pack %v contains %d blobs", packID.Str(), len(j.Entries()))
err := idx.AddPack(packID, j.Size(), j.Entries())
if err != nil {
return nil, nil, err
}
}
return idx, invalidFiles, nil
}
type packJSON struct {
ID restic.ID `json:"id"`
Blobs []blobJSON `json:"blobs"`
}
type blobJSON struct {
ID restic.ID `json:"id"`
Type restic.BlobType `json:"type"`
Offset uint `json:"offset"`
Length uint `json:"length"`
}
type indexJSON struct {
Supersedes restic.IDs `json:"supersedes,omitempty"`
Packs []*packJSON `json:"packs"`
}
func loadIndexJSON(ctx context.Context, repo restic.Repository, id restic.ID) (*indexJSON, error) {
debug.Log("process index %v\n", id.Str())
var idx indexJSON
err := repo.LoadJSONUnpacked(ctx, restic.IndexFile, id, &idx)
if err != nil {
return nil, err
}
return &idx, nil
}
// Load creates an index by loading all index files from the repo.
func Load(ctx context.Context, repo restic.Repository, p *restic.Progress) (*Index, error) {
debug.Log("loading indexes")
p.Start()
defer p.Done()
supersedes := make(map[restic.ID]restic.IDSet)
results := make(map[restic.ID]map[restic.ID]Pack)
index := newIndex()
for id := range repo.List(ctx, restic.IndexFile) {
p.Report(restic.Stat{Blobs: 1})
debug.Log("Load index %v", id.Str())
idx, err := loadIndexJSON(ctx, repo, id)
if err != nil {
return nil, err
}
res := make(map[restic.ID]Pack)
supersedes[id] = restic.NewIDSet()
for _, sid := range idx.Supersedes {
debug.Log(" index %v supersedes %v", id.Str(), sid)
supersedes[id].Insert(sid)
}
for _, jpack := range idx.Packs {
entries := make([]restic.Blob, 0, len(jpack.Blobs))
for _, blob := range jpack.Blobs {
entry := restic.Blob{
ID: blob.ID,
Type: blob.Type,
Offset: blob.Offset,
Length: blob.Length,
}
entries = append(entries, entry)
}
if err = index.AddPack(jpack.ID, 0, entries); err != nil {
return nil, err
}
}
results[id] = res
index.IndexIDs.Insert(id)
}
for superID, list := range supersedes {
for indexID := range list {
if _, ok := results[indexID]; !ok {
continue
}
debug.Log(" removing index %v, superseded by %v", indexID.Str(), superID.Str())
fmt.Fprintf(os.Stderr, "index %v can be removed, superseded by index %v\n", indexID.Str(), superID.Str())
delete(results, indexID)
}
}
return index, nil
}
// AddPack adds a pack to the index. If this pack is already in the index, an
// error is returned.
func (idx *Index) AddPack(id restic.ID, size int64, entries []restic.Blob) error {
if _, ok := idx.Packs[id]; ok {
return errors.Errorf("pack %v already present in the index", id.Str())
}
idx.Packs[id] = Pack{ID: id, Size: size, Entries: entries}
return nil
}
// RemovePack deletes a pack from the index.
func (idx *Index) RemovePack(id restic.ID) error {
if _, ok := idx.Packs[id]; !ok {
return errors.Errorf("pack %v not found in the index", id.Str())
}
delete(idx.Packs, id)
return nil
}
// DuplicateBlobs returns a list of blobs that are stored more than once in the
// repo.
func (idx *Index) DuplicateBlobs() (dups restic.BlobSet) {
dups = restic.NewBlobSet()
seen := restic.NewBlobSet()
for _, p := range idx.Packs {
for _, entry := range p.Entries {
h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
if seen.Has(h) {
dups.Insert(h)
}
seen.Insert(h)
}
}
return dups
}
// PacksForBlobs returns the set of packs in which the blobs are contained.
func (idx *Index) PacksForBlobs(blobs restic.BlobSet) (packs restic.IDSet) {
packs = restic.NewIDSet()
for id, p := range idx.Packs {
for _, entry := range p.Entries {
if blobs.Has(restic.BlobHandle{ID: entry.ID, Type: entry.Type}) {
packs.Insert(id)
}
}
}
return packs
}
// Location describes the location of a blob in a pack.
type Location struct {
PackID restic.ID
restic.Blob
}
// ErrBlobNotFound is return by FindBlob when the blob could not be found in
// the index.
var ErrBlobNotFound = errors.New("blob not found in index")
// FindBlob returns a list of packs and positions the blob can be found in.
func (idx *Index) FindBlob(h restic.BlobHandle) (result []Location, err error) {
for id, p := range idx.Packs {
for _, entry := range p.Entries {
if entry.ID.Equal(h.ID) && entry.Type == h.Type {
result = append(result, Location{
PackID: id,
Blob: entry,
})
}
}
}
if len(result) == 0 {
return nil, ErrBlobNotFound
}
return result, nil
}
// Save writes the complete index to the repo.
func (idx *Index) Save(ctx context.Context, repo restic.Repository, supersedes restic.IDs) (restic.ID, error) {
packs := make(map[restic.ID][]restic.Blob, len(idx.Packs))
for id, p := range idx.Packs {
packs[id] = p.Entries
}
return Save(ctx, repo, packs, supersedes)
}
// Save writes a new index containing the given packs.
func Save(ctx context.Context, repo restic.Repository, packs map[restic.ID][]restic.Blob, supersedes restic.IDs) (restic.ID, error) {
idx := &indexJSON{
Supersedes: supersedes,
Packs: make([]*packJSON, 0, len(packs)),
}
for packID, blobs := range packs {
b := make([]blobJSON, 0, len(blobs))
for _, blob := range blobs {
b = append(b, blobJSON{
ID: blob.ID,
Type: blob.Type,
Offset: blob.Offset,
Length: blob.Length,
})
}
p := &packJSON{
ID: packID,
Blobs: b,
}
idx.Packs = append(idx.Packs, p)
}
return repo.SaveJSONUnpacked(ctx, restic.IndexFile, idx)
}

View File

@@ -0,0 +1,403 @@
package index
import (
"context"
"math/rand"
"restic"
"restic/checker"
"restic/repository"
"restic/test"
"testing"
"time"
)
var (
snapshotTime = time.Unix(1470492820, 207401672)
depth = 3
)
func createFilledRepo(t testing.TB, snapshots int, dup float32) (restic.Repository, func()) {
repo, cleanup := repository.TestRepository(t)
for i := 0; i < 3; i++ {
restic.TestCreateSnapshot(t, repo, snapshotTime.Add(time.Duration(i)*time.Second), depth, dup)
}
return repo, cleanup
}
func validateIndex(t testing.TB, repo restic.Repository, idx *Index) {
for id := range repo.List(context.TODO(), restic.DataFile) {
p, ok := idx.Packs[id]
if !ok {
t.Errorf("pack %v missing from index", id.Str())
}
if !p.ID.Equal(id) {
t.Errorf("pack %v has invalid ID: want %v, got %v", id.Str(), id, p.ID)
}
}
}
func TestIndexNew(t *testing.T) {
repo, cleanup := createFilledRepo(t, 3, 0)
defer cleanup()
idx, _, err := New(context.TODO(), repo, restic.NewIDSet(), nil)
if err != nil {
t.Fatalf("New() returned error %v", err)
}
if idx == nil {
t.Fatalf("New() returned nil index")
}
validateIndex(t, repo, idx)
}
func TestIndexLoad(t *testing.T) {
repo, cleanup := createFilledRepo(t, 3, 0)
defer cleanup()
loadIdx, err := Load(context.TODO(), repo, nil)
if err != nil {
t.Fatalf("Load() returned error %v", err)
}
if loadIdx == nil {
t.Fatalf("Load() returned nil index")
}
validateIndex(t, repo, loadIdx)
newIdx, _, err := New(context.TODO(), repo, restic.NewIDSet(), nil)
if err != nil {
t.Fatalf("New() returned error %v", err)
}
if len(loadIdx.Packs) != len(newIdx.Packs) {
t.Errorf("number of packs does not match: want %v, got %v",
len(loadIdx.Packs), len(newIdx.Packs))
}
validateIndex(t, repo, newIdx)
for packID, packNew := range newIdx.Packs {
packLoad, ok := loadIdx.Packs[packID]
if !ok {
t.Errorf("loaded index does not list pack %v", packID.Str())
continue
}
if len(packNew.Entries) != len(packLoad.Entries) {
t.Errorf(" number of entries in pack %v does not match: %d != %d\n %v\n %v",
packID.Str(), len(packNew.Entries), len(packLoad.Entries),
packNew.Entries, packLoad.Entries)
continue
}
for _, entryNew := range packNew.Entries {
found := false
for _, entryLoad := range packLoad.Entries {
if !entryLoad.ID.Equal(entryNew.ID) {
continue
}
if entryLoad.Type != entryNew.Type {
continue
}
if entryLoad.Offset != entryNew.Offset {
continue
}
if entryLoad.Length != entryNew.Length {
continue
}
found = true
break
}
if !found {
t.Errorf("blob not found in loaded index: %v", entryNew)
}
}
}
}
func BenchmarkIndexNew(b *testing.B) {
repo, cleanup := createFilledRepo(b, 3, 0)
defer cleanup()
b.ResetTimer()
for i := 0; i < b.N; i++ {
idx, _, err := New(context.TODO(), repo, restic.NewIDSet(), nil)
if err != nil {
b.Fatalf("New() returned error %v", err)
}
if idx == nil {
b.Fatalf("New() returned nil index")
}
b.Logf("idx %v packs", len(idx.Packs))
}
}
func BenchmarkIndexSave(b *testing.B) {
repo, cleanup := repository.TestRepository(b)
defer cleanup()
idx, _, err := New(context.TODO(), repo, restic.NewIDSet(), nil)
test.OK(b, err)
for i := 0; i < 8000; i++ {
entries := make([]restic.Blob, 0, 200)
for j := 0; j < cap(entries); j++ {
entries = append(entries, restic.Blob{
ID: restic.NewRandomID(),
Length: 1000,
Offset: 5,
Type: restic.DataBlob,
})
}
idx.AddPack(restic.NewRandomID(), 10000, entries)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
id, err := idx.Save(context.TODO(), repo, nil)
if err != nil {
b.Fatalf("New() returned error %v", err)
}
b.Logf("saved as %v", id.Str())
}
}
func TestIndexDuplicateBlobs(t *testing.T) {
repo, cleanup := createFilledRepo(t, 3, 0.01)
defer cleanup()
idx, _, err := New(context.TODO(), repo, restic.NewIDSet(), nil)
if err != nil {
t.Fatal(err)
}
dups := idx.DuplicateBlobs()
if len(dups) == 0 {
t.Errorf("no duplicate blobs found")
}
t.Logf("%d packs, %d duplicate blobs", len(idx.Packs), len(dups))
packs := idx.PacksForBlobs(dups)
if len(packs) == 0 {
t.Errorf("no packs with duplicate blobs found")
}
t.Logf("%d packs with duplicate blobs", len(packs))
}
func loadIndex(t testing.TB, repo restic.Repository) *Index {
idx, err := Load(context.TODO(), repo, nil)
if err != nil {
t.Fatalf("Load() returned error %v", err)
}
return idx
}
func TestSave(t *testing.T) {
repo, cleanup := createFilledRepo(t, 3, 0)
defer cleanup()
idx := loadIndex(t, repo)
packs := make(map[restic.ID][]restic.Blob)
for id := range idx.Packs {
if rand.Float32() < 0.5 {
packs[id] = idx.Packs[id].Entries
}
}
t.Logf("save %d/%d packs in a new index\n", len(packs), len(idx.Packs))
id, err := Save(context.TODO(), repo, packs, idx.IndexIDs.List())
if err != nil {
t.Fatalf("unable to save new index: %v", err)
}
t.Logf("new index saved as %v", id.Str())
for id := range idx.IndexIDs {
t.Logf("remove index %v", id.Str())
h := restic.Handle{Type: restic.IndexFile, Name: id.String()}
err = repo.Backend().Remove(context.TODO(), h)
if err != nil {
t.Errorf("error removing index %v: %v", id, err)
}
}
idx2 := loadIndex(t, repo)
t.Logf("load new index with %d packs", len(idx2.Packs))
if len(idx2.Packs) != len(packs) {
t.Errorf("wrong number of packs in new index, want %d, got %d", len(packs), len(idx2.Packs))
}
for id := range packs {
if _, ok := idx2.Packs[id]; !ok {
t.Errorf("pack %v is not contained in new index", id.Str())
}
}
for id := range idx2.Packs {
if _, ok := packs[id]; !ok {
t.Errorf("pack %v is not contained in new index", id.Str())
}
}
}
func TestIndexSave(t *testing.T) {
repo, cleanup := createFilledRepo(t, 3, 0)
defer cleanup()
idx := loadIndex(t, repo)
id, err := idx.Save(context.TODO(), repo, idx.IndexIDs.List())
if err != nil {
t.Fatalf("unable to save new index: %v", err)
}
t.Logf("new index saved as %v", id.Str())
for id := range idx.IndexIDs {
t.Logf("remove index %v", id.Str())
h := restic.Handle{Type: restic.IndexFile, Name: id.String()}
err = repo.Backend().Remove(context.TODO(), h)
if err != nil {
t.Errorf("error removing index %v: %v", id, err)
}
}
idx2 := loadIndex(t, repo)
t.Logf("load new index with %d packs", len(idx2.Packs))
checker := checker.New(repo)
hints, errs := checker.LoadIndex(context.TODO())
for _, h := range hints {
t.Logf("hint: %v\n", h)
}
for _, err := range errs {
t.Errorf("checker found error: %v", err)
}
}
func TestIndexAddRemovePack(t *testing.T) {
repo, cleanup := createFilledRepo(t, 3, 0)
defer cleanup()
idx, err := Load(context.TODO(), repo, nil)
if err != nil {
t.Fatalf("Load() returned error %v", err)
}
packID := <-repo.List(context.TODO(), restic.DataFile)
t.Logf("selected pack %v", packID.Str())
blobs := idx.Packs[packID].Entries
idx.RemovePack(packID)
if _, ok := idx.Packs[packID]; ok {
t.Errorf("removed pack %v found in index.Packs", packID.Str())
}
for _, blob := range blobs {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
_, err := idx.FindBlob(h)
if err == nil {
t.Errorf("removed blob %v found in index", h)
}
}
}
// example index serialization from doc/Design.rst
var docExample = []byte(`
{
"supersedes": [
"ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452"
],
"packs": [
{
"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
"blobs": [
{
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data",
"offset": 0,
"length": 25
},{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree",
"offset": 38,
"length": 100
},
{
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
"type": "data",
"offset": 150,
"length": 123
}
]
}
]
}
`)
func TestIndexLoadDocReference(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
id, err := repo.SaveUnpacked(context.TODO(), restic.IndexFile, docExample)
if err != nil {
t.Fatalf("SaveUnpacked() returned error %v", err)
}
t.Logf("index saved as %v", id.Str())
idx := loadIndex(t, repo)
blobID := restic.TestParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66")
locs, err := idx.FindBlob(restic.BlobHandle{ID: blobID, Type: restic.DataBlob})
if err != nil {
t.Errorf("FindBlob() returned error %v", err)
}
if len(locs) != 1 {
t.Errorf("blob found %d times, expected just one", len(locs))
}
l := locs[0]
if !l.ID.Equal(blobID) {
t.Errorf("blob IDs are not equal: %v != %v", l.ID, blobID)
}
if l.Type != restic.DataBlob {
t.Errorf("want type %v, got %v", restic.DataBlob, l.Type)
}
if l.Offset != 150 {
t.Errorf("wrong offset, want %d, got %v", 150, l.Offset)
}
if l.Length != 123 {
t.Errorf("wrong length, want %d, got %v", 123, l.Length)
}
}