Move testing for known blobs to Archiver

This removes the list of in-flight blobs from the master index and
instead keeps a list of "known" blobs in the Archiver. "known" here
means: either already processed, or included in an index. This property
is tested atomically, when the blob is not in the list of "known" blobs,
it is added to the list and the caller is responsible to make this
happen (i.e. save the blob).
This commit is contained in:
Alexander Neumann
2016-02-01 23:50:56 +01:00
parent 382c766983
commit 4f1f03cdb9
6 changed files with 45 additions and 100 deletions

View File

@@ -13,23 +13,11 @@ import (
type MasterIndex struct {
idx []*Index
idxMutex sync.RWMutex
inFlight struct {
backend.IDSet
sync.RWMutex
}
}
// NewMasterIndex creates a new master index.
func NewMasterIndex() *MasterIndex {
return &MasterIndex{
inFlight: struct {
backend.IDSet
sync.RWMutex
}{
IDSet: backend.NewIDSet(),
},
}
return &MasterIndex{}
}
// Lookup queries all known Indexes for the ID and returns the first match.
@@ -154,68 +142,6 @@ func (mi *MasterIndex) Current() *Index {
return newIdx
}
// AddInFlight add the given ID to the list of in-flight IDs. An error is
// returned when the ID is already in the list. Setting ignoreDuplicates to
// true only checks the in flight list, otherwise the index itself is also
// tested.
func (mi *MasterIndex) AddInFlight(id backend.ID, ignoreDuplicates bool) error {
// The index + inFlight store must be searched for a matching id in one
// atomic operation. This requires locking the inFlight store and the
// index together!
mi.inFlight.Lock()
defer mi.inFlight.Unlock()
if !ignoreDuplicates {
// Note: mi.Has read locks the index again.
mi.idxMutex.RLock()
defer mi.idxMutex.RUnlock()
}
debug.Log("MasterIndex.AddInFlight", "adding %v", id.Str())
if mi.inFlight.Has(id) {
return fmt.Errorf("%v is already in flight", id.Str())
}
if !ignoreDuplicates {
if mi.Has(id) {
return fmt.Errorf("%v is already indexed (fully processed)", id)
}
}
mi.inFlight.Insert(id)
return nil
}
// IsInFlight returns true iff the id is contained in the list of in-flight IDs.
func (mi *MasterIndex) IsInFlight(id backend.ID) bool {
// The index + inFlight store must be searched for a matching id in one
// atomic operation. This requires locking the inFlight store and the
// index together!
mi.inFlight.RLock()
defer mi.inFlight.RUnlock()
mi.idxMutex.RLock()
defer mi.idxMutex.RUnlock()
inFlight := mi.inFlight.Has(id)
debug.Log("MasterIndex.IsInFlight", "testing whether %v is in flight: %v", id.Str(), inFlight)
indexed := mi.Has(id)
debug.Log("MasterIndex.IsInFlight", "testing whether %v is indexed (fully processed): %v", id.Str(), indexed)
return inFlight
}
// RemoveFromInFlight deletes the given ID from the liste of in-flight IDs.
func (mi *MasterIndex) RemoveFromInFlight(id backend.ID) {
mi.inFlight.Lock()
defer mi.inFlight.Unlock()
debug.Log("MasterIndex.RemoveFromInFlight", "removing %v from list of in flight blobs", id.Str())
mi.inFlight.Delete(id)
}
// NotFinalIndexes returns all indexes that have not yet been saved.
func (mi *MasterIndex) NotFinalIndexes() []*Index {
mi.idxMutex.Lock()

View File

@@ -84,7 +84,6 @@ func (r *Repository) savePacker(p *pack.Packer) error {
Offset: b.Offset,
Length: uint(b.Length),
})
r.idx.RemoveFromInFlight(b.ID)
}
return nil

View File

@@ -167,10 +167,9 @@ func (r *Repository) LookupBlobSize(id backend.ID) (uint, error) {
return r.idx.LookupSize(id)
}
// SaveAndEncrypt encrypts data and stores it to the backend as type t. If data is small
// enough, it will be packed together with other small blobs. When
// ignoreDuplicates is true, blobs already in the index will be saved again.
func (r *Repository) SaveAndEncrypt(t pack.BlobType, data []byte, id *backend.ID, ignoreDuplicates bool) (backend.ID, error) {
// SaveAndEncrypt encrypts data and stores it to the backend as type t. If data
// is small enough, it will be packed together with other small blobs.
func (r *Repository) SaveAndEncrypt(t pack.BlobType, data []byte, id *backend.ID) (backend.ID, error) {
if id == nil {
// compute plaintext hash
hashedID := backend.Hash(data)
@@ -189,18 +188,9 @@ func (r *Repository) SaveAndEncrypt(t pack.BlobType, data []byte, id *backend.ID
return backend.ID{}, err
}
// add this id to the list of in-flight chunk ids.
debug.Log("Repo.Save", "add %v to list of in-flight IDs", id.Str())
err = r.idx.AddInFlight(*id, ignoreDuplicates)
if err != nil {
debug.Log("Repo.Save", "another goroutine is already working on %v (%v) does already exist", id.Str, t)
return *id, nil
}
// find suitable packer and add blob
packer, err := r.findPacker(uint(len(ciphertext)))
if err != nil {
r.idx.RemoveFromInFlight(*id)
return backend.ID{}, err
}
@@ -234,7 +224,7 @@ func (r *Repository) SaveFrom(t pack.BlobType, id *backend.ID, length uint, rd i
return err
}
_, err = r.SaveAndEncrypt(t, buf, id, false)
_, err = r.SaveAndEncrypt(t, buf, id)
if err != nil {
return err
}
@@ -258,7 +248,7 @@ func (r *Repository) SaveJSON(t pack.BlobType, item interface{}) (backend.ID, er
}
buf = wr.Bytes()
return r.SaveAndEncrypt(t, buf, nil, false)
return r.SaveAndEncrypt(t, buf, nil)
}
// SaveJSONUnpacked serialises item as JSON and encrypts and saves it in the

View File

@@ -83,7 +83,7 @@ func TestSave(t *testing.T) {
id := backend.Hash(data)
// save
sid, err := repo.SaveAndEncrypt(pack.Data, data, nil, false)
sid, err := repo.SaveAndEncrypt(pack.Data, data, nil)
OK(t, err)
Equals(t, id, sid)
@@ -253,7 +253,7 @@ func saveRandomDataBlobs(t testing.TB, repo *repository.Repository, num int, siz
_, err := io.ReadFull(rand.Reader, buf)
OK(t, err)
_, err = repo.SaveAndEncrypt(pack.Data, buf, nil, false)
_, err = repo.SaveAndEncrypt(pack.Data, buf, nil)
OK(t, err)
}
}