pw_kvs: Remove deleted keys as part of heavy maintenance
Remove/garbage collect all the deleted key entries as part of
doing heavy maintenance.
Change-Id: Ie55ea51211eef1cb81e6959a819e3756091eeb5d
Co-authored-by: David Rempel <drempel@google.com>
Co-authored-by: David Rogers <davidrogers@google.com>
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/95021
Reviewed-by: Keir Mierle <keir@google.com>
Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>
Pigweed-Auto-Submit: Wyatt Hepler <hepler@google.com>
diff --git a/pw_kvs/docs.rst b/pw_kvs/docs.rst
index fd21996..fdf443a 100644
--- a/pw_kvs/docs.rst
+++ b/pw_kvs/docs.rst
@@ -157,9 +157,10 @@
Full Maintenance does garbage collection of all sectors except those that have
current valid KV entries.
-Heavy Maintenance does garbage collection of all sectors. Use strong caution
-when doing Heavy Maintenance as it can, compared to Full Maintenance, result
-in a significant amount of moving valid entries,
+Heavy Maintenance does garbage collection of all sectors, including removing
+entries for deleted keys. Use strong caution when doing Heavy Maintenance as it
+can, compared to Full Maintenance, result in a significant amount of moving
+valid entries,
Garbage collection can be performed by request of higher level software or
automatically as needed to make space available to write new entries.
diff --git a/pw_kvs/entry_cache.cc b/pw_kvs/entry_cache.cc
index ece008b..bac1607 100644
--- a/pw_kvs/entry_cache.cc
+++ b/pw_kvs/entry_cache.cc
@@ -19,6 +19,7 @@
#include <cinttypes>
+#include "pw_assert/check.h"
#include "pw_kvs/flash_memory.h"
#include "pw_kvs/internal/entry.h"
#include "pw_kvs/internal/hash.h"
@@ -125,6 +126,32 @@
return EntryMetadata(descriptors_.back(), std::span(first_address, 1));
}
+// Removes an existing entry from the cache
+EntryCache::iterator EntryCache::RemoveEntry(iterator& entry_it) {
+ PW_DCHECK_PTR_EQ(entry_it.entry_cache_, this);
+
+ const unsigned int index_to_remove =
+ entry_it.metadata_.descriptor_ - &descriptors_.front();
+ const KeyDescriptor last_desc = descriptors_[descriptors_.size() - 1];
+
+ // Since order is not important, this copies the last descriptor into the
+ // deleted descriptor's space and then pops the last entry.
+ Address* addresses_at_end = first_address(descriptors_.size() - 1);
+
+ if (index_to_remove < descriptors_.size() - 1) {
+ Address* addresses_to_remove = first_address(index_to_remove);
+ for (unsigned int i = 0; i < redundancy_; i++) {
+ addresses_to_remove[i] = addresses_at_end[i];
+ }
+ descriptors_[index_to_remove] = last_desc;
+ }
+
+ // Erase the last entry since it was copied over the entry being deleted.
+ descriptors_.pop_back();
+
+ return {this, descriptors_.data() + index_to_remove};
+}
+
// TODO: This method is the trigger of the O(valid_entries * all_entries) time
// complexity for reading. At some cost to memory, this could be optimized by
// using a hash table instead of scanning, but in practice this should be fine
diff --git a/pw_kvs/key_value_store.cc b/pw_kvs/key_value_store.cc
index 2b0bbe8..a3e2262 100644
--- a/pw_kvs/key_value_store.cc
+++ b/pw_kvs/key_value_store.cc
@@ -400,6 +400,36 @@
return Status::NotFound();
}
+Status KeyValueStore::RemoveDeletedKeyEntries() {
+ for (internal::EntryCache::iterator it = entry_cache_.begin();
+ it != entry_cache_.end();
+ ++it) {
+ EntryMetadata& entry_metadata = *it;
+
+ // The iterator we are given back from RemoveEntry could also be deleted,
+ // so loop until we find one that isn't deleted.
+ while (entry_metadata.state() == EntryState::kDeleted) {
+ // Read the original entry to get the size for sector accounting purposes.
+ Entry entry;
+ PW_TRY(ReadEntry(entry_metadata, entry));
+
+ for (Address address : entry_metadata.addresses()) {
+ sectors_.FromAddress(address).RemoveValidBytes(entry.size());
+ }
+
+ it = entry_cache_.RemoveEntry(it);
+
+ if (it == entry_cache_.end()) {
+ return OkStatus(); // new iterator is the end, bail
+ }
+
+ entry_metadata = *it; // set entry_metadata to check for deletion again
+ }
+ }
+
+ return OkStatus();
+}
+
StatusWithSize KeyValueStore::Get(Key key,
std::span<byte> value_buffer,
size_t offset_bytes) const {
@@ -634,6 +664,17 @@
Status KeyValueStore::WriteEntryForNewKey(Key key,
std::span<const byte> value) {
+ // If we are trying to ensure that all possible writes are successful, and the
+ // cache is full, attempt heavy maintenance now.
+ if (options_.gc_on_write == GargbageCollectOnWrite::kAsManySectorsNeeded &&
+ entry_cache_.full()) {
+ Status maintenance_status = HeavyMaintenance();
+ if (!maintenance_status.ok()) {
+ WRN("KVS Maintenance failed for write: %s", maintenance_status.str());
+ return maintenance_status;
+ }
+ }
+
if (entry_cache_.full()) {
WRN("KVS full: trying to store a new entry, but can't. Have %u entries",
unsigned(entry_cache_.total_entries()));
@@ -878,13 +919,15 @@
INF("Beginning full maintenance");
CheckForErrors();
+ // Step 1: Repair errors
if (error_detected_) {
PW_TRY(Repair());
}
+
+ // Step 2: Make sure all the entries are on the primary format.
StatusWithSize update_status = UpdateEntriesToPrimaryFormat();
Status overall_status = update_status.status();
- // Make sure all the entries are on the primary format.
if (!overall_status.ok()) {
ERR("Failed to update all entries to the primary format");
}
@@ -901,26 +944,50 @@
bool heavy = (maintenance_type == MaintenanceType::kHeavy);
bool force_gc = heavy || over_usage_threshold || (update_status.size() > 0);
- // TODO: look in to making an iterator method for cycling through sectors
- // starting from last_new_sector_.
- Status gc_status;
- for (size_t j = 0; j < sectors_.size(); j++) {
- sector += 1;
- if (sector == sectors_.end()) {
- sector = sectors_.begin();
- }
+ auto do_garbage_collect_pass = [&]() {
+ // TODO: look in to making an iterator method for cycling through
+ // sectors starting from last_new_sector_.
+ Status gc_status;
+ for (size_t j = 0; j < sectors_.size(); j++) {
+ sector += 1;
+ if (sector == sectors_.end()) {
+ sector = sectors_.begin();
+ }
- if (sector->RecoverableBytes(partition_.sector_size_bytes()) > 0 &&
- (force_gc || sector->valid_bytes() == 0)) {
- gc_status = GarbageCollectSector(*sector, {});
- if (!gc_status.ok()) {
- ERR("Failed to garbage collect all sectors");
- break;
+ if (sector->RecoverableBytes(partition_.sector_size_bytes()) > 0 &&
+ (force_gc || sector->valid_bytes() == 0)) {
+ gc_status = GarbageCollectSector(*sector, {});
+ if (!gc_status.ok()) {
+ ERR("Failed to garbage collect all sectors");
+ break;
+ }
}
}
- }
- if (overall_status.ok()) {
- overall_status = gc_status;
+ if (overall_status.ok()) {
+ overall_status = gc_status;
+ }
+ };
+
+ // Step 3: Do full garbage collect pass for all sectors. This will erase all
+ // old/state entries from flash and leave only current/valid entries.
+ do_garbage_collect_pass();
+
+ // Step 4: (if heavy maintenance) garbage collect all the deleted keys.
+ if (heavy) {
+ // Remove deleted keys from the entry cache, including freeing sector bytes
+ // used by those keys. This must only be done directly after a full garbage
+ // collection, otherwise the current deleted entry could be garbage
+ // collected before the older stale entry producing a window for an
+ // invalid/corrupted KVS state if there was a power-fault, crash or other
+ // interruption.
+ Status rdk_status = RemoveDeletedKeyEntries();
+ overall_status.Update(rdk_status);
+
+ // Do another garbage collect pass that will fully remove the deleted keys
+ // from flash. Garbage collect will only touch sectors that have something
+ // to garbage collect, which in this case is only sectors containing deleted
+ // keys.
+ do_garbage_collect_pass();
}
if (overall_status.ok()) {
diff --git a/pw_kvs/key_value_store_fuzz_test.cc b/pw_kvs/key_value_store_fuzz_test.cc
index d35a128..4b14ed1 100644
--- a/pw_kvs/key_value_store_fuzz_test.cc
+++ b/pw_kvs/key_value_store_fuzz_test.cc
@@ -80,7 +80,8 @@
// Rewrite a single key many times, can fill up a sector
ASSERT_EQ(OkStatus(), kvs_.Put("some_data", j));
}
- // Delete and re-add everything
+
+ // Delete and re-add everything except "some_data"
ASSERT_EQ(OkStatus(), kvs_.Delete(key1));
ASSERT_EQ(OkStatus(), kvs_.Put(key1, std::span(buf1, size1)));
ASSERT_EQ(OkStatus(), kvs_.Delete(key2));
@@ -105,4 +106,98 @@
}
}
+TEST(KvsFuzz, FuzzTestWithGC) {
+ FlashPartition& test_partition = FlashTestPartition();
+ ASSERT_EQ(OkStatus(), test_partition.Erase());
+
+ KeyValueStoreBuffer<kMaxEntries, kMaxUsableSectors> kvs_(&test_partition,
+ default_format);
+
+ ASSERT_EQ(OkStatus(), kvs_.Init());
+
+ if (test_partition.sector_size_bytes() < 4 * 1024 ||
+ test_partition.sector_count() < 4) {
+ PW_LOG_INFO("Sectors too small, skipping test.");
+ return; // TODO: Test could be generalized
+ }
+ const char* key1 = "Buf1";
+ const char* key2 = "Buf2";
+ const size_t kLargestBufSize = 3 * 1024;
+ static byte buf1[kLargestBufSize];
+ static byte buf2[kLargestBufSize];
+ std::memset(buf1, 1, sizeof(buf1));
+ std::memset(buf2, 2, sizeof(buf2));
+
+ // Start with things in KVS
+ ASSERT_EQ(OkStatus(), kvs_.Put(key1, buf1));
+ ASSERT_EQ(OkStatus(), kvs_.Put(key2, buf2));
+ for (size_t j = 0; j < keys.size(); j++) {
+ ASSERT_EQ(OkStatus(), kvs_.Put(keys[j], j));
+ }
+
+ for (size_t i = 0; i < 100; i++) {
+ // Vary two sizes
+ size_t size1 = (kLargestBufSize) / (i + 1);
+ size_t size2 = (kLargestBufSize) / (100 - i);
+ for (size_t j = 0; j < 50; j++) {
+ // Rewrite a single key many times, can fill up a sector
+ ASSERT_EQ(OkStatus(), kvs_.Put("some_data", j));
+ }
+
+ // Delete and re-add everything except "some_data".
+ ASSERT_EQ(OkStatus(), kvs_.Delete(key1));
+ ASSERT_EQ(OkStatus(), kvs_.Put(key1, std::span(buf1, size1)));
+ ASSERT_EQ(OkStatus(), kvs_.Delete(key2));
+
+ // Throw some heavy maintenance in the middle to trigger some GC before
+ // moving forward.
+ EXPECT_EQ(OkStatus(), kvs_.HeavyMaintenance());
+
+ // check for expected stats
+ KeyValueStore::StorageStats stats = kvs_.GetStorageStats();
+ EXPECT_GT(stats.sector_erase_count, 1u);
+ EXPECT_EQ(stats.reclaimable_bytes, 0u);
+
+ // Write out rotating keyvalue, read it, and delete kMaxEntries * 4.
+ // This tests whether garbage collection is working on write.
+ for (size_t j = 0; j < kMaxEntries * 4; j++) {
+ size_t readj;
+ StringBuffer<6> keyVal;
+ keyVal << j;
+ ASSERT_EQ(OkStatus(), kvs_.Put(keyVal.c_str(), j));
+ ASSERT_EQ(OkStatus(), kvs_.Get(keyVal.c_str(), &readj));
+ ASSERT_EQ(j, readj);
+ ASSERT_EQ(OkStatus(), kvs_.Delete(keyVal.c_str()));
+ ASSERT_EQ(Status::NotFound(), kvs_.Get(keyVal.c_str(), &readj));
+ }
+
+ // The KVS should contain key1, "some_data", and all of keys[].
+ ASSERT_EQ(kvs_.size(), 2u + keys.size());
+
+ ASSERT_EQ(OkStatus(), kvs_.Put(key2, std::span(buf2, size2)));
+ for (size_t j = 0; j < keys.size(); j++) {
+ ASSERT_EQ(OkStatus(), kvs_.Delete(keys[j]));
+ ASSERT_EQ(OkStatus(), kvs_.Put(keys[j], j));
+ }
+
+ // Do some more heavy maintenance, ensure we have the right number
+ // of keys.
+ EXPECT_EQ(OkStatus(), kvs_.HeavyMaintenance());
+ // The KVS should contain key1, key2, "some_data", and all of keys[].
+ ASSERT_EQ(kvs_.size(), 3u + keys.size());
+
+ // Re-enable and verify (final check on store).
+ ASSERT_EQ(OkStatus(), kvs_.Init());
+ static byte buf[4 * 1024];
+ ASSERT_EQ(OkStatus(), kvs_.Get(key1, std::span(buf, size1)).status());
+ ASSERT_EQ(std::memcmp(buf, buf1, size1), 0);
+ ASSERT_EQ(OkStatus(), kvs_.Get(key2, std::span(buf, size2)).status());
+ ASSERT_EQ(std::memcmp(buf2, buf2, size2), 0);
+ for (size_t j = 0; j < keys.size(); j++) {
+ size_t ret = 1000;
+ ASSERT_EQ(OkStatus(), kvs_.Get(keys[j], &ret));
+ ASSERT_EQ(ret, j);
+ }
+ }
+}
} // namespace pw::kvs
diff --git a/pw_kvs/key_value_store_test.cc b/pw_kvs/key_value_store_test.cc
index 004684e..0aaea1f 100644
--- a/pw_kvs/key_value_store_test.cc
+++ b/pw_kvs/key_value_store_test.cc
@@ -413,6 +413,57 @@
EXPECT_EQ(stats.reclaimable_bytes, 0u);
}
+TEST_F(LargeEmptyInitializedKvs, KeyDeletionMaintenance) {
+ const uint8_t kValue1 = 0xDA;
+ const uint8_t kValue2 = 0x12;
+ uint8_t val = 0;
+
+ // Write and delete a key. The key should be gone, but the size should be 1.
+ ASSERT_EQ(OkStatus(), kvs_.Put(keys[0], kValue1));
+ ASSERT_EQ(kvs_.size(), 1u);
+ ASSERT_EQ(OkStatus(), kvs_.Delete(keys[0]));
+
+ // Ensure the key is indeed gone and the size is 1 before continuing.
+ ASSERT_EQ(kvs_.Get(keys[0], &val), Status::NotFound());
+ ASSERT_EQ(kvs_.size(), 0u);
+ ASSERT_EQ(kvs_.total_entries_with_deleted(), 1u);
+
+ KeyValueStore::StorageStats stats = kvs_.GetStorageStats();
+ EXPECT_EQ(stats.sector_erase_count, 0u);
+ EXPECT_GT(stats.reclaimable_bytes, 0u);
+
+ // Do aggressive FullMaintenance, which should GC the sector with valid data,
+ // resulting in no reclaimable bytes and an erased sector.
+ EXPECT_EQ(OkStatus(), kvs_.HeavyMaintenance());
+ stats = kvs_.GetStorageStats();
+ EXPECT_GT(stats.sector_erase_count, 1u);
+ EXPECT_EQ(stats.reclaimable_bytes, 0u);
+ ASSERT_EQ(kvs_.size(), 0u);
+ ASSERT_EQ(kvs_.total_entries_with_deleted(), 0u);
+
+ // Do it again but with 2 keys and keep one.
+ ASSERT_EQ(OkStatus(), kvs_.Put(keys[0], kValue1));
+ ASSERT_EQ(OkStatus(), kvs_.Put(keys[1], kValue2));
+ ASSERT_EQ(kvs_.size(), 2u);
+ ASSERT_EQ(OkStatus(), kvs_.Delete(keys[0]));
+
+ // Ensure the key is indeed gone and the size is 1 before continuing.
+ ASSERT_EQ(kvs_.Get(keys[0], &val), Status::NotFound());
+ ASSERT_EQ(kvs_.size(), 1u);
+ ASSERT_EQ(kvs_.total_entries_with_deleted(), 2u);
+
+ // Do aggressive FullMaintenance, which should GC the sector with valid data,
+ // resulting in no reclaimable bytes and an erased sector.
+ EXPECT_EQ(OkStatus(), kvs_.HeavyMaintenance());
+ stats = kvs_.GetStorageStats();
+ ASSERT_EQ(kvs_.size(), 1u);
+ ASSERT_EQ(kvs_.total_entries_with_deleted(), 1u);
+
+ // Read back the second key to make sure it is still valid.
+ ASSERT_EQ(kvs_.Get(keys[1], &val), OkStatus());
+ ASSERT_EQ(val, kValue2);
+}
+
TEST(InMemoryKvs, Put_MaxValueSize) {
// Create and erase the fake flash.
Flash flash;
diff --git a/pw_kvs/public/pw_kvs/internal/entry_cache.h b/pw_kvs/public/pw_kvs/internal/entry_cache.h
index ae2acb7..a4a5414 100644
--- a/pw_kvs/public/pw_kvs/internal/entry_cache.h
+++ b/pw_kvs/public/pw_kvs/internal/entry_cache.h
@@ -188,6 +188,10 @@
Address address,
size_t sector_size_bytes) const;
+ // Removes an existing entry from the cache. Returns an iterator to the
+ // next entry so that iteration can continue.
+ iterator RemoveEntry(iterator& entry_it);
+
// Returns a pointer to an array of redundancy() addresses for temporary use.
// This is used by the KeyValueStore to track reserved addresses when finding
// space for a new entry.
diff --git a/pw_kvs/public/pw_kvs/key_value_store.h b/pw_kvs/public/pw_kvs/key_value_store.h
index 086dcb8..d59aae4 100644
--- a/pw_kvs/public/pw_kvs/key_value_store.h
+++ b/pw_kvs/public/pw_kvs/key_value_store.h
@@ -305,6 +305,11 @@
// Returns the number of valid entries in the KeyValueStore.
size_t size() const { return entry_cache_.present_entries(); }
+ // Returns the number of valid entries and deleted entries yet to be collected
+ size_t total_entries_with_deleted() const {
+ return entry_cache_.total_entries();
+ }
+
size_t max_size() const { return entry_cache_.max_entries(); }
size_t empty() const { return size() == 0u; }
@@ -383,6 +388,14 @@
Address start_address,
Address* next_entry_address);
+ // Remove deleted keys from the entry cache, including freeing sector bytes
+ // used by those keys. This must only be done directly after a full garbage
+ // collection, otherwise the current deleted entry could be garbage
+ // collected before the older stale entry producing a window for an
+ // invalid/corrupted KVS state if there was a power-fault, crash or other
+ // interruption.
+ Status RemoveDeletedKeyEntries();
+
Status PutBytes(Key key, std::span<const std::byte> value);
StatusWithSize ValueSize(const EntryMetadata& metadata) const;