pw_kvs: Scan for entries after data loss
This change updates the KVS initialization procedure to scan for
remaining entries within a sector after it encounters data corruption.
Change-Id: I2a6efbb5054b813a44c800de836a6fe963ac5ca8
diff --git a/pw_kvs/key_value_store.cc b/pw_kvs/key_value_store.cc
index 2409c0e..09f3c49 100644
--- a/pw_kvs/key_value_store.cc
+++ b/pw_kvs/key_value_store.cc
@@ -77,11 +77,14 @@
sectors_.assign(partition_.sector_count(),
SectorDescriptor(sector_size_bytes));
+ size_t total_corrupt_bytes = 0;
+ int corrupt_entries = 0;
+
for (SectorDescriptor& sector : sectors_) {
- // Reset the sector's valid and writable byte counts. These will be updated
- // after reading each entry.
Address entry_address = sector_address;
+ size_t sector_corrupt_bytes = 0;
+
for (int num_entries_in_sector = 0; true; num_entries_in_sector++) {
DBG("Load entry: sector=%" PRIx32 ", entry#=%d, address=%" PRIx32,
sector_address,
@@ -100,22 +103,36 @@
break;
}
if (status == Status::DATA_LOSS) {
- // It's not clear KVS can make a unilateral decision about what to do
- // in corruption cases. It's an application decision, for which we
- // should offer some configurability. For now, entirely bail out of
- // loading and give up.
- //
- // Later, scan for remaining valid keys; since it's entirely possible
- // that there is a duplicate of the key elsewhere and everything is
- // fine. Later, we can wipe and maybe recover the sector.
- //
- // TODO: Implement rest-of-sector scanning for valid entries.
- ERR("KVS init failed: data loss detected in sector %u at address %zu",
+ // The entry could not be read, indicating data corruption within the
+ // sector. Try to scan the remainder of the sector for other entries.
+ ERR("KVS init: data loss detected in sector %u at address %zu",
SectorIndex(§or),
size_t(entry_address));
- return Status::DATA_LOSS;
+
+ corrupt_entries++;
+
+ status = ScanForEntry(sector,
+ entry_address + Entry::kMinAlignmentBytes,
+ &next_entry_address);
+ if (status == Status::NOT_FOUND) {
+ // No further entries in this sector. Mark the remaining bytes in the
+ // sector as corrupt (since we can't reliably know the size of the
+ // corrupt entry).
+ sector_corrupt_bytes +=
+ sector_size_bytes - (entry_address - sector_address);
+ break;
+ }
+
+ if (!status.ok()) {
+ ERR("Unexpected error in KVS initialization: %s", status.str());
+ return Status::UNKNOWN;
+ }
+
+ sector_corrupt_bytes += next_entry_address - entry_address;
+ } else if (!status.ok()) {
+ ERR("Unexpected error in KVS initialization: %s", status.str());
+ return Status::UNKNOWN;
}
- TRY(status);
// Entry loaded successfully; so get ready to load the next one.
entry_address = next_entry_address;
@@ -125,7 +142,20 @@
(entry_address - sector_address));
}
+ if (sector_corrupt_bytes > 0) {
+ // If the sector contains corrupt data, prevent any further entries from
+ // being written to it by indicating that it has no space. This should
+ // also make it a decent GC candidate. Valid keys in the sector are still
+ // readable as normal.
+ sector.set_writable_bytes(0);
+
+ WRN("Sector %u contains %zuB of corrupt data",
+ SectorIndex(§or),
+ sector_corrupt_bytes);
+ }
+
sector_address += sector_size_bytes;
+ total_corrupt_bytes += sector_corrupt_bytes;
}
DBG("Second pass: Count valid bytes in each sector");
@@ -158,6 +188,14 @@
sectors_.size(),
partition_.sector_size_bytes());
+ if (total_corrupt_bytes > 0) {
+ WRN("Found %zu corrupt bytes and %d corrupt entries during init process; "
+ "some keys may be missing",
+ total_corrupt_bytes,
+ corrupt_entries);
+ return Status::DATA_LOSS;
+ }
+
return Status::OK;
}
@@ -188,6 +226,35 @@
return Status::OK;
}
+// Scans flash memory within a sector to find a KVS entry magic.
+// TODO(frolv): This needs to be unit tested!
+Status KeyValueStore::ScanForEntry(const SectorDescriptor& sector,
+ Address start_address,
+ Address* next_entry_address) {
+ DBG("Scanning sector %u for entries starting from address %zx",
+ SectorIndex(§or),
+ size_t(start_address));
+
+ // Entries must start at addresses which are aligned on a multiple of
+ // Entry::kMinAlignmentBytes. However, that multiple can vary between entries.
+ // When scanning, we don't have an entry to tell us what the current alignment
+ // is, so the minimum alignment is used to be exhaustive.
+ for (Address address = AlignUp(start_address, Entry::kMinAlignmentBytes);
+ AddressInSector(sector, address);
+ address += Entry::kMinAlignmentBytes) {
+ // TODO: Handle multiple magics for formats that have changed.
+ uint32_t magic;
+ TRY(partition_.Read(address, as_writable_bytes(span(&magic, 1))));
+ if (magic == entry_header_format_.magic) {
+ DBG("Found entry magic at address %zx", size_t(address));
+ *next_entry_address = address;
+ return Status::OK;
+ }
+ }
+
+ return Status::NOT_FOUND;
+}
+
// TODO: This method is the trigger of the O(valid_entries * all_entries) time
// complexity for reading. At some cost to memory, this could be optimized by
// using a hash table instead of scanning, but in practice this should be fine
diff --git a/pw_kvs/public/pw_kvs/key_value_store.h b/pw_kvs/public/pw_kvs/key_value_store.h
index ed797b5..aa9d431 100644
--- a/pw_kvs/public/pw_kvs/key_value_store.h
+++ b/pw_kvs/public/pw_kvs/key_value_store.h
@@ -51,6 +51,13 @@
// Initializes the key-value store. Must be called before calling other
// functions.
+ //
+ // Return values:
+ //
+ // OK: KVS successfully initialized.
+ // DATA_LOSS: KVS initialized and is usable, but contains corrupt data.
+ // UNKNOWN: Unknown error. KVS is not initialized.
+ //
Status Init();
bool initialized() const { return initialized_; }
@@ -245,6 +252,9 @@
}
Status LoadEntry(Address entry_address, Address* next_entry_address);
+ Status ScanForEntry(const SectorDescriptor& sector,
+ Address start_address,
+ Address* next_entry_address);
Status AppendNewOrOverwriteStaleExistingDescriptor(
const KeyDescriptor& key_descriptor);
Status AppendEmptyDescriptor(KeyDescriptor** new_descriptor);