pw_allocator: Add extra bytes to Block

The CL adds a new template parameter, kNumExtraBytes, to `Block`, and
refactors the existing parameters. The new parameter can be used to
reserve space in the block's header that can be used to store and fetch
allocation metadata.

Additionally, the `kMaxSize` parameter has been dropped and replaced by
a parameter for `kNumFlags`. This allows better elision of flag-related
code when it is not needed, and encourages downstream projects to define
fewer block types rather than one for each allocation pool size.

Change-Id: I3530d94b1302e0dada9ad1f2f749ee94498ebb41
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/185954
Reviewed-by: Taylor Cramer <cramertj@google.com>
Commit-Queue: Aaron Green <aarongreen@google.com>
diff --git a/pw_allocator/block_test.cc b/pw_allocator/block_test.cc
index d05d8ca..304eb74 100644
--- a/pw_allocator/block_test.cc
+++ b/pw_allocator/block_test.cc
@@ -14,6 +14,7 @@
 
 #include "pw_allocator/block.h"
 
+#include <array>
 #include <cstdint>
 #include <cstring>
 
@@ -24,14 +25,12 @@
 
 namespace pw::allocator {
 
-const size_t kCapacity = 0x20000;
-
 template <typename BlockType>
-void CanCreateSingleBlock() {
-  constexpr size_t kN = 200;
-  alignas(BlockType*) byte bytes[kN];
+void CanCreateSingleAlignedBlock() {
+  constexpr size_t kN = 1024;
+  alignas(BlockType*) std::array<std::byte, kN> bytes;
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -43,37 +42,34 @@
   EXPECT_TRUE(block->Last());
 }
 TEST(GenericBlockTest, CanCreateSingleBlock) {
-  CanCreateSingleBlock<Block<>>();
+  CanCreateSingleAlignedBlock<Block<>>();
 }
 TEST(CustomBlockTest, CanCreateSingleBlock) {
-  CanCreateSingleBlock<Block<uint32_t, kCapacity>>();
+  CanCreateSingleAlignedBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
-void CannotCreateUnalignedSingleBlock() {
+void CanCreateUnalignedSingleBlock() {
   constexpr size_t kN = 1024;
 
   // Force alignment, so we can un-force it below
-  alignas(BlockType*) byte bytes[kN];
-  byte* byte_ptr = bytes;
+  alignas(BlockType*) std::array<std::byte, kN> bytes;
+  ByteSpan aligned(bytes);
 
-  Result<BlockType*> result = BlockType::Init(span(byte_ptr + 1, kN - 1));
-  EXPECT_FALSE(result.ok());
-  EXPECT_EQ(result.status(), Status::InvalidArgument());
+  Result<BlockType*> result = BlockType::Init(aligned.subspan(1));
+  EXPECT_EQ(result.status(), OkStatus());
 }
 TEST(GenericBlockTest, CannotCreateUnalignedSingleBlock) {
-  CannotCreateUnalignedSingleBlock<Block<>>();
+  CanCreateUnalignedSingleBlock<Block<>>();
 }
 TEST(CustomBlockTest, CannotCreateUnalignedSingleBlock) {
-  CannotCreateUnalignedSingleBlock<Block<uint32_t, kCapacity>>();
+  CanCreateUnalignedSingleBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CannotCreateTooSmallBlock() {
-  constexpr size_t kN = 2;
-  alignas(BlockType*) byte bytes[kN];
-
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, 2> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   EXPECT_FALSE(result.ok());
   EXPECT_EQ(result.status(), Status::ResourceExhausted());
 }
@@ -81,15 +77,15 @@
   CannotCreateTooSmallBlock<Block<>>();
 }
 TEST(CustomBlockTest, CannotCreateTooSmallBlock) {
-  CannotCreateTooSmallBlock<Block<uint32_t, kCapacity>>();
+  CannotCreateTooSmallBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 TEST(CustomBlockTest, CannotCreateTooLargeBlock) {
-  using BlockType = Block<uint16_t, 512>;
   constexpr size_t kN = 1024;
-  alignas(BlockType*) byte bytes[kN];
+  using BlockType = Block<uint8_t>;
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   EXPECT_FALSE(result.ok());
   EXPECT_EQ(result.status(), Status::OutOfRange());
 }
@@ -98,9 +94,9 @@
 void CanSplitBlock() {
   constexpr size_t kN = 1024;
   constexpr size_t kSplitN = 512;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -121,26 +117,25 @@
 }
 TEST(GenericBlockTest, CanSplitBlock) { CanSplitBlock<Block<>>(); }
 TEST(CustomBlockTest, CanSplitBlock) {
-  CanSplitBlock<Block<uint32_t, kCapacity>>();
+  CanSplitBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CanSplitBlockUnaligned() {
   constexpr size_t kN = 1024;
-  constexpr size_t kSplitN = 513;
 
-  alignas(BlockType*) byte bytes[kN];
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
+  ASSERT_EQ(result.status(), OkStatus());
+  BlockType* block1 = *result;
 
   // We should split at sizeof(BlockType) + kSplitN bytes. Then
   // we need to round that up to an alignof(BlockType*) boundary.
-  uintptr_t split_addr = ((uintptr_t)&bytes) + kSplitN;
+  constexpr size_t kSplitN = 513;
+  uintptr_t split_addr = reinterpret_cast<uintptr_t>(block1) + kSplitN;
   split_addr += alignof(BlockType*) - (split_addr % alignof(BlockType*));
   uintptr_t split_len = split_addr - (uintptr_t)&bytes;
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
-  ASSERT_EQ(result.status(), OkStatus());
-  BlockType* block1 = *result;
-
   result = BlockType::Split(block1, kSplitN);
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block2 = *result;
@@ -156,7 +151,7 @@
 }
 TEST(GenericBlockTest, CanSplitBlockUnaligned) { CanSplitBlock<Block<>>(); }
 TEST(CustomBlockTest, CanSplitBlockUnaligned) {
-  CanSplitBlock<Block<uint32_t, kCapacity>>();
+  CanSplitBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
@@ -173,9 +168,9 @@
   constexpr size_t kN = 1024;
   constexpr size_t kSplit1 = 512;
   constexpr size_t kSplit2 = 256;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -194,16 +189,16 @@
 }
 TEST(GenericBlockTest, CanSplitMidBlock) { CanSplitMidBlock<Block<>>(); }
 TEST(CustomBlockTest, CanSplitMidBlock) {
-  CanSplitMidBlock<Block<uint32_t, kCapacity>>();
+  CanSplitMidBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CannotSplitTooSmallBlock() {
   constexpr size_t kN = 64;
   constexpr size_t kSplitN = kN + 1;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -215,9 +210,9 @@
 void CannotSplitBlockWithoutHeaderSpace() {
   constexpr size_t kN = 1024;
   constexpr size_t kSplitN = kN - BlockType::kBlockOverhead - 1;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -228,16 +223,16 @@
   CannotSplitBlockWithoutHeaderSpace<Block<>>();
 }
 TEST(CustomBlockTest, CannotSplitBlockWithoutHeaderSpace) {
-  CannotSplitBlockWithoutHeaderSpace<Block<uint32_t, kCapacity>>();
+  CannotSplitBlockWithoutHeaderSpace<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CannotMakeBlockLargerInSplit() {
   // Ensure that we can't ask for more space than the block actually has...
   constexpr size_t kN = 1024;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -248,16 +243,16 @@
   CannotMakeBlockLargerInSplit<Block<>>();
 }
 TEST(CustomBlockTest, CannotMakeBlockLargerInSplit) {
-  CannotMakeBlockLargerInSplit<Block<uint32_t, kCapacity>>();
+  CannotMakeBlockLargerInSplit<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CannotMakeSecondBlockLargerInSplit() {
   // Ensure that the second block in split is at least of the size of header.
   constexpr size_t kN = 1024;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -269,16 +264,16 @@
   CannotMakeSecondBlockLargerInSplit<Block<>>();
 }
 TEST(CustomBlockTest, CannotMakeSecondBlockLargerInSplit) {
-  CannotMakeSecondBlockLargerInSplit<Block<uint32_t, kCapacity>>();
+  CannotMakeSecondBlockLargerInSplit<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CanMakeZeroSizeFirstBlock() {
   // This block does support splitting with zero payload size.
   constexpr size_t kN = 1024;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -290,16 +285,16 @@
   CanMakeZeroSizeFirstBlock<Block<>>();
 }
 TEST(CustomBlockTest, CanMakeZeroSizeFirstBlock) {
-  CanMakeZeroSizeFirstBlock<Block<uint32_t, kCapacity>>();
+  CanMakeZeroSizeFirstBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CanMakeZeroSizeSecondBlock() {
   // Likewise, the split block can be zero-width.
   constexpr size_t kN = 1024;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -314,15 +309,15 @@
   CanMakeZeroSizeSecondBlock<Block<>>();
 }
 TEST(CustomBlockTest, CanMakeZeroSizeSecondBlock) {
-  CanMakeZeroSizeSecondBlock<Block<uint32_t, kCapacity>>();
+  CanMakeZeroSizeSecondBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CanMarkBlockUsed() {
   constexpr size_t kN = 1024;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -337,16 +332,16 @@
 }
 TEST(GenericBlockTest, CanMarkBlockUsed) { CanMarkBlockUsed<Block<>>(); }
 TEST(CustomBlockTest, CanMarkBlockUsed) {
-  CanMarkBlockUsed<Block<uint32_t, kCapacity>>();
+  CanMarkBlockUsed<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CannotSplitUsedBlock() {
   constexpr size_t kN = 1024;
   constexpr size_t kSplitN = 512;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -358,7 +353,7 @@
   CannotSplitUsedBlock<Block<>>();
 }
 TEST(CustomBlockTest, CannotSplitUsedBlock) {
-  CannotSplitUsedBlock<Block<uint32_t, kCapacity>>();
+  CannotSplitUsedBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
@@ -366,9 +361,9 @@
   constexpr size_t kN = 1024;
   constexpr size_t kSize = 256;
   constexpr size_t kAlign = 32;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -403,7 +398,7 @@
   CanAllocFirstFromAlignedBlock<Block<>>();
 }
 TEST(CustomBlockTest, CanAllocFirstFromAlignedBlock) {
-  CanAllocFirstFromAlignedBlock<Block<uint32_t, kCapacity>>();
+  CanAllocFirstFromAlignedBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
@@ -411,9 +406,9 @@
   constexpr size_t kN = 1024;
   constexpr size_t kSize = 256;
   constexpr size_t kAlign = 32;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -446,16 +441,16 @@
   CanAllocFirstFromUnalignedBlock<Block<>>();
 }
 TEST(CustomBlockTest, CanAllocFirstFromUnalignedBlock) {
-  CanAllocFirstFromUnalignedBlock<Block<uint32_t, kCapacity>>();
+  CanAllocFirstFromUnalignedBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CannotAllocFirstTooSmallBlock() {
   constexpr size_t kN = 1024;
   constexpr size_t kAlign = 32;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -478,7 +473,7 @@
   CannotAllocFirstTooSmallBlock<Block<>>();
 }
 TEST(CustomBlockTest, CannotAllocFirstTooSmallBlock) {
-  CannotAllocFirstTooSmallBlock<Block<uint32_t, kCapacity>>();
+  CannotAllocFirstTooSmallBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
@@ -486,9 +481,9 @@
   constexpr size_t kN = 1024;
   constexpr size_t kSize = 256;
   constexpr size_t kAlign = 32;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -505,16 +500,16 @@
 }
 TEST(GenericBlockTest, CanAllocLast) { CanAllocLast<Block<>>(); }
 TEST(CustomBlockTest, CanAllocLast) {
-  CanAllocLast<Block<uint32_t, kCapacity>>();
+  CanAllocLast<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CannotAllocLastFromTooSmallBlock() {
   constexpr size_t kN = 1024;
   constexpr size_t kAlign = 32;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -538,7 +533,7 @@
   CannotAllocLastFromTooSmallBlock<Block<>>();
 }
 TEST(CustomBlockTest, CannotAllocLastFromTooSmallBlock) {
-  CannotAllocLastFromTooSmallBlock<Block<uint32_t, kCapacity>>();
+  CannotAllocLastFromTooSmallBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
@@ -548,9 +543,9 @@
   constexpr size_t kN = 1024;
   constexpr size_t kSplit1 = 512;
   constexpr size_t kSplit2 = 256;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -572,21 +567,20 @@
   CanMergeWithNextBlock<Block<>>();
 }
 TEST(CustomBlockTest, CanMergeWithNextBlock) {
-  CanMergeWithNextBlock<Block<uint32_t, kCapacity>>();
+  CanMergeWithNextBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CannotMergeWithFirstOrLastBlock() {
   constexpr size_t kN = 1024;
   constexpr size_t kSplitN = 512;
-  alignas(BlockType*) byte bytes[kN];
 
-  // Do a split, just to check that the checks on Next/Prev are
-  // different...
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
+  // Do a split, just to check that the checks on Next/Prev are different...
   result = BlockType::Split(block1, kSplitN);
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block2 = *result;
@@ -600,21 +594,20 @@
   CannotMergeWithFirstOrLastBlock<Block<>>();
 }
 TEST(CustomBlockTest, CannotMergeWithFirstOrLastBlock) {
-  CannotMergeWithFirstOrLastBlock<Block<uint32_t, kCapacity>>();
+  CannotMergeWithFirstOrLastBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CannotMergeUsedBlock() {
   constexpr size_t kN = 1024;
   constexpr size_t kSplitN = 512;
-  alignas(BlockType*) byte bytes[kN];
 
-  // Do a split, just to check that the checks on Next/Prev are
-  // different...
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
+  // Do a split, just to check that the checks on Next/Prev are different...
   result = BlockType::Split(block, kSplitN);
   ASSERT_EQ(result.status(), OkStatus());
 
@@ -625,7 +618,7 @@
   CannotMergeUsedBlock<Block<>>();
 }
 TEST(CustomBlockTest, CannotMergeUsedBlock) {
-  CannotMergeUsedBlock<Block<uint32_t, kCapacity>>();
+  CannotMergeUsedBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
@@ -644,7 +637,7 @@
 }
 TEST(GenericBlockTest, CanFreeSingleBlock) { CanFreeSingleBlock<Block<>>(); }
 TEST(CustomBlockTest, CanFreeSingleBlock) {
-  CanFreeSingleBlock<Block<uint32_t, kCapacity>>();
+  CanFreeSingleBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
@@ -652,9 +645,9 @@
   constexpr size_t kN = 1024;
   constexpr size_t kSplit1 = 512;
   constexpr size_t kSplit2 = 256;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -679,7 +672,7 @@
   CanFreeBlockWithoutMerging<Block<>>();
 }
 TEST(CustomBlockTest, CanFreeBlockWithoutMerging) {
-  CanFreeBlockWithoutMerging<Block<uint32_t, kCapacity>>();
+  CanFreeBlockWithoutMerging<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
@@ -687,9 +680,9 @@
   constexpr size_t kN = 1024;
   constexpr size_t kSplit1 = 512;
   constexpr size_t kSplit2 = 256;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -713,7 +706,7 @@
   CanFreeBlockAndMergeWithPrev<Block<>>();
 }
 TEST(CustomBlockTest, CanFreeBlockAndMergeWithPrev) {
-  CanFreeBlockAndMergeWithPrev<Block<uint32_t, kCapacity>>();
+  CanFreeBlockAndMergeWithPrev<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
@@ -721,9 +714,9 @@
   constexpr size_t kN = 1024;
   constexpr size_t kSplit1 = 512;
   constexpr size_t kSplit2 = 256;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -746,7 +739,7 @@
   CanFreeBlockAndMergeWithNext<Block<>>();
 }
 TEST(CustomBlockTest, CanFreeBlockAndMergeWithNext) {
-  CanFreeBlockAndMergeWithNext<Block<uint32_t, kCapacity>>();
+  CanFreeBlockAndMergeWithNext<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
@@ -754,9 +747,9 @@
   constexpr size_t kN = 1024;
   constexpr size_t kSplit1 = 512;
   constexpr size_t kSplit2 = 256;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -778,15 +771,15 @@
   CanFreeUsedBlockAndMergeWithBoth<Block<>>();
 }
 TEST(CustomBlockTest, CanFreeUsedBlockAndMergeWithBoth) {
-  CanFreeUsedBlockAndMergeWithBoth<Block<uint32_t, kCapacity>>();
+  CanFreeUsedBlockAndMergeWithBoth<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CanResizeBlockSameSize() {
   constexpr size_t kN = 1024;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -797,15 +790,15 @@
   CanResizeBlockSameSize<Block<>>();
 }
 TEST(CustomBlockTest, CanResizeBlockSameSize) {
-  CanResizeBlockSameSize<Block<uint32_t, kCapacity>>();
+  CanResizeBlockSameSize<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CannotResizeFreeBlock() {
   constexpr size_t kN = 1024;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -816,16 +809,16 @@
   CannotResizeFreeBlock<Block<>>();
 }
 TEST(CustomBlockTest, CannotResizeFreeBlock) {
-  CannotResizeFreeBlock<Block<uint32_t, kCapacity>>();
+  CannotResizeFreeBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CanResizeBlockSmallerWithNextFree() {
   constexpr size_t kN = 1024;
   constexpr size_t kSplit1 = 512;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -838,7 +831,7 @@
 
   // Shrink by less than the minimum needed for a block. The extra should be
   // added to the subsequent block.
-  size_t delta = BlockType::kBlockOverhead / 2;
+  size_t delta = BlockType::kBlockOverhead - BlockType::kAlignment;
   size_t new_inner_size = block1->InnerSize() - delta;
   EXPECT_EQ(BlockType::Resize(block1, new_inner_size), OkStatus());
   EXPECT_EQ(block1->InnerSize(), new_inner_size);
@@ -850,16 +843,16 @@
   CanResizeBlockSmallerWithNextFree<Block<>>();
 }
 TEST(CustomBlockTest, CanResizeBlockSmallerWithNextFree) {
-  CanResizeBlockSmallerWithNextFree<Block<uint32_t, kCapacity>>();
+  CanResizeBlockSmallerWithNextFree<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CanResizeBlockLargerWithNextFree() {
   constexpr size_t kN = 1024;
   constexpr size_t kSplit1 = 512;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -872,7 +865,7 @@
 
   // Grow by less than the minimum needed for a block. The extra should be
   // added to the subsequent block.
-  size_t delta = BlockType::kBlockOverhead / 2;
+  size_t delta = BlockType::kBlockOverhead - BlockType::kAlignment;
   size_t new_inner_size = block1->InnerSize() + delta;
   EXPECT_EQ(BlockType::Resize(block1, new_inner_size), OkStatus());
   EXPECT_EQ(block1->InnerSize(), new_inner_size);
@@ -884,7 +877,7 @@
   CanResizeBlockLargerWithNextFree<Block<>>();
 }
 TEST(CustomBlockTest, CanResizeBlockLargerWithNextFree) {
-  CanResizeBlockLargerWithNextFree<Block<uint32_t, kCapacity>>();
+  CanResizeBlockLargerWithNextFree<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
@@ -892,9 +885,9 @@
   constexpr size_t kN = 1024;
   constexpr size_t kSplit1 = 512;
   constexpr size_t kSplit2 = 256;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -916,16 +909,16 @@
   CannotResizeBlockMuchLargerWithNextFree<Block<>>();
 }
 TEST(CustomBlockTest, CannotResizeBlockMuchLargerWithNextFree) {
-  CannotResizeBlockMuchLargerWithNextFree<Block<uint32_t, kCapacity>>();
+  CannotResizeBlockMuchLargerWithNextFree<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CanResizeBlockSmallerWithNextUsed() {
   constexpr size_t kN = 1024;
   constexpr size_t kSplit1 = 512;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -949,16 +942,16 @@
   CanResizeBlockSmallerWithNextUsed<Block<>>();
 }
 TEST(CustomBlockTest, CanResizeBlockSmallerWithNextUsed) {
-  CanResizeBlockSmallerWithNextUsed<Block<uint32_t, kCapacity>>();
+  CanResizeBlockSmallerWithNextUsed<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
 void CannotResizeBlockLargerWithNextUsed() {
   constexpr size_t kN = 1024;
   constexpr size_t kSplit1 = 512;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -977,7 +970,7 @@
   CannotResizeBlockLargerWithNextUsed<Block<>>();
 }
 TEST(CustomBlockTest, CannotResizeBlockLargerWithNextUsed) {
-  CannotResizeBlockLargerWithNextUsed<Block<uint32_t, kCapacity>>();
+  CannotResizeBlockLargerWithNextUsed<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
@@ -985,9 +978,9 @@
   constexpr size_t kN = 1024;
   constexpr size_t kSplit1 = 512;
   constexpr size_t kSplit2 = 256;
-  alignas(BlockType*) byte bytes[kN];
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -1010,7 +1003,7 @@
 }
 TEST(GenericBlockTest, CanCheckValidBlock) { CanCheckValidBlock<Block<>>(); }
 TEST(CustomBlockTest, CanCheckValidBlock) {
-  CanCheckValidBlock<Block<uint32_t, kCapacity>>();
+  CanCheckValidBlock<Block<uint32_t, sizeof(uint16_t)>>();
 }
 
 template <typename BlockType>
@@ -1019,10 +1012,9 @@
   constexpr size_t kSplit1 = 512;
   constexpr size_t kSplit2 = 128;
   constexpr size_t kSplit3 = 256;
-  alignas(BlockType*) byte bytes[kN];
-  memset(bytes, 0, sizeof(bytes));
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes{};
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
 
@@ -1064,15 +1056,29 @@
   CanCheckInvalidBlock<Block<>>();
 }
 TEST(CustomBlockTest, CanCheckInvalidBlock) {
-  CanCheckInvalidBlock<Block<uint32_t, kCapacity>>();
+  CanCheckInvalidBlock<Block<uint32_t, sizeof(uint16_t)>>();
+}
+
+TEST(CustomBlockTest, NoFlagsbyDefault) {
+  constexpr size_t kN = 1024;
+  using BlockType = Block<>;
+
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
+  ASSERT_EQ(result.status(), OkStatus());
+  BlockType* block = *result;
+
+  block->SetFlags(std::numeric_limits<BlockType::offset_type>::max());
+  EXPECT_EQ(block->GetFlags(), 0U);
 }
 
 TEST(CustomBlockTest, CustomFlagsInitiallyZero) {
   constexpr size_t kN = 1024;
-  using BlockType = Block<uint16_t, kN>;
-  alignas(BlockType*) byte bytes[kN];
+  constexpr size_t kNumFlags = 10;
+  using BlockType = Block<uint16_t, 0, kNumFlags>;
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -1081,10 +1087,11 @@
 
 TEST(CustomBlockTest, SetCustomFlags) {
   constexpr size_t kN = 1024;
-  using BlockType = Block<uint16_t, kN>;
-  alignas(BlockType*) byte bytes[kN];
+  constexpr size_t kNumFlags = 10;
+  using BlockType = Block<uint16_t, 0, kNumFlags>;
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -1094,10 +1101,11 @@
 
 TEST(CustomBlockTest, SetAllCustomFlags) {
   constexpr size_t kN = 1024;
-  using BlockType = Block<uint16_t, kN>;
-  alignas(BlockType*) byte bytes[kN];
+  constexpr size_t kNumFlags = 10;
+  using BlockType = Block<uint16_t, 0, kNumFlags>;
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -1110,10 +1118,11 @@
 
 TEST(CustomBlockTest, ClearCustomFlags) {
   constexpr size_t kN = 1024;
-  using BlockType = Block<uint16_t, kN>;
-  alignas(BlockType*) byte bytes[kN];
+  constexpr size_t kNumFlags = 10;
+  using BlockType = Block<uint16_t, 0, kNumFlags>;
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -1125,10 +1134,11 @@
 TEST(CustomBlockTest, FlagsNotCopiedOnSplit) {
   constexpr size_t kN = 1024;
   constexpr size_t kSplitN = 512;
-  using BlockType = Block<uint16_t, kN>;
-  alignas(BlockType*) byte bytes[kN];
+  constexpr size_t kNumFlags = 10;
+  using BlockType = Block<uint16_t, 0, kNumFlags>;
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block1 = *result;
   block1->SetFlags(0x137);
@@ -1144,10 +1154,11 @@
 TEST(CustomBlockTest, FlagsPreservedByMergeNext) {
   constexpr size_t kN = 1024;
   constexpr size_t kSplitN = 512;
-  using BlockType = Block<uint16_t, kN>;
-  alignas(BlockType*) byte bytes[kN];
+  constexpr size_t kNumFlags = 10;
+  using BlockType = Block<uint16_t, 0, kNumFlags>;
 
-  Result<BlockType*> result = BlockType::Init(span(bytes, kN));
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
   ASSERT_EQ(result.status(), OkStatus());
   BlockType* block = *result;
 
@@ -1159,4 +1170,130 @@
   EXPECT_EQ(block->GetFlags(), 0x137U);
 }
 
+TEST(GenericBlockTest, SetAndGetExtraBytes) {
+  constexpr size_t kN = 1024;
+  using BlockType = Block<>;
+  constexpr size_t kExtraN = 4;
+  constexpr std::array<uint8_t, kExtraN> kExtra{0xa1, 0xb2, 0xc3, 0xd4};
+
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
+  ASSERT_EQ(result.status(), OkStatus());
+  BlockType* block = *result;
+
+  block->SetExtraBytes(as_bytes(span(kExtra)));
+  ConstByteSpan extra = block->GetExtraBytes();
+  EXPECT_EQ(extra.size(), 0U);
+}
+
+TEST(CustomBlockTest, SetAndGetExtraBytes) {
+  constexpr size_t kN = 1024;
+  constexpr size_t kNumExtraBytes = 4;
+  using BlockType = Block<uintptr_t, kNumExtraBytes>;
+  constexpr size_t kExtraN = 4;
+  constexpr std::array<uint8_t, kExtraN> kExtra{0xa1, 0xb2, 0xc3, 0xd4};
+
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
+  ASSERT_EQ(result.status(), OkStatus());
+  BlockType* block = *result;
+
+  block->SetExtraBytes(as_bytes(span(kExtra)));
+  ConstByteSpan extra = block->GetExtraBytes();
+  EXPECT_EQ(extra.size(), kNumExtraBytes);
+  EXPECT_EQ(std::memcmp(extra.data(), kExtra.data(), kExtraN), 0);
+}
+
+TEST(CustomBlockTest, SetExtraBytesPadsWhenShort) {
+  constexpr size_t kN = 1024;
+  constexpr size_t kNumExtraBytes = 8;
+  using BlockType = Block<uintptr_t, kNumExtraBytes>;
+  constexpr size_t kExtraN = 4;
+  constexpr std::array<uint8_t, kExtraN> kExtra{0xa1, 0xb2, 0xc3, 0xd4};
+
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
+  ASSERT_EQ(result.status(), OkStatus());
+  BlockType* block = *result;
+
+  block->SetExtraBytes(as_bytes(span(kExtra)));
+  ConstByteSpan extra = block->GetExtraBytes();
+  EXPECT_EQ(extra.size(), kNumExtraBytes);
+  EXPECT_EQ(std::memcmp(extra.data(), kExtra.data(), kExtraN), 0);
+  for (size_t i = kExtraN; i < kNumExtraBytes; ++i) {
+    EXPECT_EQ(size_t(extra[i]), 0U);
+  }
+}
+
+TEST(CustomBlockTest, SetExtraBytesTruncatesWhenLong) {
+  constexpr size_t kN = 1024;
+  constexpr size_t kNumExtraBytes = 2;
+  using BlockType = Block<uintptr_t, kNumExtraBytes>;
+  constexpr size_t kExtraN = 4;
+  constexpr std::array<uint8_t, kExtraN> kExtra{0xa1, 0xb2, 0xc3, 0xd4};
+
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
+  ASSERT_EQ(result.status(), OkStatus());
+  BlockType* block = *result;
+
+  block->SetExtraBytes(as_bytes(span(kExtra)));
+  ConstByteSpan extra = block->GetExtraBytes();
+  EXPECT_EQ(extra.size(), kNumExtraBytes);
+  EXPECT_EQ(std::memcmp(extra.data(), kExtra.data(), kNumExtraBytes), 0);
+}
+
+TEST(CustomBlockTest, SetAndGetTypedExtra) {
+  constexpr size_t kN = 1024;
+  using BlockType = Block<uintptr_t, sizeof(uint32_t)>;
+  constexpr uint32_t kExtra = 0xa1b2c3d4;
+
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
+  ASSERT_EQ(result.status(), OkStatus());
+  BlockType* block = *result;
+
+  block->SetTypedExtra(kExtra);
+  EXPECT_EQ(block->GetTypedExtra<uint32_t>(), kExtra);
+}
+
+TEST(CustomBlockTest, ExtraDataNotCopiedOnSplit) {
+  constexpr size_t kN = 1024;
+  constexpr size_t kSplitN = 512;
+  using BlockType = Block<uintptr_t, sizeof(uint32_t)>;
+  constexpr uint32_t kExtra = 0xa1b2c3d4;
+
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
+  ASSERT_EQ(result.status(), OkStatus());
+  BlockType* block1 = *result;
+  block1->SetTypedExtra(kExtra);
+
+  result = BlockType::Split(block1, kSplitN);
+  ASSERT_EQ(result.status(), OkStatus());
+  BlockType* block2 = *result;
+
+  EXPECT_EQ(block1->GetTypedExtra<uint32_t>(), kExtra);
+  EXPECT_EQ(block2->GetFlags(), 0U);
+}
+
+TEST(CustomBlockTest, ExtraDataPreservedByMergeNext) {
+  constexpr size_t kN = 1024;
+  constexpr size_t kSplitN = 512;
+  using BlockType = Block<uintptr_t, sizeof(uint32_t)>;
+  constexpr uint32_t kExtra = 0xa1b2c3d4;
+
+  std::array<std::byte, kN> bytes;
+  Result<BlockType*> result = BlockType::Init(span(bytes));
+  ASSERT_EQ(result.status(), OkStatus());
+  BlockType* block = *result;
+
+  result = BlockType::Split(block, kSplitN);
+  ASSERT_EQ(result.status(), OkStatus());
+
+  block->SetTypedExtra(kExtra);
+  EXPECT_EQ(BlockType::MergeNext(block), OkStatus());
+  EXPECT_EQ(block->GetTypedExtra<uint32_t>(), kExtra);
+}
+
 }  // namespace pw::allocator
diff --git a/pw_allocator/public/pw_allocator/block.h b/pw_allocator/public/pw_allocator/block.h
index 4b5e7e5..e4e1fff 100644
--- a/pw_allocator/public/pw_allocator/block.h
+++ b/pw_allocator/public/pw_allocator/block.h
@@ -13,6 +13,7 @@
 // the License.
 #pragma once
 
+#include <climits>
 #include <cstdint>
 #include <cstring>
 
@@ -154,7 +155,7 @@
 /// allocated memory with a small amount of overhead. See
 /// pw_allocator_private/simple_allocator.h for an example.
 ///
-/// Blocks will always be aligned to a `kAlignment boundary. Block sizes will
+/// Blocks will always be aligned to a `kAlignment` boundary. Block sizes will
 /// always be rounded up to a multiple of `kAlignment`.
 ///
 /// The blocks do not encode their size. Instead, they encode the offsets to the
@@ -197,55 +198,45 @@
 /// block matches the previous offset of its next block. The first block in a
 /// list is denoted by having a previous offset of `0`.
 ///
-/// Each block also encodes flags. Builtin flags indicate whether the block is
-/// in use and whether it is the last block in the list. The last block will
-/// still have a next offset that denotes its size.
+/// Each block may also include extra data and custom flags. The amount of extra
+/// data is given in bytes by the `kNumExtraBytes` template parameter.
+/// Additional bytes may be included in the header to keep it aligned to
+/// `kAlignment`.
 ///
-/// Depending on `kMaxSize`, some bits of type `T` may not be needed to
-/// encode an offset. Additional bits of both the previous and next offsets may
-/// be used for setting custom flags.
+/// The custom flags are stored using bits from the offset fields, thereby
+/// decreasing the range of offsets that blocks can address. Up to half of the
+/// offset field may be used as flags, including one built-in flag per offset
+/// field to track `used` and `last`.
 ///
-/// For example, for a `Block<uint32_t, 0x10000>`, on a platform where
-/// `alignof(uint32_t) == 4`, the fully encoded bits would be:
-///
-/// @code{.unparsed}
-/// +-------------------------------------------------------------------------+
-/// | block:                                                                  |
-/// +------------------------------------+------------------------------------+
-/// | .prev_                             | .next_:                            |
-/// +---------------+------+-------------+---------------+------+-------------+
-/// | MSB           |      |         LSB | MSB           |      |         LSB |
-/// | 31.........16 |  15  | 14........0 | 31.........16 |  15  | 14........0 |
-/// | custom_flags1 | used | prev_offset | custom_flags2 | last | next_offset |
-/// +---------------+------+-------------+---------------+------+-------------+
-/// @endcode
-///
-/// @tparam   UintType  Unsigned integral type used to encode offsets and flags.
-/// @tparam   kMaxSize  Largest offset that can be addressed by this block. Bits
-///                     of `UintType` not needed for offsets are available as
-///                     flags.
-template <typename UintType = uintptr_t,
-          size_t kMaxSize = std::numeric_limits<uintptr_t>::max()>
+/// @tparam   OffsetType      Unsigned integral type used to encode offsets and
+///                           flags.
+/// @tparam   kNumExtraBytes  Number of additional **bytes** to add to the block
+///                           header storing custom data.
+/// @tparam    kNumFlags      Number of **bits** of the offset fields to use as
+///                           custom flags.
+template <typename OffsetType = uintptr_t,
+          size_t kNumExtraBytes = 0,
+          size_t kNumFlags = 0>
 class Block final : public BaseBlock {
  public:
-  static_assert(std::is_unsigned_v<UintType>);
-  static_assert(kMaxSize <= std::numeric_limits<UintType>::max());
+  using offset_type = OffsetType;
 
-  static constexpr size_t kCapacity = kMaxSize;
-  static constexpr size_t kHeaderSize = sizeof(Block) + kPoisonOffset;
-  static constexpr size_t kFooterSize = kPoisonOffset;
-  static constexpr size_t kBlockOverhead = kHeaderSize + kFooterSize;
+  static_assert(std::is_unsigned_v<offset_type>);
+  static_assert(kNumFlags < sizeof(offset_type) * CHAR_BIT);
+
   static constexpr size_t kAlignment = alignof(Block);
+  static constexpr size_t kHeaderSize =
+      AlignUp(sizeof(Block) + kNumExtraBytes + kPoisonOffset, kAlignment);
+  static constexpr size_t kFooterSize = AlignUp(kPoisonOffset, kAlignment);
+  static constexpr size_t kBlockOverhead = kHeaderSize + kFooterSize;
 
   /// @brief Creates the first block for a given memory region.
   ///
-  /// @pre The start of the given memory region must be aligned to an
-  /// `kAlignment` boundary.
-  ///
   /// @retval OK                    Returns a block representing the region.
-  /// @retval INVALID_ARGUMENT      The region is unaligned.
+  /// @retval INVALID_ARGUMENT      The region is null.
   /// @retval RESOURCE_EXHAUSTED    The region is too small for a block.
-  /// @retval OUT_OF_RANGE          The region is larger than `kMaxSize`.
+  /// @retval OUT_OF_RANGE          The region is too big to be addressed using
+  ///                               `offset_type`.
   static Result<Block*> Init(ByteSpan region);
 
   /// @returns  A pointer to a `Block`, given a pointer to the start of the
@@ -436,8 +427,9 @@
   /// any others are ignored. Refer to the class level documentation for the
   /// exact bit layout.
   ///
-  /// Custom flags are not copied when a block is split, and are unchanged when
-  /// merging for the block that remains valid after the merge.
+  /// Custom flags are not copied when a block is split. When merging, the
+  /// custom flags are preserved in the block that remains valid after the
+  /// merge.
   ///
   /// If `flags_to_clear` are provided, these bits will be cleared before
   /// setting the `flags_to_set`. As a consequence, if a bit is set in both
@@ -445,10 +437,61 @@
   ///
   /// @param[in]  flags_to_set      Bit flags to enable.
   /// @param[in]  flags_to_clear    Bit flags to disable.
-  void SetFlags(UintType flags_to_set, UintType flags_to_clear = 0);
+  void SetFlags(offset_type flags_to_set, offset_type flags_to_clear = 0);
 
   /// Returns the custom flags previously set on this block.
-  UintType GetFlags();
+  offset_type GetFlags();
+
+  /// Stores extra data in the block.
+  ///
+  /// If the given region is shorter than `kNumExtraBytes`, it will be padded
+  /// with `\x00` bytes. If the given region is longer than `kNumExtraBytes`, it
+  /// will be truncated.
+  ///
+  /// Extra data is not copied when a block is split. When merging, the extra
+  /// data is preserved in the block that remains valid after the merge.
+  ///
+  /// @param[in]  extra             Extra data to store in the block.
+  void SetExtraBytes(ConstByteSpan extra);
+
+  /// Stores extra data in the block from a trivially copyable type.
+  ///
+  /// The type given by template parameter should match the type used to specify
+  /// `kNumExtraBytes`. The value will treated as a span of bytes and copied
+  /// using `SetExtra(ConstByteSpan)`.
+  template <typename T,
+            std::enable_if_t<std::is_trivially_copyable_v<T> &&
+                                 sizeof(T) == kNumExtraBytes,
+                             int> = 0>
+  void SetTypedExtra(const T& extra) {
+    SetExtraBytes(as_bytes(span(&extra, 1)));
+  }
+
+  /// Returns the extra data from the block.
+  ConstByteSpan GetExtraBytes() const;
+
+  /// Returns the extra data from block as a default constructible and trivally
+  /// copyable type.
+  ///
+  /// The template parameter should match the type used to specify
+  /// `kNumExtraBytes`. For example:
+  ///
+  /// @code{.cpp}
+  ///   using BlockType = Block<uint16_t, sizeof(Token)>;
+  ///   BlockType* block = ...;
+  ///   block->SetExtra(kMyToken);
+  ///   Token my_token = block->GetExtra<Token>();
+  /// @endcode
+  template <typename T,
+            std::enable_if_t<std::is_default_constructible_v<T> &&
+                                 std::is_trivially_copyable_v<T> &&
+                                 sizeof(T) == kNumExtraBytes,
+                             int> = 0>
+  T GetTypedExtra() const {
+    T result{};
+    std::memcpy(&result, GetExtraBytes().data(), kNumExtraBytes);
+    return result;
+  }
 
   /// @brief Checks if a block is valid.
   ///
@@ -464,15 +507,13 @@
   void CrashIfInvalid();
 
  private:
-  static constexpr UintType kMaxOffset = UintType(kMaxSize / kAlignment);
-  static constexpr size_t kCustomFlagBitsPerField =
-      cpp20::countl_zero(kMaxOffset) - 1;
-  static constexpr size_t kCustomFlagBits = kCustomFlagBitsPerField * 2;
-  static constexpr size_t kOffsetBits = cpp20::bit_width(kMaxOffset);
-  static constexpr UintType kBuiltinFlag = UintType(1) << kOffsetBits;
-  static constexpr UintType kOffsetMask = kBuiltinFlag - 1;
+  static constexpr size_t kCustomFlagBitsPerField = (kNumFlags + 1) / 2;
+  static constexpr size_t kOffsetBits =
+      (sizeof(offset_type) * CHAR_BIT) - (kCustomFlagBitsPerField + 1);
+  static constexpr offset_type kBuiltinFlag = offset_type(1) << kOffsetBits;
+  static constexpr offset_type kOffsetMask = kBuiltinFlag - 1;
   static constexpr size_t kCustomFlagShift = kOffsetBits + 1;
-  static constexpr UintType kCustomFlagMask = ~(kOffsetMask | kBuiltinFlag);
+  static constexpr offset_type kCustomFlagMask = ~(kOffsetMask | kBuiltinFlag);
 
   Block(size_t prev_offset, size_t next_offset);
 
@@ -490,17 +531,18 @@
   BlockStatus CheckStatus() const;
 
   /// Extracts the offset portion from `next_` or `prev_`.
-  static size_t GetOffset(UintType packed) {
+  static size_t GetOffset(offset_type packed) {
     return static_cast<size_t>(packed & kOffsetMask) * kAlignment;
   }
 
   /// Overwrites the offset portion of `next_` or `prev_`.
-  static void SetOffset(UintType& field, size_t offset) {
-    field = (field & ~kOffsetMask) | static_cast<UintType>(offset) / kAlignment;
+  static void SetOffset(offset_type& field, size_t offset) {
+    field =
+        (field & ~kOffsetMask) | static_cast<offset_type>(offset) / kAlignment;
   }
 
-  UintType next_ = 0;
-  UintType prev_ = 0;
+  offset_type next_ = 0;
+  offset_type prev_ = 0;
 
  public:
   // Associated types.
@@ -561,16 +603,19 @@
 
 // Public template method implementations.
 
-template <typename UintType, size_t kMaxSize>
-Result<Block<UintType, kMaxSize>*> Block<UintType, kMaxSize>::Init(
-    ByteSpan region) {
-  if (reinterpret_cast<uintptr_t>(region.data()) % kAlignment != 0) {
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+Result<Block<OffsetType, kNumExtraBytes, kNumFlags>*>
+Block<OffsetType, kNumExtraBytes, kNumFlags>::Init(ByteSpan region) {
+  if (region.data() == nullptr) {
     return Status::InvalidArgument();
   }
-  if (region.size() < kBlockOverhead) {
+  auto addr = reinterpret_cast<uintptr_t>(region.data());
+  auto aligned = AlignUp(addr, kAlignment);
+  if (addr + region.size() <= aligned + kBlockOverhead) {
     return Status::ResourceExhausted();
   }
-  if (kMaxSize < region.size()) {
+  region = region.subspan(aligned - addr);
+  if (GetOffset(std::numeric_limits<offset_type>::max()) < region.size()) {
     return Status::OutOfRange();
   }
   Block* block = AsBlock(0, region);
@@ -579,10 +624,9 @@
   return block;
 }
 
-template <typename UintType, size_t kMaxSize>
-Status Block<UintType, kMaxSize>::AllocFirst(Block*& block,
-                                             size_t inner_size,
-                                             size_t alignment) {
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+Status Block<OffsetType, kNumExtraBytes, kNumFlags>::AllocFirst(
+    Block*& block, size_t inner_size, size_t alignment) {
   if (block->Used()) {
     return Status::FailedPrecondition();
   }
@@ -612,10 +656,9 @@
   return OkStatus();
 }
 
-template <typename UintType, size_t kMaxSize>
-Status Block<UintType, kMaxSize>::AllocLast(Block*& block,
-                                            size_t inner_size,
-                                            size_t alignment) {
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+Status Block<OffsetType, kNumExtraBytes, kNumFlags>::AllocLast(
+    Block*& block, size_t inner_size, size_t alignment) {
   if (block->Used()) {
     return Status::FailedPrecondition();
   }
@@ -644,8 +687,8 @@
   return OkStatus();
 }
 
-template <typename UintType, size_t kMaxSize>
-void Block<UintType, kMaxSize>::Free(Block*& block) {
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+void Block<OffsetType, kNumExtraBytes, kNumFlags>::Free(Block*& block) {
   block->MarkFree();
   Block* prev = block->Prev();
   if (Block::MergeNext(prev).ok()) {
@@ -654,8 +697,9 @@
   Block::MergeNext(block).IgnoreError();
 }
 
-template <typename UintType, size_t kMaxSize>
-Status Block<UintType, kMaxSize>::Resize(Block*& block, size_t new_inner_size) {
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+Status Block<OffsetType, kNumExtraBytes, kNumFlags>::Resize(
+    Block*& block, size_t new_inner_size) {
   if (!block->Used()) {
     return Status::FailedPrecondition();
   }
@@ -687,9 +731,10 @@
   return status;
 }
 
-template <typename UintType, size_t kMaxSize>
-Result<Block<UintType, kMaxSize>*> Block<UintType, kMaxSize>::Split(
-    Block*& block, size_t new_inner_size) {
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+Result<Block<OffsetType, kNumExtraBytes, kNumFlags>*>
+Block<OffsetType, kNumExtraBytes, kNumFlags>::Split(Block*& block,
+                                                    size_t new_inner_size) {
   if (block->Used()) {
     return Status::FailedPrecondition();
   }
@@ -704,7 +749,7 @@
   size_t prev_offset = GetOffset(block->prev_);
   size_t outer_size1 = aligned_inner_size + kBlockOverhead;
   bool is_last = block->Last();
-  UintType flags = block->GetFlags();
+  offset_type flags = block->GetFlags();
   ByteSpan bytes = AsBytes(std::move(block));
   Block* block1 = AsBlock(prev_offset, bytes.subspan(0, outer_size1));
   Block* block2 = AsBlock(outer_size1, bytes.subspan(outer_size1));
@@ -721,8 +766,8 @@
   return block2;
 }
 
-template <typename UintType, size_t kMaxSize>
-Status Block<UintType, kMaxSize>::MergeNext(Block*& block) {
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+Status Block<OffsetType, kNumExtraBytes, kNumFlags>::MergeNext(Block*& block) {
   if (block == nullptr || block->Last()) {
     return Status::OutOfRange();
   }
@@ -732,7 +777,7 @@
   }
   size_t prev_offset = GetOffset(block->prev_);
   bool is_last = next->Last();
-  UintType flags = block->GetFlags();
+  offset_type flags = block->GetFlags();
   ByteSpan prev_bytes = AsBytes(std::move(block));
   ByteSpan next_bytes = AsBytes(std::move(next));
   size_t next_offset = prev_bytes.size() + next_bytes.size();
@@ -743,20 +788,24 @@
   } else {
     SetOffset(block->Next()->prev_, GetOffset(block->next_));
   }
-  block->SetFlags(flags);
+  if constexpr (kNumFlags > 0) {
+    block->SetFlags(flags);
+  }
   return OkStatus();
 }
 
-template <typename UintType, size_t kMaxSize>
-Block<UintType, kMaxSize>* Block<UintType, kMaxSize>::Next() const {
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+Block<OffsetType, kNumExtraBytes, kNumFlags>*
+Block<OffsetType, kNumExtraBytes, kNumFlags>::Next() const {
   size_t offset = GetOffset(next_);
   uintptr_t addr = Last() ? 0 : reinterpret_cast<uintptr_t>(this) + offset;
   // See the note in `FromUsableSpace` about memory laundering.
   return std::launder(reinterpret_cast<Block*>(addr));
 }
 
-template <typename UintType, size_t kMaxSize>
-Block<UintType, kMaxSize>* Block<UintType, kMaxSize>::Prev() const {
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+Block<OffsetType, kNumExtraBytes, kNumFlags>*
+Block<OffsetType, kNumExtraBytes, kNumFlags>::Prev() const {
   size_t offset = GetOffset(prev_);
   uintptr_t addr =
       (offset == 0) ? 0 : reinterpret_cast<uintptr_t>(this) - offset;
@@ -764,55 +813,88 @@
   return std::launder(reinterpret_cast<Block*>(addr));
 }
 
-template <typename UintType, size_t kMaxSize>
-void Block<UintType, kMaxSize>::SetFlags(UintType flags_to_set,
-                                         UintType flags_to_clear) {
-  UintType hi_flags_to_set = flags_to_set >> kCustomFlagBitsPerField;
-  hi_flags_to_set <<= kCustomFlagShift;
-  UintType hi_flags_to_clear = (flags_to_clear >> kCustomFlagBitsPerField)
-                               << kCustomFlagShift;
-  UintType lo_flags_to_set =
-      (flags_to_set & ((UintType(1) << kCustomFlagBitsPerField) - 1))
-      << kCustomFlagShift;
-  UintType lo_flags_to_clear =
-      (flags_to_clear & ((UintType(1) << kCustomFlagBitsPerField) - 1))
-      << kCustomFlagShift;
-  prev_ = (prev_ & ~hi_flags_to_clear) | hi_flags_to_set;
-  next_ = (next_ & ~lo_flags_to_clear) | lo_flags_to_set;
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+void Block<OffsetType, kNumExtraBytes, kNumFlags>::SetFlags(
+    OffsetType flags_to_set, OffsetType flags_to_clear) {
+  if constexpr (kNumFlags > 0) {
+    offset_type hi_flags_to_set = flags_to_set >> kCustomFlagBitsPerField;
+    hi_flags_to_set <<= kCustomFlagShift;
+    offset_type hi_flags_to_clear = (flags_to_clear >> kCustomFlagBitsPerField)
+                                    << kCustomFlagShift;
+    offset_type lo_flags_to_set =
+        (flags_to_set & ((offset_type(1) << kCustomFlagBitsPerField) - 1))
+        << kCustomFlagShift;
+    offset_type lo_flags_to_clear =
+        (flags_to_clear & ((offset_type(1) << kCustomFlagBitsPerField) - 1))
+        << kCustomFlagShift;
+    prev_ = (prev_ & ~hi_flags_to_clear) | hi_flags_to_set;
+    next_ = (next_ & ~lo_flags_to_clear) | lo_flags_to_set;
+  }
 }
 
-template <typename UintType, size_t kMaxSize>
-UintType Block<UintType, kMaxSize>::GetFlags() {
-  UintType hi_flags = (prev_ & kCustomFlagMask) >> kCustomFlagShift;
-  UintType lo_flags = (next_ & kCustomFlagMask) >> kCustomFlagShift;
-  return (hi_flags << kCustomFlagBitsPerField) | lo_flags;
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+OffsetType Block<OffsetType, kNumExtraBytes, kNumFlags>::GetFlags() {
+  if constexpr (kNumFlags > 0) {
+    offset_type hi_flags = (prev_ & kCustomFlagMask) >> kCustomFlagShift;
+    offset_type lo_flags = (next_ & kCustomFlagMask) >> kCustomFlagShift;
+    return (hi_flags << kCustomFlagBitsPerField) | lo_flags;
+  }
+  return 0;
+}
+
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+void Block<OffsetType, kNumExtraBytes, kNumFlags>::SetExtraBytes(
+    ConstByteSpan extra) {
+  if constexpr (kNumExtraBytes > 0) {
+    auto* data = reinterpret_cast<std::byte*>(this) + sizeof(*this);
+    if (kNumExtraBytes <= extra.size()) {
+      std::memcpy(data, extra.data(), kNumExtraBytes);
+    } else {
+      std::memcpy(data, extra.data(), extra.size());
+      std::memset(data + extra.size(), 0, kNumExtraBytes - extra.size());
+    }
+  }
+}
+
+/// Returns the extra data from the block.
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+ConstByteSpan Block<OffsetType, kNumExtraBytes, kNumFlags>::GetExtraBytes()
+    const {
+  if constexpr (kNumExtraBytes > 0) {
+    const auto* data = reinterpret_cast<const std::byte*>(this) + sizeof(*this);
+    return ConstByteSpan{data, kNumExtraBytes};
+  } else {
+    return ConstByteSpan{};
+  }
 }
 
 // Private template method implementations.
 
-template <typename UintType, size_t kMaxSize>
-Block<UintType, kMaxSize>::Block(size_t prev_offset, size_t next_offset)
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+Block<OffsetType, kNumExtraBytes, kNumFlags>::Block(size_t prev_offset,
+                                                    size_t next_offset)
     : BaseBlock() {
   SetOffset(prev_, prev_offset);
   SetOffset(next_, next_offset);
 }
 
-template <typename UintType, size_t kMaxSize>
-ByteSpan Block<UintType, kMaxSize>::AsBytes(Block*&& block) {
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+ByteSpan Block<OffsetType, kNumExtraBytes, kNumFlags>::AsBytes(Block*&& block) {
   size_t block_size = block->OuterSize();
   std::byte* bytes = ::new (std::move(block)) std::byte[block_size];
   return {bytes, block_size};
 }
 
-template <typename UintType, size_t kMaxSize>
-Block<UintType, kMaxSize>* Block<UintType, kMaxSize>::AsBlock(
-    size_t prev_offset, ByteSpan bytes) {
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+Block<OffsetType, kNumExtraBytes, kNumFlags>*
+Block<OffsetType, kNumExtraBytes, kNumFlags>::AsBlock(size_t prev_offset,
+                                                      ByteSpan bytes) {
   return ::new (bytes.data()) Block(prev_offset, bytes.size());
 }
 
-template <typename UintType, size_t kMaxSize>
-typename Block<UintType, kMaxSize>::BlockStatus
-Block<UintType, kMaxSize>::CheckStatus() const {
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+typename Block<OffsetType, kNumExtraBytes, kNumFlags>::BlockStatus
+Block<OffsetType, kNumExtraBytes, kNumFlags>::CheckStatus() const {
   // Make sure the Block is aligned.
   if (reinterpret_cast<uintptr_t>(this) % kAlignment != 0) {
     return BlockStatus::kMisaligned;
@@ -834,8 +916,8 @@
   return BlockStatus::kValid;
 }
 
-template <typename UintType, size_t kMaxSize>
-void Block<UintType, kMaxSize>::CrashIfInvalid() {
+template <typename OffsetType, size_t kNumExtraBytes, size_t kNumFlags>
+void Block<OffsetType, kNumExtraBytes, kNumFlags>::CrashIfInvalid() {
   uintptr_t addr = reinterpret_cast<uintptr_t>(this);
   switch (CheckStatus()) {
     case kValid:
diff --git a/pw_allocator/public/pw_allocator/split_free_list_allocator.h b/pw_allocator/public/pw_allocator/split_free_list_allocator.h
index 1381985..77f813e 100644
--- a/pw_allocator/public/pw_allocator/split_free_list_allocator.h
+++ b/pw_allocator/public/pw_allocator/split_free_list_allocator.h
@@ -170,18 +170,7 @@
 template <typename BlockType>
 Status SplitFreeListAllocator<BlockType>::Init(ByteSpan region,
                                                size_t threshold) {
-  if (region.data() == nullptr) {
-    return Status::InvalidArgument();
-  }
-  if (BlockType::kCapacity < region.size()) {
-    return Status::OutOfRange();
-  }
-  printf("### (%s:%d)\n", __FILE__, __LINE__);
-  // Blocks need to be aligned. Find the first aligned address, and use as much
-  // of the memory region as possible.
-  auto addr = reinterpret_cast<uintptr_t>(region.data());
-  auto aligned = AlignUp(addr, BlockType::kAlignment);
-  Result<BlockType*> result = BlockType::Init(region.subspan(aligned - addr));
+  Result<BlockType*> result = BlockType::Init(region);
   if (!result.ok()) {
     return result.status();
   }
@@ -195,7 +184,6 @@
   last_free_ = block;
 
   threshold_ = threshold;
-  printf("### (%s:%d)\n", __FILE__, __LINE__);
   return OkStatus();
 }
 
diff --git a/pw_allocator/split_free_list_allocator_test.cc b/pw_allocator/split_free_list_allocator_test.cc
index a8a1ae5..6e739ae 100644
--- a/pw_allocator/split_free_list_allocator_test.cc
+++ b/pw_allocator/split_free_list_allocator_test.cc
@@ -33,7 +33,7 @@
 // considered "small".
 static constexpr size_t kThreshold = 64;
 
-using BlockType = Block<uint16_t, kCapacity>;
+using BlockType = Block<uint16_t>;
 
 // Test case fixture that allows individual tests to cache allocations and
 // release them automatically on tear-down.