Fix alignment when using PacketBuffer reserve space (#19244)

* Fix alignment when using PacketBuffer reserve space

#### Problem

Some UDPEndPoint implementation code assumes that IPPacketInfo
is aligned to 4 bytes, when storing one inside PacketBuffer
reserved space. It would also fail to identify the available
reserve if there was enough space, but less that 3 bytes more
than enough.

See #17213 Re-alignment logic makes incorrect assumptions

#### Change overview

Adds a PacketBuffer::GetReserve<T>() that returns a pointer in
the buffer reserve space suitable in size and alignment for a T,
and uses it for `GetPacketInfo()` in `UDPEndPointImplLwIP` and
`UDPEndPointImplOT`.

#### Testing

Added a unit test, `PacketBufferTest::CheckGetReserve()`.

* Use EnsureReservedSize()
diff --git a/src/inet/UDPEndPointImplLwIP.cpp b/src/inet/UDPEndPointImplLwIP.cpp
index b3ce8a2..a423075 100644
--- a/src/inet/UDPEndPointImplLwIP.cpp
+++ b/src/inet/UDPEndPointImplLwIP.cpp
@@ -471,16 +471,7 @@
 
 IPPacketInfo * UDPEndPointImplLwIP::GetPacketInfo(const System::PacketBufferHandle & aBuffer)
 {
-    if (!aBuffer->EnsureReservedSize(sizeof(IPPacketInfo) + 3))
-    {
-        return nullptr;
-    }
-
-    uintptr_t lStart           = (uintptr_t) aBuffer->Start();
-    uintptr_t lPacketInfoStart = lStart - sizeof(IPPacketInfo);
-
-    // Align to a 4-byte boundary
-    return reinterpret_cast<IPPacketInfo *>(lPacketInfoStart & ~(sizeof(uint32_t) - 1));
+    return aBuffer->GetReserve<IPPacketInfo>();
 }
 
 } // namespace Inet
diff --git a/src/inet/UDPEndPointImplOpenThread.cpp b/src/inet/UDPEndPointImplOpenThread.cpp
index 02f11ba..a9fb437 100644
--- a/src/inet/UDPEndPointImplOpenThread.cpp
+++ b/src/inet/UDPEndPointImplOpenThread.cpp
@@ -34,12 +34,10 @@
 
 namespace {
 // We want to reserve space for an IPPacketInfo in our buffer, but it needs to
-// be 4-byte aligned.  We ensure the alignment by masking off the low bits of
-// the pointer that we get by doing `Start() - sizeof(IPPacketInfo)`.  That
-// might move it backward by up to kPacketInfoAlignmentBytes, so we need to make
-// sure we allocate enough reserved space that this will still be within our
-// buffer.
-constexpr uint16_t kPacketInfoAlignmentBytes = sizeof(uint32_t) - 1;
+// be suitably aligned. That might move it backward by up to
+// kPacketInfoAlignmentBytes, so we need to make sure we allocate enough
+// reserved space that this will still be within our buffer.
+constexpr uint16_t kPacketInfoAlignmentBytes = alignof(IPPacketInfo) - 1;
 constexpr uint16_t kPacketInfoReservedSize   = sizeof(IPPacketInfo) + kPacketInfoAlignmentBytes;
 } // namespace
 
@@ -293,16 +291,7 @@
 
 IPPacketInfo * UDPEndPointImplOT::GetPacketInfo(const System::PacketBufferHandle & aBuffer)
 {
-    if (!aBuffer->EnsureReservedSize(kPacketInfoReservedSize))
-    {
-        return nullptr;
-    }
-
-    uintptr_t lStart           = (uintptr_t) aBuffer->Start();
-    uintptr_t lPacketInfoStart = lStart - sizeof(IPPacketInfo);
-
-    // Align to a 4-byte boundary
-    return reinterpret_cast<IPPacketInfo *>(lPacketInfoStart & ~kPacketInfoAlignmentBytes);
+    return aBuffer->GetReserve<IPPacketInfo>();
 }
 
 } // namespace Inet
diff --git a/src/system/SystemPacketBuffer.cpp b/src/system/SystemPacketBuffer.cpp
index 3f6c136..ce2bc65 100644
--- a/src/system/SystemPacketBuffer.cpp
+++ b/src/system/SystemPacketBuffer.cpp
@@ -401,6 +401,46 @@
     return true;
 }
 
+uint8_t * PacketBuffer::GetReserve(uint16_t aSize, uint16_t aAlignmentMask)
+{
+    // This private utility method requires that `aAlignmentMask` be one less than a power-of-two alignment. This requirement
+    // is satisfied when aAlignmentMask=alignof(T)-1 for some type T, as passed by the public GetReserve<T>().
+
+    // Computing `reserveStart` can't overflow because a valid PacketBuffer must be larger than kStructureSize.
+    const uintptr_t reserveStart = reinterpret_cast<uintptr_t>(this) + kStructureSize;
+    if (reserveStart + aSize < reserveStart)
+    {
+        // Overflow here means the requested size can't possibly fit.
+        return nullptr;
+    }
+
+    const uintptr_t requestStart = (reserveStart + aAlignmentMask) & ~aAlignmentMask;
+    if (requestStart < reserveStart)
+    {
+        // Overflow here means the request can't be satisfied because the alignment is too large.
+        return nullptr;
+    }
+
+    // This cast is safe because the difference is at most `aAlignmentMask`.
+    const uint16_t reserveAlignmentOffset = static_cast<uint16_t>(requestStart - reserveStart);
+
+    // This cast is not safe in itself, but the result is checked.
+    const uint16_t requestSize = static_cast<uint16_t>(reserveAlignmentOffset + aSize);
+    if (requestSize < aSize)
+    {
+        // Overflow here means the requested size can't possibly fit.
+        return nullptr;
+    }
+
+    if (!EnsureReservedSize(requestSize))
+    {
+        // The requested size is too large for the available space.
+        return nullptr;
+    }
+
+    return reinterpret_cast<uint8_t *>(requestStart);
+}
+
 bool PacketBuffer::AlignPayload(uint16_t aAlignBytes)
 {
     if (aAlignBytes == 0)
diff --git a/src/system/SystemPacketBuffer.h b/src/system/SystemPacketBuffer.h
index c61cbcd..61b0714 100644
--- a/src/system/SystemPacketBuffer.h
+++ b/src/system/SystemPacketBuffer.h
@@ -116,7 +116,7 @@
 #if CHIP_SYSTEM_CONFIG_USE_LWIP
     static constexpr uint16_t kStructureSize = LWIP_MEM_ALIGN_SIZE(sizeof(struct ::pbuf));
 #else  // CHIP_SYSTEM_CONFIG_USE_LWIP
-    static constexpr uint16_t kStructureSize         = CHIP_SYSTEM_ALIGN_SIZE(sizeof(::chip::System::pbuf), 4u);
+    static constexpr uint16_t kStructureSize = CHIP_SYSTEM_ALIGN_SIZE(sizeof(::chip::System::pbuf), alignof(::chip::System::pbuf));
 #endif // CHIP_SYSTEM_CONFIG_USE_LWIP
 
 public:
@@ -289,6 +289,23 @@
     CHECK_RETURN_VALUE bool EnsureReservedSize(uint16_t aReservedSize);
 
     /**
+     * Get a pointer to reserved space.
+     *
+     *  Returns a pointer to space in the packet buffer with size and alignment suitable for type T. Payload data is
+     *  moved if necessary to increase the reserve. Due to alignment and padding, the returned space is not necessarily
+     *  adjacent to either the packet buffer header or to the payload.
+     *
+     *  @return \c pointer to the requested reserve if available, \c nullptr if there's not enough room in the buffer.
+     */
+    template <typename T>
+    T * GetReserve()
+    {
+        static_assert(sizeof(T) <= UINT16_MAX, "type too large");
+        static_assert(alignof(T) <= UINT16_MAX, "alignment too large");
+        return reinterpret_cast<T *>(GetReserve(static_cast<uint16_t>(sizeof(T)), static_cast<uint16_t>(alignof(T) - 1)));
+    }
+
+    /**
      * Align the buffer payload on the specified bytes boundary.
      *
      *  Moving the payload in the buffer forward if necessary.
@@ -373,6 +390,7 @@
     static void InternalCheck(const PacketBuffer * buffer);
 #endif
 
+    uint8_t * GetReserve(uint16_t aSize, uint16_t aAlignmentMask);
     void AddRef();
     bool HasSoleOwnership() const { return (this->ref == 1); }
     static void Free(PacketBuffer * aPacket);
diff --git a/src/system/tests/TestSystemPacketBuffer.cpp b/src/system/tests/TestSystemPacketBuffer.cpp
index 6f0ba37..f023fa3 100644
--- a/src/system/tests/TestSystemPacketBuffer.cpp
+++ b/src/system/tests/TestSystemPacketBuffer.cpp
@@ -27,6 +27,7 @@
 #define __STDC_LIMIT_MACROS
 #endif
 
+#include <algorithm>
 #include <errno.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -114,6 +115,7 @@
     static void CheckConsumeHead(nlTestSuite * inSuite, void * inContext);
     static void CheckConsume(nlTestSuite * inSuite, void * inContext);
     static void CheckEnsureReservedSize(nlTestSuite * inSuite, void * inContext);
+    static void CheckGetReserve(nlTestSuite * inSuite, void * inContext);
     static void CheckAlignPayload(nlTestSuite * inSuite, void * inContext);
     static void CheckNext(nlTestSuite * inSuite, void * inContext);
     static void CheckLast(nlTestSuite * inSuite, void * inContext);
@@ -136,8 +138,18 @@
 
     static void PrintHandle(const char * tag, const PacketBuffer * buffer)
     {
-        printf("%s %p ref=%u len=%-4u next=%p\n", tag, buffer, buffer ? buffer->ref : 0, buffer ? buffer->len : 0,
-               buffer ? buffer->next : nullptr);
+        if (buffer)
+        {
+            const uint16_t reserved_offset = PacketBuffer::kStructureSize;
+            const uint16_t payload_offset =
+                static_cast<uint16_t>(reinterpret_cast<const char *>(buffer->payload) - reinterpret_cast<const char *>(buffer));
+            printf("%s res@%-4u#%-4u pay@%-4u#%-4u %p next=%p ref=%u\n", tag, reserved_offset, payload_offset - reserved_offset,
+                   payload_offset, buffer->len, buffer, buffer->next, buffer->ref);
+        }
+        else
+        {
+            printf("%s NULL\n", tag);
+        }
     }
     static void PrintHandle(const char * tag, const PacketBufferHandle & handle) { PrintHandle(tag, handle.mBuffer); }
 
@@ -162,9 +174,9 @@
     static void PrintHandle(const char * tag, const BufferConfiguration & config) { PrintHandle(tag, config.handle); }
     static void PrintConfig(const char * tag, const BufferConfiguration & config)
     {
-        printf("%s pay=%-4zu len=%-4u res=%-4u:", tag, config.payload_ptr - config.start_buffer, config.init_len,
-               config.reserved_size);
-        PrintHandle("", config.handle);
+        printf("%s res@%-4u#%-4u pay@%-4zu#%-4u (config)\n", tag, PacketBuffer::kStructureSize, config.reserved_size,
+               config.payload_ptr - config.start_buffer, config.init_len);
+        PrintHandle(tag, config.handle);
     }
 
     PacketBufferTest(TestContext * context);
@@ -1143,6 +1155,139 @@
 }
 
 /**
+ *  Test PacketBuffer::GetReserve() function.
+ *
+ *  Description: This tests the private implementation version of GetReserve(),
+ *  since we can't iterate over types. The tested configurations are custom
+ *  for this test, since the usual ones don't cover the relevant boundary conditions.
+ */
+void PacketBufferTest::CheckGetReserve(nlTestSuite * inSuite, void * inContext)
+{
+    struct TestContext * const theContext = static_cast<struct TestContext *>(inContext);
+    PacketBufferTest * const test         = theContext->test;
+    NL_TEST_ASSERT(inSuite, test->mContext == theContext);
+
+    uint8_t payloads[2 * kBlockSize];
+    for (size_t i = 1; i < sizeof(payloads); ++i)
+    {
+        payloads[i] = static_cast<uint8_t>(random());
+    }
+
+    constexpr uint16_t kMax = PacketBuffer::kMaxSizeWithoutReserve;
+
+    // clang-format off
+    const struct Instance
+    {
+        // PacketBuffer initialization:
+        struct {
+            uint16_t reserve_length;
+            uint16_t payload_length;
+        } init;
+        // GetReserve():
+        struct {
+            uint16_t length;    // Must be a multiple of alignment.
+            uint16_t alignment; // Must be a power of 2.
+        } request;
+        // Expected result:
+        struct {
+            bool success;
+            uint16_t motion;
+        } expect;
+    } instances[] = {
+        // PacketBuffer         GetReserve
+        // Reserve Payload      Length   Align      Expect
+        {  {    1,      1},     {   1,      1},     { true,     0 } },  // Fits without moving payload.
+        {  {    0,      1},     {   1,      1},     { true,     1 } },  // Fits by moving payload 1 byte.
+        {  {    0,   kMax},     {   1,      1},     { false,    0 } },  // No space to move payload.
+        {  {   16,      1},     {  16,      8},     { true,     0 } },  // Fits without moving payload.
+        {  {    9,      1},     {   8,      8},     { true,     0 } },  // Fits without moving payload.
+        {  {    9,      1},     {  16,      8},     { true,     7 } },  // Fits by moving payload 7 bytes.
+    };
+    // clang-format on
+
+    for (auto & instance : instances)
+    {
+        BufferConfiguration config(instance.init.reserve_length);
+        test->PrepareTestBuffer(&config, kRecordHandle | kAllowHandleReuse);
+
+        uint8_t * payload_start = config.handle->Start();
+        memcpy(payload_start, payloads, instance.init.payload_length);
+        config.handle->SetDataLength(instance.init.payload_length);
+        uint16_t available_payload_length = config.handle->AvailableDataLength();
+
+        // Check that the packet was initialized correctly.
+        NL_TEST_ASSERT(inSuite, config.handle->ReservedSize() == instance.init.reserve_length);
+        NL_TEST_ASSERT(inSuite, config.handle->TotalLength() == instance.init.payload_length);
+
+        const uint8_t * const reserve =
+            config.handle->GetReserve(instance.request.length, static_cast<uint16_t>(instance.request.alignment - 1));
+        if (instance.expect.success)
+        {
+            NL_TEST_ASSERT(inSuite, reserve != nullptr);
+
+            // Verify that the payload is intact.
+            NL_TEST_ASSERT(inSuite, config.handle->TotalLength() == instance.init.payload_length);
+            NL_TEST_ASSERT(inSuite, memcmp(config.handle->Start(), payloads, instance.init.payload_length) == 0);
+
+            // Verify that the payload was moved or not.
+            NL_TEST_ASSERT(inSuite, payload_start + instance.expect.motion == config.handle->Start());
+            NL_TEST_ASSERT(inSuite, available_payload_length - instance.expect.motion == config.handle->AvailableDataLength());
+        }
+        else
+        {
+            NL_TEST_ASSERT(inSuite, reserve == nullptr);
+        }
+    }
+
+    // Check the case that the packet buffer starts so close to the end of memory that adding the requested length would overflow
+    // and result in a pointer that incorrectly appears to be before the payload start.
+    //
+    // NB: White-box test! This relies on knowing that the implementation of GetReserve() returns without dereferencing the
+    // pointer in this case.
+    // NB: Implementation-defined test! Merely creating an invalid pointer is allowed to fail (but is NOT ‘undefined behavior’),
+    // although it does not on any currently common platform.
+    PacketBuffer * badPointer = reinterpret_cast<PacketBuffer *>(static_cast<uintptr_t>(-PacketBuffer::kStructureSize - 1));
+    NL_TEST_ASSERT(inSuite, badPointer->GetReserve(1, 1) == nullptr);
+
+    // Check the case that the packet buffer starts so close to the end of memory that the requested alignment would overflow
+    // and result in a pointer that incorrectly appears to be before the payload start.
+    //
+    // NB: White-box test! This relies on knowing that the implementation of GetReserve() returns without dereferencing the
+    // pointer in this case.
+    // NB: Implementation-defined test! Merely creating an invalid pointer is allowed to fail (but is NOT ‘undefined behavior’),
+    // although it does not on any currently common platform.
+    badPointer = reinterpret_cast<PacketBuffer *>(static_cast<uintptr_t>(-PacketBuffer::kStructureSize - 2));
+    NL_TEST_ASSERT(inSuite, badPointer->GetReserve(1, 2) == nullptr);
+
+    // Check the case that the requested alignment is greater than PacketBuffer alignment, so that, depending on the actual
+    // location of the PacketBuffer, the returned pointer may or may not be able to be located immediately after the header.
+    //
+    // NB: White-box test! This can't happen with heap-allocated buffers, since heap allocation returns a maximally aligned
+    // pointer, so we need to allocate and set up the PacketBuffer internal state manually. We construct a pair of packet
+    // buffers pb[] such that one of the two is aligned to (2*alignof(PacketBuffer)) and the other is not.
+    constexpr size_t kHeadersPerBlock    = (PacketBuffer::kBlockSize + sizeof(PacketBuffer) - 1) / sizeof(PacketBuffer);
+    constexpr size_t kOddHeadersPerBlock = (kHeadersPerBlock % 2) == 0 ? kHeadersPerBlock + 1 : kHeadersPerBlock;
+    NL_TEST_ASSERT(inSuite, kOddHeadersPerBlock > 2);
+    PacketBuffer holder[2 * kOddHeadersPerBlock + 1];
+    PacketBuffer * pb[2] = { &holder[0], &holder[kOddHeadersPerBlock] };
+
+    constexpr size_t kBigAlignment = 2 * alignof(PacketBuffer);
+    for (int i = 0; i < 2; ++i)
+    {
+        pb[i]->next    = nullptr;
+        pb[i]->payload = reinterpret_cast<uint8_t *>(pb[i]) + 2 * sizeof(PacketBuffer) + 1;
+        pb[i]->tot_len = pb[i]->len = 1;
+        pb[i]->ref                  = 0;
+#if CHIP_SYSTEM_PACKETBUFFER_FROM_CHIP_HEAP
+        pb[i]->alloc_size = kHeadersPerBlock * sizeof(PacketBuffer);
+#endif
+        const uint8_t * const reserve = pb[i]->GetReserve(1, kBigAlignment - 1);
+        NL_TEST_ASSERT(inSuite, reserve != nullptr);
+        NL_TEST_ASSERT(inSuite, (reinterpret_cast<uintptr_t>(reserve) % kBigAlignment) == 0);
+    }
+}
+
+/**
  *  Test PacketBuffer::AlignPayload() function.
  *
  *  Description: For every buffer-configuration from inContext, create a
@@ -1987,6 +2132,7 @@
     NL_TEST_DEF("PacketBuffer::ConsumeHead",            PacketBufferTest::CheckConsumeHead),
     NL_TEST_DEF("PacketBuffer::Consume",                PacketBufferTest::CheckConsume),
     NL_TEST_DEF("PacketBuffer::EnsureReservedSize",     PacketBufferTest::CheckEnsureReservedSize),
+    NL_TEST_DEF("PacketBuffer::GetReserve",             PacketBufferTest::CheckGetReserve),
     NL_TEST_DEF("PacketBuffer::AlignPayload",           PacketBufferTest::CheckAlignPayload),
     NL_TEST_DEF("PacketBuffer::Next",                   PacketBufferTest::CheckNext),
     NL_TEST_DEF("PacketBuffer::Last",                   PacketBufferTest::CheckLast),