Stop treating BUSY response as fatal when CASE re-attempts are enabled. (#32490)

1. We now communicate the requested delay to the session establishment delegate
   when we get a BUSY response.
2. OperationalSessionSetup, when it has CASE re-attempts enabled, treats a BUSY
   response as a trigger to reattempt, not a fatal error.
3. In CASEServer, set the requested delay to a better value if we have sent
   Sigma2 and are waiting for a response to that.

Fixes https://github.com/project-chip/connectedhomeip/issues/28288
diff --git a/src/app/OperationalSessionSetup.cpp b/src/app/OperationalSessionSetup.cpp
index 179e2a3..6df07ca 100644
--- a/src/app/OperationalSessionSetup.cpp
+++ b/src/app/OperationalSessionSetup.cpp
@@ -430,7 +430,7 @@
     // member instead of having a boolean
     // mTryingNextResultDueToSessionEstablishmentError, so we can recover the
     // error in UpdateDeviceData.
-    if (CHIP_ERROR_TIMEOUT == error)
+    if (CHIP_ERROR_TIMEOUT == error || CHIP_ERROR_BUSY == error)
     {
 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
         // Make a copy of the ReliableMessageProtocolConfig, since our
@@ -480,6 +480,15 @@
     // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
 }
 
+void OperationalSessionSetup::OnResponderBusy(System::Clock::Milliseconds16 requestedDelay)
+{
+#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
+    // Store the requested delay, so that we can use it for scheduling our
+    // retry.
+    mRequestedBusyDelay = requestedDelay;
+#endif
+}
+
 void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session)
 {
     VerifyOrReturn(mState == State::Connecting,
@@ -705,9 +714,22 @@
     static_assert(UINT16_MAX / CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS >=
                       (1 << CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF),
                   "Our backoff calculation will overflow.");
-    timerDelay = System::Clock::Seconds16(
+    System::Clock::Timeout actualTimerDelay = System::Clock::Seconds16(
         static_cast<uint16_t>(CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS
                               << min((mAttemptsDone - 1), CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF)));
+    const bool responseWasBusy = mRequestedBusyDelay != System::Clock::kZero;
+    if (responseWasBusy)
+    {
+        if (mRequestedBusyDelay > actualTimerDelay)
+        {
+            actualTimerDelay = mRequestedBusyDelay;
+        }
+
+        // Reset mRequestedBusyDelay now that we have consumed it, so it does
+        // not affect future reattempts not triggered by a busy response.
+        mRequestedBusyDelay = System::Clock::kZero;
+    }
+
     if (mAttemptsDone % 2 == 0)
     {
         // It's possible that the other side received one of our Sigma1 messages
@@ -716,11 +738,22 @@
         // listening for Sigma1 messages again.
         //
         // To handle that, on every other retry, add the amount of time it would
-        // take the other side to time out.
+        // take the other side to time out.  It would be nice if we could rely
+        // on the delay reported in a BUSY response to just tell us that value,
+        // but in practice for old devices BUSY often sends some hardcoded value
+        // that tells us nothing about when the other side will decide it has
+        // timed out.
         auto additionalTimeout = CASESession::ComputeSigma2ResponseTimeout(GetLocalMRPConfig().ValueOr(GetDefaultMRPConfig()));
-        timerDelay += std::chrono::duration_cast<System::Clock::Seconds16>(additionalTimeout);
+        actualTimerDelay += additionalTimeout;
     }
-    CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(timerDelay, TrySetupAgain, this);
+    timerDelay = std::chrono::duration_cast<System::Clock::Seconds16>(actualTimerDelay);
+
+    CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(actualTimerDelay, TrySetupAgain, this);
+
+    // TODO: If responseWasBusy, should we increment, mRemainingAttempts and
+    // mResolveAttemptsAllowed, since we were explicitly told to retry?  Hard to
+    // tell what consumers expect out of a capped retry count here.
+
     // The cast on count() is needed because the type count() returns might not
     // actually be uint16_t; on some platforms it's int.
     ChipLogProgress(Discovery,
diff --git a/src/app/OperationalSessionSetup.h b/src/app/OperationalSessionSetup.h
index 45b571a..9d24faa 100644
--- a/src/app/OperationalSessionSetup.h
+++ b/src/app/OperationalSessionSetup.h
@@ -227,6 +227,7 @@
     //////////// SessionEstablishmentDelegate Implementation ///////////////
     void OnSessionEstablished(const SessionHandle & session) override;
     void OnSessionEstablishmentError(CHIP_ERROR error, SessionEstablishmentStage stage) override;
+    void OnResponderBusy(System::Clock::Milliseconds16 requestedDelay) override;
 
     ScopedNodeId GetPeerId() const { return mPeerId; }
 
@@ -319,6 +320,8 @@
 
     uint8_t mResolveAttemptsAllowed = 0;
 
+    System::Clock::Milliseconds16 mRequestedBusyDelay = System::Clock::kZero;
+
     Callback::CallbackDeque mConnectionRetry;
 #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
 
diff --git a/src/protocols/secure_channel/CASEServer.cpp b/src/protocols/secure_channel/CASEServer.cpp
index 2ad196a..df0984d 100644
--- a/src/protocols/secure_channel/CASEServer.cpp
+++ b/src/protocols/secure_channel/CASEServer.cpp
@@ -90,9 +90,31 @@
             // Handshake wasn't stuck, send the busy status report and let the existing handshake continue.
 
             // A successful CASE handshake can take several seconds and some may time out (30 seconds or more).
-            // TODO: Come up with better estimate: https://github.com/project-chip/connectedhomeip/issues/28288
-            // For now, setting minimum wait time to 5000 milliseconds.
-            CHIP_ERROR err = SendBusyStatusReport(ec, System::Clock::Milliseconds16(5000));
+
+            System::Clock::Milliseconds16 delay = System::Clock::kZero;
+            if (GetSession().GetState() == CASESession::State::kSentSigma2)
+            {
+                // The delay should be however long we think it will take for
+                // that to time out.
+                auto sigma2Timeout = CASESession::ComputeSigma2ResponseTimeout(GetSession().GetRemoteMRPConfig());
+                if (sigma2Timeout < System::Clock::Milliseconds16::max())
+                {
+                    delay = std::chrono::duration_cast<System::Clock::Milliseconds16>(sigma2Timeout);
+                }
+                else
+                {
+                    // Avoid overflow issues, just wait for as long as we can to
+                    // get close to our expected Sigma2 timeout.
+                    delay = System::Clock::Milliseconds16::max();
+                }
+            }
+            else
+            {
+                // For now, setting minimum wait time to 5000 milliseconds if we
+                // have no other information.
+                delay = System::Clock::Milliseconds16(5000);
+            }
+            CHIP_ERROR err = SendBusyStatusReport(ec, delay);
             if (err != CHIP_NO_ERROR)
             {
                 ChipLogError(Inet, "Failed to send the busy status report, err:%" CHIP_ERROR_FORMAT, err.Format());
diff --git a/src/protocols/secure_channel/CASESession.cpp b/src/protocols/secure_channel/CASESession.cpp
index 30d1f71..a9b6248 100644
--- a/src/protocols/secure_channel/CASESession.cpp
+++ b/src/protocols/secure_channel/CASESession.cpp
@@ -1966,7 +1966,8 @@
     Finish();
 }
 
-CHIP_ERROR CASESession::OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode)
+CHIP_ERROR CASESession::OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
+                                              Optional<uintptr_t> protocolData)
 {
     CHIP_ERROR err = CHIP_NO_ERROR;
     switch (protocolCode)
@@ -1981,6 +1982,10 @@
 
     case kProtocolCodeBusy:
         err = CHIP_ERROR_BUSY;
+        if (protocolData.HasValue())
+        {
+            mDelegate->OnResponderBusy(System::Clock::Milliseconds16(static_cast<uint16_t>(protocolData.Value())));
+        }
         break;
 
     default:
diff --git a/src/protocols/secure_channel/CASESession.h b/src/protocols/secure_channel/CASESession.h
index cb7f3c7..9e41f6c 100644
--- a/src/protocols/secure_channel/CASESession.h
+++ b/src/protocols/secure_channel/CASESession.h
@@ -272,7 +272,8 @@
                                       const ByteSpan & skInfo, const ByteSpan & nonce);
 
     void OnSuccessStatusReport() override;
-    CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode) override;
+    CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
+                                     Optional<uintptr_t> protocolData) override;
 
     void AbortPendingEstablish(CHIP_ERROR err);
 
diff --git a/src/protocols/secure_channel/PASESession.cpp b/src/protocols/secure_channel/PASESession.cpp
index 50b1861..40dc677 100644
--- a/src/protocols/secure_channel/PASESession.cpp
+++ b/src/protocols/secure_channel/PASESession.cpp
@@ -759,7 +759,8 @@
     Finish();
 }
 
-CHIP_ERROR PASESession::OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode)
+CHIP_ERROR PASESession::OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
+                                              Optional<uintptr_t> protocolData)
 {
     CHIP_ERROR err = CHIP_NO_ERROR;
     switch (protocolCode)
diff --git a/src/protocols/secure_channel/PASESession.h b/src/protocols/secure_channel/PASESession.h
index 393d3a6..e270baf 100644
--- a/src/protocols/secure_channel/PASESession.h
+++ b/src/protocols/secure_channel/PASESession.h
@@ -203,7 +203,8 @@
     CHIP_ERROR HandleMsg3(System::PacketBufferHandle && msg);
 
     void OnSuccessStatusReport() override;
-    CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode) override;
+    CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
+                                     Optional<uintptr_t> protocolData) override;
 
     void Finish();
 
diff --git a/src/protocols/secure_channel/PairingSession.h b/src/protocols/secure_channel/PairingSession.h
index 8ed9f26..f49dbf7 100644
--- a/src/protocols/secure_channel/PairingSession.h
+++ b/src/protocols/secure_channel/PairingSession.h
@@ -26,6 +26,7 @@
 #pragma once
 
 #include <lib/core/CHIPError.h>
+#include <lib/core/Optional.h>
 #include <lib/core/TLV.h>
 #include <messaging/ExchangeContext.h>
 #include <messaging/SessionParameters.h>
@@ -129,7 +130,11 @@
 
     void SetPeerSessionId(uint16_t id) { mPeerSessionId.SetValue(id); }
     virtual void OnSuccessStatusReport() {}
-    virtual CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode)
+
+    // Handle a failure StatusReport message from the server.  protocolData will
+    // depend on exactly what the generalCode/protocolCode are.
+    virtual CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
+                                             Optional<uintptr_t> protocolData)
     {
         return CHIP_ERROR_INTERNAL;
     }
@@ -174,6 +179,7 @@
             return CHIP_NO_ERROR;
         }
 
+        Optional<uintptr_t> protocolData;
         if (report.GetGeneralCode() == Protocols::SecureChannel::GeneralStatusCode::kBusy &&
             report.GetProtocolCode() == Protocols::SecureChannel::kProtocolCodeBusy)
         {
@@ -189,15 +195,15 @@
                 }
                 else
                 {
-                    // TODO: CASE: Notify minimum wait time to clients on receiving busy status report #28290
                     ChipLogProgress(SecureChannel, "Received busy status report with minimum wait time: %u ms", minimumWaitTime);
+                    protocolData.Emplace(minimumWaitTime);
                 }
             }
         }
 
         // It's very important that we propagate the return value from
         // OnFailureStatusReport out to the caller.  Make sure we return it directly.
-        return OnFailureStatusReport(report.GetGeneralCode(), report.GetProtocolCode());
+        return OnFailureStatusReport(report.GetGeneralCode(), report.GetProtocolCode(), protocolData);
     }
 
     /**
diff --git a/src/protocols/secure_channel/SessionEstablishmentDelegate.h b/src/protocols/secure_channel/SessionEstablishmentDelegate.h
index a50949e..c0ba56d 100644
--- a/src/protocols/secure_channel/SessionEstablishmentDelegate.h
+++ b/src/protocols/secure_channel/SessionEstablishmentDelegate.h
@@ -25,6 +25,7 @@
 
 #pragma once
 
+#include <system/SystemClock.h>
 #include <system/SystemPacketBuffer.h>
 #include <transport/Session.h>
 #include <transport/raw/MessageHeader.h>
@@ -80,6 +81,15 @@
      */
     virtual void OnSessionEstablished(const SessionHandle & session) {}
 
+    /**
+     * Called when the responder has responded with a "busy" status code and
+     * provided a requested delay.
+     *
+     * This call will be followed by an OnSessionEstablishmentError with
+     * CHIP_ERROR_BUSY as the error.
+     */
+    virtual void OnResponderBusy(System::Clock::Milliseconds16 requestedDelay) {}
+
     virtual ~SessionEstablishmentDelegate() {}
 };