Stop treating BUSY response as fatal when CASE re-attempts are enabled. (#32490)
1. We now communicate the requested delay to the session establishment delegate
when we get a BUSY response.
2. OperationalSessionSetup, when it has CASE re-attempts enabled, treats a BUSY
response as a trigger to reattempt, not a fatal error.
3. In CASEServer, set the requested delay to a better value if we have sent
Sigma2 and are waiting for a response to that.
Fixes https://github.com/project-chip/connectedhomeip/issues/28288
diff --git a/src/app/OperationalSessionSetup.cpp b/src/app/OperationalSessionSetup.cpp
index 179e2a3..6df07ca 100644
--- a/src/app/OperationalSessionSetup.cpp
+++ b/src/app/OperationalSessionSetup.cpp
@@ -430,7 +430,7 @@
// member instead of having a boolean
// mTryingNextResultDueToSessionEstablishmentError, so we can recover the
// error in UpdateDeviceData.
- if (CHIP_ERROR_TIMEOUT == error)
+ if (CHIP_ERROR_TIMEOUT == error || CHIP_ERROR_BUSY == error)
{
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
// Make a copy of the ReliableMessageProtocolConfig, since our
@@ -480,6 +480,15 @@
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
}
+void OperationalSessionSetup::OnResponderBusy(System::Clock::Milliseconds16 requestedDelay)
+{
+#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
+ // Store the requested delay, so that we can use it for scheduling our
+ // retry.
+ mRequestedBusyDelay = requestedDelay;
+#endif
+}
+
void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session)
{
VerifyOrReturn(mState == State::Connecting,
@@ -705,9 +714,22 @@
static_assert(UINT16_MAX / CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS >=
(1 << CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF),
"Our backoff calculation will overflow.");
- timerDelay = System::Clock::Seconds16(
+ System::Clock::Timeout actualTimerDelay = System::Clock::Seconds16(
static_cast<uint16_t>(CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS
<< min((mAttemptsDone - 1), CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF)));
+ const bool responseWasBusy = mRequestedBusyDelay != System::Clock::kZero;
+ if (responseWasBusy)
+ {
+ if (mRequestedBusyDelay > actualTimerDelay)
+ {
+ actualTimerDelay = mRequestedBusyDelay;
+ }
+
+ // Reset mRequestedBusyDelay now that we have consumed it, so it does
+ // not affect future reattempts not triggered by a busy response.
+ mRequestedBusyDelay = System::Clock::kZero;
+ }
+
if (mAttemptsDone % 2 == 0)
{
// It's possible that the other side received one of our Sigma1 messages
@@ -716,11 +738,22 @@
// listening for Sigma1 messages again.
//
// To handle that, on every other retry, add the amount of time it would
- // take the other side to time out.
+ // take the other side to time out. It would be nice if we could rely
+ // on the delay reported in a BUSY response to just tell us that value,
+ // but in practice for old devices BUSY often sends some hardcoded value
+ // that tells us nothing about when the other side will decide it has
+ // timed out.
auto additionalTimeout = CASESession::ComputeSigma2ResponseTimeout(GetLocalMRPConfig().ValueOr(GetDefaultMRPConfig()));
- timerDelay += std::chrono::duration_cast<System::Clock::Seconds16>(additionalTimeout);
+ actualTimerDelay += additionalTimeout;
}
- CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(timerDelay, TrySetupAgain, this);
+ timerDelay = std::chrono::duration_cast<System::Clock::Seconds16>(actualTimerDelay);
+
+ CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(actualTimerDelay, TrySetupAgain, this);
+
+ // TODO: If responseWasBusy, should we increment, mRemainingAttempts and
+ // mResolveAttemptsAllowed, since we were explicitly told to retry? Hard to
+ // tell what consumers expect out of a capped retry count here.
+
// The cast on count() is needed because the type count() returns might not
// actually be uint16_t; on some platforms it's int.
ChipLogProgress(Discovery,
diff --git a/src/app/OperationalSessionSetup.h b/src/app/OperationalSessionSetup.h
index 45b571a..9d24faa 100644
--- a/src/app/OperationalSessionSetup.h
+++ b/src/app/OperationalSessionSetup.h
@@ -227,6 +227,7 @@
//////////// SessionEstablishmentDelegate Implementation ///////////////
void OnSessionEstablished(const SessionHandle & session) override;
void OnSessionEstablishmentError(CHIP_ERROR error, SessionEstablishmentStage stage) override;
+ void OnResponderBusy(System::Clock::Milliseconds16 requestedDelay) override;
ScopedNodeId GetPeerId() const { return mPeerId; }
@@ -319,6 +320,8 @@
uint8_t mResolveAttemptsAllowed = 0;
+ System::Clock::Milliseconds16 mRequestedBusyDelay = System::Clock::kZero;
+
Callback::CallbackDeque mConnectionRetry;
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
diff --git a/src/protocols/secure_channel/CASEServer.cpp b/src/protocols/secure_channel/CASEServer.cpp
index 2ad196a..df0984d 100644
--- a/src/protocols/secure_channel/CASEServer.cpp
+++ b/src/protocols/secure_channel/CASEServer.cpp
@@ -90,9 +90,31 @@
// Handshake wasn't stuck, send the busy status report and let the existing handshake continue.
// A successful CASE handshake can take several seconds and some may time out (30 seconds or more).
- // TODO: Come up with better estimate: https://github.com/project-chip/connectedhomeip/issues/28288
- // For now, setting minimum wait time to 5000 milliseconds.
- CHIP_ERROR err = SendBusyStatusReport(ec, System::Clock::Milliseconds16(5000));
+
+ System::Clock::Milliseconds16 delay = System::Clock::kZero;
+ if (GetSession().GetState() == CASESession::State::kSentSigma2)
+ {
+ // The delay should be however long we think it will take for
+ // that to time out.
+ auto sigma2Timeout = CASESession::ComputeSigma2ResponseTimeout(GetSession().GetRemoteMRPConfig());
+ if (sigma2Timeout < System::Clock::Milliseconds16::max())
+ {
+ delay = std::chrono::duration_cast<System::Clock::Milliseconds16>(sigma2Timeout);
+ }
+ else
+ {
+ // Avoid overflow issues, just wait for as long as we can to
+ // get close to our expected Sigma2 timeout.
+ delay = System::Clock::Milliseconds16::max();
+ }
+ }
+ else
+ {
+ // For now, setting minimum wait time to 5000 milliseconds if we
+ // have no other information.
+ delay = System::Clock::Milliseconds16(5000);
+ }
+ CHIP_ERROR err = SendBusyStatusReport(ec, delay);
if (err != CHIP_NO_ERROR)
{
ChipLogError(Inet, "Failed to send the busy status report, err:%" CHIP_ERROR_FORMAT, err.Format());
diff --git a/src/protocols/secure_channel/CASESession.cpp b/src/protocols/secure_channel/CASESession.cpp
index 30d1f71..a9b6248 100644
--- a/src/protocols/secure_channel/CASESession.cpp
+++ b/src/protocols/secure_channel/CASESession.cpp
@@ -1966,7 +1966,8 @@
Finish();
}
-CHIP_ERROR CASESession::OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode)
+CHIP_ERROR CASESession::OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
+ Optional<uintptr_t> protocolData)
{
CHIP_ERROR err = CHIP_NO_ERROR;
switch (protocolCode)
@@ -1981,6 +1982,10 @@
case kProtocolCodeBusy:
err = CHIP_ERROR_BUSY;
+ if (protocolData.HasValue())
+ {
+ mDelegate->OnResponderBusy(System::Clock::Milliseconds16(static_cast<uint16_t>(protocolData.Value())));
+ }
break;
default:
diff --git a/src/protocols/secure_channel/CASESession.h b/src/protocols/secure_channel/CASESession.h
index cb7f3c7..9e41f6c 100644
--- a/src/protocols/secure_channel/CASESession.h
+++ b/src/protocols/secure_channel/CASESession.h
@@ -272,7 +272,8 @@
const ByteSpan & skInfo, const ByteSpan & nonce);
void OnSuccessStatusReport() override;
- CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode) override;
+ CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
+ Optional<uintptr_t> protocolData) override;
void AbortPendingEstablish(CHIP_ERROR err);
diff --git a/src/protocols/secure_channel/PASESession.cpp b/src/protocols/secure_channel/PASESession.cpp
index 50b1861..40dc677 100644
--- a/src/protocols/secure_channel/PASESession.cpp
+++ b/src/protocols/secure_channel/PASESession.cpp
@@ -759,7 +759,8 @@
Finish();
}
-CHIP_ERROR PASESession::OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode)
+CHIP_ERROR PASESession::OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
+ Optional<uintptr_t> protocolData)
{
CHIP_ERROR err = CHIP_NO_ERROR;
switch (protocolCode)
diff --git a/src/protocols/secure_channel/PASESession.h b/src/protocols/secure_channel/PASESession.h
index 393d3a6..e270baf 100644
--- a/src/protocols/secure_channel/PASESession.h
+++ b/src/protocols/secure_channel/PASESession.h
@@ -203,7 +203,8 @@
CHIP_ERROR HandleMsg3(System::PacketBufferHandle && msg);
void OnSuccessStatusReport() override;
- CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode) override;
+ CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
+ Optional<uintptr_t> protocolData) override;
void Finish();
diff --git a/src/protocols/secure_channel/PairingSession.h b/src/protocols/secure_channel/PairingSession.h
index 8ed9f26..f49dbf7 100644
--- a/src/protocols/secure_channel/PairingSession.h
+++ b/src/protocols/secure_channel/PairingSession.h
@@ -26,6 +26,7 @@
#pragma once
#include <lib/core/CHIPError.h>
+#include <lib/core/Optional.h>
#include <lib/core/TLV.h>
#include <messaging/ExchangeContext.h>
#include <messaging/SessionParameters.h>
@@ -129,7 +130,11 @@
void SetPeerSessionId(uint16_t id) { mPeerSessionId.SetValue(id); }
virtual void OnSuccessStatusReport() {}
- virtual CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode)
+
+ // Handle a failure StatusReport message from the server. protocolData will
+ // depend on exactly what the generalCode/protocolCode are.
+ virtual CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
+ Optional<uintptr_t> protocolData)
{
return CHIP_ERROR_INTERNAL;
}
@@ -174,6 +179,7 @@
return CHIP_NO_ERROR;
}
+ Optional<uintptr_t> protocolData;
if (report.GetGeneralCode() == Protocols::SecureChannel::GeneralStatusCode::kBusy &&
report.GetProtocolCode() == Protocols::SecureChannel::kProtocolCodeBusy)
{
@@ -189,15 +195,15 @@
}
else
{
- // TODO: CASE: Notify minimum wait time to clients on receiving busy status report #28290
ChipLogProgress(SecureChannel, "Received busy status report with minimum wait time: %u ms", minimumWaitTime);
+ protocolData.Emplace(minimumWaitTime);
}
}
}
// It's very important that we propagate the return value from
// OnFailureStatusReport out to the caller. Make sure we return it directly.
- return OnFailureStatusReport(report.GetGeneralCode(), report.GetProtocolCode());
+ return OnFailureStatusReport(report.GetGeneralCode(), report.GetProtocolCode(), protocolData);
}
/**
diff --git a/src/protocols/secure_channel/SessionEstablishmentDelegate.h b/src/protocols/secure_channel/SessionEstablishmentDelegate.h
index a50949e..c0ba56d 100644
--- a/src/protocols/secure_channel/SessionEstablishmentDelegate.h
+++ b/src/protocols/secure_channel/SessionEstablishmentDelegate.h
@@ -25,6 +25,7 @@
#pragma once
+#include <system/SystemClock.h>
#include <system/SystemPacketBuffer.h>
#include <transport/Session.h>
#include <transport/raw/MessageHeader.h>
@@ -80,6 +81,15 @@
*/
virtual void OnSessionEstablished(const SessionHandle & session) {}
+ /**
+ * Called when the responder has responded with a "busy" status code and
+ * provided a requested delay.
+ *
+ * This call will be followed by an OnSessionEstablishmentError with
+ * CHIP_ERROR_BUSY as the error.
+ */
+ virtual void OnResponderBusy(System::Clock::Milliseconds16 requestedDelay) {}
+
virtual ~SessionEstablishmentDelegate() {}
};