src/app/OperationalSessionSetup.cpp - third_party/github/project-chip/connectedhomeip - Git at Google

 /*
  *
  *    Copyright (c) 2020-2021 Project CHIP Authors
  *    All rights reserved.
  *
  *    Licensed under the Apache License, Version 2.0 (the "License");
  *    you may not use this file except in compliance with the License.
  *    You may obtain a copy of the License at
  *
  *        http://www.apache.org/licenses/LICENSE-2.0
  *
  *    Unless required by applicable law or agreed to in writing, software
  *    distributed under the License is distributed on an "AS IS" BASIS,
  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  *    See the License for the specific language governing permissions and
  *    limitations under the License.
  */

 /**
  *  @file
  *    This file contains implementation of Device class. The objects of this
  *    class will be used by Controller applications to interact with CHIP
  *    devices. The class provides mechanism to construct, send and receive
  *    messages to and from the corresponding CHIP devices.
  */

 #include <app/OperationalSessionSetup.h>

 #include <app/CASEClient.h>
 #include <app/InteractionModelEngine.h>
 #include <transport/SecureSession.h>

 #include <lib/address_resolve/AddressResolve.h>
 #include <lib/core/CHIPCore.h>
 #include <lib/core/CHIPEncoding.h>
 #include <lib/dnssd/Resolver.h>
 #include <lib/support/CodeUtils.h>
 #include <lib/support/logging/CHIPLogging.h>
 #include <system/SystemClock.h>
 #include <system/SystemLayer.h>
 #include <tracing/metric_event.h>

 using namespace chip::Callback;
 using chip::AddressResolve::NodeLookupRequest;
 using chip::AddressResolve::Resolver;
 using chip::AddressResolve::ResolveResult;
 using namespace chip::Tracing;

 namespace chip {

 void OperationalSessionSetup::MoveToState(State aTargetState)
 {
     if (mState != aTargetState)
     {
         ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: State change %d --> %d",
                       mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), to_underlying(mState),
                       to_underlying(aTargetState));

 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
         if (mState == State::WaitingForRetry)
         {
             CancelSessionSetupReattempt();
         }
 #endif

         mState = aTargetState;

         if (aTargetState != State::Connecting)
         {
             CleanupCASEClient();
         }
     }
 }

 bool OperationalSessionSetup::AttachToExistingSecureSession()
 {
     VerifyOrReturnError(mState == State::NeedsAddress || mState == State::ResolvingAddress || mState == State::HasAddress ||
                             mState == State::WaitingForRetry,
                         false);

     auto sessionHandle = mInitParams.sessionManager->FindSecureSessionForNode(
         mPeerId, MakeOptional(Transport::SecureSession::Type::kCASE), mTransportPayloadCapability);
     if (!sessionHandle.HasValue())
         return false;

     ChipLogProgress(Discovery, "Found an existing secure session to [%u:" ChipLogFormatX64 "]!", mPeerId.GetFabricIndex(),
                     ChipLogValueX64(mPeerId.GetNodeId()));

     mDeviceAddress = sessionHandle.Value()->AsSecureSession()->GetPeerAddress();
     if (!mSecureSession.Grab(sessionHandle.Value()))
         return false;

     return true;
 }

 void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection,
                                       Callback::Callback<OnDeviceConnectionFailure> * onFailure,
                                       Callback::Callback<OnSetupFailure> * onSetupFailure,
                                       TransportPayloadCapability transportPayloadCapability)
 {
     CHIP_ERROR err   = CHIP_NO_ERROR;
     bool isConnected = false;

     mTransportPayloadCapability = transportPayloadCapability;
     //
     // Always enqueue our user provided callbacks into our callback list.
     // If anything goes wrong below, we'll trigger failures (including any queued from
     // a previous iteration which in theory shouldn't happen, but this is written to be more defensive)
     //
     EnqueueConnectionCallbacks(onConnection, onFailure, onSetupFailure);

     switch (mState)
     {
     case State::Uninitialized:
         err = CHIP_ERROR_INCORRECT_STATE;
         break;

     case State::NeedsAddress:
         isConnected = AttachToExistingSecureSession();
         if (!isConnected)
         {
             // LookupPeerAddress could perhaps call back with a result
             // synchronously, so do our state update first.
             MoveToState(State::ResolvingAddress);
             err = LookupPeerAddress();
             if (err != CHIP_NO_ERROR)
             {
                 // Roll back the state change, since we are presumably not in
                 // the middle of a lookup.
                 MoveToState(State::NeedsAddress);
             }
         }

         break;

     case State::ResolvingAddress:
     case State::WaitingForRetry:
         isConnected = AttachToExistingSecureSession();
         break;

     case State::HasAddress:
         isConnected = AttachToExistingSecureSession();
         if (!isConnected)
         {
             // We should not actually every be in be in State::HasAddress. This
             // is because in the same call that we moved to State::HasAddress
             // we either move to State::Connecting or call
             // DequeueConnectionCallbacks with an error thus releasing
             // ourselves before any call would reach this section of code.
             err = CHIP_ERROR_INCORRECT_STATE;
         }

         break;

     case State::Connecting:
         break;

     case State::SecureConnected:
         isConnected = true;
         break;

     default:
         err = CHIP_ERROR_INCORRECT_STATE;
     }

     if (isConnected)
     {
         MoveToState(State::SecureConnected);
     }

     //
     // Dequeue all our callbacks on either encountering an error
     // or if we successfully connected. Both should not be set
     // simultaneously.
     //
     if (err != CHIP_NO_ERROR || isConnected)
     {
         DequeueConnectionCallbacks(err);
         // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
         // While it is odd to have an explicit return here at the end of the function, we do so
         // as a precaution in case someone later on adds something to the end of this function.
         return;
     }
 }

 void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection,
                                       Callback::Callback<OnDeviceConnectionFailure> * onFailure,
                                       TransportPayloadCapability transportPayloadCapability)
 {
     Connect(onConnection, onFailure, nullptr, transportPayloadCapability);
 }

 void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection,
                                       Callback::Callback<OnSetupFailure> * onSetupFailure,
                                       TransportPayloadCapability transportPayloadCapability)
 {
     Connect(onConnection, nullptr, onSetupFailure, transportPayloadCapability);
 }

 void OperationalSessionSetup::UpdateDeviceData(const ResolveResult & result)
 {
     auto & config = result.mrpRemoteConfig;
     auto addr     = result.address;
 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
     // Make sure to clear out our reason for trying the next result first thing,
     // so it does not stick around in various error cases.
     bool tryingNextResultDueToSessionEstablishmentError = mTryingNextResultDueToSessionEstablishmentError;
     mTryingNextResultDueToSessionEstablishmentError     = false;
 #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

     if (mState == State::Uninitialized)
     {
         return;
     }

 #if CHIP_DETAIL_LOGGING
     char peerAddrBuff[Transport::PeerAddress::kMaxToStringSize];
     addr.ToString(peerAddrBuff);

     ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: Updating device address to %s while in state %d",
                   mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), peerAddrBuff, static_cast<int>(mState));
 #endif

     mDeviceAddress = addr;

     // Initialize CASE session state with any MRP parameters that DNS-SD has provided.
     // It can be overridden by CASE session protocol messages that include MRP parameters.
     if (mCASEClient)
     {
         mCASEClient->SetRemoteMRPIntervals(config);
     }

     if (mState != State::ResolvingAddress)
     {
         ChipLogError(Discovery, "Received UpdateDeviceData in incorrect state");
         DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE);
         // Do not touch `this` instance anymore; it has been destroyed in
         // DequeueConnectionCallbacks.
         return;
     }

     MoveToState(State::HasAddress);
     mInitParams.sessionManager->UpdateAllSessionsPeerAddress(mPeerId, addr);

     if (mPerformingAddressUpdate)
     {
         // Nothing else to do here.
         DequeueConnectionCallbacks(CHIP_NO_ERROR);
         // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
         return;
     }

     CHIP_ERROR err = EstablishConnection(result);
     LogErrorOnFailure(err);
     if (err == CHIP_NO_ERROR)
     {
         // We expect to get a callback via OnSessionEstablished or OnSessionEstablishmentError to continue
         // the state machine forward.
 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
         if (tryingNextResultDueToSessionEstablishmentError)
         {
             // Our retry has already been kicked off, so claim 0 delay until it
             // starts.  We only reach this from OnSessionEstablishmentError when
             // the error is CHIP_ERROR_TIMEOUT.
             NotifyRetryHandlers(CHIP_ERROR_TIMEOUT, config, System::Clock::kZero);
         }
 #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
         return;
     }

     // Move to the ResolvingAddress state, in case we have more results,
     // since we expect to receive results in that state.  Pretend like we moved
     // on directly to this address from whatever triggered us to try this result
     // (so restore mTryingNextResultDueToSessionEstablishmentError to the value
     // it had at the start of this function).
     MoveToState(State::ResolvingAddress);
 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
     mTryingNextResultDueToSessionEstablishmentError = tryingNextResultDueToSessionEstablishmentError;
 #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
     if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle))
     {
         // No need to NotifyRetryHandlers, since we never actually spent any
         // time trying the previous result.  Whatever work we need to do has
         // been handled by our recursive OnNodeAddressResolved callback.  Make
         // sure not to touch `this` under here, because it might have been
         // deleted by OnNodeAddressResolved.
         return;
     }

     // No need to reset mTryingNextResultDueToSessionEstablishmentError here,
     // because we're about to delete ourselves.

     DequeueConnectionCallbacks(err);
     // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
 }

 CHIP_ERROR OperationalSessionSetup::EstablishConnection(const ResolveResult & result)
 {
     auto & config = result.mrpRemoteConfig;
 #if INET_CONFIG_ENABLE_TCP_ENDPOINT
     if (mTransportPayloadCapability == TransportPayloadCapability::kLargePayload)
     {
         if (result.supportsTcpServer)
         {
             // Set the transport type for carrying large payloads
             mDeviceAddress.SetTransportType(chip::Transport::Type::kTcp);
         }
         else
         {
             // we should not set the large payload while the TCP support is not enabled
             ChipLogError(
                 Discovery,
                 "LargePayload session requested but peer does not support TCP server, PeerNodeId=" ChipLogFormatScopedNodeId,
                 ChipLogValueScopedNodeId(mPeerId));
             return CHIP_ERROR_INTERNAL;
         }
     }
 #endif

     mCASEClient = mClientPool->Allocate();
     ReturnErrorCodeIf(mCASEClient == nullptr, CHIP_ERROR_NO_MEMORY);

     MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESession);
     CHIP_ERROR err = mCASEClient->EstablishSession(mInitParams, mPeerId, mDeviceAddress, config, this);
     if (err != CHIP_NO_ERROR)
     {
         MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err);
         CleanupCASEClient();
         return err;
     }

     MoveToState(State::Connecting);

     return CHIP_NO_ERROR;
 }

 void OperationalSessionSetup::EnqueueConnectionCallbacks(Callback::Callback<OnDeviceConnected> * onConnection,
                                                          Callback::Callback<OnDeviceConnectionFailure> * onFailure,
                                                          Callback::Callback<OnSetupFailure> * onSetupFailure)
 {
     mCallbacks.Enqueue(onConnection, onFailure, onSetupFailure);
 }

 void OperationalSessionSetup::DequeueConnectionCallbacks(CHIP_ERROR error, SessionEstablishmentStage stage,
                                                          ReleaseBehavior releaseBehavior)
 {
     // We expect that we only have callbacks if we are not performing just address update.
     VerifyOrDie(!mPerformingAddressUpdate || mCallbacks.IsEmpty());

 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
     // Clear out mConnectionRetry, so that those cancelables are not holding
     // pointers to us, since we're about to go away.
     while (auto * cb = mConnectionRetry.First())
     {
         cb->Cancel();
     }
 #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

     // Gather up state we will need for our notifications.
     SuccessFailureCallbackList readyCallbacks;
     readyCallbacks.EnqueueTakeAll(mCallbacks);
     auto * exchangeMgr                            = mInitParams.exchangeMgr;
     Optional<SessionHandle> optionalSessionHandle = mSecureSession.Get();
     ScopedNodeId peerId                           = mPeerId;
     System::Clock::Milliseconds16 requestedBusyDelay =
 #if CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
         mRequestedBusyDelay;
 #else
         System::Clock::kZero;
 #endif // CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP

     if (releaseBehavior == ReleaseBehavior::Release)
     {
         VerifyOrDie(mReleaseDelegate != nullptr);
         mReleaseDelegate->ReleaseSession(this);
     }

     // DO NOT touch any members of this object after this point.  It's dead.
     NotifyConnectionCallbacks(readyCallbacks, error, stage, peerId, exchangeMgr, optionalSessionHandle, requestedBusyDelay);
 }

 void OperationalSessionSetup::NotifyConnectionCallbacks(SuccessFailureCallbackList & ready, CHIP_ERROR error,
                                                         SessionEstablishmentStage stage, const ScopedNodeId & peerId,
                                                         Messaging::ExchangeManager * exchangeMgr,
                                                         const Optional<SessionHandle> & optionalSessionHandle,
                                                         System::Clock::Milliseconds16 requestedBusyDelay)
 {
     Callback::Callback<OnDeviceConnected> * onConnected;
     Callback::Callback<OnDeviceConnectionFailure> * onConnectionFailure;
     Callback::Callback<OnSetupFailure> * onSetupFailure;
     while (ready.Take(onConnected, onConnectionFailure, onSetupFailure))
     {
         if (error == CHIP_NO_ERROR)
         {
             VerifyOrDie(exchangeMgr);
             VerifyOrDie(optionalSessionHandle.Value()->AsSecureSession()->IsActiveSession());
             if (onConnected != nullptr)
             {
                 onConnected->mCall(onConnected->mContext, *exchangeMgr, optionalSessionHandle.Value());

                 // That sucessful call might have made the session inactive.  If it did, then we should
                 // not call any more success callbacks, since we do not in fact have an active session
                 // for them, and if they try to put the session in a holder that will fail, and then
                 // trying to use the holder as if it has a session will crash.
                 if (!optionalSessionHandle.Value()->AsSecureSession()->IsActiveSession())
                 {
                     ChipLogError(Discovery, "Success callback for connection to " ChipLogFormatScopedNodeId " tore down session",
                                  ChipLogValueScopedNodeId(peerId));
                     error = CHIP_ERROR_CONNECTION_ABORTED;
                 }
             }
         }
         else // error
         {
             if (onConnectionFailure != nullptr)
             {
                 onConnectionFailure->mCall(onConnectionFailure->mContext, peerId, error);
             }
             if (onSetupFailure != nullptr)
             {
                 ConnnectionFailureInfo failureInfo(peerId, error, stage);
 #if CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
                 if (error == CHIP_ERROR_BUSY)
                 {
                     failureInfo.requestedBusyDelay.Emplace(requestedBusyDelay);
                 }
 #endif // CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
                 onSetupFailure->mCall(onSetupFailure->mContext, failureInfo);
             }
         }
     }
 }

 void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error, SessionEstablishmentStage stage)
 {
     VerifyOrReturn(mState == State::Connecting,
                    ChipLogError(Discovery, "OnSessionEstablishmentError was called while we were not connecting"));

     // If this condition ever changes, we may need to store the error in a
     // member instead of having a boolean
     // mTryingNextResultDueToSessionEstablishmentError, so we can recover the
     // error in UpdateDeviceData.
     if (CHIP_ERROR_TIMEOUT == error || CHIP_ERROR_BUSY == error)
     {
 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
         // Make a copy of the ReliableMessageProtocolConfig, since our
         // mCaseClient is about to go away once we change state.
         ReliableMessageProtocolConfig remoteMprConfig = mCASEClient->GetRemoteMRPIntervals();
 #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

         // Move to the ResolvingAddress state, in case we have more results,
         // since we expect to receive results in that state.
         MoveToState(State::ResolvingAddress);
 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
         mTryingNextResultDueToSessionEstablishmentError = true;
 #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
         if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle))
         {
             // Whatever work we needed to do has been handled by our
             // OnNodeAddressResolved callback.  Make sure not to touch `this`
             // under here, because it might have been deleted by
             // OnNodeAddressResolved.
             return;
         }
 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
         mTryingNextResultDueToSessionEstablishmentError = false;
 #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

         // Moving back to the Connecting state would be a bit of a lie, since we
         // don't have an mCASEClient.  Just go back to NeedsAddress, since
         // that's really where we are now.
         MoveToState(State::NeedsAddress);

 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
         if (mRemainingAttempts > 0)
         {
             System::Clock::Seconds16 reattemptDelay;
             CHIP_ERROR err = ScheduleSessionSetupReattempt(reattemptDelay);
             if (err == CHIP_NO_ERROR)
             {
                 MoveToState(State::WaitingForRetry);
                 NotifyRetryHandlers(error, remoteMprConfig, reattemptDelay);
                 return;
             }
         }
 #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
     }

     // Session failed to be established. This is when discovery is also stopped
     MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, error);
     MATTER_LOG_METRIC_END(kMetricDeviceCASESession, error);

     DequeueConnectionCallbacks(error, stage);
     // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
 }

 void OperationalSessionSetup::OnResponderBusy(System::Clock::Milliseconds16 requestedDelay)
 {
 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES || CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
     // Store the requested delay, so that we can use it for scheduling our
     // retry or communicate it to our API consumer.
     mRequestedBusyDelay = requestedDelay;
 #endif
 }

 void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session)
 {
     VerifyOrReturn(mState == State::Connecting,
                    ChipLogError(Discovery, "OnSessionEstablished was called while we were not connecting"));

     // Session has been established. This is when discovery is also stopped
     MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, CHIP_NO_ERROR);

     MATTER_LOG_METRIC_END(kMetricDeviceCASESession, CHIP_NO_ERROR);

     if (!mSecureSession.Grab(session))
     {
         // Got an invalid session, just dispatch an error.  We have to do this
         // so we don't leak.
         DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE);

         // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
         return;
     }

     MoveToState(State::SecureConnected);

     DequeueConnectionCallbacks(CHIP_NO_ERROR);
 }

 void OperationalSessionSetup::CleanupCASEClient()
 {
     if (mCASEClient)
     {
         mClientPool->Release(mCASEClient);
         mCASEClient = nullptr;
     }
 }

 OperationalSessionSetup::~OperationalSessionSetup()
 {
     if (mAddressLookupHandle.IsActive())
     {
         ChipLogDetail(Discovery,
                       "OperationalSessionSetup[%u:" ChipLogFormatX64
                       "]: Cancelling incomplete address resolution as device is being deleted.",
                       mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()));

         // Skip cancel callback since the destructor is being called, so we assume that this object is
         // obviously not used anymore
         CHIP_ERROR err = Resolver::Instance().CancelLookup(mAddressLookupHandle, Resolver::FailureCallback::Skip);
         if (err != CHIP_NO_ERROR)
         {
             ChipLogError(Discovery, "Lookup cancel failed: %" CHIP_ERROR_FORMAT, err.Format());
         }
     }

     if (mCASEClient)
     {
         // Make sure we don't leak it.
         mClientPool->Release(mCASEClient);
     }

 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
     CancelSessionSetupReattempt();
 #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

     DequeueConnectionCallbacks(CHIP_ERROR_CANCELLED, ReleaseBehavior::DoNotRelease);
 }

 CHIP_ERROR OperationalSessionSetup::LookupPeerAddress()
 {
 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
     if (mRemainingAttempts > 0)
     {
         --mRemainingAttempts;
     }
     if (mAttemptsDone < UINT8_MAX)
     {
         ++mAttemptsDone;
     }
     if (mResolveAttemptsAllowed > 0)
     {
         --mResolveAttemptsAllowed;
     }
     MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone);
 #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

     // NOTE: This is public API that can be used to update our stored peer
     // address even when we are in State::Connected, so we do not make any
     // MoveToState calls in this method.
     if (mAddressLookupHandle.IsActive())
     {
         ChipLogProgress(Discovery,
                         "OperationalSessionSetup[%u:" ChipLogFormatX64
                         "]: Operational node lookup already in progress. Will NOT start a new one.",
                         mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()));
         return CHIP_NO_ERROR;
     }

     // This code can be reached multiple times, if we discover multiple addresses or do retries.
     // The metric backend can handle this and always picks the earliest occurrence as the start of the event.
     MATTER_LOG_METRIC_BEGIN(kMetricDeviceOperationalDiscovery);

     auto const * fabricInfo = mInitParams.fabricTable->FindFabricWithIndex(mPeerId.GetFabricIndex());
     VerifyOrReturnError(fabricInfo != nullptr, CHIP_ERROR_INVALID_FABRIC_INDEX);

     PeerId peerId(fabricInfo->GetCompressedFabricId(), mPeerId.GetNodeId());

     NodeLookupRequest request(peerId);

     return Resolver::Instance().LookupNode(request, mAddressLookupHandle);
 }

 void OperationalSessionSetup::PerformAddressUpdate()
 {
     if (mPerformingAddressUpdate)
     {
         // We are already in the middle of a lookup from a previous call to
         // PerformAddressUpdate. In that case we will just exit right away as
         // we are already looking to update the results from the previous lookup.
         return;
     }

     // We must be newly-allocated to handle this address lookup, so must be in the NeedsAddress state.
     VerifyOrDie(mState == State::NeedsAddress);

     // We are doing an address lookup whether we have an active session for this peer or not.
     mPerformingAddressUpdate = true;
     MoveToState(State::ResolvingAddress);
     CHIP_ERROR err = LookupPeerAddress();
     if (err != CHIP_NO_ERROR)
     {
         ChipLogError(Discovery, "Failed to look up peer address: %" CHIP_ERROR_FORMAT, err.Format());
         DequeueConnectionCallbacks(err);
         // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
         return;
     }
 }

 void OperationalSessionSetup::OnNodeAddressResolved(const PeerId & peerId, const ResolveResult & result)
 {
     UpdateDeviceData(result);
 }

 void OperationalSessionSetup::OnNodeAddressResolutionFailed(const PeerId & peerId, CHIP_ERROR reason)
 {
     ChipLogError(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: operational discovery failed: %" CHIP_ERROR_FORMAT,
                  mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), reason.Format());

 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
     // If we're in a mode where we would generally retry CASE, retry operational
     // discovery if we're allowed to.  That allows us to more-gracefully handle broken networks
     // where multicast DNS does not actually work and hence only the initial
     // unicast DNS-SD queries get a response.
     //
     // We check for State::ResolvingAddress just in case in the meantime
     // something weird happened and we are no longer trying to resolve an
     // address.
     if (mState == State::ResolvingAddress && mResolveAttemptsAllowed > 0)
     {
         ChipLogProgress(Discovery, "Retrying operational DNS-SD discovery. Attempts remaining: %u", mResolveAttemptsAllowed);

         // Pretend like our previous attempt (i.e. call to LookupPeerAddress)
         // has not happened for purposes of the generic attempt counters, so we
         // don't mess up the counters for our actual CASE retry logic.
         if (mRemainingAttempts < UINT8_MAX)
         {
             ++mRemainingAttempts;
         }
         if (mAttemptsDone > 0)
         {
             --mAttemptsDone;
         }

         MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone);

         CHIP_ERROR err = LookupPeerAddress();
         if (err == CHIP_NO_ERROR)
         {
             // We need to notify our consumer that the resolve will take more
             // time, but we don't actually know how much time it will take,
             // because the resolver does not expose that information.  Just use
             // one minute to be safe.
             using namespace chip::System::Clock::Literals;
             NotifyRetryHandlers(reason, 60_s16);
             return;
         }
     }
 #endif

     MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, reason);

     // No need to modify any variables in `this` since call below releases `this`.
     DequeueConnectionCallbacks(reason);
     // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
 }

 #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
 void OperationalSessionSetup::UpdateAttemptCount(uint8_t attemptCount)
 {
     if (attemptCount == 0)
     {
         // Nothing to do.
         return;
     }

     if (mState != State::NeedsAddress)
     {
         // We're in the middle of an attempt already, so decrement attemptCount
         // by 1 to account for that.
         --attemptCount;
     }

     if (attemptCount > mRemainingAttempts)
     {
         mRemainingAttempts = attemptCount;
     }

     if (attemptCount > mResolveAttemptsAllowed)
     {
         mResolveAttemptsAllowed = attemptCount;
     }
 }

 CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt(System::Clock::Seconds16 & timerDelay)
 {
     VerifyOrDie(mRemainingAttempts > 0);
     // Try again, but not if things are in shutdown such that we can't get
     // to a system layer, and not if we've run out of attempts.
     if (!mInitParams.exchangeMgr->GetSessionManager() || !mInitParams.exchangeMgr->GetSessionManager()->SystemLayer())
     {
         return CHIP_ERROR_INCORRECT_STATE;
     }

     MoveToState(State::NeedsAddress);
     // Stop exponential backoff before our delays get too large.
     //
     // Note that mAttemptsDone is always > 0 here, because we have
     // just finished one attempt.
     VerifyOrDie(mAttemptsDone > 0);
     static_assert(UINT16_MAX / CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS >=
                       (1 << CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF),
                   "Our backoff calculation will overflow.");
     System::Clock::Timeout actualTimerDelay = System::Clock::Seconds16(
         static_cast<uint16_t>(CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS
                               << min((mAttemptsDone - 1), CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF)));
     const bool responseWasBusy = mRequestedBusyDelay != System::Clock::kZero;
     if (responseWasBusy)
     {
         if (mRequestedBusyDelay > actualTimerDelay)
         {
             actualTimerDelay = mRequestedBusyDelay;
         }

         // Reset mRequestedBusyDelay now that we have consumed it, so it does
         // not affect future reattempts not triggered by a busy response.
         mRequestedBusyDelay = System::Clock::kZero;
     }

     if (mAttemptsDone % 2 == 0)
     {
         // It's possible that the other side received one of our Sigma1 messages
         // and then failed to get its Sigma2 back to us.  If that's the case, it
         // will be waiting for that Sigma2 to time out before it starts
         // listening for Sigma1 messages again.
         //
         // To handle that, on every other retry, add the amount of time it would
         // take the other side to time out.  It would be nice if we could rely
         // on the delay reported in a BUSY response to just tell us that value,
         // but in practice for old devices BUSY often sends some hardcoded value
         // that tells us nothing about when the other side will decide it has
         // timed out.
         //
         // Unfortunately, we do not have the MRP config for the other side here,
         // but in practice if the other side is using its local config to
         // compute Sigma2 response timeouts, then it's also returning useful
         // values with BUSY, so we will wait long enough.
         auto additionalTimeout = CASESession::ComputeSigma2ResponseTimeout(GetLocalMRPConfig().ValueOr(GetDefaultMRPConfig()));
         actualTimerDelay += additionalTimeout;
     }
     timerDelay = std::chrono::duration_cast<System::Clock::Seconds16>(actualTimerDelay);

     CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(actualTimerDelay, TrySetupAgain, this);

     // TODO: If responseWasBusy, should we increment, mRemainingAttempts and
     // mResolveAttemptsAllowed, since we were explicitly told to retry?  Hard to
     // tell what consumers expect out of a capped retry count here.

     // The cast on count() is needed because the type count() returns might not
     // actually be uint16_t; on some platforms it's int.
     ChipLogProgress(Discovery,
                     "OperationalSessionSetup:attempts done: %u, attempts left: %u, retry delay %us, status %" CHIP_ERROR_FORMAT,
                     mAttemptsDone, mRemainingAttempts, static_cast<unsigned>(timerDelay.count()), err.Format());
     return err;
 }

 void OperationalSessionSetup::CancelSessionSetupReattempt()
 {
     // If we can't get a system layer, there is no way for us to cancel things
     // at this point, but hopefully that's because everything is torn down
     // anyway and hence the timer will not fire.
     auto * sessionManager = mInitParams.exchangeMgr->GetSessionManager();
     VerifyOrReturn(sessionManager != nullptr);

     auto * systemLayer = sessionManager->SystemLayer();
     VerifyOrReturn(systemLayer != nullptr);

     systemLayer->CancelTimer(TrySetupAgain, this);
 }

 void OperationalSessionSetup::TrySetupAgain(System::Layer * systemLayer, void * state)
 {
     auto * self = static_cast<OperationalSessionSetup *>(state);

     self->MoveToState(State::ResolvingAddress);
     CHIP_ERROR err = self->LookupPeerAddress();
     if (err == CHIP_NO_ERROR)
     {
         return;
     }

     // Give up; we could not start a lookup.
     self->DequeueConnectionCallbacks(err);
     // Do not touch `self` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
 }

 void OperationalSessionSetup::AddRetryHandler(Callback::Callback<OnDeviceConnectionRetry> * onRetry)
 {
     mConnectionRetry.Enqueue(onRetry->Cancel());
 }

 void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, const ReliableMessageProtocolConfig & remoteMrpConfig,
                                                   System::Clock::Seconds16 retryDelay)
 {
     // Compute the time we are likely to need to detect that the retry has
     // failed.
     System::Clock::Timeout messageTimeout = CASESession::ComputeSigma1ResponseTimeout(remoteMrpConfig);
     auto timeoutSecs                      = std::chrono::duration_cast<System::Clock::Seconds16>(messageTimeout);
     // Add 1 second in case we had fractional milliseconds in messageTimeout.
     using namespace chip::System::Clock::Literals;
     NotifyRetryHandlers(error, timeoutSecs + 1_s16 + retryDelay);
 }

 void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, System::Clock::Seconds16 timeoutEstimate)
 {
     // We have to be very careful here: Calling into these handlers might in
     // theory destroy the Callback objects involved, but unlike the
     // succcess/failure cases we don't want to just clear the handlers from our
     // list when we are calling them, because we might need to call a given
     // handler more than once.
     //
     // To handle this we:
     //
     // 1) Snapshot the list of handlers up front, so if any of the handlers
     //    triggers an AddRetryHandler with some other handler that does not
     //    affect the list we plan to notify here.
     //
     // 2) When planning to notify a handler move it to a new list that contains
     //    just that handler.  This way if it gets canceled as part of the
     //    notification we can tell it has been canceled.
     //
     // 3) If notifying the handler does not cancel it, add it back to our list
     //    of handlers so we will notify it on future retries.

     Cancelable retryHandlerListSnapshot;
     mConnectionRetry.DequeueAll(retryHandlerListSnapshot);

     while (retryHandlerListSnapshot.mNext != &retryHandlerListSnapshot)
     {
         auto * cb = Callback::Callback<OnDeviceConnectionRetry>::FromCancelable(retryHandlerListSnapshot.mNext);

         Callback::CallbackDeque currentCallbackHolder;
         currentCallbackHolder.Enqueue(cb->Cancel());

         cb->mCall(cb->mContext, mPeerId, error, timeoutEstimate);

         if (currentCallbackHolder.mNext != &currentCallbackHolder)
         {
             // Callback has not been canceled as part of the call, so is still
             // supposed to be registered with us.
             AddRetryHandler(cb);
         }
     }
 }
 #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

 } // namespace chip