| /* |
| * |
| * Copyright (c) 2020-2021 Project CHIP Authors |
| * All rights reserved. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /** |
| * @file |
| * This file contains implementation of Device class. The objects of this |
| * class will be used by Controller applications to interact with CHIP |
| * devices. The class provides mechanism to construct, send and receive |
| * messages to and from the corresponding CHIP devices. |
| */ |
| |
| #include <app/OperationalSessionSetup.h> |
| |
| #include <app/CASEClient.h> |
| #include <app/InteractionModelEngine.h> |
| #include <transport/SecureSession.h> |
| |
| #include <lib/address_resolve/AddressResolve.h> |
| #include <lib/core/CHIPCore.h> |
| #include <lib/core/CHIPEncoding.h> |
| #include <lib/dnssd/Resolver.h> |
| #include <lib/support/CodeUtils.h> |
| #include <lib/support/logging/CHIPLogging.h> |
| #include <system/SystemClock.h> |
| #include <system/SystemLayer.h> |
| #include <tracing/metric_event.h> |
| |
| using namespace chip::Callback; |
| using chip::AddressResolve::NodeLookupRequest; |
| using chip::AddressResolve::Resolver; |
| using chip::AddressResolve::ResolveResult; |
| using namespace chip::Tracing; |
| |
| namespace chip { |
| |
| void OperationalSessionSetup::MoveToState(State aTargetState) |
| { |
| if (mState != aTargetState) |
| { |
| ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: State change %d --> %d", |
| mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), to_underlying(mState), |
| to_underlying(aTargetState)); |
| |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| if (mState == State::WaitingForRetry) |
| { |
| CancelSessionSetupReattempt(); |
| } |
| #endif |
| |
| mState = aTargetState; |
| |
| if (aTargetState != State::Connecting) |
| { |
| CleanupCASEClient(); |
| } |
| } |
| } |
| |
| bool OperationalSessionSetup::AttachToExistingSecureSession() |
| { |
| VerifyOrReturnError(mState == State::NeedsAddress || mState == State::ResolvingAddress || mState == State::HasAddress || |
| mState == State::WaitingForRetry, |
| false); |
| |
| auto sessionHandle = mInitParams.sessionManager->FindSecureSessionForNode( |
| mPeerId, MakeOptional(Transport::SecureSession::Type::kCASE), mTransportPayloadCapability); |
| if (!sessionHandle.HasValue()) |
| return false; |
| |
| ChipLogProgress(Discovery, "Found an existing secure session to [%u:" ChipLogFormatX64 "]!", mPeerId.GetFabricIndex(), |
| ChipLogValueX64(mPeerId.GetNodeId())); |
| |
| mDeviceAddress = sessionHandle.Value()->AsSecureSession()->GetPeerAddress(); |
| if (!mSecureSession.Grab(sessionHandle.Value())) |
| return false; |
| |
| return true; |
| } |
| |
| void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection, |
| Callback::Callback<OnDeviceConnectionFailure> * onFailure, |
| Callback::Callback<OnSetupFailure> * onSetupFailure, |
| TransportPayloadCapability transportPayloadCapability) |
| { |
| CHIP_ERROR err = CHIP_NO_ERROR; |
| bool isConnected = false; |
| |
| mTransportPayloadCapability = transportPayloadCapability; |
| // |
| // Always enqueue our user provided callbacks into our callback list. |
| // If anything goes wrong below, we'll trigger failures (including any queued from |
| // a previous iteration which in theory shouldn't happen, but this is written to be more defensive) |
| // |
| EnqueueConnectionCallbacks(onConnection, onFailure, onSetupFailure); |
| |
| switch (mState) |
| { |
| case State::Uninitialized: |
| err = CHIP_ERROR_INCORRECT_STATE; |
| break; |
| |
| case State::NeedsAddress: |
| isConnected = AttachToExistingSecureSession(); |
| if (!isConnected) |
| { |
| // LookupPeerAddress could perhaps call back with a result |
| // synchronously, so do our state update first. |
| MoveToState(State::ResolvingAddress); |
| err = LookupPeerAddress(); |
| if (err != CHIP_NO_ERROR) |
| { |
| // Roll back the state change, since we are presumably not in |
| // the middle of a lookup. |
| MoveToState(State::NeedsAddress); |
| } |
| } |
| |
| break; |
| |
| case State::ResolvingAddress: |
| case State::WaitingForRetry: |
| isConnected = AttachToExistingSecureSession(); |
| break; |
| |
| case State::HasAddress: |
| isConnected = AttachToExistingSecureSession(); |
| if (!isConnected) |
| { |
| // We should not actually every be in be in State::HasAddress. This |
| // is because in the same call that we moved to State::HasAddress |
| // we either move to State::Connecting or call |
| // DequeueConnectionCallbacks with an error thus releasing |
| // ourselves before any call would reach this section of code. |
| err = CHIP_ERROR_INCORRECT_STATE; |
| } |
| |
| break; |
| |
| case State::Connecting: |
| break; |
| |
| case State::SecureConnected: |
| isConnected = true; |
| break; |
| |
| default: |
| err = CHIP_ERROR_INCORRECT_STATE; |
| } |
| |
| if (isConnected) |
| { |
| MoveToState(State::SecureConnected); |
| } |
| |
| // |
| // Dequeue all our callbacks on either encountering an error |
| // or if we successfully connected. Both should not be set |
| // simultaneously. |
| // |
| if (err != CHIP_NO_ERROR || isConnected) |
| { |
| DequeueConnectionCallbacks(err); |
| // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. |
| // While it is odd to have an explicit return here at the end of the function, we do so |
| // as a precaution in case someone later on adds something to the end of this function. |
| return; |
| } |
| } |
| |
| void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection, |
| Callback::Callback<OnDeviceConnectionFailure> * onFailure, |
| TransportPayloadCapability transportPayloadCapability) |
| { |
| Connect(onConnection, onFailure, nullptr, transportPayloadCapability); |
| } |
| |
| void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection, |
| Callback::Callback<OnSetupFailure> * onSetupFailure, |
| TransportPayloadCapability transportPayloadCapability) |
| { |
| Connect(onConnection, nullptr, onSetupFailure, transportPayloadCapability); |
| } |
| |
| void OperationalSessionSetup::UpdateDeviceData(const ResolveResult & result) |
| { |
| auto & config = result.mrpRemoteConfig; |
| auto addr = result.address; |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| // Make sure to clear out our reason for trying the next result first thing, |
| // so it does not stick around in various error cases. |
| bool tryingNextResultDueToSessionEstablishmentError = mTryingNextResultDueToSessionEstablishmentError; |
| mTryingNextResultDueToSessionEstablishmentError = false; |
| #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| |
| if (mState == State::Uninitialized) |
| { |
| return; |
| } |
| |
| #if CHIP_DETAIL_LOGGING |
| char peerAddrBuff[Transport::PeerAddress::kMaxToStringSize]; |
| addr.ToString(peerAddrBuff); |
| |
| ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: Updating device address to %s while in state %d", |
| mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), peerAddrBuff, static_cast<int>(mState)); |
| #endif |
| |
| mDeviceAddress = addr; |
| |
| // Initialize CASE session state with any MRP parameters that DNS-SD has provided. |
| // It can be overridden by CASE session protocol messages that include MRP parameters. |
| if (mCASEClient) |
| { |
| mCASEClient->SetRemoteMRPIntervals(config); |
| } |
| |
| if (mState != State::ResolvingAddress) |
| { |
| ChipLogError(Discovery, "Received UpdateDeviceData in incorrect state"); |
| DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE); |
| // Do not touch `this` instance anymore; it has been destroyed in |
| // DequeueConnectionCallbacks. |
| return; |
| } |
| |
| MoveToState(State::HasAddress); |
| mInitParams.sessionManager->UpdateAllSessionsPeerAddress(mPeerId, addr); |
| |
| if (mPerformingAddressUpdate) |
| { |
| // Nothing else to do here. |
| DequeueConnectionCallbacks(CHIP_NO_ERROR); |
| // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. |
| return; |
| } |
| |
| CHIP_ERROR err = EstablishConnection(result); |
| LogErrorOnFailure(err); |
| if (err == CHIP_NO_ERROR) |
| { |
| // We expect to get a callback via OnSessionEstablished or OnSessionEstablishmentError to continue |
| // the state machine forward. |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| if (tryingNextResultDueToSessionEstablishmentError) |
| { |
| // Our retry has already been kicked off, so claim 0 delay until it |
| // starts. We only reach this from OnSessionEstablishmentError when |
| // the error is CHIP_ERROR_TIMEOUT. |
| NotifyRetryHandlers(CHIP_ERROR_TIMEOUT, config, System::Clock::kZero); |
| } |
| #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| return; |
| } |
| |
| // Move to the ResolvingAddress state, in case we have more results, |
| // since we expect to receive results in that state. Pretend like we moved |
| // on directly to this address from whatever triggered us to try this result |
| // (so restore mTryingNextResultDueToSessionEstablishmentError to the value |
| // it had at the start of this function). |
| MoveToState(State::ResolvingAddress); |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| mTryingNextResultDueToSessionEstablishmentError = tryingNextResultDueToSessionEstablishmentError; |
| #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle)) |
| { |
| // No need to NotifyRetryHandlers, since we never actually spent any |
| // time trying the previous result. Whatever work we need to do has |
| // been handled by our recursive OnNodeAddressResolved callback. Make |
| // sure not to touch `this` under here, because it might have been |
| // deleted by OnNodeAddressResolved. |
| return; |
| } |
| |
| // No need to reset mTryingNextResultDueToSessionEstablishmentError here, |
| // because we're about to delete ourselves. |
| |
| DequeueConnectionCallbacks(err); |
| // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. |
| } |
| |
| CHIP_ERROR OperationalSessionSetup::EstablishConnection(const ResolveResult & result) |
| { |
| auto & config = result.mrpRemoteConfig; |
| #if INET_CONFIG_ENABLE_TCP_ENDPOINT |
| if (mTransportPayloadCapability == TransportPayloadCapability::kLargePayload) |
| { |
| if (result.supportsTcpServer) |
| { |
| // Set the transport type for carrying large payloads |
| mDeviceAddress.SetTransportType(chip::Transport::Type::kTcp); |
| } |
| else |
| { |
| // we should not set the large payload while the TCP support is not enabled |
| ChipLogError( |
| Discovery, |
| "LargePayload session requested but peer does not support TCP server, PeerNodeId=" ChipLogFormatScopedNodeId, |
| ChipLogValueScopedNodeId(mPeerId)); |
| return CHIP_ERROR_INTERNAL; |
| } |
| } |
| #endif |
| |
| mCASEClient = mClientPool->Allocate(); |
| VerifyOrReturnError(mCASEClient != nullptr, CHIP_ERROR_NO_MEMORY); |
| |
| MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESession); |
| CHIP_ERROR err = mCASEClient->EstablishSession(mInitParams, mPeerId, mDeviceAddress, config, this); |
| if (err != CHIP_NO_ERROR) |
| { |
| MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err); |
| CleanupCASEClient(); |
| return err; |
| } |
| |
| MoveToState(State::Connecting); |
| |
| return CHIP_NO_ERROR; |
| } |
| |
| void OperationalSessionSetup::EnqueueConnectionCallbacks(Callback::Callback<OnDeviceConnected> * onConnection, |
| Callback::Callback<OnDeviceConnectionFailure> * onFailure, |
| Callback::Callback<OnSetupFailure> * onSetupFailure) |
| { |
| mCallbacks.Enqueue(onConnection, onFailure, onSetupFailure); |
| } |
| |
| void OperationalSessionSetup::DequeueConnectionCallbacks(CHIP_ERROR error, SessionEstablishmentStage stage, |
| ReleaseBehavior releaseBehavior) |
| { |
| // We expect that we only have callbacks if we are not performing just address update. |
| VerifyOrDie(!mPerformingAddressUpdate || mCallbacks.IsEmpty()); |
| |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| // Clear out mConnectionRetry, so that those cancelables are not holding |
| // pointers to us, since we're about to go away. |
| while (auto * cb = mConnectionRetry.First()) |
| { |
| cb->Cancel(); |
| } |
| #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| |
| // Gather up state we will need for our notifications. |
| SuccessFailureCallbackList readyCallbacks; |
| readyCallbacks.EnqueueTakeAll(mCallbacks); |
| auto * exchangeMgr = mInitParams.exchangeMgr; |
| Optional<SessionHandle> optionalSessionHandle = mSecureSession.Get(); |
| ScopedNodeId peerId = mPeerId; |
| System::Clock::Milliseconds16 requestedBusyDelay = |
| #if CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP |
| mRequestedBusyDelay; |
| #else |
| System::Clock::kZero; |
| #endif // CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP |
| |
| if (releaseBehavior == ReleaseBehavior::Release) |
| { |
| VerifyOrDie(mReleaseDelegate != nullptr); |
| mReleaseDelegate->ReleaseSession(this); |
| } |
| |
| // DO NOT touch any members of this object after this point. It's dead. |
| NotifyConnectionCallbacks(readyCallbacks, error, stage, peerId, exchangeMgr, optionalSessionHandle, requestedBusyDelay); |
| } |
| |
| void OperationalSessionSetup::NotifyConnectionCallbacks(SuccessFailureCallbackList & ready, CHIP_ERROR error, |
| SessionEstablishmentStage stage, const ScopedNodeId & peerId, |
| Messaging::ExchangeManager * exchangeMgr, |
| const Optional<SessionHandle> & optionalSessionHandle, |
| System::Clock::Milliseconds16 requestedBusyDelay) |
| { |
| Callback::Callback<OnDeviceConnected> * onConnected; |
| Callback::Callback<OnDeviceConnectionFailure> * onConnectionFailure; |
| Callback::Callback<OnSetupFailure> * onSetupFailure; |
| while (ready.Take(onConnected, onConnectionFailure, onSetupFailure)) |
| { |
| if (error == CHIP_NO_ERROR) |
| { |
| VerifyOrDie(exchangeMgr); |
| VerifyOrDie(optionalSessionHandle.Value()->AsSecureSession()->IsActiveSession()); |
| if (onConnected != nullptr) |
| { |
| onConnected->mCall(onConnected->mContext, *exchangeMgr, optionalSessionHandle.Value()); |
| |
| // That sucessful call might have made the session inactive. If it did, then we should |
| // not call any more success callbacks, since we do not in fact have an active session |
| // for them, and if they try to put the session in a holder that will fail, and then |
| // trying to use the holder as if it has a session will crash. |
| if (!optionalSessionHandle.Value()->AsSecureSession()->IsActiveSession()) |
| { |
| ChipLogError(Discovery, "Success callback for connection to " ChipLogFormatScopedNodeId " tore down session", |
| ChipLogValueScopedNodeId(peerId)); |
| error = CHIP_ERROR_CONNECTION_ABORTED; |
| } |
| } |
| } |
| else // error |
| { |
| if (onConnectionFailure != nullptr) |
| { |
| onConnectionFailure->mCall(onConnectionFailure->mContext, peerId, error); |
| } |
| if (onSetupFailure != nullptr) |
| { |
| ConnectionFailureInfo failureInfo(peerId, error, stage); |
| #if CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP |
| if (error == CHIP_ERROR_BUSY) |
| { |
| failureInfo.requestedBusyDelay.Emplace(requestedBusyDelay); |
| } |
| #endif // CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP |
| onSetupFailure->mCall(onSetupFailure->mContext, failureInfo); |
| } |
| } |
| } |
| } |
| |
| void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error, SessionEstablishmentStage stage) |
| { |
| VerifyOrReturn(mState == State::Connecting, |
| ChipLogError(Discovery, "OnSessionEstablishmentError was called while we were not connecting")); |
| |
| // If this condition ever changes, we may need to store the error in a |
| // member instead of having a boolean |
| // mTryingNextResultDueToSessionEstablishmentError, so we can recover the |
| // error in UpdateDeviceData. |
| if (CHIP_ERROR_TIMEOUT == error || CHIP_ERROR_BUSY == error) |
| { |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| // Make a copy of the ReliableMessageProtocolConfig, since our |
| // mCaseClient is about to go away once we change state. |
| ReliableMessageProtocolConfig remoteMprConfig = mCASEClient->GetRemoteMRPIntervals(); |
| #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| |
| // Move to the ResolvingAddress state, in case we have more results, |
| // since we expect to receive results in that state. |
| MoveToState(State::ResolvingAddress); |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| mTryingNextResultDueToSessionEstablishmentError = true; |
| #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle)) |
| { |
| // Whatever work we needed to do has been handled by our |
| // OnNodeAddressResolved callback. Make sure not to touch `this` |
| // under here, because it might have been deleted by |
| // OnNodeAddressResolved. |
| return; |
| } |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| mTryingNextResultDueToSessionEstablishmentError = false; |
| #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| |
| // Moving back to the Connecting state would be a bit of a lie, since we |
| // don't have an mCASEClient. Just go back to NeedsAddress, since |
| // that's really where we are now. |
| MoveToState(State::NeedsAddress); |
| |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| if (mRemainingAttempts > 0) |
| { |
| System::Clock::Seconds16 reattemptDelay; |
| CHIP_ERROR err = ScheduleSessionSetupReattempt(reattemptDelay); |
| if (err == CHIP_NO_ERROR) |
| { |
| MoveToState(State::WaitingForRetry); |
| NotifyRetryHandlers(error, remoteMprConfig, reattemptDelay); |
| return; |
| } |
| } |
| #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| } |
| |
| // Session failed to be established. This is when discovery is also stopped |
| MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, error); |
| MATTER_LOG_METRIC_END(kMetricDeviceCASESession, error); |
| |
| DequeueConnectionCallbacks(error, stage); |
| // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. |
| } |
| |
| void OperationalSessionSetup::OnResponderBusy(System::Clock::Milliseconds16 requestedDelay) |
| { |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES || CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP |
| // Store the requested delay, so that we can use it for scheduling our |
| // retry or communicate it to our API consumer. |
| mRequestedBusyDelay = requestedDelay; |
| #endif |
| } |
| |
| void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session) |
| { |
| VerifyOrReturn(mState == State::Connecting, |
| ChipLogError(Discovery, "OnSessionEstablished was called while we were not connecting")); |
| |
| // Session has been established. This is when discovery is also stopped |
| MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, CHIP_NO_ERROR); |
| |
| MATTER_LOG_METRIC_END(kMetricDeviceCASESession, CHIP_NO_ERROR); |
| |
| if (!mSecureSession.Grab(session)) |
| { |
| // Got an invalid session, just dispatch an error. We have to do this |
| // so we don't leak. |
| DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE); |
| |
| // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. |
| return; |
| } |
| |
| MoveToState(State::SecureConnected); |
| |
| DequeueConnectionCallbacks(CHIP_NO_ERROR); |
| } |
| |
| void OperationalSessionSetup::CleanupCASEClient() |
| { |
| if (mCASEClient) |
| { |
| mClientPool->Release(mCASEClient); |
| mCASEClient = nullptr; |
| } |
| } |
| |
| OperationalSessionSetup::~OperationalSessionSetup() |
| { |
| if (mAddressLookupHandle.IsActive()) |
| { |
| ChipLogDetail(Discovery, |
| "OperationalSessionSetup[%u:" ChipLogFormatX64 |
| "]: Cancelling incomplete address resolution as device is being deleted.", |
| mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId())); |
| |
| // Skip cancel callback since the destructor is being called, so we assume that this object is |
| // obviously not used anymore |
| CHIP_ERROR err = Resolver::Instance().CancelLookup(mAddressLookupHandle, Resolver::FailureCallback::Skip); |
| if (err != CHIP_NO_ERROR) |
| { |
| ChipLogError(Discovery, "Lookup cancel failed: %" CHIP_ERROR_FORMAT, err.Format()); |
| } |
| } |
| |
| if (mCASEClient) |
| { |
| // Make sure we don't leak it. |
| mClientPool->Release(mCASEClient); |
| } |
| |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| CancelSessionSetupReattempt(); |
| #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| |
| DequeueConnectionCallbacks(CHIP_ERROR_CANCELLED, ReleaseBehavior::DoNotRelease); |
| } |
| |
| CHIP_ERROR OperationalSessionSetup::LookupPeerAddress() |
| { |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| if (mRemainingAttempts > 0) |
| { |
| --mRemainingAttempts; |
| } |
| if (mAttemptsDone < UINT8_MAX) |
| { |
| ++mAttemptsDone; |
| } |
| if (mResolveAttemptsAllowed > 0) |
| { |
| --mResolveAttemptsAllowed; |
| } |
| MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone); |
| #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| |
| // NOTE: This is public API that can be used to update our stored peer |
| // address even when we are in State::Connected, so we do not make any |
| // MoveToState calls in this method. |
| if (mAddressLookupHandle.IsActive()) |
| { |
| ChipLogProgress(Discovery, |
| "OperationalSessionSetup[%u:" ChipLogFormatX64 |
| "]: Operational node lookup already in progress. Will NOT start a new one.", |
| mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId())); |
| return CHIP_NO_ERROR; |
| } |
| |
| // This code can be reached multiple times, if we discover multiple addresses or do retries. |
| // The metric backend can handle this and always picks the earliest occurrence as the start of the event. |
| MATTER_LOG_METRIC_BEGIN(kMetricDeviceOperationalDiscovery); |
| |
| auto const * fabricInfo = mInitParams.fabricTable->FindFabricWithIndex(mPeerId.GetFabricIndex()); |
| VerifyOrReturnError(fabricInfo != nullptr, CHIP_ERROR_INVALID_FABRIC_INDEX); |
| |
| PeerId peerId(fabricInfo->GetCompressedFabricId(), mPeerId.GetNodeId()); |
| |
| NodeLookupRequest request(peerId); |
| |
| return Resolver::Instance().LookupNode(request, mAddressLookupHandle); |
| } |
| |
| void OperationalSessionSetup::PerformAddressUpdate() |
| { |
| if (mPerformingAddressUpdate) |
| { |
| // We are already in the middle of a lookup from a previous call to |
| // PerformAddressUpdate. In that case we will just exit right away as |
| // we are already looking to update the results from the previous lookup. |
| return; |
| } |
| |
| // We must be newly-allocated to handle this address lookup, so must be in the NeedsAddress state. |
| VerifyOrDie(mState == State::NeedsAddress); |
| |
| // We are doing an address lookup whether we have an active session for this peer or not. |
| mPerformingAddressUpdate = true; |
| MoveToState(State::ResolvingAddress); |
| CHIP_ERROR err = LookupPeerAddress(); |
| if (err != CHIP_NO_ERROR) |
| { |
| ChipLogError(Discovery, "Failed to look up peer address: %" CHIP_ERROR_FORMAT, err.Format()); |
| DequeueConnectionCallbacks(err); |
| // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. |
| return; |
| } |
| } |
| |
| void OperationalSessionSetup::OnNodeAddressResolved(const PeerId & peerId, const ResolveResult & result) |
| { |
| UpdateDeviceData(result); |
| } |
| |
| void OperationalSessionSetup::OnNodeAddressResolutionFailed(const PeerId & peerId, CHIP_ERROR reason) |
| { |
| ChipLogError(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: operational discovery failed: %" CHIP_ERROR_FORMAT, |
| mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), reason.Format()); |
| |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| // If we're in a mode where we would generally retry CASE, retry operational |
| // discovery if we're allowed to. That allows us to more-gracefully handle broken networks |
| // where multicast DNS does not actually work and hence only the initial |
| // unicast DNS-SD queries get a response. |
| // |
| // We check for State::ResolvingAddress just in case in the meantime |
| // something weird happened and we are no longer trying to resolve an |
| // address. |
| if (mState == State::ResolvingAddress && mResolveAttemptsAllowed > 0) |
| { |
| ChipLogProgress(Discovery, "Retrying operational DNS-SD discovery. Attempts remaining: %u", mResolveAttemptsAllowed); |
| |
| // Pretend like our previous attempt (i.e. call to LookupPeerAddress) |
| // has not happened for purposes of the generic attempt counters, so we |
| // don't mess up the counters for our actual CASE retry logic. |
| if (mRemainingAttempts < UINT8_MAX) |
| { |
| ++mRemainingAttempts; |
| } |
| if (mAttemptsDone > 0) |
| { |
| --mAttemptsDone; |
| } |
| |
| MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone); |
| |
| CHIP_ERROR err = LookupPeerAddress(); |
| if (err == CHIP_NO_ERROR) |
| { |
| // We need to notify our consumer that the resolve will take more |
| // time, but we don't actually know how much time it will take, |
| // because the resolver does not expose that information. Just use |
| // one minute to be safe. |
| using namespace chip::System::Clock::Literals; |
| NotifyRetryHandlers(reason, 60_s16); |
| return; |
| } |
| } |
| #endif |
| |
| MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, reason); |
| |
| // No need to modify any variables in `this` since call below releases `this`. |
| DequeueConnectionCallbacks(reason); |
| // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. |
| } |
| |
| #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| void OperationalSessionSetup::UpdateAttemptCount(uint8_t attemptCount) |
| { |
| if (attemptCount == 0) |
| { |
| // Nothing to do. |
| return; |
| } |
| |
| if (mState != State::NeedsAddress) |
| { |
| // We're in the middle of an attempt already, so decrement attemptCount |
| // by 1 to account for that. |
| --attemptCount; |
| } |
| |
| if (attemptCount > mRemainingAttempts) |
| { |
| mRemainingAttempts = attemptCount; |
| } |
| |
| if (attemptCount > mResolveAttemptsAllowed) |
| { |
| mResolveAttemptsAllowed = attemptCount; |
| } |
| } |
| |
| CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt(System::Clock::Seconds16 & timerDelay) |
| { |
| VerifyOrDie(mRemainingAttempts > 0); |
| // Try again, but not if things are in shutdown such that we can't get |
| // to a system layer, and not if we've run out of attempts. |
| if (!mInitParams.exchangeMgr->GetSessionManager() || !mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()) |
| { |
| return CHIP_ERROR_INCORRECT_STATE; |
| } |
| |
| MoveToState(State::NeedsAddress); |
| // Stop exponential backoff before our delays get too large. |
| // |
| // Note that mAttemptsDone is always > 0 here, because we have |
| // just finished one attempt. |
| VerifyOrDie(mAttemptsDone > 0); |
| static_assert(UINT16_MAX / CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS >= |
| (1 << CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF), |
| "Our backoff calculation will overflow."); |
| System::Clock::Timeout actualTimerDelay = System::Clock::Seconds16( |
| static_cast<uint16_t>(CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS |
| << std::min((mAttemptsDone - 1), CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF))); |
| const bool responseWasBusy = mRequestedBusyDelay != System::Clock::kZero; |
| if (responseWasBusy) |
| { |
| if (mRequestedBusyDelay > actualTimerDelay) |
| { |
| actualTimerDelay = mRequestedBusyDelay; |
| } |
| |
| // Reset mRequestedBusyDelay now that we have consumed it, so it does |
| // not affect future reattempts not triggered by a busy response. |
| mRequestedBusyDelay = System::Clock::kZero; |
| } |
| |
| if (mAttemptsDone % 2 == 0) |
| { |
| // It's possible that the other side received one of our Sigma1 messages |
| // and then failed to get its Sigma2 back to us. If that's the case, it |
| // will be waiting for that Sigma2 to time out before it starts |
| // listening for Sigma1 messages again. |
| // |
| // To handle that, on every other retry, add the amount of time it would |
| // take the other side to time out. It would be nice if we could rely |
| // on the delay reported in a BUSY response to just tell us that value, |
| // but in practice for old devices BUSY often sends some hardcoded value |
| // that tells us nothing about when the other side will decide it has |
| // timed out. |
| // |
| // Unfortunately, we do not have the MRP config for the other side here, |
| // but in practice if the other side is using its local config to |
| // compute Sigma2 response timeouts, then it's also returning useful |
| // values with BUSY, so we will wait long enough. |
| auto additionalTimeout = CASESession::ComputeSigma2ResponseTimeout(GetLocalMRPConfig().ValueOr(GetDefaultMRPConfig())); |
| actualTimerDelay += additionalTimeout; |
| } |
| timerDelay = std::chrono::duration_cast<System::Clock::Seconds16>(actualTimerDelay); |
| |
| CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(actualTimerDelay, TrySetupAgain, this); |
| |
| // TODO: If responseWasBusy, should we increment, mRemainingAttempts and |
| // mResolveAttemptsAllowed, since we were explicitly told to retry? Hard to |
| // tell what consumers expect out of a capped retry count here. |
| |
| // The cast on count() is needed because the type count() returns might not |
| // actually be uint16_t; on some platforms it's int. |
| ChipLogProgress(Discovery, |
| "OperationalSessionSetup:attempts done: %u, attempts left: %u, retry delay %us, status %" CHIP_ERROR_FORMAT, |
| mAttemptsDone, mRemainingAttempts, static_cast<unsigned>(timerDelay.count()), err.Format()); |
| return err; |
| } |
| |
| void OperationalSessionSetup::CancelSessionSetupReattempt() |
| { |
| // If we can't get a system layer, there is no way for us to cancel things |
| // at this point, but hopefully that's because everything is torn down |
| // anyway and hence the timer will not fire. |
| auto * sessionManager = mInitParams.exchangeMgr->GetSessionManager(); |
| VerifyOrReturn(sessionManager != nullptr); |
| |
| auto * systemLayer = sessionManager->SystemLayer(); |
| VerifyOrReturn(systemLayer != nullptr); |
| |
| systemLayer->CancelTimer(TrySetupAgain, this); |
| } |
| |
| void OperationalSessionSetup::TrySetupAgain(System::Layer * systemLayer, void * state) |
| { |
| auto * self = static_cast<OperationalSessionSetup *>(state); |
| |
| self->MoveToState(State::ResolvingAddress); |
| CHIP_ERROR err = self->LookupPeerAddress(); |
| if (err == CHIP_NO_ERROR) |
| { |
| return; |
| } |
| |
| // Give up; we could not start a lookup. |
| self->DequeueConnectionCallbacks(err); |
| // Do not touch `self` instance anymore; it has been destroyed in DequeueConnectionCallbacks. |
| } |
| |
| void OperationalSessionSetup::AddRetryHandler(Callback::Callback<OnDeviceConnectionRetry> * onRetry) |
| { |
| mConnectionRetry.Enqueue(onRetry->Cancel()); |
| } |
| |
| void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, const ReliableMessageProtocolConfig & remoteMrpConfig, |
| System::Clock::Seconds16 retryDelay) |
| { |
| // Compute the time we are likely to need to detect that the retry has |
| // failed. |
| System::Clock::Timeout messageTimeout = CASESession::ComputeSigma1ResponseTimeout(remoteMrpConfig); |
| auto timeoutSecs = std::chrono::duration_cast<System::Clock::Seconds16>(messageTimeout); |
| // Add 1 second in case we had fractional milliseconds in messageTimeout. |
| using namespace chip::System::Clock::Literals; |
| NotifyRetryHandlers(error, timeoutSecs + 1_s16 + retryDelay); |
| } |
| |
| void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, System::Clock::Seconds16 timeoutEstimate) |
| { |
| // We have to be very careful here: Calling into these handlers might in |
| // theory destroy the Callback objects involved, but unlike the |
| // succcess/failure cases we don't want to just clear the handlers from our |
| // list when we are calling them, because we might need to call a given |
| // handler more than once. |
| // |
| // To handle this we: |
| // |
| // 1) Snapshot the list of handlers up front, so if any of the handlers |
| // triggers an AddRetryHandler with some other handler that does not |
| // affect the list we plan to notify here. |
| // |
| // 2) When planning to notify a handler move it to a new list that contains |
| // just that handler. This way if it gets canceled as part of the |
| // notification we can tell it has been canceled. |
| // |
| // 3) If notifying the handler does not cancel it, add it back to our list |
| // of handlers so we will notify it on future retries. |
| |
| Cancelable retryHandlerListSnapshot; |
| mConnectionRetry.DequeueAll(retryHandlerListSnapshot); |
| |
| while (retryHandlerListSnapshot.mNext != &retryHandlerListSnapshot) |
| { |
| auto * cb = Callback::Callback<OnDeviceConnectionRetry>::FromCancelable(retryHandlerListSnapshot.mNext); |
| |
| Callback::CallbackDeque currentCallbackHolder; |
| currentCallbackHolder.Enqueue(cb->Cancel()); |
| |
| cb->mCall(cb->mContext, mPeerId, error, timeoutEstimate); |
| |
| if (currentCallbackHolder.mNext != ¤tCallbackHolder) |
| { |
| // Callback has not been canceled as part of the call, so is still |
| // supposed to be registered with us. |
| AddRetryHandler(cb); |
| } |
| } |
| } |
| #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES |
| |
| } // namespace chip |