blob: 156003dec8ffd5d20bacec1f2e4f37b60863f3ae [file] [log] [blame]
/*
*
* Copyright (c) 2020-2021 Project CHIP Authors
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file
* This file contains implementation of Device class. The objects of this
* class will be used by Controller applications to interact with CHIP
* devices. The class provides mechanism to construct, send and receive
* messages to and from the corresponding CHIP devices.
*/
#include <app/OperationalSessionSetup.h>
#include <app/CASEClient.h>
#include <app/InteractionModelEngine.h>
#include <transport/SecureSession.h>
#include <lib/address_resolve/AddressResolve.h>
#include <lib/core/CHIPCore.h>
#include <lib/core/CHIPEncoding.h>
#include <lib/dnssd/Resolver.h>
#include <lib/support/CodeUtils.h>
#include <lib/support/logging/CHIPLogging.h>
#include <system/SystemClock.h>
#include <system/SystemLayer.h>
#include <tracing/metric_event.h>
using namespace chip::Callback;
using chip::AddressResolve::NodeLookupRequest;
using chip::AddressResolve::Resolver;
using chip::AddressResolve::ResolveResult;
using namespace chip::Tracing;
namespace chip {
void OperationalSessionSetup::MoveToState(State aTargetState)
{
if (mState != aTargetState)
{
ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: State change %d --> %d",
mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), to_underlying(mState),
to_underlying(aTargetState));
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
if (mState == State::WaitingForRetry)
{
CancelSessionSetupReattempt();
}
#endif
mState = aTargetState;
if (aTargetState != State::Connecting)
{
CleanupCASEClient();
}
}
}
bool OperationalSessionSetup::AttachToExistingSecureSession()
{
VerifyOrReturnError(mState == State::NeedsAddress || mState == State::ResolvingAddress || mState == State::HasAddress ||
mState == State::WaitingForRetry,
false);
auto sessionHandle = mInitParams.sessionManager->FindSecureSessionForNode(
mPeerId, MakeOptional(Transport::SecureSession::Type::kCASE), mTransportPayloadCapability);
if (!sessionHandle.HasValue())
return false;
ChipLogProgress(Discovery, "Found an existing secure session to [%u:" ChipLogFormatX64 "]!", mPeerId.GetFabricIndex(),
ChipLogValueX64(mPeerId.GetNodeId()));
mDeviceAddress = sessionHandle.Value()->AsSecureSession()->GetPeerAddress();
if (!mSecureSession.Grab(sessionHandle.Value()))
return false;
return true;
}
void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection,
Callback::Callback<OnDeviceConnectionFailure> * onFailure,
Callback::Callback<OnSetupFailure> * onSetupFailure,
TransportPayloadCapability transportPayloadCapability)
{
CHIP_ERROR err = CHIP_NO_ERROR;
bool isConnected = false;
mTransportPayloadCapability = transportPayloadCapability;
//
// Always enqueue our user provided callbacks into our callback list.
// If anything goes wrong below, we'll trigger failures (including any queued from
// a previous iteration which in theory shouldn't happen, but this is written to be more defensive)
//
EnqueueConnectionCallbacks(onConnection, onFailure, onSetupFailure);
switch (mState)
{
case State::Uninitialized:
err = CHIP_ERROR_INCORRECT_STATE;
break;
case State::NeedsAddress:
isConnected = AttachToExistingSecureSession();
if (!isConnected)
{
// LookupPeerAddress could perhaps call back with a result
// synchronously, so do our state update first.
MoveToState(State::ResolvingAddress);
err = LookupPeerAddress();
if (err != CHIP_NO_ERROR)
{
// Roll back the state change, since we are presumably not in
// the middle of a lookup.
MoveToState(State::NeedsAddress);
}
}
break;
case State::ResolvingAddress:
case State::WaitingForRetry:
isConnected = AttachToExistingSecureSession();
break;
case State::HasAddress:
isConnected = AttachToExistingSecureSession();
if (!isConnected)
{
// We should not actually every be in be in State::HasAddress. This
// is because in the same call that we moved to State::HasAddress
// we either move to State::Connecting or call
// DequeueConnectionCallbacks with an error thus releasing
// ourselves before any call would reach this section of code.
err = CHIP_ERROR_INCORRECT_STATE;
}
break;
case State::Connecting:
break;
case State::SecureConnected:
isConnected = true;
break;
default:
err = CHIP_ERROR_INCORRECT_STATE;
}
if (isConnected)
{
MoveToState(State::SecureConnected);
}
//
// Dequeue all our callbacks on either encountering an error
// or if we successfully connected. Both should not be set
// simultaneously.
//
if (err != CHIP_NO_ERROR || isConnected)
{
DequeueConnectionCallbacks(err);
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
// While it is odd to have an explicit return here at the end of the function, we do so
// as a precaution in case someone later on adds something to the end of this function.
return;
}
}
void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection,
Callback::Callback<OnDeviceConnectionFailure> * onFailure,
TransportPayloadCapability transportPayloadCapability)
{
Connect(onConnection, onFailure, nullptr, transportPayloadCapability);
}
void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection,
Callback::Callback<OnSetupFailure> * onSetupFailure,
TransportPayloadCapability transportPayloadCapability)
{
Connect(onConnection, nullptr, onSetupFailure, transportPayloadCapability);
}
void OperationalSessionSetup::UpdateDeviceData(const ResolveResult & result)
{
auto & config = result.mrpRemoteConfig;
auto addr = result.address;
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
// Make sure to clear out our reason for trying the next result first thing,
// so it does not stick around in various error cases.
bool tryingNextResultDueToSessionEstablishmentError = mTryingNextResultDueToSessionEstablishmentError;
mTryingNextResultDueToSessionEstablishmentError = false;
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
if (mState == State::Uninitialized)
{
return;
}
#if CHIP_DETAIL_LOGGING
char peerAddrBuff[Transport::PeerAddress::kMaxToStringSize];
addr.ToString(peerAddrBuff);
ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: Updating device address to %s while in state %d",
mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), peerAddrBuff, static_cast<int>(mState));
#endif
mDeviceAddress = addr;
// Initialize CASE session state with any MRP parameters that DNS-SD has provided.
// It can be overridden by CASE session protocol messages that include MRP parameters.
if (mCASEClient)
{
mCASEClient->SetRemoteMRPIntervals(config);
}
if (mState != State::ResolvingAddress)
{
ChipLogError(Discovery, "Received UpdateDeviceData in incorrect state");
DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE);
// Do not touch `this` instance anymore; it has been destroyed in
// DequeueConnectionCallbacks.
return;
}
MoveToState(State::HasAddress);
mInitParams.sessionManager->UpdateAllSessionsPeerAddress(mPeerId, addr);
if (mPerformingAddressUpdate)
{
// Nothing else to do here.
DequeueConnectionCallbacks(CHIP_NO_ERROR);
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
return;
}
CHIP_ERROR err = EstablishConnection(result);
LogErrorOnFailure(err);
if (err == CHIP_NO_ERROR)
{
// We expect to get a callback via OnSessionEstablished or OnSessionEstablishmentError to continue
// the state machine forward.
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
if (tryingNextResultDueToSessionEstablishmentError)
{
// Our retry has already been kicked off, so claim 0 delay until it
// starts. We only reach this from OnSessionEstablishmentError when
// the error is CHIP_ERROR_TIMEOUT.
NotifyRetryHandlers(CHIP_ERROR_TIMEOUT, config, System::Clock::kZero);
}
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
return;
}
// Move to the ResolvingAddress state, in case we have more results,
// since we expect to receive results in that state. Pretend like we moved
// on directly to this address from whatever triggered us to try this result
// (so restore mTryingNextResultDueToSessionEstablishmentError to the value
// it had at the start of this function).
MoveToState(State::ResolvingAddress);
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
mTryingNextResultDueToSessionEstablishmentError = tryingNextResultDueToSessionEstablishmentError;
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle))
{
// No need to NotifyRetryHandlers, since we never actually spent any
// time trying the previous result. Whatever work we need to do has
// been handled by our recursive OnNodeAddressResolved callback. Make
// sure not to touch `this` under here, because it might have been
// deleted by OnNodeAddressResolved.
return;
}
// No need to reset mTryingNextResultDueToSessionEstablishmentError here,
// because we're about to delete ourselves.
DequeueConnectionCallbacks(err);
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
}
CHIP_ERROR OperationalSessionSetup::EstablishConnection(const ResolveResult & result)
{
auto & config = result.mrpRemoteConfig;
#if INET_CONFIG_ENABLE_TCP_ENDPOINT
if (mTransportPayloadCapability == TransportPayloadCapability::kLargePayload)
{
if (result.supportsTcpServer)
{
// Set the transport type for carrying large payloads
mDeviceAddress.SetTransportType(chip::Transport::Type::kTcp);
}
else
{
// we should not set the large payload while the TCP support is not enabled
ChipLogError(
Discovery,
"LargePayload session requested but peer does not support TCP server, PeerNodeId=" ChipLogFormatScopedNodeId,
ChipLogValueScopedNodeId(mPeerId));
return CHIP_ERROR_INTERNAL;
}
}
#endif
mCASEClient = mClientPool->Allocate();
ReturnErrorCodeIf(mCASEClient == nullptr, CHIP_ERROR_NO_MEMORY);
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESession);
CHIP_ERROR err = mCASEClient->EstablishSession(mInitParams, mPeerId, mDeviceAddress, config, this);
if (err != CHIP_NO_ERROR)
{
MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err);
CleanupCASEClient();
return err;
}
MoveToState(State::Connecting);
return CHIP_NO_ERROR;
}
void OperationalSessionSetup::EnqueueConnectionCallbacks(Callback::Callback<OnDeviceConnected> * onConnection,
Callback::Callback<OnDeviceConnectionFailure> * onFailure,
Callback::Callback<OnSetupFailure> * onSetupFailure)
{
mCallbacks.Enqueue(onConnection, onFailure, onSetupFailure);
}
void OperationalSessionSetup::DequeueConnectionCallbacks(CHIP_ERROR error, SessionEstablishmentStage stage,
ReleaseBehavior releaseBehavior)
{
// We expect that we only have callbacks if we are not performing just address update.
VerifyOrDie(!mPerformingAddressUpdate || mCallbacks.IsEmpty());
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
// Clear out mConnectionRetry, so that those cancelables are not holding
// pointers to us, since we're about to go away.
while (auto * cb = mConnectionRetry.First())
{
cb->Cancel();
}
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
// Gather up state we will need for our notifications.
SuccessFailureCallbackList readyCallbacks;
readyCallbacks.EnqueueTakeAll(mCallbacks);
auto * exchangeMgr = mInitParams.exchangeMgr;
Optional<SessionHandle> optionalSessionHandle = mSecureSession.Get();
ScopedNodeId peerId = mPeerId;
System::Clock::Milliseconds16 requestedBusyDelay =
#if CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
mRequestedBusyDelay;
#else
System::Clock::kZero;
#endif // CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
if (releaseBehavior == ReleaseBehavior::Release)
{
VerifyOrDie(mReleaseDelegate != nullptr);
mReleaseDelegate->ReleaseSession(this);
}
// DO NOT touch any members of this object after this point. It's dead.
NotifyConnectionCallbacks(readyCallbacks, error, stage, peerId, exchangeMgr, optionalSessionHandle, requestedBusyDelay);
}
void OperationalSessionSetup::NotifyConnectionCallbacks(SuccessFailureCallbackList & ready, CHIP_ERROR error,
SessionEstablishmentStage stage, const ScopedNodeId & peerId,
Messaging::ExchangeManager * exchangeMgr,
const Optional<SessionHandle> & optionalSessionHandle,
System::Clock::Milliseconds16 requestedBusyDelay)
{
Callback::Callback<OnDeviceConnected> * onConnected;
Callback::Callback<OnDeviceConnectionFailure> * onConnectionFailure;
Callback::Callback<OnSetupFailure> * onSetupFailure;
while (ready.Take(onConnected, onConnectionFailure, onSetupFailure))
{
if (error == CHIP_NO_ERROR)
{
VerifyOrDie(exchangeMgr);
VerifyOrDie(optionalSessionHandle.Value()->AsSecureSession()->IsActiveSession());
if (onConnected != nullptr)
{
onConnected->mCall(onConnected->mContext, *exchangeMgr, optionalSessionHandle.Value());
// That sucessful call might have made the session inactive. If it did, then we should
// not call any more success callbacks, since we do not in fact have an active session
// for them, and if they try to put the session in a holder that will fail, and then
// trying to use the holder as if it has a session will crash.
if (!optionalSessionHandle.Value()->AsSecureSession()->IsActiveSession())
{
ChipLogError(Discovery, "Success callback for connection to " ChipLogFormatScopedNodeId " tore down session",
ChipLogValueScopedNodeId(peerId));
error = CHIP_ERROR_CONNECTION_ABORTED;
}
}
}
else // error
{
if (onConnectionFailure != nullptr)
{
onConnectionFailure->mCall(onConnectionFailure->mContext, peerId, error);
}
if (onSetupFailure != nullptr)
{
ConnnectionFailureInfo failureInfo(peerId, error, stage);
#if CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
if (error == CHIP_ERROR_BUSY)
{
failureInfo.requestedBusyDelay.Emplace(requestedBusyDelay);
}
#endif // CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
onSetupFailure->mCall(onSetupFailure->mContext, failureInfo);
}
}
}
}
void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error, SessionEstablishmentStage stage)
{
VerifyOrReturn(mState == State::Connecting,
ChipLogError(Discovery, "OnSessionEstablishmentError was called while we were not connecting"));
// If this condition ever changes, we may need to store the error in a
// member instead of having a boolean
// mTryingNextResultDueToSessionEstablishmentError, so we can recover the
// error in UpdateDeviceData.
if (CHIP_ERROR_TIMEOUT == error || CHIP_ERROR_BUSY == error)
{
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
// Make a copy of the ReliableMessageProtocolConfig, since our
// mCaseClient is about to go away once we change state.
ReliableMessageProtocolConfig remoteMprConfig = mCASEClient->GetRemoteMRPIntervals();
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
// Move to the ResolvingAddress state, in case we have more results,
// since we expect to receive results in that state.
MoveToState(State::ResolvingAddress);
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
mTryingNextResultDueToSessionEstablishmentError = true;
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle))
{
// Whatever work we needed to do has been handled by our
// OnNodeAddressResolved callback. Make sure not to touch `this`
// under here, because it might have been deleted by
// OnNodeAddressResolved.
return;
}
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
mTryingNextResultDueToSessionEstablishmentError = false;
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
// Moving back to the Connecting state would be a bit of a lie, since we
// don't have an mCASEClient. Just go back to NeedsAddress, since
// that's really where we are now.
MoveToState(State::NeedsAddress);
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
if (mRemainingAttempts > 0)
{
System::Clock::Seconds16 reattemptDelay;
CHIP_ERROR err = ScheduleSessionSetupReattempt(reattemptDelay);
if (err == CHIP_NO_ERROR)
{
MoveToState(State::WaitingForRetry);
NotifyRetryHandlers(error, remoteMprConfig, reattemptDelay);
return;
}
}
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
}
// Session failed to be established. This is when discovery is also stopped
MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, error);
MATTER_LOG_METRIC_END(kMetricDeviceCASESession, error);
DequeueConnectionCallbacks(error, stage);
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
}
void OperationalSessionSetup::OnResponderBusy(System::Clock::Milliseconds16 requestedDelay)
{
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES || CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
// Store the requested delay, so that we can use it for scheduling our
// retry or communicate it to our API consumer.
mRequestedBusyDelay = requestedDelay;
#endif
}
void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session)
{
VerifyOrReturn(mState == State::Connecting,
ChipLogError(Discovery, "OnSessionEstablished was called while we were not connecting"));
// Session has been established. This is when discovery is also stopped
MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, CHIP_NO_ERROR);
MATTER_LOG_METRIC_END(kMetricDeviceCASESession, CHIP_NO_ERROR);
if (!mSecureSession.Grab(session))
{
// Got an invalid session, just dispatch an error. We have to do this
// so we don't leak.
DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE);
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
return;
}
MoveToState(State::SecureConnected);
DequeueConnectionCallbacks(CHIP_NO_ERROR);
}
void OperationalSessionSetup::CleanupCASEClient()
{
if (mCASEClient)
{
mClientPool->Release(mCASEClient);
mCASEClient = nullptr;
}
}
OperationalSessionSetup::~OperationalSessionSetup()
{
if (mAddressLookupHandle.IsActive())
{
ChipLogDetail(Discovery,
"OperationalSessionSetup[%u:" ChipLogFormatX64
"]: Cancelling incomplete address resolution as device is being deleted.",
mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()));
// Skip cancel callback since the destructor is being called, so we assume that this object is
// obviously not used anymore
CHIP_ERROR err = Resolver::Instance().CancelLookup(mAddressLookupHandle, Resolver::FailureCallback::Skip);
if (err != CHIP_NO_ERROR)
{
ChipLogError(Discovery, "Lookup cancel failed: %" CHIP_ERROR_FORMAT, err.Format());
}
}
if (mCASEClient)
{
// Make sure we don't leak it.
mClientPool->Release(mCASEClient);
}
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
CancelSessionSetupReattempt();
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
DequeueConnectionCallbacks(CHIP_ERROR_CANCELLED, ReleaseBehavior::DoNotRelease);
}
CHIP_ERROR OperationalSessionSetup::LookupPeerAddress()
{
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
if (mRemainingAttempts > 0)
{
--mRemainingAttempts;
}
if (mAttemptsDone < UINT8_MAX)
{
++mAttemptsDone;
}
if (mResolveAttemptsAllowed > 0)
{
--mResolveAttemptsAllowed;
}
MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone);
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
// NOTE: This is public API that can be used to update our stored peer
// address even when we are in State::Connected, so we do not make any
// MoveToState calls in this method.
if (mAddressLookupHandle.IsActive())
{
ChipLogProgress(Discovery,
"OperationalSessionSetup[%u:" ChipLogFormatX64
"]: Operational node lookup already in progress. Will NOT start a new one.",
mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()));
return CHIP_NO_ERROR;
}
// This code can be reached multiple times, if we discover multiple addresses or do retries.
// The metric backend can handle this and always picks the earliest occurrence as the start of the event.
MATTER_LOG_METRIC_BEGIN(kMetricDeviceOperationalDiscovery);
auto const * fabricInfo = mInitParams.fabricTable->FindFabricWithIndex(mPeerId.GetFabricIndex());
VerifyOrReturnError(fabricInfo != nullptr, CHIP_ERROR_INVALID_FABRIC_INDEX);
PeerId peerId(fabricInfo->GetCompressedFabricId(), mPeerId.GetNodeId());
NodeLookupRequest request(peerId);
return Resolver::Instance().LookupNode(request, mAddressLookupHandle);
}
void OperationalSessionSetup::PerformAddressUpdate()
{
if (mPerformingAddressUpdate)
{
// We are already in the middle of a lookup from a previous call to
// PerformAddressUpdate. In that case we will just exit right away as
// we are already looking to update the results from the previous lookup.
return;
}
// We must be newly-allocated to handle this address lookup, so must be in the NeedsAddress state.
VerifyOrDie(mState == State::NeedsAddress);
// We are doing an address lookup whether we have an active session for this peer or not.
mPerformingAddressUpdate = true;
MoveToState(State::ResolvingAddress);
CHIP_ERROR err = LookupPeerAddress();
if (err != CHIP_NO_ERROR)
{
ChipLogError(Discovery, "Failed to look up peer address: %" CHIP_ERROR_FORMAT, err.Format());
DequeueConnectionCallbacks(err);
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
return;
}
}
void OperationalSessionSetup::OnNodeAddressResolved(const PeerId & peerId, const ResolveResult & result)
{
UpdateDeviceData(result);
}
void OperationalSessionSetup::OnNodeAddressResolutionFailed(const PeerId & peerId, CHIP_ERROR reason)
{
ChipLogError(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: operational discovery failed: %" CHIP_ERROR_FORMAT,
mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), reason.Format());
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
// If we're in a mode where we would generally retry CASE, retry operational
// discovery if we're allowed to. That allows us to more-gracefully handle broken networks
// where multicast DNS does not actually work and hence only the initial
// unicast DNS-SD queries get a response.
//
// We check for State::ResolvingAddress just in case in the meantime
// something weird happened and we are no longer trying to resolve an
// address.
if (mState == State::ResolvingAddress && mResolveAttemptsAllowed > 0)
{
ChipLogProgress(Discovery, "Retrying operational DNS-SD discovery. Attempts remaining: %u", mResolveAttemptsAllowed);
// Pretend like our previous attempt (i.e. call to LookupPeerAddress)
// has not happened for purposes of the generic attempt counters, so we
// don't mess up the counters for our actual CASE retry logic.
if (mRemainingAttempts < UINT8_MAX)
{
++mRemainingAttempts;
}
if (mAttemptsDone > 0)
{
--mAttemptsDone;
}
MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone);
CHIP_ERROR err = LookupPeerAddress();
if (err == CHIP_NO_ERROR)
{
// We need to notify our consumer that the resolve will take more
// time, but we don't actually know how much time it will take,
// because the resolver does not expose that information. Just use
// one minute to be safe.
using namespace chip::System::Clock::Literals;
NotifyRetryHandlers(reason, 60_s16);
return;
}
}
#endif
MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, reason);
// No need to modify any variables in `this` since call below releases `this`.
DequeueConnectionCallbacks(reason);
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
}
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
void OperationalSessionSetup::UpdateAttemptCount(uint8_t attemptCount)
{
if (attemptCount == 0)
{
// Nothing to do.
return;
}
if (mState != State::NeedsAddress)
{
// We're in the middle of an attempt already, so decrement attemptCount
// by 1 to account for that.
--attemptCount;
}
if (attemptCount > mRemainingAttempts)
{
mRemainingAttempts = attemptCount;
}
if (attemptCount > mResolveAttemptsAllowed)
{
mResolveAttemptsAllowed = attemptCount;
}
}
CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt(System::Clock::Seconds16 & timerDelay)
{
VerifyOrDie(mRemainingAttempts > 0);
// Try again, but not if things are in shutdown such that we can't get
// to a system layer, and not if we've run out of attempts.
if (!mInitParams.exchangeMgr->GetSessionManager() || !mInitParams.exchangeMgr->GetSessionManager()->SystemLayer())
{
return CHIP_ERROR_INCORRECT_STATE;
}
MoveToState(State::NeedsAddress);
// Stop exponential backoff before our delays get too large.
//
// Note that mAttemptsDone is always > 0 here, because we have
// just finished one attempt.
VerifyOrDie(mAttemptsDone > 0);
static_assert(UINT16_MAX / CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS >=
(1 << CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF),
"Our backoff calculation will overflow.");
System::Clock::Timeout actualTimerDelay = System::Clock::Seconds16(
static_cast<uint16_t>(CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS
<< min((mAttemptsDone - 1), CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF)));
const bool responseWasBusy = mRequestedBusyDelay != System::Clock::kZero;
if (responseWasBusy)
{
if (mRequestedBusyDelay > actualTimerDelay)
{
actualTimerDelay = mRequestedBusyDelay;
}
// Reset mRequestedBusyDelay now that we have consumed it, so it does
// not affect future reattempts not triggered by a busy response.
mRequestedBusyDelay = System::Clock::kZero;
}
if (mAttemptsDone % 2 == 0)
{
// It's possible that the other side received one of our Sigma1 messages
// and then failed to get its Sigma2 back to us. If that's the case, it
// will be waiting for that Sigma2 to time out before it starts
// listening for Sigma1 messages again.
//
// To handle that, on every other retry, add the amount of time it would
// take the other side to time out. It would be nice if we could rely
// on the delay reported in a BUSY response to just tell us that value,
// but in practice for old devices BUSY often sends some hardcoded value
// that tells us nothing about when the other side will decide it has
// timed out.
//
// Unfortunately, we do not have the MRP config for the other side here,
// but in practice if the other side is using its local config to
// compute Sigma2 response timeouts, then it's also returning useful
// values with BUSY, so we will wait long enough.
auto additionalTimeout = CASESession::ComputeSigma2ResponseTimeout(GetLocalMRPConfig().ValueOr(GetDefaultMRPConfig()));
actualTimerDelay += additionalTimeout;
}
timerDelay = std::chrono::duration_cast<System::Clock::Seconds16>(actualTimerDelay);
CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(actualTimerDelay, TrySetupAgain, this);
// TODO: If responseWasBusy, should we increment, mRemainingAttempts and
// mResolveAttemptsAllowed, since we were explicitly told to retry? Hard to
// tell what consumers expect out of a capped retry count here.
// The cast on count() is needed because the type count() returns might not
// actually be uint16_t; on some platforms it's int.
ChipLogProgress(Discovery,
"OperationalSessionSetup:attempts done: %u, attempts left: %u, retry delay %us, status %" CHIP_ERROR_FORMAT,
mAttemptsDone, mRemainingAttempts, static_cast<unsigned>(timerDelay.count()), err.Format());
return err;
}
void OperationalSessionSetup::CancelSessionSetupReattempt()
{
// If we can't get a system layer, there is no way for us to cancel things
// at this point, but hopefully that's because everything is torn down
// anyway and hence the timer will not fire.
auto * sessionManager = mInitParams.exchangeMgr->GetSessionManager();
VerifyOrReturn(sessionManager != nullptr);
auto * systemLayer = sessionManager->SystemLayer();
VerifyOrReturn(systemLayer != nullptr);
systemLayer->CancelTimer(TrySetupAgain, this);
}
void OperationalSessionSetup::TrySetupAgain(System::Layer * systemLayer, void * state)
{
auto * self = static_cast<OperationalSessionSetup *>(state);
self->MoveToState(State::ResolvingAddress);
CHIP_ERROR err = self->LookupPeerAddress();
if (err == CHIP_NO_ERROR)
{
return;
}
// Give up; we could not start a lookup.
self->DequeueConnectionCallbacks(err);
// Do not touch `self` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
}
void OperationalSessionSetup::AddRetryHandler(Callback::Callback<OnDeviceConnectionRetry> * onRetry)
{
mConnectionRetry.Enqueue(onRetry->Cancel());
}
void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, const ReliableMessageProtocolConfig & remoteMrpConfig,
System::Clock::Seconds16 retryDelay)
{
// Compute the time we are likely to need to detect that the retry has
// failed.
System::Clock::Timeout messageTimeout = CASESession::ComputeSigma1ResponseTimeout(remoteMrpConfig);
auto timeoutSecs = std::chrono::duration_cast<System::Clock::Seconds16>(messageTimeout);
// Add 1 second in case we had fractional milliseconds in messageTimeout.
using namespace chip::System::Clock::Literals;
NotifyRetryHandlers(error, timeoutSecs + 1_s16 + retryDelay);
}
void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, System::Clock::Seconds16 timeoutEstimate)
{
// We have to be very careful here: Calling into these handlers might in
// theory destroy the Callback objects involved, but unlike the
// succcess/failure cases we don't want to just clear the handlers from our
// list when we are calling them, because we might need to call a given
// handler more than once.
//
// To handle this we:
//
// 1) Snapshot the list of handlers up front, so if any of the handlers
// triggers an AddRetryHandler with some other handler that does not
// affect the list we plan to notify here.
//
// 2) When planning to notify a handler move it to a new list that contains
// just that handler. This way if it gets canceled as part of the
// notification we can tell it has been canceled.
//
// 3) If notifying the handler does not cancel it, add it back to our list
// of handlers so we will notify it on future retries.
Cancelable retryHandlerListSnapshot;
mConnectionRetry.DequeueAll(retryHandlerListSnapshot);
while (retryHandlerListSnapshot.mNext != &retryHandlerListSnapshot)
{
auto * cb = Callback::Callback<OnDeviceConnectionRetry>::FromCancelable(retryHandlerListSnapshot.mNext);
Callback::CallbackDeque currentCallbackHolder;
currentCallbackHolder.Enqueue(cb->Cancel());
cb->mCall(cb->mContext, mPeerId, error, timeoutEstimate);
if (currentCallbackHolder.mNext != &currentCallbackHolder)
{
// Callback has not been canceled as part of the call, so is still
// supposed to be registered with us.
AddRetryHandler(cb);
}
}
}
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
} // namespace chip