Get the commissioner reusable more quickly after StopPairing() (#32473)

* Get the commissioner reusable more quickly after StopPairing()

Don't need to keep state and the CommissioneeDeviceProxy around; instead hold
on to just the Session and evict it when the disarm request succeeds or fails.

* Tweaks from review
diff --git a/src/controller/CHIPDeviceController.cpp b/src/controller/CHIPDeviceController.cpp
index 35c616a..0b384a3 100644
--- a/src/controller/CHIPDeviceController.cpp
+++ b/src/controller/CHIPDeviceController.cpp
@@ -1658,13 +1658,30 @@
     commissioner->CommissioningStageComplete(error);
 }
 
+static GeneralCommissioning::Commands::ArmFailSafe::Type DisarmFailsafeRequest()
+{
+    GeneralCommissioning::Commands::ArmFailSafe::Type request;
+    request.expiryLengthSeconds = 0; // Expire immediately.
+    request.breadcrumb          = 0;
+    return request;
+}
+
+static void MarkForEviction(const Optional<SessionHandle> & session)
+{
+    if (session.HasValue())
+    {
+        session.Value()->AsSecureSession()->MarkForEviction();
+    }
+}
+
 void DeviceCommissioner::CleanupCommissioning(DeviceProxy * proxy, NodeId nodeId, const CompletionStatus & completionStatus)
 {
+    // At this point, proxy == mDeviceBeingCommissioned, nodeId == mDeviceBeingCommissioned->GetDeviceId()
+
     mCommissioningCompletionStatus = completionStatus;
 
     if (completionStatus.err == CHIP_NO_ERROR)
     {
-
         CommissioneeDeviceProxy * commissionee = FindCommissioneeDevice(nodeId);
         if (commissionee != nullptr)
         {
@@ -1674,13 +1691,40 @@
         CommissioningStageComplete(CHIP_NO_ERROR);
         SendCommissioningCompleteCallbacks(nodeId, mCommissioningCompletionStatus);
     }
-    else if (completionStatus.failedStage.HasValue() && completionStatus.failedStage.Value() >= kWiFiNetworkSetup &&
-             completionStatus.err != CHIP_ERROR_CANCELLED)
+    else if (completionStatus.err == CHIP_ERROR_CANCELLED)
+    {
+        // If we're cleaning up because cancellation has been requested via StopPairing(), expire the failsafe
+        // in the background and reset our state synchronously, so a new commissioning attempt can be started.
+        CommissioneeDeviceProxy * commissionee = FindCommissioneeDevice(nodeId);
+        SessionHolder session((commissionee == proxy) ? commissionee->DetachSecureSession().Value()
+                                                      : proxy->GetSecureSession().Value());
+
+        auto request     = DisarmFailsafeRequest();
+        auto onSuccessCb = [session](const app::ConcreteCommandPath & aPath, const app::StatusIB & aStatus,
+                                     const decltype(request)::ResponseType & responseData) {
+            ChipLogProgress(Controller, "Failsafe disarmed");
+            MarkForEviction(session.Get());
+        };
+        auto onFailureCb = [session](CHIP_ERROR aError) {
+            ChipLogProgress(Controller, "Ignoring failure to disarm failsafe: %" CHIP_ERROR_FORMAT, aError.Format());
+            MarkForEviction(session.Get());
+        };
+
+        ChipLogProgress(Controller, "Disarming failsafe on device %p in background", proxy);
+        CHIP_ERROR err = InvokeCommandRequest(proxy->GetExchangeManager(), session.Get().Value(), kRootEndpointId, request,
+                                              onSuccessCb, onFailureCb);
+        if (err != CHIP_NO_ERROR)
+        {
+            ChipLogError(Controller, "Failed to send command to disarm fail-safe: %" CHIP_ERROR_FORMAT, err.Format());
+        }
+
+        CleanupDoneAfterError();
+    }
+    else if (completionStatus.failedStage.HasValue() && completionStatus.failedStage.Value() >= kWiFiNetworkSetup)
     {
         // If we were already doing network setup, we need to retain the pase session and start again from network setup stage.
         // We do not need to reset the failsafe here because we want to keep everything on the device up to this point, so just
-        // send the completion callbacks (see "Commissioning Flows Error Handling" in the spec). This does not apply if
-        // we're cleaning up because cancellation has been requested via StopPairing().
+        // send the completion callbacks (see "Commissioning Flows Error Handling" in the spec).
         CommissioningStageComplete(CHIP_NO_ERROR);
         SendCommissioningCompleteCallbacks(nodeId, mCommissioningCompletionStatus);
     }
@@ -1689,21 +1733,14 @@
         // If we've failed somewhere in the early stages (or we don't have a failedStage specified), we need to start from the
         // beginning. However, because some of the commands can only be sent once per arm-failsafe, we also need to force a reset on
         // the failsafe so we can start fresh on the next attempt.
-        GeneralCommissioning::Commands::ArmFailSafe::Type request;
-        request.expiryLengthSeconds = 0; // Expire immediately.
-        request.breadcrumb          = 0;
-        ChipLogProgress(Controller, "Expiring failsafe on proxy %p", proxy);
-        mDeviceBeingCommissioned = proxy;
-        // We actually want to do the same thing on success or failure because we're already in a failure state
-        CHIP_ERROR err = SendCommissioningCommand(proxy, request, OnDisarmFailsafe, OnDisarmFailsafeFailure, kRootEndpointId,
-                                                  /* timeout = */ NullOptional);
+        ChipLogProgress(Controller, "Disarming failsafe on device %p", proxy);
+        auto request   = DisarmFailsafeRequest();
+        CHIP_ERROR err = SendCommissioningCommand(proxy, request, OnDisarmFailsafe, OnDisarmFailsafeFailure, kRootEndpointId);
         if (err != CHIP_NO_ERROR)
         {
-            // We won't get any async callbacks here, so just pretend like the
-            // command errored out async.
+            // We won't get any async callbacks here, so just pretend like the command errored out async.
             ChipLogError(Controller, "Failed to send command to disarm fail-safe: %" CHIP_ERROR_FORMAT, err.Format());
-            DisarmDone();
-            return;
+            CleanupDoneAfterError();
         }
     }
 }
@@ -1713,17 +1750,17 @@
 {
     ChipLogProgress(Controller, "Failsafe disarmed");
     DeviceCommissioner * commissioner = static_cast<DeviceCommissioner *>(context);
-    commissioner->DisarmDone();
+    commissioner->CleanupDoneAfterError();
 }
 
 void DeviceCommissioner::OnDisarmFailsafeFailure(void * context, CHIP_ERROR error)
 {
     ChipLogProgress(Controller, "Ignoring failure to disarm failsafe: %" CHIP_ERROR_FORMAT, error.Format());
     DeviceCommissioner * commissioner = static_cast<DeviceCommissioner *>(context);
-    commissioner->DisarmDone();
+    commissioner->CleanupDoneAfterError();
 }
 
-void DeviceCommissioner::DisarmDone()
+void DeviceCommissioner::CleanupDoneAfterError()
 {
     // If someone nulled out our mDeviceBeingCommissioned, there's nothing else
     // to do here.
@@ -1735,13 +1772,15 @@
 
     // Signal completion - this will reset mDeviceBeingCommissioned.
     CommissioningStageComplete(CHIP_NO_ERROR);
-    SendCommissioningCompleteCallbacks(nodeId, mCommissioningCompletionStatus);
 
     // If we've disarmed the failsafe, it's because we're starting again, so kill the pase connection.
     if (commissionee != nullptr)
     {
         ReleaseCommissioneeDevice(commissionee);
     }
+
+    // Invoke callbacks last, after we have cleared up all state.
+    SendCommissioningCompleteCallbacks(nodeId, mCommissioningCompletionStatus);
 }
 
 void DeviceCommissioner::SendCommissioningCompleteCallbacks(NodeId nodeId, const CompletionStatus & completionStatus)
@@ -2572,13 +2611,11 @@
                         params.GetCompletionStatus().err.AsString());
     }
 
-    // For now, we ignore errors coming in from the device since not all commissioning clusters are implemented on the device
-    // side.
     mCommissioningStage      = step;
     mCommissioningDelegate   = delegate;
     mDeviceBeingCommissioned = proxy;
-    // TODO: Extend timeouts to the DAC and Opcert requests.
 
+    // TODO: Extend timeouts to the DAC and Opcert requests.
     // TODO(cecille): We probably want something better than this for breadcrumbs.
     uint64_t breadcrumb = static_cast<uint64_t>(step);
 
diff --git a/src/controller/CHIPDeviceController.h b/src/controller/CHIPDeviceController.h
index ee4ac09..87d31db 100644
--- a/src/controller/CHIPDeviceController.h
+++ b/src/controller/CHIPDeviceController.h
@@ -908,7 +908,7 @@
     static void OnDisarmFailsafe(void * context,
                                  const app::Clusters::GeneralCommissioning::Commands::ArmFailSafeResponse::DecodableType & data);
     static void OnDisarmFailsafeFailure(void * context, CHIP_ERROR error);
-    void DisarmDone();
+    void CleanupDoneAfterError();
     static void OnArmFailSafeExtendedForDeviceAttestation(
         void * context, const chip::app::Clusters::GeneralCommissioning::Commands::ArmFailSafeResponse::DecodableType & data);
     static void OnFailedToExtendedArmFailSafeDeviceAttestation(void * context, CHIP_ERROR error);
@@ -961,7 +961,7 @@
     CHIP_ERROR SendCommissioningCommand(DeviceProxy * device, const RequestObjectT & request,
                                         CommandResponseSuccessCallback<typename RequestObjectT::ResponseType> successCb,
                                         CommandResponseFailureCallback failureCb, EndpointId endpoint,
-                                        Optional<System::Clock::Timeout> timeout);
+                                        Optional<System::Clock::Timeout> timeout = NullOptional);
     void SendCommissioningReadRequest(DeviceProxy * proxy, Optional<System::Clock::Timeout> timeout,
                                       app::AttributePathParams * readPaths, size_t readPathsSize);
     void CancelCommissioningInteractions();
diff --git a/src/controller/CommissioneeDeviceProxy.cpp b/src/controller/CommissioneeDeviceProxy.cpp
index 48ba4e4..376685a 100644
--- a/src/controller/CommissioneeDeviceProxy.cpp
+++ b/src/controller/CommissioneeDeviceProxy.cpp
@@ -66,6 +66,15 @@
     mPairing.Clear();
 }
 
+chip::Optional<SessionHandle> CommissioneeDeviceProxy::DetachSecureSession()
+{
+    auto session = mSecureSession.Get();
+    mSecureSession.Release();
+    mState = ConnectionState::NotConnected;
+    mPairing.Clear();
+    return session;
+}
+
 CHIP_ERROR CommissioneeDeviceProxy::UpdateDeviceData(const Transport::PeerAddress & addr,
                                                      const ReliableMessageProtocolConfig & config)
 {
diff --git a/src/controller/CommissioneeDeviceProxy.h b/src/controller/CommissioneeDeviceProxy.h
index fa9aa17..715601b 100644
--- a/src/controller/CommissioneeDeviceProxy.h
+++ b/src/controller/CommissioneeDeviceProxy.h
@@ -105,6 +105,11 @@
      */
     void CloseSession();
 
+    /**
+     *  Detaches the underlying session (if any) from this proxy and returns it.
+     */
+    chip::Optional<SessionHandle> DetachSecureSession();
+
     void Disconnect() override { CloseSession(); }
 
     /**