[Darwin] MTRDevice in-memory cache needs to be resilient to data store returning nil (#34182)

* [Darwin] MTRDevice in-memory cache needs to be resilient to data store returning nil

* Added tests and address PR comment

* Fixed log string

* Additional logging

* Fixed SubscriptionCallback object handling on invalidate and subscription reset

* Fixed error string for MTRErrorTests/testErrorSourcePaths
diff --git a/src/darwin/Framework/CHIP/MTRDevice.mm b/src/darwin/Framework/CHIP/MTRDevice.mm
index 93f9639..05bc9f0 100644
--- a/src/darwin/Framework/CHIP/MTRDevice.mm
+++ b/src/darwin/Framework/CHIP/MTRDevice.mm
@@ -908,12 +908,17 @@
     _reattemptingSubscription = NO;
 
     [_deviceController asyncDispatchToMatterQueue:^{
+        MTR_LOG("%@ invalidate disconnecting ReadClient and SubscriptionCallback", self);
+
         // Destroy the read client and callback (has to happen on the Matter
         // queue, to avoid deleting objects that are being referenced), to
         // tear down the subscription.  We will get no more callbacks from
         // the subscrption after this point.
         std::lock_guard lock(self->_lock);
         self->_currentReadClient = nullptr;
+        if (self->_currentSubscriptionCallback) {
+            delete self->_currentSubscriptionCallback;
+        }
         self->_currentSubscriptionCallback = nullptr;
 
         [self _changeInternalState:MTRInternalDeviceStateUnsubscribed];
@@ -940,6 +945,7 @@
 // whether it might be.
 - (void)_triggerResubscribeWithReason:(NSString *)reason nodeLikelyReachable:(BOOL)nodeLikelyReachable
 {
+    MTR_LOG("%@ _triggerResubscribeWithReason called with reason %@", self, reason);
     assertChipStackLockedByCurrentThread();
 
     // We might want to trigger a resubscribe on our existing ReadClient.  Do
@@ -1235,6 +1241,12 @@
 - (void)_handleSubscriptionError:(NSError *)error
 {
     std::lock_guard lock(_lock);
+    [self _doHandleSubscriptionError:error];
+}
+
+- (void)_doHandleSubscriptionError:(NSError *)error
+{
+    os_unfair_lock_assert_owner(&_lock);
 
     [self _changeInternalState:MTRInternalDeviceStateUnsubscribed];
     _unreportedEvents = nil;
@@ -1400,6 +1412,12 @@
 - (void)_handleSubscriptionReset:(NSNumber * _Nullable)retryDelay
 {
     std::lock_guard lock(_lock);
+    [self _doHandleSubscriptionReset:retryDelay];
+}
+
+- (void)_doHandleSubscriptionReset:(NSNumber * _Nullable)retryDelay
+{
+    os_unfair_lock_assert_owner(&_lock);
 
     // If we are here, then either we failed to establish initial CASE, or we
     // failed to send the initial SubscribeRequest message, or our ReadClient
@@ -1471,7 +1489,7 @@
         return;
     }
 
-    MTR_LOG("%@ reattempting subscription", self);
+    MTR_LOG("%@ reattempting subscription with reason %@", self, reason);
     self.reattemptingSubscription = NO;
     [self _setupSubscriptionWithReason:reason];
 }
@@ -2100,6 +2118,22 @@
 }
 #endif
 
+- (void)_reconcilePersistedClustersWithStorage
+{
+    os_unfair_lock_assert_owner(&self->_lock);
+
+    NSMutableSet * clusterPathsToRemove = [NSMutableSet set];
+    for (MTRClusterPath * clusterPath in _persistedClusters) {
+        MTRDeviceClusterData * data = [_deviceController.controllerDataStore getStoredClusterDataForNodeID:_nodeID endpointID:clusterPath.endpoint clusterID:clusterPath.cluster];
+        if (!data) {
+            [clusterPathsToRemove addObject:clusterPath];
+        }
+    }
+
+    MTR_LOG_ERROR("%@ Storage missing %lu / %lu clusters - reconciling in-memory records", self, static_cast<unsigned long>(clusterPathsToRemove.count), static_cast<unsigned long>(_persistedClusters.count));
+    [_persistedClusters minusSet:clusterPathsToRemove];
+}
+
 - (nullable MTRDeviceClusterData *)_clusterDataForPath:(MTRClusterPath *)clusterPath
 {
     os_unfair_lock_assert_owner(&self->_lock);
@@ -2132,8 +2166,16 @@
 
     // Page in the stored value for the data.
     MTRDeviceClusterData * data = [_deviceController.controllerDataStore getStoredClusterDataForNodeID:_nodeID endpointID:clusterPath.endpoint clusterID:clusterPath.cluster];
+    MTR_LOG("%@ cluster path %@ cache miss - load from storage success %@", self, clusterPath, YES_NO(data));
     if (data != nil) {
         [_persistedClusterData setObject:data forKey:clusterPath];
+    } else {
+        // If clusterPath is in _persistedClusters and the data store returns nil for it, then the in-memory cache is now not dependable, and subscription should be reset and reestablished to reload cache from device
+
+        // First make sure _persistedClusters is consistent with storage, so repeated calls don't immediately re-trigger this
+        [self _reconcilePersistedClustersWithStorage];
+
+        [self _resetSubscriptionWithReasonString:[NSString stringWithFormat:@"Data store has no data for cluster %@", clusterPath]];
     }
 
     return data;
@@ -2303,13 +2345,43 @@
     }
 }
 
+- (void)_resetSubscriptionWithReasonString:(NSString *)reasonString
+{
+    os_unfair_lock_assert_owner(&self->_lock);
+    MTR_LOG_ERROR("%@ %@ - resetting subscription", self, reasonString);
+
+    [_deviceController asyncDispatchToMatterQueue:^{
+        MTR_LOG("%@ subscription reset disconnecting ReadClient and SubscriptionCallback", self);
+
+        std::lock_guard lock(self->_lock);
+        self->_currentReadClient = nullptr;
+        if (self->_currentSubscriptionCallback) {
+            delete self->_currentSubscriptionCallback;
+        }
+        self->_currentSubscriptionCallback = nullptr;
+
+        [self _doHandleSubscriptionError:nil];
+        // Use nil reset delay so that this keeps existing backoff timing
+        [self _doHandleSubscriptionReset:nil];
+    }
+                                     errorHandler:nil];
+}
+
+#ifdef DEBUG
+- (void)unitTestResetSubscription
+{
+    std::lock_guard lock(self->_lock);
+    [self _resetSubscriptionWithReasonString:@"Unit test reset subscription"];
+}
+#endif
+
 // assume lock is held
 - (void)_setupSubscriptionWithReason:(NSString *)reason
 {
     os_unfair_lock_assert_owner(&self->_lock);
 
     if (![self _subscriptionsAllowed]) {
-        MTR_LOG("%@ _setupSubscription: Subscriptions not allowed. Do not set up subscription", self);
+        MTR_LOG("%@ _setupSubscription: Subscriptions not allowed. Do not set up subscription (reason: %@)", self, reason);
         return;
     }
 
@@ -2328,6 +2400,7 @@
 
     // for now just subscribe once
     if (!NeedToStartSubscriptionSetup(_internalDeviceState)) {
+        MTR_LOG("%@ setupSubscription: no need to subscribe due to internal state %lu (reason: %@)", self, static_cast<unsigned long>(_internalDeviceState), reason);
         return;
     }
 
@@ -3758,14 +3831,21 @@
 }
 
 #ifdef DEBUG
-- (MTRDeviceClusterData *)_getClusterDataForPath:(MTRClusterPath *)path
+- (MTRDeviceClusterData *)unitTestGetClusterDataForPath:(MTRClusterPath *)path
 {
     std::lock_guard lock(_lock);
 
     return [[self _clusterDataForPath:path] copy];
 }
 
-- (BOOL)_clusterHasBeenPersisted:(MTRClusterPath *)path
+- (NSSet<MTRClusterPath *> *)unitTestGetPersistedClusters
+{
+    std::lock_guard lock(_lock);
+
+    return [_persistedClusters copy];
+}
+
+- (BOOL)unitTestClusterHasBeenPersisted:(MTRClusterPath *)path
 {
     std::lock_guard lock(_lock);
 
diff --git a/src/darwin/Framework/CHIP/MTRDeviceController.mm b/src/darwin/Framework/CHIP/MTRDeviceController.mm
index 0f263d7..2e4bb6d 100644
--- a/src/darwin/Framework/CHIP/MTRDeviceController.mm
+++ b/src/darwin/Framework/CHIP/MTRDeviceController.mm
@@ -272,7 +272,7 @@
             concurrentSubscriptionPoolSize = 1;
         }
 
-        MTR_LOG("Setting up pool size of MTRDeviceController with: %lu", static_cast<unsigned long>(concurrentSubscriptionPoolSize));
+        MTR_LOG("%@ Setting up pool size of MTRDeviceController with: %lu", self, static_cast<unsigned long>(concurrentSubscriptionPoolSize));
 
         _concurrentSubscriptionPool = [[MTRAsyncWorkQueue alloc] initWithContext:self width:concurrentSubscriptionPoolSize];
 
@@ -283,6 +283,11 @@
     return self;
 }
 
+- (NSString *)description
+{
+    return [NSString stringWithFormat:@"<MTRDeviceController: %p uuid %@>", self, _uniqueIdentifier];
+}
+
 - (BOOL)isRunning
 {
     return _cppCommissioner != nullptr;
@@ -290,6 +295,7 @@
 
 - (void)shutdown
 {
+    MTR_LOG("%@ shutdown called", self);
     if (_cppCommissioner == nullptr) {
         // Already shut down.
         return;
@@ -393,7 +399,7 @@
 {
     __block BOOL commissionerInitialized = NO;
     if ([self isRunning]) {
-        MTR_LOG_ERROR("Unexpected duplicate call to startup");
+        MTR_LOG_ERROR("%@ Unexpected duplicate call to startup", self);
         return NO;
     }
 
@@ -404,24 +410,24 @@
 
         if (startupParams.vendorID == nil || [startupParams.vendorID unsignedShortValue] == chip::VendorId::Common) {
             // Shouldn't be using the "standard" vendor ID for actual devices.
-            MTR_LOG_ERROR("%@ is not a valid vendorID to initialize a device controller with", startupParams.vendorID);
+            MTR_LOG_ERROR("%@ %@ is not a valid vendorID to initialize a device controller with", self, startupParams.vendorID);
             return;
         }
 
         if (startupParams.operationalCertificate == nil && startupParams.nodeID == nil) {
-            MTR_LOG_ERROR("Can't start a controller if we don't know what node id it is");
+            MTR_LOG_ERROR("%@ Can't start a controller if we don't know what node id it is", self);
             return;
         }
 
         if ([startupParams keypairsMatchCertificates] == NO) {
-            MTR_LOG_ERROR("Provided keypairs do not match certificates");
+            MTR_LOG_ERROR("%@ Provided keypairs do not match certificates", self);
             return;
         }
 
         if (startupParams.operationalCertificate != nil && startupParams.operationalKeypair == nil
             && (!startupParams.fabricIndex.HasValue()
                 || !startupParams.keystore->HasOpKeypairForFabric(startupParams.fabricIndex.Value()))) {
-            MTR_LOG_ERROR("Have no operational keypair for our operational certificate");
+            MTR_LOG_ERROR("%@ Have no operational keypair for our operational certificate", self);
             return;
         }
 
@@ -584,9 +590,12 @@
 
         self->_storedFabricIndex = fabricIdx;
         commissionerInitialized = YES;
+
+        MTR_LOG("%@ startup succeeded for nodeID 0x%016llX", self, self->_cppCommissioner->GetNodeId());
     });
 
     if (commissionerInitialized == NO) {
+        MTR_LOG_ERROR("%@ startup failed", self);
         [self cleanupAfterStartup];
         return NO;
     }
@@ -597,7 +606,7 @@
     // above.
     if (![self setOperationalCertificateIssuer:startupParams.operationalCertificateIssuer
                                          queue:startupParams.operationalCertificateIssuerQueue]) {
-        MTR_LOG_ERROR("operationalCertificateIssuer and operationalCertificateIssuerQueue must both be nil or both be non-nil");
+        MTR_LOG_ERROR("%@ operationalCertificateIssuer and operationalCertificateIssuerQueue must both be nil or both be non-nil", self);
         [self cleanupAfterStartup];
         return NO;
     }
@@ -605,14 +614,14 @@
     if (_controllerDataStore) {
         // If the storage delegate supports the bulk read API, then a dictionary of nodeID => cluster data dictionary would be passed to the handler. Otherwise this would be a no-op, and stored attributes for MTRDevice objects will be loaded lazily in -deviceForNodeID:.
         [_controllerDataStore fetchAttributeDataForAllDevices:^(NSDictionary<NSNumber *, NSDictionary<MTRClusterPath *, MTRDeviceClusterData *> *> * _Nonnull clusterDataByNode) {
-            MTR_LOG("Loaded attribute values for %lu nodes from storage for controller uuid %@", static_cast<unsigned long>(clusterDataByNode.count), self->_uniqueIdentifier);
+            MTR_LOG("%@ Loaded attribute values for %lu nodes from storage for controller uuid %@", self, static_cast<unsigned long>(clusterDataByNode.count), self->_uniqueIdentifier);
 
             std::lock_guard lock(self->_deviceMapLock);
             NSMutableArray * deviceList = [NSMutableArray array];
             for (NSNumber * nodeID in clusterDataByNode) {
                 NSDictionary * clusterData = clusterDataByNode[nodeID];
                 MTRDevice * device = [self _setupDeviceForNodeID:nodeID prefetchedClusterData:clusterData];
-                MTR_LOG("Loaded %lu cluster data from storage for %@", static_cast<unsigned long>(clusterData.count), device);
+                MTR_LOG("%@ Loaded %lu cluster data from storage for %@", self, static_cast<unsigned long>(clusterData.count), device);
 
                 [deviceList addObject:device];
             }
@@ -623,7 +632,7 @@
             // Note that this is just an optimization to avoid throwing the information away and immediately
             // re-reading it from storage.
             dispatch_after(dispatch_time(DISPATCH_TIME_NOW, (int64_t) (kSecondsToWaitBeforeAPIClientRetainsMTRDevice * NSEC_PER_SEC)), dispatch_get_main_queue(), ^{
-                MTR_LOG("MTRDeviceController: un-retain devices loaded at startup %lu", static_cast<unsigned long>(deviceList.count));
+                MTR_LOG("%@ un-retain devices loaded at startup %lu", self, static_cast<unsigned long>(deviceList.count));
             });
         }];
     }
@@ -638,7 +647,7 @@
 
     NSNumber * nodeID = [self syncRunOnWorkQueueWithReturnValue:block error:nil];
     if (!nodeID) {
-        MTR_LOG_ERROR("A controller has no node id if it has not been started");
+        MTR_LOG_ERROR("%@ A controller has no node id if it has not been started", self);
     }
 
     return nodeID;
@@ -707,7 +716,7 @@
                                             newNodeID:(NSNumber *)newNodeID
                                                 error:(NSError * __autoreleasing *)error
 {
-    MTR_LOG("Setting up commissioning session for already-discovered device %@ and device ID 0x%016llX with setup payload %@", discoveredDevice, newNodeID.unsignedLongLongValue, payload);
+    MTR_LOG("%@ Setting up commissioning session for already-discovered device %@ and device ID 0x%016llX with setup payload %@", self, discoveredDevice, newNodeID.unsignedLongLongValue, payload);
 
     [[MTRMetricsCollector sharedInstance] resetMetrics];
 
@@ -965,8 +974,7 @@
     };
 
     MTRBaseDevice * device = [self syncRunOnWorkQueueWithReturnValue:block error:error];
-    MTR_LOG("Getting device being commissioned with node ID 0x%016llX: %@ (error: %@)",
-        nodeID.unsignedLongLongValue, device, (error ? *error : nil));
+    MTR_LOG("%@ Getting device being commissioned with node ID 0x%016llX: %@ (error: %@)", self, nodeID.unsignedLongLongValue, device, (error ? *error : nil));
     return device;
 }
 
@@ -996,7 +1004,7 @@
     } else if (_controllerDataStore) {
         // Load persisted cluster data if they exist.
         NSDictionary * clusterData = [_controllerDataStore getStoredClusterDataForNodeID:nodeID];
-        MTR_LOG("Loaded %lu cluster data from storage for %@", static_cast<unsigned long>(clusterData.count), deviceToReturn);
+        MTR_LOG("%@ Loaded %lu cluster data from storage for %@", self, static_cast<unsigned long>(clusterData.count), deviceToReturn);
         if (clusterData.count) {
             [deviceToReturn setPersistedClusterData:clusterData];
         }
@@ -1035,7 +1043,7 @@
         [deviceToRemove invalidate];
         [_nodeIDToDeviceMap removeObjectForKey:nodeID];
     } else {
-        MTR_LOG_ERROR("Error: Cannot remove device %p with nodeID %llu", device, nodeID.unsignedLongLongValue);
+        MTR_LOG_ERROR("%@ Error: Cannot remove device %p with nodeID %llu", self, device, nodeID.unsignedLongLongValue);
     }
 }
 
@@ -1143,7 +1151,7 @@
     }
 
     if (![endpoint associateWithController:self]) {
-        MTR_LOG_ERROR("Failed to associate MTRServerEndpoint with MTRDeviceController");
+        MTR_LOG_ERROR("%@ Failed to associate MTRServerEndpoint with MTRDeviceController", self);
         [_factory removeServerEndpoint:endpoint];
         return NO;
     }
@@ -1151,11 +1159,11 @@
     [self asyncDispatchToMatterQueue:^() {
         [self->_serverEndpoints addObject:endpoint];
         [endpoint registerMatterEndpoint];
-        MTR_LOG("Added server endpoint %u to controller %@", static_cast<chip::EndpointId>(endpoint.endpointID.unsignedLongLongValue),
+        MTR_LOG("%@ Added server endpoint %u to controller %@", self, static_cast<chip::EndpointId>(endpoint.endpointID.unsignedLongLongValue),
             self->_uniqueIdentifier);
     }
         errorHandler:^(NSError * error) {
-            MTR_LOG_ERROR("Unexpected failure dispatching to Matter queue on running controller in addServerEndpoint, adding endpoint %u",
+            MTR_LOG_ERROR("%@ Unexpected failure dispatching to Matter queue on running controller in addServerEndpoint, adding endpoint %u", self,
                 static_cast<chip::EndpointId>(endpoint.endpointID.unsignedLongLongValue));
         }];
     return YES;
@@ -1179,7 +1187,7 @@
     // tearing it down.
     [self asyncDispatchToMatterQueue:^() {
         [self removeServerEndpointOnMatterQueue:endpoint];
-        MTR_LOG("Removed server endpoint %u from controller %@", static_cast<chip::EndpointId>(endpoint.endpointID.unsignedLongLongValue),
+        MTR_LOG("%@ Removed server endpoint %u from controller %@", self, static_cast<chip::EndpointId>(endpoint.endpointID.unsignedLongLongValue),
             self->_uniqueIdentifier);
         if (queue != nil && completion != nil) {
             dispatch_async(queue, completion);
@@ -1187,7 +1195,7 @@
     }
         errorHandler:^(NSError * error) {
             // Error means we got shut down, so the endpoint is removed now.
-            MTR_LOG("controller %@ already shut down, so endpoint %u has already been removed", self->_uniqueIdentifier,
+            MTR_LOG("%@ controller already shut down, so endpoint %u has already been removed", self,
                 static_cast<chip::EndpointId>(endpoint.endpointID.unsignedLongLongValue));
             if (queue != nil && completion != nil) {
                 dispatch_async(queue, completion);
@@ -1212,7 +1220,7 @@
         return NO;
     }
 
-    MTR_LOG_ERROR("Error: %@", logMsg);
+    MTR_LOG_ERROR("%@ Error: %@", self, logMsg);
 
     [self cleanup];
 
@@ -1233,7 +1241,7 @@
         return NO;
     }
 
-    MTR_LOG_ERROR("Error(%" CHIP_ERROR_FORMAT "): %@", errorCode.Format(), logMsg);
+    MTR_LOG_ERROR("Error(%" CHIP_ERROR_FORMAT "): %@ %@", errorCode.Format(), self, logMsg);
 
     return YES;
 }
@@ -1244,7 +1252,7 @@
         return NO;
     }
 
-    MTR_LOG_ERROR("Error(%" CHIP_ERROR_FORMAT "): %s", errorCode.Format(), [logMsg UTF8String]);
+    MTR_LOG_ERROR("Error(%" CHIP_ERROR_FORMAT "): %@ %s", errorCode.Format(), self, [logMsg UTF8String]);
     if (error) {
         *error = [MTRError errorForCHIPErrorCode:errorCode];
     }
@@ -1882,7 +1890,7 @@
 - (BOOL)openPairingWindow:(uint64_t)deviceID duration:(NSUInteger)duration error:(NSError * __autoreleasing *)error
 {
     if (duration > UINT16_MAX) {
-        MTR_LOG_ERROR("Error: Duration %lu is too large. Max value %d", static_cast<unsigned long>(duration), UINT16_MAX);
+        MTR_LOG_ERROR("%@ Error: Duration %lu is too large. Max value %d", self, static_cast<unsigned long>(duration), UINT16_MAX);
         if (error) {
             *error = [MTRError errorForCHIPErrorCode:CHIP_ERROR_INVALID_INTEGER_VALUE];
         }
@@ -1908,7 +1916,7 @@
                                  error:(NSError * __autoreleasing *)error
 {
     if (duration > UINT16_MAX) {
-        MTR_LOG_ERROR("Error: Duration %lu is too large. Max value %d", static_cast<unsigned long>(duration), UINT16_MAX);
+        MTR_LOG_ERROR("%@ Error: Duration %lu is too large. Max value %d", self, static_cast<unsigned long>(duration), UINT16_MAX);
         if (error) {
             *error = [MTRError errorForCHIPErrorCode:CHIP_ERROR_INVALID_INTEGER_VALUE];
         }
@@ -1916,7 +1924,7 @@
     }
 
     if (discriminator > 0xfff) {
-        MTR_LOG_ERROR("Error: Discriminator %lu is too large. Max value %d", static_cast<unsigned long>(discriminator), 0xfff);
+        MTR_LOG_ERROR("%@ Error: Discriminator %lu is too large. Max value %d", self, static_cast<unsigned long>(discriminator), 0xfff);
         if (error) {
             *error = [MTRError errorForCHIPErrorCode:CHIP_ERROR_INVALID_INTEGER_VALUE];
         }
@@ -1927,7 +1935,7 @@
     MATTER_LOG_METRIC_SCOPE(kMetricOpenPairingWindow, errorCode);
 
     if (!chip::CanCastTo<uint32_t>(setupPIN) || !chip::SetupPayload::IsValidSetupPIN(static_cast<uint32_t>(setupPIN))) {
-        MTR_LOG_ERROR("Error: Setup pin %lu is not valid", static_cast<unsigned long>(setupPIN));
+        MTR_LOG_ERROR("%@ Error: Setup pin %lu is not valid", self, static_cast<unsigned long>(setupPIN));
         errorCode = CHIP_ERROR_INVALID_INTEGER_VALUE;
         if (error) {
             *error = [MTRError errorForCHIPErrorCode:errorCode];
@@ -1949,11 +1957,11 @@
         std::string outCode;
 
         if (CHIP_NO_ERROR != (errorCode = generator.payloadDecimalStringRepresentation(outCode))) {
-            MTR_LOG_ERROR("Failed to get decimal setup code");
+            MTR_LOG_ERROR("%@ Failed to get decimal setup code", self);
             return nil;
         }
 
-        MTR_LOG_ERROR("Setup code is %s", outCode.c_str());
+        MTR_LOG_ERROR("%@ Setup code is %s", self, outCode.c_str());
         return [NSString stringWithCString:outCode.c_str() encoding:[NSString defaultCStringEncoding]];
     };
 
diff --git a/src/darwin/Framework/CHIP/MTRDeviceControllerDataStore.mm b/src/darwin/Framework/CHIP/MTRDeviceControllerDataStore.mm
index a328b2a..8d57a5b 100644
--- a/src/darwin/Framework/CHIP/MTRDeviceControllerDataStore.mm
+++ b/src/darwin/Framework/CHIP/MTRDeviceControllerDataStore.mm
@@ -1011,6 +1011,7 @@
         BOOL endpointIndexModified = NO;
         NSMutableArray<NSNumber *> * endpointIndexToStore;
         if (endpointIndex) {
+            MTR_LOG("No entry found for endpointIndex @ node 0x%016llX - creating", nodeID.unsignedLongLongValue);
             endpointIndexToStore = [endpointIndex mutableCopy];
         } else {
             endpointIndexToStore = [NSMutableArray array];
@@ -1029,6 +1030,7 @@
             BOOL clusterIndexModified = NO;
             NSMutableArray<NSNumber *> * clusterIndexToStore;
             if (clusterIndex) {
+                MTR_LOG("No entry found for clusterIndex @ node 0x%016llX endpoint %u - creating", nodeID.unsignedLongLongValue, endpointID.unsignedShortValue);
                 clusterIndexToStore = [clusterIndex mutableCopy];
             } else {
                 clusterIndexToStore = [NSMutableArray array];
@@ -1074,6 +1076,7 @@
         NSArray<NSNumber *> * nodeIndexToStore = nil;
         if (!nodeIndex) {
             // Ensure node index exists
+            MTR_LOG("No entry found for for nodeIndex - creating for node 0x%016llX", nodeID.unsignedLongLongValue);
             nodeIndexToStore = [NSArray arrayWithObject:nodeID];
         } else if (![nodeIndex containsObject:nodeID]) {
             nodeIndexToStore = [nodeIndex arrayByAddingObject:nodeID];
diff --git a/src/darwin/Framework/CHIPTests/MTRPerControllerStorageTests.m b/src/darwin/Framework/CHIPTests/MTRPerControllerStorageTests.m
index 4c7375c..db8bd30 100644
--- a/src/darwin/Framework/CHIPTests/MTRPerControllerStorageTests.m
+++ b/src/darwin/Framework/CHIPTests/MTRPerControllerStorageTests.m
@@ -2834,7 +2834,7 @@
                 MTRClusterPath * path = [MTRClusterPath clusterPathWithEndpointID:testEndpoint clusterID:cluster];
 
                 if ([cluster isEqualToNumber:@(MTRClusterIDTypeIdentifyID)]) {
-                    MTRDeviceClusterData * data = [device _getClusterDataForPath:path];
+                    MTRDeviceClusterData * data = [device unitTestGetClusterDataForPath:path];
                     XCTAssertNotNil(data);
                     XCTAssertNotNil(data.attributes);
 
@@ -2897,7 +2897,7 @@
                 MTRClusterPath * path = [MTRClusterPath clusterPathWithEndpointID:testEndpoint clusterID:cluster];
 
                 if ([cluster isEqualToNumber:@(MTRClusterIDTypeIdentifyID)]) {
-                    MTRDeviceClusterData * data = [device _getClusterDataForPath:path];
+                    MTRDeviceClusterData * data = [device unitTestGetClusterDataForPath:path];
                     XCTAssertNotNil(data);
                     XCTAssertNotNil(data.attributes);
 
@@ -2937,4 +2937,128 @@
     XCTAssertFalse([controller isRunning]);
 }
 
+// Run the test here since detectLeaks is set, and subscription reset needs to not cause leaks
+- (void)testMTRDeviceResetSubscription
+{
+    __auto_type * storageDelegate = [[MTRTestPerControllerStorageWithBulkReadWrite alloc] initWithControllerID:[NSUUID UUID]];
+
+    __auto_type * factory = [MTRDeviceControllerFactory sharedInstance];
+    XCTAssertNotNil(factory);
+
+    __auto_type queue = dispatch_get_main_queue();
+
+    __auto_type * rootKeys = [[MTRTestKeys alloc] init];
+    XCTAssertNotNil(rootKeys);
+
+    __auto_type * operationalKeys = [[MTRTestKeys alloc] init];
+    XCTAssertNotNil(operationalKeys);
+
+    NSNumber * nodeID = @(333);
+    NSNumber * fabricID = @(444);
+
+    NSError * error;
+
+    MTRPerControllerStorageTestsCertificateIssuer * certificateIssuer;
+    MTRDeviceController * controller = [self startControllerWithRootKeys:rootKeys
+                                                         operationalKeys:operationalKeys
+                                                                fabricID:fabricID
+                                                                  nodeID:nodeID
+                                                                 storage:storageDelegate
+                                                                   error:&error
+                                                       certificateIssuer:&certificateIssuer];
+    XCTAssertNil(error);
+    XCTAssertNotNil(controller);
+    XCTAssertTrue([controller isRunning]);
+
+    XCTAssertEqualObjects(controller.controllerNodeID, nodeID);
+
+    // Now commission the device, to test that that works.
+    NSNumber * deviceID = @(22);
+    certificateIssuer.nextNodeID = deviceID;
+    [self commissionWithController:controller newNodeID:deviceID];
+
+    // We should have established CASE using our operational key.
+    XCTAssertEqual(operationalKeys.signatureCount, 1);
+
+    __auto_type * device = [MTRDevice deviceWithNodeID:deviceID controller:controller];
+    __auto_type * delegate = [[MTRDeviceTestDelegate alloc] init];
+
+    XCTestExpectation * subscriptionExpectation1 = [self expectationWithDescription:@"Subscription has been set up 1"];
+
+    delegate.onReportEnd = ^{
+        [subscriptionExpectation1 fulfill];
+    };
+
+    [device setDelegate:delegate queue:queue];
+
+    [self waitForExpectations:@[ subscriptionExpectation1 ] timeout:60];
+
+    // Test 1: test that subscription reset works
+
+    XCTestExpectation * subscriptionExpectation2 = [self expectationWithDescription:@"Subscription has been set up 2"];
+
+    __weak __auto_type weakDelegate = delegate;
+    delegate.onReportEnd = ^{
+        [subscriptionExpectation2 fulfill];
+        // reset callback so expectation not fulfilled twice
+        __strong __auto_type strongDelegate = weakDelegate;
+        strongDelegate.onReportEnd = nil;
+    };
+
+    // clear cluster data before reset
+    NSUInteger attributeCountBeforeReset = [device unitTestAttributeCount];
+    [device unitTestClearClusterData];
+
+    [device unitTestResetSubscription];
+
+    [self waitForExpectations:@[ subscriptionExpectation2 ] timeout:60];
+
+    // check that in-memory cache has recovered
+    NSUInteger attributeCountAfterReset = [device unitTestAttributeCount];
+    XCTAssertEqual(attributeCountBeforeReset, attributeCountAfterReset);
+
+    // Test 2: simulate a cache purge and loss of storage, to see:
+    //  * that subscription reestablishes
+    //  * the cache is restored
+    [device unitTestClearClusterData];
+    [controller.controllerDataStore clearAllStoredClusterData];
+
+    NSDictionary<MTRClusterPath *, MTRDeviceClusterData *> * storedClusterData = [controller.controllerDataStore getStoredClusterDataForNodeID:deviceID];
+    XCTAssertEqual(storedClusterData.count, 0);
+
+    XCTestExpectation * subscriptionExpectation3 = [self expectationWithDescription:@"Subscription has been set up 3"];
+    delegate.onReportEnd = ^{
+        [subscriptionExpectation3 fulfill];
+        // reset callback so expectation not fulfilled twice
+        __strong __auto_type strongDelegate = weakDelegate;
+        strongDelegate.onReportEnd = nil;
+    };
+
+    // now get list of clusters, and call clusterDataForPath: to trigger the reset
+    NSSet<MTRClusterPath *> * persistedClusters = [device unitTestGetPersistedClusters];
+    MTRDeviceClusterData * data = [device unitTestGetClusterDataForPath:persistedClusters.anyObject];
+    XCTAssertNil(data);
+
+    // Also call clusterDataForPath: repeatedly to verify in logs that subscription is reset only once
+    for (MTRClusterPath * path in persistedClusters) {
+        MTRDeviceClusterData * data = [device unitTestGetClusterDataForPath:path];
+        (void) data; // do not assert nil because subscription may happen during this time and already fill in the cache
+    }
+
+    [self waitForExpectations:@[ subscriptionExpectation3 ] timeout:60];
+
+    // Verify that after report ends all the cluster data is back
+    for (MTRClusterPath * path in persistedClusters) {
+        MTRDeviceClusterData * data = [device unitTestGetClusterDataForPath:path];
+        XCTAssertNotNil(data);
+    }
+
+    // Reset our commissionee.
+    __auto_type * baseDevice = [MTRBaseDevice deviceWithNodeID:deviceID controller:controller];
+    ResetCommissionee(baseDevice, queue, self, kTimeoutInSeconds);
+
+    [controller shutdown];
+    XCTAssertFalse([controller isRunning]);
+}
+
 @end
diff --git a/src/darwin/Framework/CHIPTests/TestHelpers/MTRTestDeclarations.h b/src/darwin/Framework/CHIPTests/TestHelpers/MTRTestDeclarations.h
index 0705cb8..46d6c61 100644
--- a/src/darwin/Framework/CHIPTests/TestHelpers/MTRTestDeclarations.h
+++ b/src/darwin/Framework/CHIPTests/TestHelpers/MTRTestDeclarations.h
@@ -47,8 +47,6 @@
 
 @interface MTRDevice (Test)
 - (BOOL)_attributeDataValue:(NSDictionary *)one isEqualToDataValue:(NSDictionary *)theOther;
-- (MTRDeviceClusterData *)_getClusterDataForPath:(MTRClusterPath *)path;
-- (BOOL)_clusterHasBeenPersisted:(MTRClusterPath *)path;
 - (NSMutableArray<NSNumber *> *)arrayOfNumbersFromAttributeValue:(MTRDeviceDataValueDictionary)dataDictionary;
 @end
 
@@ -79,6 +77,10 @@
     deviceReportingExcessivelyIntervalThreshold:(NSTimeInterval)deviceReportingExcessivelyIntervalThreshold;
 - (void)unitTestSetMostRecentReportTimes:(NSMutableArray<NSDate *> *)mostRecentReportTimes;
 - (NSUInteger)unitTestNonnullDelegateCount;
+- (void)unitTestResetSubscription;
+- (MTRDeviceClusterData *)unitTestGetClusterDataForPath:(MTRClusterPath *)path;
+- (NSSet<MTRClusterPath *> *)unitTestGetPersistedClusters;
+- (BOOL)unitTestClusterHasBeenPersisted:(MTRClusterPath *)path;
 @end
 #endif