Operational Discovery with Continuous Query Support for Linux  (#33402)

* Add operational discovery and continuous query support in linux platform mdns

* Restyled by whitespace

* Restyled by clang-format

* Updated as per feedback comments

* Updated as per review feedback

* Restyled by whitespace

* Restyled by clang-format

* Updated as per review feedback

* Updated as per new review feedback

* Restyled by whitespace

* Restyled by clang-format

* Updated as per new review feedback and fixed builds

* Updated as per suggestions

* Fix comment spelling.

---------

Co-authored-by: Restyled.io <commits@restyled.io>
Co-authored-by: Boris Zbarsky <bzbarsky@apple.com>
diff --git a/src/lib/dnssd/Discovery_ImplPlatform.cpp b/src/lib/dnssd/Discovery_ImplPlatform.cpp
index f4a524f..a1b28b7 100644
--- a/src/lib/dnssd/Discovery_ImplPlatform.cpp
+++ b/src/lib/dnssd/Discovery_ImplPlatform.cpp
@@ -50,13 +50,33 @@
     }
 
     DiscoveredNodeData nodeData;
-    result->ToDiscoveredNodeData(addresses, nodeData);
+
+    result->ToDiscoveredCommissionNodeData(addresses, nodeData);
 
     nodeData.Get<CommissionNodeData>().LogDetail();
     discoveryContext->OnNodeDiscovered(nodeData);
     discoveryContext->Release();
 }
 
+static void HandleNodeOperationalBrowse(void * context, DnssdService * result, CHIP_ERROR error)
+{
+    DiscoveryContext * discoveryContext = static_cast<DiscoveryContext *>(context);
+
+    if (error != CHIP_NO_ERROR)
+    {
+        discoveryContext->Release();
+        return;
+    }
+
+    DiscoveredNodeData nodeData;
+
+    result->ToDiscoveredOperationalNodeBrowseData(nodeData);
+
+    nodeData.Get<OperationalNodeBrowseData>().LogDetail();
+    discoveryContext->OnNodeDiscovered(nodeData);
+    discoveryContext->Release();
+}
+
 static void HandleNodeBrowse(void * context, DnssdService * services, size_t servicesSize, bool finalBrowse, CHIP_ERROR error)
 {
     DiscoveryContext * discoveryContext = static_cast<DiscoveryContext *>(context);
@@ -75,8 +95,16 @@
 
         auto & ipAddress = services[i].mAddress;
 
-        // Check if SRV, TXT and AAAA records were received in DNS responses
-        if (strlen(services[i].mHostName) == 0 || services[i].mTextEntrySize == 0 || !ipAddress.has_value())
+        // mType(service name) exactly matches with operational service name
+        bool isOperationalBrowse = strcmp(services[i].mType, kOperationalServiceName) == 0;
+
+        // For operational browse result we currently don't need IP address hence skip resolution and handle differently.
+        if (isOperationalBrowse)
+        {
+            HandleNodeOperationalBrowse(context, &services[i], error);
+        }
+        // check whether SRV, TXT and AAAA records were received in DNS responses
+        else if (strlen(services[i].mHostName) == 0 || services[i].mTextEntrySize == 0 || !ipAddress.has_value())
         {
             ChipDnssdResolve(&services[i], services[i].mInterface, HandleNodeResolve, context);
         }
@@ -340,7 +368,15 @@
     impl->mOperationalDelegate->OnOperationalNodeResolved(nodeData);
 }
 
-void DnssdService::ToDiscoveredNodeData(const Span<Inet::IPAddress> & addresses, DiscoveredNodeData & nodeData)
+void DnssdService::ToDiscoveredOperationalNodeBrowseData(DiscoveredNodeData & nodeData)
+{
+    nodeData.Set<OperationalNodeBrowseData>();
+
+    ExtractIdFromInstanceName(mName, &nodeData.Get<OperationalNodeBrowseData>().peerId);
+    nodeData.Get<OperationalNodeBrowseData>().hasZeroTTL = (mTtlSeconds == 0);
+}
+
+void DnssdService::ToDiscoveredCommissionNodeData(const Span<Inet::IPAddress> & addresses, DiscoveredNodeData & nodeData)
 {
     nodeData.Set<CommissionNodeData>();
     auto & discoveredData = nodeData.Get<CommissionNodeData>();
@@ -746,6 +782,31 @@
     return error;
 }
 
+CHIP_ERROR DiscoveryImplPlatform::DiscoverOperational(DiscoveryFilter filter, DiscoveryContext & context)
+{
+    ReturnErrorOnFailure(InitImpl());
+    StopDiscovery(context);
+
+    char serviceName[kMaxOperationalServiceNameSize];
+    ReturnErrorOnFailure(MakeServiceTypeName(serviceName, sizeof(serviceName), filter, DiscoveryType::kOperational));
+
+    intptr_t browseIdentifier;
+    // Increase the reference count of the context to keep it alive until HandleNodeBrowse is called back.
+    CHIP_ERROR error = ChipDnssdBrowse(serviceName, DnssdServiceProtocol::kDnssdProtocolTcp, Inet::IPAddressType::kAny,
+                                       Inet::InterfaceId::Null(), HandleNodeBrowse, context.Retain(), &browseIdentifier);
+
+    if (error == CHIP_NO_ERROR)
+    {
+        context.SetBrowseIdentifier(browseIdentifier);
+    }
+    else
+    {
+        context.Release();
+    }
+
+    return error;
+}
+
 CHIP_ERROR DiscoveryImplPlatform::StartDiscovery(DiscoveryType type, DiscoveryFilter filter, DiscoveryContext & context)
 {
     switch (type)
@@ -755,7 +816,7 @@
     case DiscoveryType::kCommissionerNode:
         return DiscoverCommissioners(filter, context);
     case DiscoveryType::kOperational:
-        return CHIP_ERROR_NOT_IMPLEMENTED;
+        return DiscoverOperational(filter, context);
     default:
         return CHIP_ERROR_INVALID_ARGUMENT;
     }
diff --git a/src/lib/dnssd/Discovery_ImplPlatform.h b/src/lib/dnssd/Discovery_ImplPlatform.h
index 34fedf8..d51d5e7 100644
--- a/src/lib/dnssd/Discovery_ImplPlatform.h
+++ b/src/lib/dnssd/Discovery_ImplPlatform.h
@@ -55,6 +55,7 @@
     void NodeIdResolutionNoLongerNeeded(const PeerId & peerId) override;
     CHIP_ERROR DiscoverCommissionableNodes(DiscoveryFilter filter, DiscoveryContext & context);
     CHIP_ERROR DiscoverCommissioners(DiscoveryFilter filter, DiscoveryContext & context);
+    CHIP_ERROR DiscoverOperational(DiscoveryFilter filter, DiscoveryContext & context);
     CHIP_ERROR StartDiscovery(DiscoveryType type, DiscoveryFilter filter, DiscoveryContext & context) override;
     CHIP_ERROR StopDiscovery(DiscoveryContext & context) override;
     CHIP_ERROR ReconfirmRecord(const char * hostname, Inet::IPAddress address, Inet::InterfaceId interfaceId) override;
diff --git a/src/lib/dnssd/ServiceNaming.cpp b/src/lib/dnssd/ServiceNaming.cpp
index 25bdfbf..8ec86a1 100644
--- a/src/lib/dnssd/ServiceNaming.cpp
+++ b/src/lib/dnssd/ServiceNaming.cpp
@@ -162,6 +162,10 @@
         {
             requiredSize = snprintf(buffer, bufferLen, kCommissionerServiceName);
         }
+        else if (type == DiscoveryType::kOperational)
+        {
+            requiredSize = snprintf(buffer, bufferLen, kOperationalServiceName);
+        }
         else
         {
             return CHIP_ERROR_NOT_IMPLEMENTED;
diff --git a/src/lib/dnssd/platform/Dnssd.h b/src/lib/dnssd/platform/Dnssd.h
index 02fb851..e1e4405 100644
--- a/src/lib/dnssd/platform/Dnssd.h
+++ b/src/lib/dnssd/platform/Dnssd.h
@@ -81,7 +81,8 @@
     // Time to live in seconds. Per rfc6762 section 10, because we have a hostname, our default TTL is 120 seconds
     uint32_t mTtlSeconds = 120;
 
-    void ToDiscoveredNodeData(const Span<Inet::IPAddress> & addresses, DiscoveredNodeData & nodeData);
+    void ToDiscoveredCommissionNodeData(const Span<Inet::IPAddress> & addresses, DiscoveredNodeData & nodeData);
+    void ToDiscoveredOperationalNodeBrowseData(DiscoveredNodeData & nodeData);
 };
 
 /**
diff --git a/src/lib/shell/commands/Dns.cpp b/src/lib/shell/commands/Dns.cpp
index 65b3f7b..50be591 100644
--- a/src/lib/shell/commands/Dns.cpp
+++ b/src/lib/shell/commands/Dns.cpp
@@ -261,6 +261,13 @@
     return sResolverProxy.DiscoverOperationalNodes(filter);
 }
 
+CHIP_ERROR BrowseStopHandler(int argc, char ** argv)
+{
+    streamer_printf(streamer_get(), "Stopping browse...\r\n");
+
+    return sResolverProxy.StopDiscovery();
+}
+
 } // namespace
 
 void RegisterDnsCommands()
@@ -270,6 +277,8 @@
           "Browse Matter commissionables. Usage: dns browse commissionable [subtype]" },
         { &BrowseCommissionerHandler, "commissioner", "Browse Matter commissioners. Usage: dns browse commissioner [subtype]" },
         { &BrowseOperationalHandler, "operational", "Browse Matter operational nodes. Usage: dns browse operational" },
+        { &BrowseStopHandler, "stop", "Stop ongoing browse. Usage: dns browse stop" },
+
     };
 
     static constexpr Command subCommands[] = {
diff --git a/src/platform/Darwin/DnssdContexts.cpp b/src/platform/Darwin/DnssdContexts.cpp
index 7dc5956..fc68a5d 100644
--- a/src/platform/Darwin/DnssdContexts.cpp
+++ b/src/platform/Darwin/DnssdContexts.cpp
@@ -607,7 +607,16 @@
     {
         auto delegate = static_cast<DiscoverNodeDelegate *>(context);
         DiscoveredNodeData nodeData;
-        service.ToDiscoveredNodeData(addresses, nodeData);
+
+        // Check whether mType (service name) exactly matches with operational service name
+        if (strcmp(service.mType, kOperationalServiceName) == 0)
+        {
+            service.ToDiscoveredOperationalNodeBrowseData(nodeData);
+        }
+        else
+        {
+            service.ToDiscoveredCommissionNodeData(addresses, nodeData);
+        }
         delegate->OnNodeDiscovered(nodeData);
     }
     else
diff --git a/src/platform/Linux/DnssdImpl.cpp b/src/platform/Linux/DnssdImpl.cpp
index 800064f..40b7c89 100644
--- a/src/platform/Linux/DnssdImpl.cpp
+++ b/src/platform/Linux/DnssdImpl.cpp
@@ -611,9 +611,9 @@
     {
         avahiInterface = AVAHI_IF_UNSPEC;
     }
-    browseContext->mInterface     = avahiInterface;
-    browseContext->mProtocol      = GetFullType(type, protocol);
-    browseContext->mBrowseRetries = 0;
+    browseContext->mInterface         = avahiInterface;
+    browseContext->mProtocol          = GetFullType(type, protocol);
+    browseContext->mReceivedAllCached = false;
     browseContext->mStopped.store(false);
 
     browser = avahi_service_browser_new(mClient, avahiInterface, AVAHI_PROTO_UNSPEC, browseContext->mProtocol.c_str(), nullptr,
@@ -686,23 +686,22 @@
     }
 }
 
-void MdnsAvahi::BrowseRetryCallback(chip::System::Layer * aLayer, void * appState)
+void MdnsAvahi::InvokeDelegateOrCleanUp(BrowseContext * context, AvahiServiceBrowser * browser)
 {
-    BrowseContext * context = static_cast<BrowseContext *>(appState);
-    // Don't schedule anything new if we've stopped.
-    if (context->mStopped.load())
+    // If we were already asked to stop, no need to send a callback - no one is listening.
+    if (!context->mStopped.load())
     {
-        chip::Platform::Delete(context);
-        return;
+        // since this is continuous browse, finalBrowse will always be false.
+        context->mCallback(context->mContext, context->mServices.data(), context->mServices.size(), false, CHIP_NO_ERROR);
+
+        // Clearing records/services already passed to application through delegate. Keeping it may cause
+        // duplicates in next query / retry attempt as currently found will also come again from cache.
+        context->mServices.clear();
     }
-    AvahiServiceBrowser * newBrowser =
-        avahi_service_browser_new(context->mInstance->mClient, context->mInterface, AVAHI_PROTO_UNSPEC, context->mProtocol.c_str(),
-                                  nullptr, static_cast<AvahiLookupFlags>(0), HandleBrowse, context);
-    if (newBrowser == nullptr)
+    else
     {
-        // If we failed to create the browser, this browse context is effectively done. We need to call the final callback and
-        // delete the context.
-        context->mCallback(context->mContext, context->mServices.data(), context->mServices.size(), true, CHIP_NO_ERROR);
+        // browse is stopped, so free browse handle and context
+        avahi_service_browser_free(browser);
         chip::Platform::Delete(context);
     }
 }
@@ -722,6 +721,13 @@
         break;
     case AVAHI_BROWSER_NEW:
         ChipLogProgress(DeviceLayer, "Avahi browse: cache new");
+        if (context->mStopped.load())
+        {
+            // browse is stopped, so free browse handle and context
+            avahi_service_browser_free(browser);
+            chip::Platform::Delete(context);
+            break;
+        }
         if (strcmp("local", domain) == 0)
         {
             DnssdService service = {};
@@ -738,41 +744,53 @@
             }
             service.mType[kDnssdTypeMaxSize] = 0;
             context->mServices.push_back(service);
+            if (context->mReceivedAllCached)
+            {
+                InvokeDelegateOrCleanUp(context, browser);
+            }
         }
         break;
     case AVAHI_BROWSER_ALL_FOR_NOW: {
         ChipLogProgress(DeviceLayer, "Avahi browse: all for now");
-        bool needRetries = context->mBrowseRetries++ < kMaxBrowseRetries && !context->mStopped.load();
-        // If we were already asked to stop, no need to send a callback - no one is listening.
-        if (!context->mStopped.load())
-        {
-            context->mCallback(context->mContext, context->mServices.data(), context->mServices.size(), !needRetries,
-                               CHIP_NO_ERROR);
-        }
-        avahi_service_browser_free(browser);
-        if (needRetries)
-        {
-            context->mNextRetryDelay *= 2;
-            // Hand the ownership of the context over to the timer. It will either schedule a new browse on the context,
-            // triggering this function, or it will delete and not reschedule (if stopped).
-            DeviceLayer::SystemLayer().StartTimer(context->mNextRetryDelay / 2, BrowseRetryCallback, context);
-        }
-        else
-        {
-            // We didn't schedule a timer, so we're responsible for deleting the context
-            chip::Platform::Delete(context);
-        }
+        context->mReceivedAllCached = true;
+
+        InvokeDelegateOrCleanUp(context, browser);
         break;
     }
     case AVAHI_BROWSER_REMOVE:
         ChipLogProgress(DeviceLayer, "Avahi browse: remove");
         if (strcmp("local", domain) == 0)
         {
-            context->mServices.erase(
-                std::remove_if(context->mServices.begin(), context->mServices.end(), [name, type](const DnssdService & service) {
-                    return strcmp(name, service.mName) == 0 && type == GetFullType(service.mType, service.mProtocol);
-                }));
+            // don't attempt to erase if vector has been cleared
+            if (context->mServices.size())
+            {
+                context->mServices.erase(std::remove_if(
+                    context->mServices.begin(), context->mServices.end(), [name, type](const DnssdService & service) {
+                        return strcmp(name, service.mName) == 0 && type == GetFullType(service.mType, service.mProtocol);
+                    }));
+            }
+
+            if (context->mReceivedAllCached)
+            {
+                DnssdService service = {};
+
+                Platform::CopyString(service.mName, name);
+                CopyTypeWithoutProtocol(service.mType, type);
+                service.mProtocol      = GetProtocolInType(type);
+                service.mAddressType   = context->mAddressType;
+                service.mTransportType = ToAddressType(protocol);
+                service.mInterface     = Inet::InterfaceId::Null();
+                if (interface != AVAHI_IF_UNSPEC)
+                {
+                    service.mInterface = static_cast<chip::Inet::InterfaceId>(interface);
+                }
+                service.mTtlSeconds = 0;
+
+                context->mServices.push_back(service);
+                InvokeDelegateOrCleanUp(context, browser);
+            }
         }
+
         break;
     case AVAHI_BROWSER_CACHE_EXHAUSTED:
         ChipLogProgress(DeviceLayer, "Avahi browse: cache exhausted");
diff --git a/src/platform/Linux/DnssdImpl.h b/src/platform/Linux/DnssdImpl.h
index c66a8c2..f1de8db 100644
--- a/src/platform/Linux/DnssdImpl.h
+++ b/src/platform/Linux/DnssdImpl.h
@@ -131,11 +131,11 @@
         void * mContext;
         Inet::IPAddressType mAddressType;
         std::vector<DnssdService> mServices;
-        size_t mBrowseRetries;
+        bool mReceivedAllCached;
         AvahiIfIndex mInterface;
         std::string mProtocol;
-        chip::System::Clock::Timeout mNextRetryDelay = chip::System::Clock::Seconds16(1);
         std::atomic_bool mStopped{ false };
+        AvahiServiceBrowser * mBrowser;
     };
 
     struct ResolveContext
@@ -181,7 +181,7 @@
     static void HandleBrowse(AvahiServiceBrowser * broswer, AvahiIfIndex interface, AvahiProtocol protocol, AvahiBrowserEvent event,
                              const char * name, const char * type, const char * domain, AvahiLookupResultFlags flags,
                              void * userdata);
-    static void BrowseRetryCallback(chip::System::Layer * aLayer, void * appState);
+    static void InvokeDelegateOrCleanUp(BrowseContext * context, AvahiServiceBrowser * browser);
     static void HandleResolve(AvahiServiceResolver * resolver, AvahiIfIndex interface, AvahiProtocol protocol,
                               AvahiResolverEvent event, const char * name, const char * type, const char * domain,
                               const char * host_name, const AvahiAddress * address, uint16_t port, AvahiStringList * txt,