Use FaultInjection to help with IDM_1_3 automation testing (#32045)

* Add FaultInjection to help with IDM_1_3 automation testing

* Restyled by whitespace

* Restyled by clang-format

* Small fixes

* Small change

* Fix CI

* Quick fix CI

* Address PR comments

* Quick Fix

* Restyled by clang-format

* Fix CI

* Fix CI

* use maybe_unused instead of (void)

* Apply suggestions from code review

Co-authored-by: Boris Zbarsky <bzbarsky@apple.com>

* Address PR comments

* Restyled by whitespace

* Restyled by clang-format

* Address PR comment

---------

Co-authored-by: Restyled.io <commits@restyled.io>
Co-authored-by: Boris Zbarsky <bzbarsky@apple.com>
diff --git a/scripts/build/build/targets.py b/scripts/build/build/targets.py
index 771c92b..af1bd45 100755
--- a/scripts/build/build/targets.py
+++ b/scripts/build/build/targets.py
@@ -152,6 +152,7 @@
     target.AppendModifier('nodeps', enable_ble=False, enable_wifi=False, enable_thread=False,
                           crypto_library=HostCryptoLibrary.MBEDTLS, use_clang=True).ExceptIfRe('-(clang|noble|boringssl|mbedtls)')
 
+    target.AppendModifier('nlfaultinject', use_nl_fault_injection=True)
     target.AppendModifier('platform-mdns', use_platform_mdns=True)
     target.AppendModifier('minmdns-verbose', minmdns_high_verbosity=True)
     target.AppendModifier('libnl', minmdns_address_policy="libnl")
diff --git a/scripts/build/builders/host.py b/scripts/build/builders/host.py
index 7b7a02d..a6ad8f3 100644
--- a/scripts/build/builders/host.py
+++ b/scripts/build/builders/host.py
@@ -295,7 +295,7 @@
                  enable_ipv4=True, enable_ble=True, enable_wifi=True,
                  enable_thread=True, use_tsan=False, use_asan=False, use_ubsan=False,
                  separate_event_loop=True, fuzzing_type: HostFuzzingType = HostFuzzingType.NONE, use_clang=False,
-                 interactive_mode=True, extra_tests=False, use_platform_mdns=False, enable_rpcs=False,
+                 interactive_mode=True, extra_tests=False, use_nl_fault_injection=False, use_platform_mdns=False, enable_rpcs=False,
                  use_coverage=False, use_dmalloc=False, minmdns_address_policy=None,
                  minmdns_high_verbosity=False, imgui_ui=False, crypto_library: HostCryptoLibrary = None,
                  enable_test_event_triggers=None):
@@ -368,6 +368,9 @@
                 # so setting clang is not correct
                 raise Exception('Fake host board is always gcc (not clang)')
 
+        if use_nl_fault_injection:
+            self.extra_gn_options.append('chip_with_nlfaultinjection=true')
+
         if minmdns_address_policy:
             if use_platform_mdns:
                 raise Exception('Address policy applies to minmdns only')
diff --git a/scripts/build/testdata/all_targets_linux_x64.txt b/scripts/build/testdata/all_targets_linux_x64.txt
index 6a0b733..3c6a2d3 100644
--- a/scripts/build/testdata/all_targets_linux_x64.txt
+++ b/scripts/build/testdata/all_targets_linux_x64.txt
@@ -10,7 +10,7 @@
 esp32-{m5stack,c3devkit,devkitc,qemu}-{all-clusters,all-clusters-minimal,energy-management,ota-provider,ota-requestor,shell,light,lock,bridge,temperature-measurement,ota-requestor,tests}[-rpc][-ipv6only][-tracing]
 genio-lighting-app
 linux-fake-tests[-mbedtls][-boringssl][-asan][-tsan][-ubsan][-libfuzzer][-ossfuzz][-coverage][-dmalloc][-clang]
-linux-{x64,arm64}-{rpc-console,all-clusters,all-clusters-minimal,chip-tool,thermostat,java-matter-controller,kotlin-matter-controller,minmdns,light,lock,shell,ota-provider,ota-requestor,simulated-app1,simulated-app2,python-bindings,tv-app,tv-casting-app,bridge,tests,chip-cert,address-resolve-tool,contact-sensor,dishwasher,microwave-oven,refrigerator,rvc,air-purifier,lit-icd,air-quality-sensor,network-manager,energy-management}[-nodeps][-platform-mdns][-minmdns-verbose][-libnl][-same-event-loop][-no-interactive][-ipv6only][-no-ble][-no-wifi][-no-thread][-mbedtls][-boringssl][-asan][-tsan][-ubsan][-libfuzzer][-ossfuzz][-coverage][-dmalloc][-clang][-test][-rpc][-with-ui][-evse-test-event]
+linux-{x64,arm64}-{rpc-console,all-clusters,all-clusters-minimal,chip-tool,thermostat,java-matter-controller,kotlin-matter-controller,minmdns,light,lock,shell,ota-provider,ota-requestor,simulated-app1,simulated-app2,python-bindings,tv-app,tv-casting-app,bridge,tests,chip-cert,address-resolve-tool,contact-sensor,dishwasher,microwave-oven,refrigerator,rvc,air-purifier,lit-icd,air-quality-sensor,network-manager,energy-management}[-nodeps][-nlfaultinject][-platform-mdns][-minmdns-verbose][-libnl][-same-event-loop][-no-interactive][-ipv6only][-no-ble][-no-wifi][-no-thread][-mbedtls][-boringssl][-asan][-tsan][-ubsan][-libfuzzer][-ossfuzz][-coverage][-dmalloc][-clang][-test][-rpc][-with-ui][-evse-test-event]
 linux-x64-efr32-test-runner[-clang]
 imx-{chip-tool,lighting-app,thermostat,all-clusters-app,all-clusters-minimal-app,ota-provider-app}[-release]
 infineon-psoc6-{lock,light,all-clusters,all-clusters-minimal}[-ota][-updateimage]
diff --git a/src/app/CommandHandler.cpp b/src/app/CommandHandler.cpp
index 6831a84..d0e5db9 100644
--- a/src/app/CommandHandler.cpp
+++ b/src/app/CommandHandler.cpp
@@ -214,6 +214,7 @@
         SetGroupRequest(true);
     }
 
+    // When updating this code, please remember to make corresponding changes to TestOnlyInvokeCommandRequestWithFaultsInjected.
     VerifyOrReturnError(invokeRequestMessage.GetSuppressResponse(&mSuppressResponse) == CHIP_NO_ERROR, Status::InvalidAction);
     VerifyOrReturnError(invokeRequestMessage.GetTimedRequest(&mTimedRequest) == CHIP_NO_ERROR, Status::InvalidAction);
     VerifyOrReturnError(invokeRequestMessage.GetInvokeRequests(&invokeRequests) == CHIP_NO_ERROR, Status::InvalidAction);
@@ -911,6 +912,135 @@
     ChipLogDetail(DataManagement, "Command handler moving to [%10.10s]", GetStateStr());
 }
 
+#if CHIP_WITH_NLFAULTINJECTION
+
+namespace {
+
+CHIP_ERROR TestOnlyExtractCommandPathFromNextInvokeRequest(TLV::TLVReader & invokeRequestsReader,
+                                                           ConcreteCommandPath & concretePath)
+{
+    ReturnErrorOnFailure(invokeRequestsReader.Next(TLV::AnonymousTag()));
+    CommandDataIB::Parser commandData;
+    ReturnErrorOnFailure(commandData.Init(invokeRequestsReader));
+    CommandPathIB::Parser commandPath;
+    ReturnErrorOnFailure(commandData.GetPath(&commandPath));
+    return commandPath.GetConcreteCommandPath(concretePath);
+}
+
+[[maybe_unused]] const char * GetFaultInjectionTypeStr(CommandHandler::NlFaultInjectionType faultType)
+{
+    switch (faultType)
+    {
+    case CommandHandler::NlFaultInjectionType::SeparateResponseMessages:
+        return "Each response will be sent in a separate InvokeResponseMessage. The order of responses will be the same as the "
+               "original request.";
+    case CommandHandler::NlFaultInjectionType::SeparateResponseMessagesAndInvertedResponseOrder:
+        return "Each response will be sent in a separate InvokeResponseMessage. The order of responses will be reversed from the "
+               "original request.";
+    case CommandHandler::NlFaultInjectionType::SkipSecondResponse:
+        return "Single InvokeResponseMessages. Dropping response to second request";
+    }
+    VerifyOrDieWithMsg(false, DataManagement, "TH Failure: Unexpected fault type");
+}
+
+} // anonymous namespace
+
+// This method intentionally duplicates code from other sections. While code consolidation
+// is generally preferred, here we prioritize generating a clear crash message to aid in
+// troubleshooting test failures.
+void CommandHandler::TestOnlyInvokeCommandRequestWithFaultsInjected(Messaging::ExchangeContext * ec,
+                                                                    System::PacketBufferHandle && payload, bool isTimedInvoke,
+                                                                    NlFaultInjectionType faultType)
+{
+    VerifyOrDieWithMsg(ec != nullptr, DataManagement, "TH Failure: Incoming exchange context should not be null");
+    VerifyOrDieWithMsg(mState == State::Idle, DataManagement, "TH Failure: state should be Idle, issue with TH");
+
+    ChipLogProgress(DataManagement, "Response to InvokeRequestMessage overridden by fault injection");
+    ChipLogProgress(DataManagement, "   Injecting the following response:%s", GetFaultInjectionTypeStr(faultType));
+
+    mResponseSender.SetExchangeContext(ec);
+    Handle workHandle(this);
+    mResponseSender.WillSendMessage();
+    VerifyOrDieWithMsg(!mResponseSender.IsForGroup(), DataManagement, "DUT Failure: Unexpected Group Command");
+
+    System::PacketBufferTLVReader reader;
+    InvokeRequestMessage::Parser invokeRequestMessage;
+    InvokeRequests::Parser invokeRequests;
+    reader.Init(std::move(payload));
+    VerifyOrDieWithMsg(invokeRequestMessage.Init(reader) == CHIP_NO_ERROR, DataManagement,
+                       "TH Failure: Failed 'invokeRequestMessage.Init(reader)'");
+#if CHIP_CONFIG_IM_PRETTY_PRINT
+    invokeRequestMessage.PrettyPrint();
+#endif
+
+    VerifyOrDieWithMsg(invokeRequestMessage.GetSuppressResponse(&mSuppressResponse) == CHIP_NO_ERROR, DataManagement,
+                       "DUT Failure: Mandatory SuppressResponse field missing");
+    VerifyOrDieWithMsg(invokeRequestMessage.GetTimedRequest(&mTimedRequest) == CHIP_NO_ERROR, DataManagement,
+                       "DUT Failure: Mandatory TimedRequest field missing");
+    VerifyOrDieWithMsg(invokeRequestMessage.GetInvokeRequests(&invokeRequests) == CHIP_NO_ERROR, DataManagement,
+                       "DUT Failure: Mandatory InvokeRequests field missing");
+    VerifyOrDieWithMsg(mTimedRequest == isTimedInvoke, DataManagement,
+                       "DUT Failure: TimedRequest value in message mismatches action");
+
+    {
+        InvokeRequestMessage::Parser validationInvokeRequestMessage = invokeRequestMessage;
+        VerifyOrDieWithMsg(ValidateInvokeRequestMessageAndBuildRegistry(validationInvokeRequestMessage) == CHIP_NO_ERROR,
+                           DataManagement, "DUT Failure: InvokeRequestMessage contents were invalid");
+    }
+
+    TLV::TLVReader invokeRequestsReader;
+    invokeRequests.GetReader(&invokeRequestsReader);
+
+    size_t commandCount = 0;
+    VerifyOrDieWithMsg(TLV::Utilities::Count(invokeRequestsReader, commandCount, false /* recurse */) == CHIP_NO_ERROR,
+                       DataManagement,
+                       "TH Failure: Failed to get the length of InvokeRequests after InvokeRequestMessage validation");
+
+    // The command count check (specifically for a count of 2) is tied to IDM_1_3. This may need adjustment for
+    // compatibility with future test plans.
+    VerifyOrDieWithMsg(commandCount == 2, DataManagement, "DUT failure: We were strictly expecting exactly 2 InvokeRequests");
+    mReserveSpaceForMoreChunkMessages = true;
+
+    {
+        // Response path is the same as request path since we are replying with a failure message.
+        ConcreteCommandPath concreteResponsePath1;
+        ConcreteCommandPath concreteResponsePath2;
+        VerifyOrDieWithMsg(
+            TestOnlyExtractCommandPathFromNextInvokeRequest(invokeRequestsReader, concreteResponsePath1) == CHIP_NO_ERROR,
+            DataManagement, "DUT Failure: Issues encountered while extracting the ConcreteCommandPath from the first request");
+        VerifyOrDieWithMsg(
+            TestOnlyExtractCommandPathFromNextInvokeRequest(invokeRequestsReader, concreteResponsePath2) == CHIP_NO_ERROR,
+            DataManagement, "DUT Failure: Issues encountered while extracting the ConcreteCommandPath from the second request");
+
+        if (faultType == NlFaultInjectionType::SeparateResponseMessagesAndInvertedResponseOrder)
+        {
+            ConcreteCommandPath temp(concreteResponsePath1);
+            concreteResponsePath1 = concreteResponsePath2;
+            concreteResponsePath2 = temp;
+        }
+
+        VerifyOrDieWithMsg(FallibleAddStatus(concreteResponsePath1, Status::Failure) == CHIP_NO_ERROR, DataManagement,
+                           "TH Failure: Error adding the first InvokeResponse");
+        if (faultType == NlFaultInjectionType::SeparateResponseMessages ||
+            faultType == NlFaultInjectionType::SeparateResponseMessagesAndInvertedResponseOrder)
+        {
+            VerifyOrDieWithMsg(FinalizeInvokeResponseMessageAndPrepareNext() == CHIP_NO_ERROR, DataManagement,
+                               "TH Failure: Failed to create second InvokeResponseMessage");
+        }
+        if (faultType != NlFaultInjectionType::SkipSecondResponse)
+        {
+            VerifyOrDieWithMsg(FallibleAddStatus(concreteResponsePath2, Status::Failure) == CHIP_NO_ERROR, DataManagement,
+                               "TH Failure: Error adding the second InvokeResponse");
+        }
+    }
+
+    VerifyOrDieWithMsg(invokeRequestsReader.Next() == CHIP_END_OF_TLV, DataManagement,
+                       "DUT Failure: Unexpected TLV ending of InvokeRequests");
+    VerifyOrDieWithMsg(invokeRequestMessage.ExitContainer() == CHIP_NO_ERROR, DataManagement,
+                       "DUT Failure: InvokeRequestMessage TLV is not properly terminated");
+}
+#endif // CHIP_WITH_NLFAULTINJECTION
+
 } // namespace app
 } // namespace chip
 
diff --git a/src/app/CommandHandler.h b/src/app/CommandHandler.h
index b90cb82..f7acd70 100644
--- a/src/app/CommandHandler.h
+++ b/src/app/CommandHandler.h
@@ -429,6 +429,35 @@
         return mResponseSender.GetSubjectDescriptor();
     }
 
+#if CHIP_WITH_NLFAULTINJECTION
+
+    enum class NlFaultInjectionType : uint8_t
+    {
+        SeparateResponseMessages,
+        SeparateResponseMessagesAndInvertedResponseOrder,
+        SkipSecondResponse
+    };
+
+    /**
+     * @brief Sends InvokeResponseMessages with injected faults for certification testing.
+     *
+     * The Test Harness (TH) uses this to simulate various server response behaviors,
+     * ensuring the Device Under Test (DUT) handles responses per specification.
+     *
+     * This function strictly validates the DUT's InvokeRequestMessage against the test plan.
+     * If deviations occur, the TH terminates with a detailed error message.
+     *
+     * @param ec Exchange context for sending InvokeResponseMessages to the client.
+     * @param payload Payload of the incoming InvokeRequestMessage from the client.
+     * @param isTimedInvoke Indicates whether the interaction is timed.
+     * @param faultType The specific type of fault to inject into the response.
+     */
+    // TODO(#30453): After refactoring CommandHandler for better unit testability, create a
+    // unit test specifically for the fault injection behavior.
+    void TestOnlyInvokeCommandRequestWithFaultsInjected(Messaging::ExchangeContext * ec, System::PacketBufferHandle && payload,
+                                                        bool isTimedInvoke, NlFaultInjectionType faultType);
+#endif // CHIP_WITH_NLFAULTINJECTION
+
 private:
     friend class TestCommandInteraction;
     friend class CommandHandler::Handle;
diff --git a/src/app/ConcreteCommandPath.h b/src/app/ConcreteCommandPath.h
index 5020aa0..42845b6 100644
--- a/src/app/ConcreteCommandPath.h
+++ b/src/app/ConcreteCommandPath.h
@@ -33,6 +33,8 @@
         ConcreteClusterPath(aEndpointId, aClusterId), mCommandId(aCommandId)
     {}
 
+    ConcreteCommandPath() : ConcreteClusterPath(kInvalidEndpointId, kInvalidClusterId), mCommandId(kInvalidCommandId) {}
+
     bool operator==(const ConcreteCommandPath & aOther) const
     {
         return ConcreteClusterPath::operator==(aOther) && (mCommandId == aOther.mCommandId);
diff --git a/src/app/InteractionModelEngine.cpp b/src/app/InteractionModelEngine.cpp
index 565cddd..ddc8a98 100644
--- a/src/app/InteractionModelEngine.cpp
+++ b/src/app/InteractionModelEngine.cpp
@@ -35,6 +35,7 @@
 #include <app/util/endpoint-config-api.h>
 #include <lib/core/Global.h>
 #include <lib/core/TLVUtilities.h>
+#include <lib/support/CHIPFaultInjection.h>
 #include <lib/support/CodeUtils.h>
 #include <lib/support/FibonacciUtils.h>
 
@@ -411,6 +412,21 @@
         ChipLogProgress(InteractionModel, "no resource for Invoke interaction");
         return Status::Busy;
     }
+    CHIP_FAULT_INJECT(
+        FaultInjection::kFault_IMInvoke_SeparateResponses,
+        commandHandler->TestOnlyInvokeCommandRequestWithFaultsInjected(
+            apExchangeContext, std::move(aPayload), aIsTimedInvoke, CommandHandler::NlFaultInjectionType::SeparateResponseMessages);
+        return Status::Success;);
+    CHIP_FAULT_INJECT(FaultInjection::kFault_IMInvoke_SeparateResponsesInvertResponseOrder,
+                      commandHandler->TestOnlyInvokeCommandRequestWithFaultsInjected(
+                          apExchangeContext, std::move(aPayload), aIsTimedInvoke,
+                          CommandHandler::NlFaultInjectionType::SeparateResponseMessagesAndInvertedResponseOrder);
+                      return Status::Success;);
+    CHIP_FAULT_INJECT(
+        FaultInjection::kFault_IMInvoke_SkipSecondResponse,
+        commandHandler->TestOnlyInvokeCommandRequestWithFaultsInjected(apExchangeContext, std::move(aPayload), aIsTimedInvoke,
+                                                                       CommandHandler::NlFaultInjectionType::SkipSecondResponse);
+        return Status::Success;);
     commandHandler->OnInvokeCommandRequest(apExchangeContext, aPayloadHeader, std::move(aPayload), aIsTimedInvoke);
     return Status::Success;
 }
diff --git a/src/lib/support/CHIPFaultInjection.cpp b/src/lib/support/CHIPFaultInjection.cpp
index fc50021..15cb5a4 100644
--- a/src/lib/support/CHIPFaultInjection.cpp
+++ b/src/lib/support/CHIPFaultInjection.cpp
@@ -35,9 +35,21 @@
 static class nl::FaultInjection::Manager sChipFaultInMgr;
 static const nl::FaultInjection::Name sManagerName  = "chip";
 static const nl::FaultInjection::Name sFaultNames[] = {
-    "AllocExchangeContext", "DropIncomingUDPMsg",   "DropOutgoingUDPMsg", "AllocBinding", "SendAlarm",
-    "HandleAlarm",          "FuzzExchangeHeaderTx", "RMPDoubleTx",        "RMPSendError", "BDXBadBlockCounter",
-    "BDXAllocTransfer",     "CASEKeyConfirm",       "SecMgrBusy",
+    "AllocExchangeContext",
+    "DropIncomingUDPMsg",
+    "DropOutgoingUDPMsg",
+    "AllocBinding",
+    "SendAlarm",
+    "HandleAlarm",
+    "FuzzExchangeHeaderTx",
+    "RMPDoubleTx",
+    "RMPSendError",
+    "BDXBadBlockCounter",
+    "BDXAllocTransfer",
+    "SecMgrBusy",
+    "IMInvoke_SeparateResponses",
+    "IMInvoke_SeparateResponsesInvertResponseOrder",
+    "IMInvoke_SkipSecondResponse",
 #if CONFIG_NETWORK_LAYER_BLE
     "CHIPOBLESend",
 #endif // CONFIG_NETWORK_LAYER_BLE
diff --git a/src/lib/support/CHIPFaultInjection.h b/src/lib/support/CHIPFaultInjection.h
index 9fe78ae..44e582b 100644
--- a/src/lib/support/CHIPFaultInjection.h
+++ b/src/lib/support/CHIPFaultInjection.h
@@ -26,6 +26,11 @@
 #include <lib/core/CHIPConfig.h>
 
 #if CHIP_WITH_NLFAULTINJECTION
+#ifdef NDEBUG
+// TODO(#30453): After fixing the issue where CHIP_WITH_NLFAULTINJECTION is seemingly enabled on release builds,
+// uncomment the line below.
+// static_assert(false, "CHIP_WITH_NLFAULTINJECTION should NOT be enabled on release build");
+#endif
 
 #include <nlfaultinjection.hpp>
 
@@ -57,12 +62,25 @@
     kFault_BDXBadBlockCounter,   /**< Corrupt the BDX Block Counter in the BDX BlockSend or BlockEOF message about to be sent */
     kFault_BDXAllocTransfer,     /**< Fail the allocation of a BDXTransfer object */
     kFault_SecMgrBusy,           /**< Trigger a WEAVE_ERROR_SECURITY_MANAGER_BUSY when starting an authentication session */
+    kFault_IMInvoke_SeparateResponses, /**< Validate incoming InvokeRequestMessage contains exactly 2 valid commands and respond
+                                        with 2 InvokeResponseMessages */
+    kFault_IMInvoke_SeparateResponsesInvertResponseOrder, /**< Validate incoming InvokeRequestMessage contains exactly 2 valid
+                                        commands and respond with 2 InvokeResponseMessages where the response order is inverted
+                                        compared to the request order */
+    kFault_IMInvoke_SkipSecondResponse, /**< Validate incoming InvokeRequestMessage contains exactly 2 valid commands and respond
+                                        with 1 InvokeResponseMessage, dropping the response to the second request */
 #if CONFIG_NETWORK_LAYER_BLE
     kFault_CHIPOBLESend, /**< Inject a GATT error when sending the first fragment of a chip message over BLE */
 #endif                   // CONFIG_NETWORK_LAYER_BLE
     kFault_NumItems,
 } Id;
 
+static_assert(kFault_IMInvoke_SeparateResponses == 12, "Test plan specification and automation code relies on this value being 12");
+static_assert(kFault_IMInvoke_SeparateResponsesInvertResponseOrder == 13,
+              "Test plan specification and automation code relies on this value being 13");
+static_assert(kFault_IMInvoke_SkipSecondResponse == 14,
+              "Test plan specification and automation code relies on this value being 14");
+
 DLL_EXPORT nl::FaultInjection::Manager & GetManager();
 
 /**