Bring python yaml test parser of octet string inline with what javascript codegen is doing (#23670)

* Fix octet string parsing in yaml test parser

Old parsing had issue since str.encode for utf-8 character would be
improperly convert. For example `\xff` would become `b'\xc3\xbf'` when
we wanted it to be `b'\xff`.

Co-authored-by: Tennessee Carmel-Veilleux <tennessee.carmelveilleux@gmail.com>

* Address PR comments

* run regen

* Address PR comments

Co-authored-by: Tennessee Carmel-Veilleux <tennessee.carmelveilleux@gmail.com>
diff --git a/src/app/tests/suites/TestCluster.yaml b/src/app/tests/suites/TestCluster.yaml
index f84068e..ddf4eb1 100644
--- a/src/app/tests/suites/TestCluster.yaml
+++ b/src/app/tests/suites/TestCluster.yaml
@@ -749,7 +749,8 @@
       command: "readAttribute"
       attribute: "octet_string"
       response:
-          value: "\r\n\xff\"\xa0"
+          # This is the properly 'hex:...' version of "\r\n\xff\"\xa0"
+          value: "hex:0d0aff22a0"
 
     - label: "Write attribute OCTET_STRING"
       command: "writeAttribute"
diff --git a/src/controller/python/chip/yaml/format_converter.py b/src/controller/python/chip/yaml/format_converter.py
index 11e38f8..f6402c5 100644
--- a/src/controller/python/chip/yaml/format_converter.py
+++ b/src/controller/python/chip/yaml/format_converter.py
@@ -20,9 +20,26 @@
 from chip.tlv import uint, float32
 import enum
 from chip.yaml.errors import ValidationError
+import binascii
 
 
-_HEX_PREFIX = 'hex:'
+def convert_yaml_octet_string_to_bytes(s: str) -> bytes:
+    """Convert YAML octet string body to bytes, handling any c-style hex escapes (e.g. \x5a) and hex: prefix"""
+    # Step 1: handle explicit "hex:" prefix
+    if s.startswith('hex:'):
+        return binascii.unhexlify(s[4:])
+
+    # Step 2: convert non-hex-prefixed to bytes
+    # TODO(#23669): This does not properly support utf8 octet strings. We mimic
+    # javascript codegen behavior. Behavior of javascript is:
+    #   * Octet string character >= u+0200 errors out.
+    #   * Any character greater than 0xFF has the upper bytes chopped off.
+    as_bytes = [ord(c) for c in s]
+
+    if any([value > 0x200 for value in as_bytes]):
+        raise ValueError('Unsupported char in octet string %r' % as_bytes)
+    accumulated_hex = ''.join([f"{(v & 0xFF):02x}" for v in as_bytes])
+    return binascii.unhexlify(accumulated_hex)
 
 
 def convert_name_value_pair_to_dict(arg_values):
@@ -118,9 +135,7 @@
         return field_type(field_value)
     # YAML treats bytes as strings. Convert to a byte string.
     elif (field_type == bytes and type(field_value) != bytes):
-        if isinstance(field_value, str) and field_value.startswith(_HEX_PREFIX):
-            return bytes.fromhex(field_value[len(_HEX_PREFIX):])
-        return str.encode(field_value)
+        return convert_yaml_octet_string_to_bytes(field_value)
     # By default, just return the field_value casted to field_type.
     else:
         return field_type(field_value)
diff --git a/src/controller/python/test/unit_tests/test_yaml_format_converter.py b/src/controller/python/test/unit_tests/test_yaml_format_converter.py
new file mode 100644
index 0000000..fa46705
--- /dev/null
+++ b/src/controller/python/test/unit_tests/test_yaml_format_converter.py
@@ -0,0 +1,52 @@
+#
+#    Copyright (c) 2022 Project CHIP Authors
+#    All rights reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+
+from chip.yaml.format_converter import convert_yaml_octet_string_to_bytes
+from binascii import unhexlify
+import unittest
+
+
+class TestOctetStringYamlDecode(unittest.TestCase):
+    def test_common_cases(self):
+        self.assertEqual(convert_yaml_octet_string_to_bytes("hex:aa55"), unhexlify("aa55"))
+        self.assertEqual(convert_yaml_octet_string_to_bytes("hex:"), unhexlify(""))
+        self.assertEqual(convert_yaml_octet_string_to_bytes("hex:AA55"), unhexlify("aa55"))
+
+        self.assertEqual(convert_yaml_octet_string_to_bytes("0\xaa\x55"), unhexlify("30aa55"))
+        self.assertEqual(convert_yaml_octet_string_to_bytes("0\xAA\x55"), unhexlify("30aa55"))
+        self.assertEqual(convert_yaml_octet_string_to_bytes("0\xAa\x55"), unhexlify("30aa55"))
+
+        self.assertEqual(convert_yaml_octet_string_to_bytes("0hex:"), b"0hex:")
+        self.assertEqual(convert_yaml_octet_string_to_bytes("0hex:A"), b"0hex:A")
+        self.assertEqual(convert_yaml_octet_string_to_bytes("0hex:AA55"), b"0hex:AA55")
+
+        self.assertEqual(convert_yaml_octet_string_to_bytes("AA55"), b"AA55")
+        self.assertEqual(convert_yaml_octet_string_to_bytes("AA\n\r\t55"), unhexlify("41410a0d093535"))
+        # TODO(#23669): After utf8 is properly supported expected result is unhexlify("c3a9c3a90a0a")
+        self.assertEqual(convert_yaml_octet_string_to_bytes("\xC3\xA9é\n\n"), unhexlify("c3a9e90a0a"))
+
+        # Partial hex nibble
+        with self.assertRaises(ValueError):
+            convert_yaml_octet_string_to_bytes("hex:aa5")
+
+
+def main():
+    unittest.main()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/zzz_generated/chip-tool/zap-generated/test/Commands.h b/zzz_generated/chip-tool/zap-generated/test/Commands.h
index e9f1930..796a968 100644
--- a/zzz_generated/chip-tool/zap-generated/test/Commands.h
+++ b/zzz_generated/chip-tool/zap-generated/test/Commands.h
@@ -48621,7 +48621,7 @@
                 chip::ByteSpan value;
                 VerifyOrReturn(CheckDecodeValue(chip::app::DataModel::Decode(*data, value)));
                 VerifyOrReturn(CheckValueAsString("octetString", value,
-                                                  chip::ByteSpan(chip::Uint8::from_const_char("\015\012\377\042\240"), 5)));
+                                                  chip::ByteSpan(chip::Uint8::from_const_char("\x0d\x0a\xff\x22\xa0"), 5)));
             }
             break;
         case 114:
diff --git a/zzz_generated/darwin-framework-tool/zap-generated/test/Commands.h b/zzz_generated/darwin-framework-tool/zap-generated/test/Commands.h
index 3fb1a5c..2b90c66 100644
--- a/zzz_generated/darwin-framework-tool/zap-generated/test/Commands.h
+++ b/zzz_generated/darwin-framework-tool/zap-generated/test/Commands.h
@@ -77591,7 +77591,7 @@
             {
                 id actualValue = value;
                 VerifyOrReturn(CheckValueAsString(
-                    "octet_string", actualValue, [[NSData alloc] initWithBytes:"\015\012\377\042\240" length:5]));
+                    "octet_string", actualValue, [[NSData alloc] initWithBytes:"\x0d\x0a\xff\x22\xa0" length:5]));
             }
 
             NextTest();