pw_protobuf: Distinguish between string and bytes fields

Use a MessageField bit to indicate that the field is a string field, as
opposed to a bytes field. Dedicating a bit to one specific type is an
inefficient use of the field_info_ bits, but there were bits to spare
and this was the simplest way to do this. If more field_info_ bits are
needed in the future, a few of the existing fields could be consolidated
into a single 4-bit field_type that specifies that precise type (e.g.
int32, fixed64, message, etc.).

It is necessary to distinguish between string and bytes in order to
switch from using pw::Vector<char> to pw::InlineString<> for string
fields.

This change also updates C++ codegen to include /*attr_name=*/ comments
for bool arguments to improve readability.

Change-Id: I31a918e8716f330eefddcf22b844d693d382b688
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/110231
Pigweed-Auto-Submit: Wyatt Hepler <hepler@google.com>
Reviewed-by: Armando Montanez <amontanez@google.com>
Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>
diff --git a/pw_protobuf/codegen_message_test.cc b/pw_protobuf/codegen_message_test.cc
index f23471d..a9df4bc 100644
--- a/pw_protobuf/codegen_message_test.cc
+++ b/pw_protobuf/codegen_message_test.cc
@@ -1265,6 +1265,7 @@
        false,
        false,
        false,
+       false,
        0,
        sizeof(KeyValuePair::Message) * 2,
        {}},
@@ -1866,6 +1867,7 @@
        false,
        false,
        false,
+       false,
        0,
        sizeof(KeyValuePair::Message) * 2,
        {}},
diff --git a/pw_protobuf/public/pw_protobuf/internal/codegen.h b/pw_protobuf/public/pw_protobuf/internal/codegen.h
index 34f4478..eed264f 100644
--- a/pw_protobuf/public/pw_protobuf/internal/codegen.h
+++ b/pw_protobuf/public/pw_protobuf/internal/codegen.h
@@ -61,6 +61,7 @@
                          WireType wire_type,
                          size_t elem_size,
                          VarintType varint_type,
+                         bool is_string,
                          bool is_fixed_size,
                          bool is_repeated,
                          bool is_optional,
@@ -73,7 +74,7 @@
             static_cast<unsigned int>(wire_type) << kWireTypeShift |
             elem_size << kElemSizeShift |
             static_cast<unsigned int>(varint_type) << kVarintTypeShift |
-            is_fixed_size << kIsFixedSizeShift |
+            is_string << kIsStringShift | is_fixed_size << kIsFixedSizeShift |
             is_repeated << kIsRepeatedShift | is_optional << kIsOptionalShift |
             use_callback << kUseCallbackShift | field_size << kFieldSizeShift),
         field_offset_(field_offset),
@@ -91,6 +92,9 @@
     return static_cast<VarintType>((field_info_ >> kVarintTypeShift) &
                                    kVarintTypeMask);
   }
+  constexpr bool is_string() const {
+    return (field_info_ >> kIsStringShift) & 1;
+  }
   constexpr bool is_fixed_size() const {
     return (field_info_ >> kIsFixedSizeShift) & 1;
   }
@@ -117,25 +121,34 @@
 
  private:
   // field_info_ packs multiple fields into a single word as follows:
+  //
   //   wire_type      : 3
   //   varint_type    : 2
+  //   is_string      : 1
   //   is_fixed_size  : 1
   //   is_repeated    : 1
   //   use_callback   : 1
   //   -
   //   elem_size      : 4
   //   is_optional    : 1
-  //   [unused space] : 3
+  //   [unused space] : 2
   //   -
   //   field_size     : 16
+  //
+  // The protobuf field type is spread among a few fields (wire_type,
+  // varint_type, is_string, elem_size). The exact field type (e.g. int32, bool,
+  // message, etc.), from which all of that information can be derived, can be
+  // represented in 4 bits. If more bits are needed in the future, these could
+  // be consolidated into a single field type enum.
   static constexpr unsigned int kWireTypeShift = 29u;
   static constexpr unsigned int kWireTypeMask = (1u << 3) - 1;
   static constexpr unsigned int kVarintTypeShift = 27u;
   static constexpr unsigned int kVarintTypeMask = (1u << 2) - 1;
-  static constexpr unsigned int kIsFixedSizeShift = 26u;
-  static constexpr unsigned int kIsRepeatedShift = 25u;
-  static constexpr unsigned int kUseCallbackShift = 24u;
-  static constexpr unsigned int kElemSizeShift = 20u;
+  static constexpr unsigned int kIsStringShift = 26u;
+  static constexpr unsigned int kIsFixedSizeShift = 25u;
+  static constexpr unsigned int kIsRepeatedShift = 24u;
+  static constexpr unsigned int kUseCallbackShift = 23u;
+  static constexpr unsigned int kElemSizeShift = 19u;
   static constexpr unsigned int kElemSizeMask = (1u << 4) - 1;
   static constexpr unsigned int kIsOptionalShift = 16u;
   static constexpr unsigned int kFieldSizeShift = 0u;
diff --git a/pw_protobuf/py/pw_protobuf/codegen_pwpb.py b/pw_protobuf/py/pw_protobuf/codegen_pwpb.py
index bc815bf..55116b1 100644
--- a/pw_protobuf/py/pw_protobuf/codegen_pwpb.py
+++ b/pw_protobuf/py/pw_protobuf/codegen_pwpb.py
@@ -349,6 +349,10 @@
         """
         raise NotImplementedError()
 
+    def is_string(self) -> bool:  # pylint: disable=no-self-use
+        """True if this field is a string field (as opposed to bytes)."""
+        return False
+
     def use_callback(self) -> bool:  # pylint: disable=no-self-use
         """Returns whether the decoder should use a callback."""
         options = self._field.options()
@@ -361,6 +365,9 @@
         return (self._field.is_optional() and self.max_size() == 0
                 and self.wire_type() != 'kDelimited')
 
+    def is_repeated(self) -> bool:
+        return self._field.is_repeated()
+
     def max_size(self) -> int:
         """Returns the maximum size of the field."""
         if self._field.is_repeated():
@@ -370,7 +377,7 @@
 
         return 0
 
-    def fixed_size(self) -> bool:
+    def is_fixed_size(self) -> bool:
         """Returns whether the decoder should use a fixed sized field."""
         if self._field.is_repeated():
             options = self._field.options()
@@ -399,7 +406,7 @@
             return (self.type_name(from_root), self.name())
 
         # Fixed size fields use std::array.
-        if self.fixed_size():
+        if self.is_fixed_size():
             return ('std::array<{}, {}>'.format(self.type_name(from_root),
                                                 max_size), self.name())
 
@@ -420,6 +427,10 @@
     def _elem_size_table_entry(self) -> str:
         return 'sizeof({})'.format(self.type_name())
 
+    def _bool_attr(self, attr: str) -> str:
+        """C++ string for a bool argument that includes the argument name."""
+        return f'/*{attr}=*/{bool(getattr(self, attr)())}'.lower()
+
     def table_entry(self) -> List[str]:
         """Table entry."""
         return [
@@ -427,10 +438,11 @@
             self._wire_type_table_entry(),
             self._elem_size_table_entry(),
             self._varint_type_table_entry(),
-            'true' if self.fixed_size() else 'false',
-            'true' if self._field.is_repeated() else 'false',
-            'true' if self.is_optional() else 'false',
-            'true' if self.use_callback() else 'false',
+            self._bool_attr('is_string'),
+            self._bool_attr('is_fixed_size'),
+            self._bool_attr('is_repeated'),
+            self._bool_attr('is_optional'),
+            self._bool_attr('use_callback'),
             'offsetof(Message, {})'.format(self.name()),
             'sizeof(Message::{})'.format(self.name()),
             self.sub_table(),
@@ -1493,7 +1505,7 @@
 
         return 0
 
-    def fixed_size(self) -> bool:
+    def is_fixed_size(self) -> bool:
         if not self._field.is_repeated():
             options = self._field.options()
             assert options is not None
@@ -1562,12 +1574,15 @@
 
         return 0
 
-    def fixed_size(self) -> bool:
+    def is_fixed_size(self) -> bool:
         return False
 
     def wire_type(self) -> str:
         return 'kDelimited'
 
+    def is_string(self) -> bool:
+        return True
+
     def _size_fn(self) -> str:
         # This uses the WithoutValue method to ensure that the maximum length
         # of the delimited field size varint is used. This accounts for scratch