pw_protobuf: Make maximum varint size configurable

This adds a configuration option to set the number of bytes reserved for
nested message size varints when encoding a protobuf. A simple mapping
of varint size -> integer type is temporarily added to support the
stack-based encoder.

Change-Id: I2947fc3c9bc3f77262a040620517c8b1b0a89760
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/39582
Reviewed-by: Ewout van Bekkum <ewout@google.com>
Reviewed-by: Wyatt Hepler <hepler@google.com>
Commit-Queue: Alexei Frolov <frolv@google.com>
diff --git a/pw_protobuf/BUILD b/pw_protobuf/BUILD
index 54b4d3a..40edf6d 100644
--- a/pw_protobuf/BUILD
+++ b/pw_protobuf/BUILD
@@ -23,6 +23,12 @@
 licenses(["notice"])  # Apache License 2.0
 
 pw_cc_library(
+    name = "config",
+    hdrs = ["public/pw_protobuf/config.h"],
+    includes = ["public"],
+)
+
+pw_cc_library(
     name = "pw_protobuf",
     srcs = [
         "decoder.cc",
@@ -39,6 +45,7 @@
     ],
     includes = ["public"],
     deps = [
+        ":config",
         "//pw_span",
         "//pw_status",
         "//pw_varint",
@@ -81,6 +88,14 @@
     ],
 )
 
+# TODO(frolv): Figure out how to add facade tests to Bazel.
+filegroup(
+    name = "varint_size_test",
+    srcs = [
+        "varint_size_test.cc",
+    ],
+)
+
 # TODO(frolv): Figure out what to do about size reports in Bazel.
 filegroup(
     name = "size_reports",
diff --git a/pw_protobuf/BUILD.gn b/pw_protobuf/BUILD.gn
index 562850f..ceb9947 100644
--- a/pw_protobuf/BUILD.gn
+++ b/pw_protobuf/BUILD.gn
@@ -1,4 +1,4 @@
-# Copyright 2020 The Pigweed Authors
+# Copyright 2021 The Pigweed Authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 # use this file except in compliance with the License. You may obtain a copy of
@@ -15,19 +15,36 @@
 import("//build_overrides/pigweed.gni")
 
 import("$dir_pw_build/input_group.gni")
+import("$dir_pw_build/module_config.gni")
 import("$dir_pw_build/target_types.gni")
 import("$dir_pw_docgen/docs.gni")
 import("$dir_pw_fuzzer/fuzzer.gni")
 import("$dir_pw_protobuf_compiler/proto.gni")
+import("$dir_pw_unit_test/facade_test.gni")
 import("$dir_pw_unit_test/test.gni")
 
-config("default_config") {
+declare_args() {
+  # The build target that overrides the default configuration options for this
+  # module. This should point to a source set that provides defines through a
+  # public config (which may -include a file or add defines directly).
+  pw_protobuf_CONFIG = pw_build_DEFAULT_MODULE_CONFIG
+}
+
+config("public_include_path") {
   include_dirs = [ "public" ]
 }
 
+pw_source_set("config") {
+  public = [ "public/pw_protobuf/config.h" ]
+  public_configs = [ ":public_include_path" ]
+  public_deps = [ pw_protobuf_CONFIG ]
+  visibility = [ ":*" ]
+}
+
 pw_source_set("pw_protobuf") {
-  public_configs = [ ":default_config" ]
+  public_configs = [ ":public_include_path" ]
   public_deps = [
+    ":config",
     dir_pw_bytes,
     dir_pw_result,
     dir_pw_status,
@@ -66,6 +83,7 @@
     ":encoder_test",
     ":encoder_fuzzer",
     ":find_test",
+    ":varint_size_test",
   ]
 }
 
@@ -89,6 +107,24 @@
   sources = [ "codegen_test.cc" ]
 }
 
+config("one_byte_varint") {
+  defines = [ "PW_PROTOBUF_CFG_MAX_VARINT_SIZE=1" ]
+  visibility = [ ":*" ]
+}
+
+pw_source_set("varint_size_test_config") {
+  public_configs = [ ":one_byte_varint" ]
+  visibility = [ ":*" ]
+}
+
+pw_facade_test("varint_size_test") {
+  build_args = {
+    pw_protobuf_CONFIG = ":varint_size_test_config"
+  }
+  deps = [ ":pw_protobuf" ]
+  sources = [ "varint_size_test.cc" ]
+}
+
 pw_proto_library("common_protos") {
   sources = [ "pw_protobuf_protos/common.proto" ]
 }
diff --git a/pw_protobuf/CMakeLists.txt b/pw_protobuf/CMakeLists.txt
index 44ed7dc..8d67047 100644
--- a/pw_protobuf/CMakeLists.txt
+++ b/pw_protobuf/CMakeLists.txt
@@ -15,14 +15,57 @@
 include($ENV{PW_ROOT}/pw_build/pigweed.cmake)
 include($ENV{PW_ROOT}/pw_protobuf_compiler/proto.cmake)
 
-pw_auto_add_simple_module(pw_protobuf
+pw_add_module_library(pw_protobuf
+  SOURCES
+    decoder.cc
+    encoder.cc
+    find.cc
   PUBLIC_DEPS
     pw_bytes
     pw_result
     pw_status
     pw_varint
-  TEST_DEPS
+)
+
+pw_add_test(pw_protobuf.decoder_test
+  SOURCES
+    decoder_test.cc
+  DEPS
+    pw_protobuf
+  GROUPS
+    modules
+    pw_protobuf
+)
+
+pw_add_test(pw_protobuf.encoder_test
+  SOURCES
+    encoder_test.cc
+  DEPS
+    pw_protobuf
+  GROUPS
+    modules
+    pw_protobuf
+)
+
+pw_add_test(pw_protobuf.find_test
+  SOURCES
+    find_test.cc
+  DEPS
+    pw_protobuf
+  GROUPS
+    modules
+    pw_protobuf
+)
+
+pw_add_test(pw_protobuf.codegen_test
+  SOURCES
+    codegen_test.cc
+  DEPS
+    pw_protobuf
     pw_protobuf.codegen_test_protos.pwpb
+  GROUPS
+    modules
+    pw_protobuf
 )
 
 pw_proto_library(pw_protobuf.codegen_test_protos
diff --git a/pw_protobuf/docs.rst b/pw_protobuf/docs.rst
index ffc13bc..5a02779 100644
--- a/pw_protobuf/docs.rst
+++ b/pw_protobuf/docs.rst
@@ -27,6 +27,32 @@
 specific protobuf messages. The code generation integrates with Pigweed's GN
 build system.
 
+Configuration
+=============
+``pw_protobuf`` supports the following configuration options.
+
+* ``PW_PROTOBUF_CFG_MAX_VARINT_SIZE``:
+  When encoding nested messages, the number of bytes to reserve for the varint
+  submessage length. Nested messages are limited in size to the maximum value
+  that can be varint-encoded into this reserved space.
+
+  The values that can be set, and their corresponding maximum submessage
+  lengths, are outlined below.
+
+  +-------------------+----------------------------------------+
+  | MAX_VARINT_SIZE   | Maximum submessage length              |
+  +===================+========================================+
+  | 1 byte            | 127                                    |
+  +-------------------+----------------------------------------+
+  | 2 bytes           | 16,383 or < 16KiB                      |
+  +-------------------+----------------------------------------+
+  | 3 bytes           | 2,097,151 or < 2048KiB                 |
+  +-------------------+----------------------------------------+
+  | 4 bytes (default) | 268,435,455 or < 256MiB                |
+  +-------------------+----------------------------------------+
+  | 5 bytes           | 4,294,967,295 or < 4GiB (max uint32_t) |
+  +-------------------+----------------------------------------+
+
 Usage
 =====
 ``pw_protobuf`` splits wire format encoding and decoding operations. Links to
diff --git a/pw_protobuf/encoder.cc b/pw_protobuf/encoder.cc
index 807a78b..6ca6f6d 100644
--- a/pw_protobuf/encoder.cc
+++ b/pw_protobuf/encoder.cc
@@ -1,4 +1,4 @@
-// Copyright 2019 The Pigweed Authors
+// Copyright 2021 The Pigweed Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy of
@@ -14,14 +14,15 @@
 
 #include "pw_protobuf/encoder.h"
 
+#include <limits>
+
 namespace pw::protobuf {
 
 Status Encoder::WriteUint64(uint32_t field_number, uint64_t value) {
   std::byte* original_cursor = cursor_;
   WriteFieldKey(field_number, WireType::kVarint);
-  Status status = WriteVarint(value);
-  IncreaseParentSize(cursor_ - original_cursor);
-  return status;
+  WriteVarint(value);
+  return IncreaseParentSize(cursor_ - original_cursor);
 }
 
 // Encodes a base-128 varint to the buffer.
@@ -90,7 +91,7 @@
   }
 
   // Update parent size with the written key.
-  IncreaseParentSize(cursor_ - original_cursor);
+  PW_TRY(IncreaseParentSize(cursor_ - original_cursor));
 
   union {
     std::byte* cursor;
@@ -121,7 +122,7 @@
   // Update the parent's size with how much total space the child will take
   // after its size field is varint encoded.
   SizeType child_size = *blob_stack_[--depth_];
-  IncreaseParentSize(child_size + VarintSizeBytes(child_size));
+  PW_TRY(IncreaseParentSize(child_size + VarintSizeBytes(child_size)));
 
   // Encode the child
   if (Status status = EncodeFrom(blob_count_ - 1).status(); !status.ok()) {
@@ -188,4 +189,28 @@
   return Result<ConstByteSpan>(buffer_.first(EncodedSize()));
 }
 
+Status Encoder::IncreaseParentSize(size_t size_bytes) {
+  if (!encode_status_.ok()) {
+    return encode_status_;
+  }
+
+  if (depth_ == 0) {
+    return OkStatus();
+  }
+
+  size_t current_size = *blob_stack_[depth_ - 1];
+
+  constexpr size_t max_size =
+      std::min(varint::MaxValueInBytes(sizeof(SizeType)),
+               static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+
+  if (size_bytes > max_size || current_size > max_size - size_bytes) {
+    encode_status_ = Status::OutOfRange();
+    return encode_status_;
+  }
+
+  *blob_stack_[depth_ - 1] = current_size + size_bytes;
+  return OkStatus();
+}
+
 }  // namespace pw::protobuf
diff --git a/pw_protobuf/public/pw_protobuf/config.h b/pw_protobuf/public/pw_protobuf/config.h
new file mode 100644
index 0000000..c7a12eb
--- /dev/null
+++ b/pw_protobuf/public/pw_protobuf/config.h
@@ -0,0 +1,56 @@
+// Copyright 2021 The Pigweed Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+// Configuration macros for the protobuf module.
+#pragma once
+
+#include <cstddef>
+
+// When encoding nested messages, the number of bytes to reserve for the varint
+// submessage length. Nested messages are limited in size to the maximum value
+// that can be varint-encoded into this reserved space.
+//
+// The values that can be set, and their corresponding maximum submessage
+// lengths, are outlined below.
+//
+//   1 byte  => 127
+//   2 bytes => 16,383 or < 16KiB
+//   3 bytes => 2,097,151 or < 2048KiB
+//   4 bytes => 268,435,455 or < 256MiB
+//   5 bytes => 4,294,967,295 or < 4GiB (max uint32_t)
+//
+#ifndef PW_PROTOBUF_CFG_MAX_VARINT_SIZE
+#define PW_PROTOBUF_CFG_MAX_VARINT_SIZE 4
+#endif  // PW_PROTOBUF_MAX_VARINT_SIZE
+
+static_assert(PW_PROTOBUF_CFG_MAX_VARINT_SIZE > 0 &&
+              PW_PROTOBUF_CFG_MAX_VARINT_SIZE <= 5);
+
+namespace pw::protobuf::config {
+
+inline constexpr size_t kMaxVarintSize = PW_PROTOBUF_CFG_MAX_VARINT_SIZE;
+
+// TODO(frolv): This converts the configured varint length to the legacy encoder
+// SizeType. Remove this with the encoder rewrite.
+#if PW_PROTOBUF_CFG_MAX_VARINT_SIZE == 1
+using SizeType = uint8_t;
+#elif PW_PROTOBUF_CFG_MAX_VARINT_SIZE == 2
+using SizeType = uint16_t;
+#elif PW_PROTOBUF_CFG_MAX_VARINT_SIZE <= 4
+using SizeType = uint32_t;
+#else
+using SizeType = uint64_t;
+#endif
+
+}  // namespace pw::protobuf::config
diff --git a/pw_protobuf/public/pw_protobuf/encoder.h b/pw_protobuf/public/pw_protobuf/encoder.h
index 51c335d..fbf9acf 100644
--- a/pw_protobuf/public/pw_protobuf/encoder.h
+++ b/pw_protobuf/public/pw_protobuf/encoder.h
@@ -1,4 +1,4 @@
-// Copyright 2019 The Pigweed Authors
+// Copyright 2021 The Pigweed Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy of
@@ -18,9 +18,10 @@
 #include <span>
 
 #include "pw_bytes/span.h"
+#include "pw_protobuf/config.h"
 #include "pw_protobuf/wire_format.h"
 #include "pw_result/result.h"
-#include "pw_status/status.h"
+#include "pw_status/try.h"
 #include "pw_varint/varint.h"
 
 namespace pw::protobuf {
@@ -28,10 +29,7 @@
 // A streaming protobuf encoder which encodes to a user-specified buffer.
 class Encoder {
  public:
-  // TODO(frolv): Right now, only one intermediate size is supported. However,
-  // this can be wasteful, as it requires 4 or 8 bytes of space per nested
-  // message. This can be templated to minimize the overhead.
-  using SizeType = size_t;
+  using SizeType = config::SizeType;
 
   constexpr Encoder(ByteSpan buffer,
                     std::span<SizeType*> locations,
@@ -144,9 +142,8 @@
   Status WriteFixed32(uint32_t field_number, uint32_t value) {
     std::byte* original_cursor = cursor_;
     WriteFieldKey(field_number, WireType::kFixed32);
-    Status status = WriteRawBytes(value);
-    IncreaseParentSize(cursor_ - original_cursor);
-    return status;
+    WriteRawBytes(value);
+    return IncreaseParentSize(cursor_ - original_cursor);
   }
 
   // Writes a repeated fixed32 field using packed encoding.
@@ -159,9 +156,8 @@
   Status WriteFixed64(uint32_t field_number, uint64_t value) {
     std::byte* original_cursor = cursor_;
     WriteFieldKey(field_number, WireType::kFixed64);
-    Status status = WriteRawBytes(value);
-    IncreaseParentSize(cursor_ - original_cursor);
-    return status;
+    WriteRawBytes(value);
+    return IncreaseParentSize(cursor_ - original_cursor);
   }
 
   // Writes a repeated fixed64 field using packed encoding.
@@ -198,9 +194,8 @@
                   "Float and uint32_t are not the same size");
     std::byte* original_cursor = cursor_;
     WriteFieldKey(field_number, WireType::kFixed32);
-    Status status = WriteRawBytes(value);
-    IncreaseParentSize(cursor_ - original_cursor);
-    return status;
+    WriteRawBytes(value);
+    return IncreaseParentSize(cursor_ - original_cursor);
   }
 
   // Writes a repeated float field using packed encoding.
@@ -215,9 +210,8 @@
                   "Double and uint64_t are not the same size");
     std::byte* original_cursor = cursor_;
     WriteFieldKey(field_number, WireType::kFixed64);
-    Status status = WriteRawBytes(value);
-    IncreaseParentSize(cursor_ - original_cursor);
-    return status;
+    WriteRawBytes(value);
+    return IncreaseParentSize(cursor_ - original_cursor);
   }
 
   // Writes a repeated double field using packed encoding.
@@ -231,9 +225,8 @@
     std::byte* original_cursor = cursor_;
     WriteFieldKey(field_number, WireType::kDelimited);
     WriteVarint(value.size_bytes());
-    Status status = WriteRawBytes(value.data(), value.size_bytes());
-    IncreaseParentSize(cursor_ - original_cursor);
-    return status;
+    WriteRawBytes(value.data(), value.size_bytes());
+    return IncreaseParentSize(cursor_ - original_cursor);
   }
 
   // Writes a proto string key-value pair.
@@ -330,17 +323,13 @@
         WriteVarint(value);
       }
     }
-    IncreaseParentSize(cursor_ - original_cursor);
+    PW_TRY(IncreaseParentSize(cursor_ - original_cursor));
 
     return Pop();
   }
 
   // Adds to the parent proto's size field in the buffer.
-  void IncreaseParentSize(size_t bytes) {
-    if (depth_ > 0) {
-      *blob_stack_[depth_ - 1] += bytes;
-    }
-  }
+  Status IncreaseParentSize(size_t size_bytes);
 
   // Returns the size of `n` encoded as a varint.
   size_t VarintSizeBytes(uint64_t n) {
@@ -382,8 +371,8 @@
   NestedEncoder& operator=(const NestedEncoder& other) = delete;
 
  private:
-  std::array<size_t*, kMaxBlobs> blobs_;
-  std::array<size_t*, kMaxNestedDepth> stack_;
+  std::array<Encoder::SizeType*, kMaxBlobs> blobs_;
+  std::array<Encoder::SizeType*, kMaxNestedDepth> stack_;
 };
 
 // Explicit template argument deduction to hide warnings.
diff --git a/pw_protobuf/varint_size_test.cc b/pw_protobuf/varint_size_test.cc
new file mode 100644
index 0000000..246b265
--- /dev/null
+++ b/pw_protobuf/varint_size_test.cc
@@ -0,0 +1,73 @@
+// Copyright 2021 The Pigweed Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include "gtest/gtest.h"
+#include "pw_bytes/array.h"
+#include "pw_protobuf/encoder.h"
+
+namespace pw::protobuf {
+namespace {
+
+TEST(Encoder, SizeTypeIsConfigured) {
+  static_assert(sizeof(Encoder::SizeType) == sizeof(uint8_t));
+}
+
+TEST(Encoder, NestedWriteSmallerThanVarintSize) {
+  std::array<std::byte, 256> buffer;
+  NestedEncoder<2, 2> encoder(buffer);
+
+  encoder.Push(1);
+  // 1 byte key + 1 byte size + 125 byte value = 127 byte nested length.
+  EXPECT_EQ(encoder.WriteBytes(2, bytes::Initialized<125>(0xaa)), OkStatus());
+  encoder.Pop();
+
+  auto result = encoder.Encode();
+  EXPECT_EQ(result.status(), OkStatus());
+}
+
+TEST(Encoder, NestedWriteLargerThanVarintSizeReturnsOutOfRange) {
+  std::array<std::byte, 256> buffer;
+  NestedEncoder<2, 2> encoder(buffer);
+
+  // Try to write a larger nested message than the max nested varint value.
+  encoder.Push(1);
+  // 1 byte key + 1 byte size + 126 byte value = 128 byte nested length.
+  EXPECT_EQ(encoder.WriteBytes(2, bytes::Initialized<126>(0xaa)),
+            Status::OutOfRange());
+  EXPECT_EQ(encoder.WriteUint32(3, 42), Status::OutOfRange());
+  encoder.Pop();
+
+  auto result = encoder.Encode();
+  EXPECT_EQ(result.status(), Status::OutOfRange());
+}
+
+TEST(Encoder, NestedMessageLargerThanVarintSizeReturnsOutOfRange) {
+  std::array<std::byte, 256> buffer;
+  NestedEncoder<2, 2> encoder(buffer);
+
+  // Try to write a larger nested message than the max nested varint value as
+  // multiple smaller writes.
+  encoder.Push(1);
+  EXPECT_EQ(encoder.WriteBytes(2, bytes::Initialized<60>(0xaa)), OkStatus());
+  EXPECT_EQ(encoder.WriteBytes(3, bytes::Initialized<60>(0xaa)), OkStatus());
+  EXPECT_EQ(encoder.WriteBytes(4, bytes::Initialized<60>(0xaa)),
+            Status::OutOfRange());
+  encoder.Pop();
+
+  auto result = encoder.Encode();
+  EXPECT_EQ(result.status(), Status::OutOfRange());
+}
+
+}  // namespace
+}  // namespace pw::protobuf