Reduce stack use for certain arithmetic operations.
diff --git a/runtime/cpp/emboss_arithmetic.h b/runtime/cpp/emboss_arithmetic.h
index dd570ba..9bb209a 100644
--- a/runtime/cpp/emboss_arithmetic.h
+++ b/runtime/cpp/emboss_arithmetic.h
@@ -49,12 +49,37 @@
// can be used directly for each operation, but casting may be required in
// between operations.
+
+// AllKnown(...) returns true if all of its arguments are Known(). The base
+// case is no arguments.
inline constexpr bool AllKnown() { return true; }
-template <typename T, typename... RestT>
-inline constexpr bool AllKnown(T value, RestT... rest) {
- return value.Known() && AllKnown(rest...);
-}
+// The rest of AllKnown() could be:
+//
+// template <typename T, typename... RestT>
+// inline constexpr bool AllKnown(T v, RestT... rest) {
+// return v.Known() && AllKnown(rest...);
+// }
+//
+// ... unfortunately, some compilers do not optimize this well, and it ends
+// up using linear stack space instead of constant stack space; for complex
+// structs on systems with limited stack (such as typical microcontrollers),
+// this can cause methods like Ok() to blow the stack.
+//
+// The C++14 solution would be to use a std::initializer_list and iterate over
+// the arguments. Unfortunately, C++11 std::initializer_list is not
+// constexpr, and C++11 constexpr does not allow iteration.
+//
+// Instead, for "small" numbers of arguments (up to 64, at time of writing), we
+// have generated overloads of the form:
+//
+// template <typename T0, ... typename TN>
+// inline constexpr bool AllKnown(T0 v0, ... TN vN) {
+// return v0.Known() && ... && vN.Known();
+// }
+//
+// This reduces stack frames by ~64x.
+# include "emboss_arithmetic_all_known_generated.inc"
// MaybeDo implements the logic of checking for known values, unwrapping the
// known values, passing the unwrapped values to OperatorT, and then rewrapping
@@ -150,26 +175,68 @@
// MaximumOperation is a bit more complex, in order to handle the variable
// number of parameters.
struct MaximumOperation {
+ // Maximum of 1 element is just itself.
template <typename T>
static inline constexpr T Do(T arg) {
- // Base case for recursive template.
return arg;
}
- // Ideally, this would only use template<typename T>, but C++11 requires a
- // full variadic template or C-style variadic function in order to accept a
- // variable number of arguments. C-style variadic functions have no intrinsic
- // way of figuring out how many arguments they receive, so we have to use a
- // variadic template.
+ // The rest of MaximumOperation::Do could be:
//
- // The static_assert ensures that all arguments are actually the same type.
- template <typename T1, typename T2, typename... T>
- static inline constexpr T1 Do(T1 l, T2 r, T... rest) {
+ // template <typename T, typename... RestT>
+ // static inline constexpr T Do(T v0, T v1, RestT... rest) {
+ // return Do(v0 < v1 ? v1 : v0, rest...);
+ // }
+ //
+ // ... unfortunately, some compilers do not optimize this well, and it ends
+ // up using linear stack space instead of constant stack space; for complex
+ // structs on systems with limited stack (such as typical microcontrollers),
+ // this can cause methods like Ok() to blow the stack.
+ //
+ // The C++14 solution would be to use a std::initializer_list and iterate over
+ // the arguments. Unfortunately, C++11 std::initializer_list is not
+ // constexpr, and C++11 constexpr does not allow iteration.
+ //
+ // Instead, we have a small number of hand-written overloads and a large
+ // number (61, at time of writing) of generated overloads, which use O(lg(N))
+ // stack for "small" numbers of arguments (128 or fewer, at time of writing),
+ // and O(N) stack for more arguments, but with a much, much smaller constant
+ // multiplier: one additional stack frame per 64 arguments, instead of one
+ // per argument.
+
+ // Maximum of 2-4 elements are special-cased.
+ template <typename T>
+ static inline constexpr T Do(T v0, T v1) {
// C++11 std::max is not constexpr, so we can't just call it.
- static_assert(::std::is_same<T1, T2>::value,
- "Expected Do to be called with a proper intermediate type.");
- return Do(l < r ? r : l, rest...);
+ return v0 < v1 ? v1 : v0;
}
+
+ template <typename T>
+ static inline constexpr T Do(T v0, T v1, T v2) {
+ return Do(v0 < v1 ? v1 : v0, v2);
+ }
+
+ template <typename T>
+ static inline constexpr T Do(T v0, T v1, T v2, T v3) {
+ return Do(v0 < v1 ? v1 : v0, v2 < v3 ? v3 : v2);
+ }
+
+ // The remaining overloads (5+ arguments) are generated by a script and
+ // #included, so that they do not clutter the hand-written code.
+ //
+ // They are of the form:
+ //
+ // template <typename T>
+ // static inline constexpr Do(T v0, ... T vN, T vN_plus_1, ... T v2N) {
+ // return Do(Do(v0, ... vN), Do(vN_plus_1, ... v2N));
+ // }
+ //
+ // In each case, they cut their argument lists in half, calling Do(Do(first
+ // half), Do(second half)).
+ //
+ // Note that, if there are enough arguments, this still falls back onto
+ // linear-stack-space recursion.
+# include "emboss_arithmetic_maximum_operation_generated.inc"
};
//// Special operations, where either un-Known() operands do not always result