blob: d50bd99f9c421b9a41cb910e71cfec6dd23c8cb4 [file] [log] [blame]
/* Amalgamated source file */
#include "ruby-upb.h"
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* This is where we define macros used across upb.
*
* All of these macros are undef'd in port_undef.inc to avoid leaking them to
* users.
*
* The correct usage is:
*
* #include "upb/foobar.h"
* #include "upb/baz.h"
*
* // MUST be last included header.
* #include "upb/port_def.inc"
*
* // Code for this file.
* // <...>
*
* // Can be omitted for .c files, required for .h.
* #include "upb/port_undef.inc"
*
* This file is private and must not be included by users!
*/
#if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
(defined(__cplusplus) && __cplusplus >= 201103L) || \
(defined(_MSC_VER) && _MSC_VER >= 1900))
#error upb requires C99 or C++11 or MSVC >= 2015.
#endif
#include <stdint.h>
#include <stddef.h>
#if UINTPTR_MAX == 0xffffffff
#define UPB_SIZE(size32, size64) size32
#else
#define UPB_SIZE(size32, size64) size64
#endif
/* If we always read/write as a consistent type to each address, this shouldn't
* violate aliasing.
*/
#define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs)))
#define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
*UPB_PTR_AT(msg, case_offset, int) == case_val \
? *UPB_PTR_AT(msg, offset, fieldtype) \
: default
#define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
*UPB_PTR_AT(msg, case_offset, int) = case_val; \
*UPB_PTR_AT(msg, offset, fieldtype) = value;
#define UPB_MAPTYPE_STRING 0
/* UPB_INLINE: inline if possible, emit standalone code if required. */
#ifdef __cplusplus
#define UPB_INLINE inline
#elif defined (__GNUC__) || defined(__clang__)
#define UPB_INLINE static __inline__
#else
#define UPB_INLINE static
#endif
#define UPB_ALIGN_UP(size, align) (((size) + (align) - 1) / (align) * (align))
#define UPB_ALIGN_DOWN(size, align) ((size) / (align) * (align))
#define UPB_ALIGN_MALLOC(size) UPB_ALIGN_UP(size, 16)
#define UPB_ALIGN_OF(type) offsetof (struct { char c; type member; }, member)
/* Hints to the compiler about likely/unlikely branches. */
#if defined (__GNUC__) || defined(__clang__)
#define UPB_LIKELY(x) __builtin_expect((x),1)
#define UPB_UNLIKELY(x) __builtin_expect((x),0)
#else
#define UPB_LIKELY(x) (x)
#define UPB_UNLIKELY(x) (x)
#endif
/* Macros for function attributes on compilers that support them. */
#ifdef __GNUC__
#define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
#define UPB_NOINLINE __attribute__((noinline))
#define UPB_NORETURN __attribute__((__noreturn__))
#define UPB_PRINTF(str, first_vararg) __attribute__((format (printf, str, first_vararg)))
#elif defined(_MSC_VER)
#define UPB_NOINLINE
#define UPB_FORCEINLINE
#define UPB_NORETURN __declspec(noreturn)
#define UPB_PRINTF(str, first_vararg)
#else /* !defined(__GNUC__) */
#define UPB_FORCEINLINE
#define UPB_NOINLINE
#define UPB_NORETURN
#define UPB_PRINTF(str, first_vararg)
#endif
#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
#define UPB_UNUSED(var) (void)var
/* UPB_ASSUME(): in release mode, we tell the compiler to assume this is true.
*/
#ifdef NDEBUG
#ifdef __GNUC__
#define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable()
#elif defined _MSC_VER
#define UPB_ASSUME(expr) if (!(expr)) __assume(0)
#else
#define UPB_ASSUME(expr) do {} while (false && (expr))
#endif
#else
#define UPB_ASSUME(expr) assert(expr)
#endif
/* UPB_ASSERT(): in release mode, we use the expression without letting it be
* evaluated. This prevents "unused variable" warnings. */
#ifdef NDEBUG
#define UPB_ASSERT(expr) do {} while (false && (expr))
#else
#define UPB_ASSERT(expr) assert(expr)
#endif
#if defined(__GNUC__) || defined(__clang__)
#define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0)
#else
#define UPB_UNREACHABLE() do { assert(0); } while(0)
#endif
/* UPB_SETJMP() / UPB_LONGJMP(): avoid setting/restoring signal mask. */
#ifdef __APPLE__
#define UPB_SETJMP(buf) _setjmp(buf)
#define UPB_LONGJMP(buf, val) _longjmp(buf, val)
#else
#define UPB_SETJMP(buf) setjmp(buf)
#define UPB_LONGJMP(buf, val) longjmp(buf, val)
#endif
/* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */
#define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr))
/* Configure whether fasttable is switched on or not. *************************/
#ifdef __has_attribute
#define UPB_HAS_ATTRIBUTE(x) __has_attribute(x)
#else
#define UPB_HAS_ATTRIBUTE(x) 0
#endif
#if UPB_HAS_ATTRIBUTE(musttail)
#define UPB_MUSTTAIL __attribute__((musttail))
#else
#define UPB_MUSTTAIL
#endif
#undef UPB_HAS_ATTRIBUTE
/* This check is not fully robust: it does not require that we have "musttail"
* support available. We need tail calls to avoid consuming arbitrary amounts
* of stack space.
*
* GCC/Clang can mostly be trusted to generate tail calls as long as
* optimization is enabled, but, debug builds will not generate tail calls
* unless "musttail" is available.
*
* We should probably either:
* 1. require that the compiler supports musttail.
* 2. add some fallback code for when musttail isn't available (ie. return
* instead of tail calling). This is safe and portable, but this comes at
* a CPU cost.
*/
#if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__)
#define UPB_FASTTABLE_SUPPORTED 1
#else
#define UPB_FASTTABLE_SUPPORTED 0
#endif
/* define UPB_ENABLE_FASTTABLE to force fast table support.
* This is useful when we want to ensure we are really getting fasttable,
* for example for testing or benchmarking. */
#if defined(UPB_ENABLE_FASTTABLE)
#if !UPB_FASTTABLE_SUPPORTED
#error fasttable is x86-64/ARM64 only and requires GCC or Clang.
#endif
#define UPB_FASTTABLE 1
/* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible.
* This is useful for releasing code that might be used on multiple platforms,
* for example the PHP or Ruby C extensions. */
#elif defined(UPB_TRY_ENABLE_FASTTABLE)
#define UPB_FASTTABLE UPB_FASTTABLE_SUPPORTED
#else
#define UPB_FASTTABLE 0
#endif
/* UPB_FASTTABLE_INIT() allows protos compiled for fasttable to gracefully
* degrade to non-fasttable if we are using UPB_TRY_ENABLE_FASTTABLE. */
#if !UPB_FASTTABLE && defined(UPB_TRY_ENABLE_FASTTABLE)
#define UPB_FASTTABLE_INIT(...)
#else
#define UPB_FASTTABLE_INIT(...) __VA_ARGS__
#endif
#undef UPB_FASTTABLE_SUPPORTED
/* ASAN poisoning (for arena) *************************************************/
#if defined(__SANITIZE_ADDRESS__)
#define UPB_ASAN 1
#ifdef __cplusplus
extern "C" {
#endif
void __asan_poison_memory_region(void const volatile *addr, size_t size);
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
#ifdef __cplusplus
} /* extern "C" */
#endif
#define UPB_POISON_MEMORY_REGION(addr, size) \
__asan_poison_memory_region((addr), (size))
#define UPB_UNPOISON_MEMORY_REGION(addr, size) \
__asan_unpoison_memory_region((addr), (size))
#else
#define UPB_ASAN 0
#define UPB_POISON_MEMORY_REGION(addr, size) \
((void)(addr), (void)(size))
#define UPB_UNPOISON_MEMORY_REGION(addr, size) \
((void)(addr), (void)(size))
#endif
/* Disable proto2 arena behavior (TEMPORARY) **********************************/
#ifdef UPB_DISABLE_PROTO2_ENUM_CHECKING
#define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 1
#else
#define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 0
#endif
/** upb/decode.c ************************************************************/
#include <setjmp.h>
#include <string.h>
/* Must be last. */
/* Maps descriptor type -> elem_size_lg2. */
static const uint8_t desctype_to_elem_size_lg2[] = {
-1, /* invalid descriptor type */
3, /* DOUBLE */
2, /* FLOAT */
3, /* INT64 */
3, /* UINT64 */
2, /* INT32 */
3, /* FIXED64 */
2, /* FIXED32 */
0, /* BOOL */
UPB_SIZE(3, 4), /* STRING */
UPB_SIZE(2, 3), /* GROUP */
UPB_SIZE(2, 3), /* MESSAGE */
UPB_SIZE(3, 4), /* BYTES */
2, /* UINT32 */
2, /* ENUM */
2, /* SFIXED32 */
3, /* SFIXED64 */
2, /* SINT32 */
3, /* SINT64 */
};
/* Maps descriptor type -> upb map size. */
static const uint8_t desctype_to_mapsize[] = {
-1, /* invalid descriptor type */
8, /* DOUBLE */
4, /* FLOAT */
8, /* INT64 */
8, /* UINT64 */
4, /* INT32 */
8, /* FIXED64 */
4, /* FIXED32 */
1, /* BOOL */
UPB_MAPTYPE_STRING, /* STRING */
sizeof(void*), /* GROUP */
sizeof(void*), /* MESSAGE */
UPB_MAPTYPE_STRING, /* BYTES */
4, /* UINT32 */
4, /* ENUM */
4, /* SFIXED32 */
8, /* SFIXED64 */
4, /* SINT32 */
8, /* SINT64 */
};
static const unsigned FIXED32_OK_MASK = (1 << kUpb_FieldType_Float) |
(1 << kUpb_FieldType_Fixed32) |
(1 << kUpb_FieldType_SFixed32);
static const unsigned FIXED64_OK_MASK = (1 << kUpb_FieldType_Double) |
(1 << kUpb_FieldType_Fixed64) |
(1 << kUpb_FieldType_SFixed64);
/* Three fake field types for MessageSet. */
#define TYPE_MSGSET_ITEM 19
#define TYPE_MSGSET_TYPE_ID 20
#define TYPE_COUNT 20
/* Op: an action to be performed for a wire-type/field-type combination. */
#define OP_UNKNOWN -1 /* Unknown field. */
#define OP_MSGSET_ITEM -2
#define OP_MSGSET_TYPEID -3
#define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */
#define OP_ENUM 1
#define OP_STRING 4
#define OP_BYTES 5
#define OP_SUBMSG 6
/* Scalar fields use only ops above. Repeated fields can use any op. */
#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
#define OP_PACKED_ENUM 13
static const int8_t varint_ops[] = {
OP_UNKNOWN, /* field not found */
OP_UNKNOWN, /* DOUBLE */
OP_UNKNOWN, /* FLOAT */
OP_SCALAR_LG2(3), /* INT64 */
OP_SCALAR_LG2(3), /* UINT64 */
OP_SCALAR_LG2(2), /* INT32 */
OP_UNKNOWN, /* FIXED64 */
OP_UNKNOWN, /* FIXED32 */
OP_SCALAR_LG2(0), /* BOOL */
OP_UNKNOWN, /* STRING */
OP_UNKNOWN, /* GROUP */
OP_UNKNOWN, /* MESSAGE */
OP_UNKNOWN, /* BYTES */
OP_SCALAR_LG2(2), /* UINT32 */
OP_ENUM, /* ENUM */
OP_UNKNOWN, /* SFIXED32 */
OP_UNKNOWN, /* SFIXED64 */
OP_SCALAR_LG2(2), /* SINT32 */
OP_SCALAR_LG2(3), /* SINT64 */
OP_UNKNOWN, /* MSGSET_ITEM */
OP_MSGSET_TYPEID, /* MSGSET TYPEID */
};
static const int8_t delim_ops[] = {
/* For non-repeated field type. */
OP_UNKNOWN, /* field not found */
OP_UNKNOWN, /* DOUBLE */
OP_UNKNOWN, /* FLOAT */
OP_UNKNOWN, /* INT64 */
OP_UNKNOWN, /* UINT64 */
OP_UNKNOWN, /* INT32 */
OP_UNKNOWN, /* FIXED64 */
OP_UNKNOWN, /* FIXED32 */
OP_UNKNOWN, /* BOOL */
OP_STRING, /* STRING */
OP_UNKNOWN, /* GROUP */
OP_SUBMSG, /* MESSAGE */
OP_BYTES, /* BYTES */
OP_UNKNOWN, /* UINT32 */
OP_UNKNOWN, /* ENUM */
OP_UNKNOWN, /* SFIXED32 */
OP_UNKNOWN, /* SFIXED64 */
OP_UNKNOWN, /* SINT32 */
OP_UNKNOWN, /* SINT64 */
OP_UNKNOWN, /* MSGSET_ITEM */
OP_UNKNOWN, /* MSGSET TYPEID */
/* For repeated field type. */
OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */
OP_FIXPCK_LG2(2), /* REPEATED FLOAT */
OP_VARPCK_LG2(3), /* REPEATED INT64 */
OP_VARPCK_LG2(3), /* REPEATED UINT64 */
OP_VARPCK_LG2(2), /* REPEATED INT32 */
OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */
OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */
OP_VARPCK_LG2(0), /* REPEATED BOOL */
OP_STRING, /* REPEATED STRING */
OP_SUBMSG, /* REPEATED GROUP */
OP_SUBMSG, /* REPEATED MESSAGE */
OP_BYTES, /* REPEATED BYTES */
OP_VARPCK_LG2(2), /* REPEATED UINT32 */
OP_PACKED_ENUM, /* REPEATED ENUM */
OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */
OP_VARPCK_LG2(2), /* REPEATED SINT32 */
OP_VARPCK_LG2(3), /* REPEATED SINT64 */
/* Omitting MSGSET_*, because we never emit a repeated msgset type */
};
typedef union {
bool bool_val;
uint32_t uint32_val;
uint64_t uint64_val;
uint32_t size;
} wireval;
static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg,
const upb_MiniTable* layout);
UPB_NORETURN static void* decode_err(upb_Decoder* d, upb_DecodeStatus status) {
assert(status != kUpb_DecodeStatus_Ok);
UPB_LONGJMP(d->err, status);
}
const char* fastdecode_err(upb_Decoder* d, int status) {
assert(status != kUpb_DecodeStatus_Ok);
UPB_LONGJMP(d->err, status);
return NULL;
}
static void decode_verifyutf8(upb_Decoder* d, const char* buf, int len) {
if (!decode_verifyutf8_inl(buf, len))
decode_err(d, kUpb_DecodeStatus_BadUtf8);
}
static bool decode_reserve(upb_Decoder* d, upb_Array* arr, size_t elem) {
bool need_realloc = arr->size - arr->len < elem;
if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, &d->arena)) {
decode_err(d, kUpb_DecodeStatus_OutOfMemory);
}
return need_realloc;
}
typedef struct {
const char* ptr;
uint64_t val;
} decode_vret;
UPB_NOINLINE
static decode_vret decode_longvarint64(const char* ptr, uint64_t val) {
decode_vret ret = {NULL, 0};
uint64_t byte;
int i;
for (i = 1; i < 10; i++) {
byte = (uint8_t)ptr[i];
val += (byte - 1) << (i * 7);
if (!(byte & 0x80)) {
ret.ptr = ptr + i + 1;
ret.val = val;
return ret;
}
}
return ret;
}
UPB_FORCEINLINE
static const char* decode_varint64(upb_Decoder* d, const char* ptr,
uint64_t* val) {
uint64_t byte = (uint8_t)*ptr;
if (UPB_LIKELY((byte & 0x80) == 0)) {
*val = byte;
return ptr + 1;
} else {
decode_vret res = decode_longvarint64(ptr, byte);
if (!res.ptr) return decode_err(d, kUpb_DecodeStatus_Malformed);
*val = res.val;
return res.ptr;
}
}
UPB_FORCEINLINE
static const char* decode_tag(upb_Decoder* d, const char* ptr, uint32_t* val) {
uint64_t byte = (uint8_t)*ptr;
if (UPB_LIKELY((byte & 0x80) == 0)) {
*val = byte;
return ptr + 1;
} else {
const char* start = ptr;
decode_vret res = decode_longvarint64(ptr, byte);
if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) {
return decode_err(d, kUpb_DecodeStatus_Malformed);
}
*val = res.val;
return res.ptr;
}
}
static void decode_munge_int32(wireval* val) {
if (!_upb_IsLittleEndian()) {
/* The next stage will memcpy(dst, &val, 4) */
val->uint32_val = val->uint64_val;
}
}
static void decode_munge(int type, wireval* val) {
switch (type) {
case kUpb_FieldType_Bool:
val->bool_val = val->uint64_val != 0;
break;
case kUpb_FieldType_SInt32: {
uint32_t n = val->uint64_val;
val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1);
break;
}
case kUpb_FieldType_SInt64: {
uint64_t n = val->uint64_val;
val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1);
break;
}
case kUpb_FieldType_Int32:
case kUpb_FieldType_UInt32:
case kUpb_FieldType_Enum:
decode_munge_int32(val);
break;
}
}
static upb_Message* decode_newsubmsg(upb_Decoder* d,
const upb_MiniTable_Sub* subs,
const upb_MiniTable_Field* field) {
const upb_MiniTable* subl = subs[field->submsg_index].submsg;
return _upb_Message_New_inl(subl, &d->arena);
}
UPB_NOINLINE
const char* decode_isdonefallback(upb_Decoder* d, const char* ptr,
int overrun) {
int status;
ptr = decode_isdonefallback_inl(d, ptr, overrun, &status);
if (ptr == NULL) {
return decode_err(d, status);
}
return ptr;
}
static const char* decode_readstr(upb_Decoder* d, const char* ptr, int size,
upb_StringView* str) {
if (d->options & kUpb_DecodeOption_AliasString) {
str->data = ptr;
} else {
char* data = upb_Arena_Malloc(&d->arena, size);
if (!data) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
memcpy(data, ptr, size);
str->data = data;
}
str->size = size;
return ptr + size;
}
UPB_FORCEINLINE
static const char* decode_tosubmsg2(upb_Decoder* d, const char* ptr,
upb_Message* submsg,
const upb_MiniTable* subl, int size) {
int saved_delta = decode_pushlimit(d, ptr, size);
if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded);
ptr = decode_msg(d, ptr, submsg, subl);
if (d->end_group != DECODE_NOGROUP)
return decode_err(d, kUpb_DecodeStatus_Malformed);
decode_poplimit(d, ptr, saved_delta);
d->depth++;
return ptr;
}
UPB_FORCEINLINE
static const char* decode_tosubmsg(upb_Decoder* d, const char* ptr,
upb_Message* submsg,
const upb_MiniTable_Sub* subs,
const upb_MiniTable_Field* field, int size) {
return decode_tosubmsg2(d, ptr, submsg, subs[field->submsg_index].submsg,
size);
}
UPB_FORCEINLINE
static const char* decode_group(upb_Decoder* d, const char* ptr,
upb_Message* submsg, const upb_MiniTable* subl,
uint32_t number) {
if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded);
if (decode_isdone(d, &ptr)) {
return decode_err(d, kUpb_DecodeStatus_Malformed);
}
ptr = decode_msg(d, ptr, submsg, subl);
if (d->end_group != number) return decode_err(d, kUpb_DecodeStatus_Malformed);
d->end_group = DECODE_NOGROUP;
d->depth++;
return ptr;
}
UPB_FORCEINLINE
static const char* decode_togroup(upb_Decoder* d, const char* ptr,
upb_Message* submsg,
const upb_MiniTable_Sub* subs,
const upb_MiniTable_Field* field) {
const upb_MiniTable* subl = subs[field->submsg_index].submsg;
return decode_group(d, ptr, submsg, subl, field->number);
}
static char* encode_varint32(uint32_t val, char* ptr) {
do {
uint8_t byte = val & 0x7fU;
val >>= 7;
if (val) byte |= 0x80U;
*(ptr++) = byte;
} while (val);
return ptr;
}
static void upb_Decode_AddUnknownVarints(upb_Decoder* d, upb_Message* msg,
uint32_t val1, uint32_t val2) {
char buf[20];
char* end = buf;
end = encode_varint32(val1, end);
end = encode_varint32(val2, end);
if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) {
decode_err(d, kUpb_DecodeStatus_OutOfMemory);
}
}
UPB_NOINLINE
static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr,
upb_Message* msg, const upb_MiniTable_Enum* e,
const upb_MiniTable_Field* field,
uint32_t v) {
// OPT: binary search long lists?
int n = e->value_count;
for (int i = 0; i < n; i++) {
if ((uint32_t)e->values[i] == v) return true;
}
// Unrecognized enum goes into unknown fields.
// For packed fields the tag could be arbitrarily far in the past, so we
// just re-encode the tag and value here.
uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint;
upb_Decode_AddUnknownVarints(d, msg, tag, v);
return false;
}
UPB_FORCEINLINE
static bool decode_checkenum(upb_Decoder* d, const char* ptr, upb_Message* msg,
const upb_MiniTable_Enum* e,
const upb_MiniTable_Field* field, wireval* val) {
uint32_t v = val->uint32_val;
if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true;
return decode_checkenum_slow(d, ptr, msg, e, field, v);
}
UPB_NOINLINE
static const char* decode_enum_toarray(upb_Decoder* d, const char* ptr,
upb_Message* msg, upb_Array* arr,
const upb_MiniTable_Sub* subs,
const upb_MiniTable_Field* field,
wireval* val) {
const upb_MiniTable_Enum* e = subs[field->submsg_index].subenum;
if (!decode_checkenum(d, ptr, msg, e, field, val)) return ptr;
void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
arr->len++;
memcpy(mem, val, 4);
return ptr;
}
UPB_FORCEINLINE
static const char* decode_fixed_packed(upb_Decoder* d, const char* ptr,
upb_Array* arr, wireval* val,
const upb_MiniTable_Field* field,
int lg2) {
int mask = (1 << lg2) - 1;
size_t count = val->size >> lg2;
if ((val->size & mask) != 0) {
// Length isn't a round multiple of elem size.
return decode_err(d, kUpb_DecodeStatus_Malformed);
}
decode_reserve(d, arr, count);
void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
arr->len += count;
// Note: if/when the decoder supports multi-buffer input, we will need to
// handle buffer seams here.
if (_upb_IsLittleEndian()) {
memcpy(mem, ptr, val->size);
ptr += val->size;
} else {
const char* end = ptr + val->size;
char* dst = mem;
while (ptr < end) {
if (lg2 == 2) {
uint32_t val;
memcpy(&val, ptr, sizeof(val));
val = _upb_BigEndian_Swap32(val);
memcpy(dst, &val, sizeof(val));
} else {
UPB_ASSERT(lg2 == 3);
uint64_t val;
memcpy(&val, ptr, sizeof(val));
val = _upb_BigEndian_Swap64(val);
memcpy(dst, &val, sizeof(val));
}
ptr += 1 << lg2;
dst += 1 << lg2;
}
}
return ptr;
}
UPB_FORCEINLINE
static const char* decode_varint_packed(upb_Decoder* d, const char* ptr,
upb_Array* arr, wireval* val,
const upb_MiniTable_Field* field,
int lg2) {
int scale = 1 << lg2;
int saved_limit = decode_pushlimit(d, ptr, val->size);
char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
while (!decode_isdone(d, &ptr)) {
wireval elem;
ptr = decode_varint64(d, ptr, &elem.uint64_val);
decode_munge(field->descriptortype, &elem);
if (decode_reserve(d, arr, 1)) {
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
}
arr->len++;
memcpy(out, &elem, scale);
out += scale;
}
decode_poplimit(d, ptr, saved_limit);
return ptr;
}
UPB_NOINLINE
static const char* decode_enum_packed(upb_Decoder* d, const char* ptr,
upb_Message* msg, upb_Array* arr,
const upb_MiniTable_Sub* subs,
const upb_MiniTable_Field* field,
wireval* val) {
const upb_MiniTable_Enum* e = subs[field->submsg_index].subenum;
int saved_limit = decode_pushlimit(d, ptr, val->size);
char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
while (!decode_isdone(d, &ptr)) {
wireval elem;
ptr = decode_varint64(d, ptr, &elem.uint64_val);
decode_munge_int32(&elem);
if (!decode_checkenum(d, ptr, msg, e, field, &elem)) {
continue;
}
if (decode_reserve(d, arr, 1)) {
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
}
arr->len++;
memcpy(out, &elem, 4);
out += 4;
}
decode_poplimit(d, ptr, saved_limit);
return ptr;
}
static const char* decode_toarray(upb_Decoder* d, const char* ptr,
upb_Message* msg,
const upb_MiniTable_Sub* subs,
const upb_MiniTable_Field* field,
wireval* val, int op) {
upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void);
upb_Array* arr = *arrp;
void* mem;
if (arr) {
decode_reserve(d, arr, 1);
} else {
size_t lg2 = desctype_to_elem_size_lg2[field->descriptortype];
arr = _upb_Array_New(&d->arena, 4, lg2);
if (!arr) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
*arrp = arr;
}
switch (op) {
case OP_SCALAR_LG2(0):
case OP_SCALAR_LG2(2):
case OP_SCALAR_LG2(3):
/* Append scalar value. */
mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void);
arr->len++;
memcpy(mem, val, 1 << op);
return ptr;
case OP_STRING:
decode_verifyutf8(d, ptr, val->size);
/* Fallthrough. */
case OP_BYTES: {
/* Append bytes. */
upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->len;
arr->len++;
return decode_readstr(d, ptr, val->size, str);
}
case OP_SUBMSG: {
/* Append submessage / group. */
upb_Message* submsg = decode_newsubmsg(d, subs, field);
*UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void*), upb_Message*) =
submsg;
arr->len++;
if (UPB_UNLIKELY(field->descriptortype == kUpb_FieldType_Group)) {
return decode_togroup(d, ptr, submsg, subs, field);
} else {
return decode_tosubmsg(d, ptr, submsg, subs, field, val->size);
}
}
case OP_FIXPCK_LG2(2):
case OP_FIXPCK_LG2(3):
return decode_fixed_packed(d, ptr, arr, val, field,
op - OP_FIXPCK_LG2(0));
case OP_VARPCK_LG2(0):
case OP_VARPCK_LG2(2):
case OP_VARPCK_LG2(3):
return decode_varint_packed(d, ptr, arr, val, field,
op - OP_VARPCK_LG2(0));
case OP_ENUM:
return decode_enum_toarray(d, ptr, msg, arr, subs, field, val);
case OP_PACKED_ENUM:
return decode_enum_packed(d, ptr, msg, arr, subs, field, val);
default:
UPB_UNREACHABLE();
}
}
static const char* decode_tomap(upb_Decoder* d, const char* ptr,
upb_Message* msg, const upb_MiniTable_Sub* subs,
const upb_MiniTable_Field* field,
wireval* val) {
upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*);
upb_Map* map = *map_p;
upb_MapEntry ent;
const upb_MiniTable* entry = subs[field->submsg_index].submsg;
if (!map) {
/* Lazily create map. */
const upb_MiniTable_Field* key_field = &entry->fields[0];
const upb_MiniTable_Field* val_field = &entry->fields[1];
char key_size = desctype_to_mapsize[key_field->descriptortype];
char val_size = desctype_to_mapsize[val_field->descriptortype];
UPB_ASSERT(key_field->offset == 0);
UPB_ASSERT(val_field->offset == sizeof(upb_StringView));
map = _upb_Map_New(&d->arena, key_size, val_size);
*map_p = map;
}
/* Parse map entry. */
memset(&ent, 0, sizeof(ent));
if (entry->fields[1].descriptortype == kUpb_FieldType_Message ||
entry->fields[1].descriptortype == kUpb_FieldType_Group) {
/* Create proactively to handle the case where it doesn't appear. */
ent.v.val =
upb_value_ptr(_upb_Message_New(entry->subs[0].submsg, &d->arena));
}
const char* start = ptr;
ptr = decode_tosubmsg(d, ptr, &ent.k, subs, field, val->size);
// check if ent had any unknown fields
size_t size;
upb_Message_GetUnknown(&ent.k, &size);
if (size != 0) {
uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited;
upb_Decode_AddUnknownVarints(d, msg, tag, (uint32_t)(ptr - start));
if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) {
decode_err(d, kUpb_DecodeStatus_OutOfMemory);
}
} else {
_upb_Map_Set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena);
}
return ptr;
}
static const char* decode_tomsg(upb_Decoder* d, const char* ptr,
upb_Message* msg, const upb_MiniTable_Sub* subs,
const upb_MiniTable_Field* field, wireval* val,
int op) {
void* mem = UPB_PTR_AT(msg, field->offset, void);
int type = field->descriptortype;
if (UPB_UNLIKELY(op == OP_ENUM) &&
!decode_checkenum(d, ptr, msg, subs[field->submsg_index].subenum, field,
val)) {
return ptr;
}
/* Set presence if necessary. */
if (field->presence > 0) {
_upb_sethas_field(msg, field);
} else if (field->presence < 0) {
/* Oneof case */
uint32_t* oneof_case = _upb_oneofcase_field(msg, field);
if (op == OP_SUBMSG && *oneof_case != field->number) {
memset(mem, 0, sizeof(void*));
}
*oneof_case = field->number;
}
/* Store into message. */
switch (op) {
case OP_SUBMSG: {
upb_Message** submsgp = mem;
upb_Message* submsg = *submsgp;
if (!submsg) {
submsg = decode_newsubmsg(d, subs, field);
*submsgp = submsg;
}
if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) {
ptr = decode_togroup(d, ptr, submsg, subs, field);
} else {
ptr = decode_tosubmsg(d, ptr, submsg, subs, field, val->size);
}
break;
}
case OP_STRING:
decode_verifyutf8(d, ptr, val->size);
/* Fallthrough. */
case OP_BYTES:
return decode_readstr(d, ptr, val->size, mem);
case OP_SCALAR_LG2(3):
memcpy(mem, val, 8);
break;
case OP_ENUM:
case OP_SCALAR_LG2(2):
memcpy(mem, val, 4);
break;
case OP_SCALAR_LG2(0):
memcpy(mem, val, 1);
break;
default:
UPB_UNREACHABLE();
}
return ptr;
}
UPB_NOINLINE
const char* decode_checkrequired(upb_Decoder* d, const char* ptr,
const upb_Message* msg,
const upb_MiniTable* l) {
assert(l->required_count);
if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) {
return ptr;
}
uint64_t msg_head;
memcpy(&msg_head, msg, 8);
msg_head = _upb_BigEndian_Swap64(msg_head);
if (upb_MiniTable_requiredmask(l) & ~msg_head) {
d->missing_required = true;
}
return ptr;
}
UPB_FORCEINLINE
static bool decode_tryfastdispatch(upb_Decoder* d, const char** ptr,
upb_Message* msg,
const upb_MiniTable* layout) {
#if UPB_FASTTABLE
if (layout && layout->table_mask != (unsigned char)-1) {
uint16_t tag = fastdecode_loadtag(*ptr);
intptr_t table = decode_totable(layout);
*ptr = fastdecode_tagdispatch(d, *ptr, msg, table, 0, tag);
return true;
}
#endif
return false;
}
static const char* decode_msgset(upb_Decoder* d, const char* ptr,
upb_Message* msg,
const upb_MiniTable* layout) {
// We create a temporary upb_MiniTable here and abuse its fields as temporary
// storage, to avoid creating lots of MessageSet-specific parsing code-paths:
// 1. We store 'layout' in item_layout.subs. We will need this later as
// a key to look up extensions for this MessageSet.
// 2. We use item_layout.fields as temporary storage to store the extension
// we
// found when parsing the type id.
upb_MiniTable item_layout = {
.subs = (const upb_MiniTable_Sub[]){{.submsg = layout}},
.fields = NULL,
.size = 0,
.field_count = 0,
.ext = kUpb_ExtMode_IsMessageSet_ITEM,
.dense_below = 0,
.table_mask = -1};
return decode_group(d, ptr, msg, &item_layout, 1);
}
static const upb_MiniTable_Field* decode_findfield(upb_Decoder* d,
const upb_MiniTable* l,
uint32_t field_number,
int* last_field_index) {
static upb_MiniTable_Field none = {0, 0, 0, 0, 0, 0};
if (l == NULL) return &none;
size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX
if (idx < l->dense_below) {
/* Fastest case: index into dense fields. */
goto found;
}
if (l->dense_below < l->field_count) {
/* Linear search non-dense fields. Resume scanning from last_field_index
* since fields are usually in order. */
int last = *last_field_index;
for (idx = last; idx < l->field_count; idx++) {
if (l->fields[idx].number == field_number) {
goto found;
}
}
for (idx = l->dense_below; idx < last; idx++) {
if (l->fields[idx].number == field_number) {
goto found;
}
}
}
if (d->extreg) {
switch (l->ext) {
case kUpb_ExtMode_Extendable: {
const upb_MiniTable_Extension* ext =
_upb_extreg_get(d->extreg, l, field_number);
if (ext) return &ext->field;
break;
}
case kUpb_ExtMode_IsMessageSet:
if (field_number == _UPB_MSGSET_ITEM) {
static upb_MiniTable_Field item = {0, 0, 0, 0, TYPE_MSGSET_ITEM, 0};
return &item;
}
break;
case kUpb_ExtMode_IsMessageSet_ITEM:
switch (field_number) {
case _UPB_MSGSET_TYPEID: {
static upb_MiniTable_Field type_id = {
0, 0, 0, 0, TYPE_MSGSET_TYPE_ID, 0};
return &type_id;
}
case _UPB_MSGSET_MESSAGE:
if (l->fields) {
// We saw type_id previously and succeeded in looking up msg.
return l->fields;
} else {
// TODO: out of order MessageSet.
// This is a very rare case: all serializers will emit in-order
// MessageSets. To hit this case there has to be some kind of
// re-ordering proxy. We should eventually handle this case, but
// not today.
}
break;
}
}
}
return &none; /* Unknown field. */
found:
UPB_ASSERT(l->fields[idx].number == field_number);
*last_field_index = idx;
return &l->fields[idx];
}
UPB_FORCEINLINE
static const char* decode_wireval(upb_Decoder* d, const char* ptr,
const upb_MiniTable_Field* field,
int wire_type, wireval* val, int* op) {
switch (wire_type) {
case kUpb_WireType_Varint:
ptr = decode_varint64(d, ptr, &val->uint64_val);
*op = varint_ops[field->descriptortype];
decode_munge(field->descriptortype, val);
return ptr;
case kUpb_WireType_32Bit:
memcpy(&val->uint32_val, ptr, 4);
val->uint32_val = _upb_BigEndian_Swap32(val->uint32_val);
*op = OP_SCALAR_LG2(2);
if (((1 << field->descriptortype) & FIXED32_OK_MASK) == 0) {
*op = OP_UNKNOWN;
}
return ptr + 4;
case kUpb_WireType_64Bit:
memcpy(&val->uint64_val, ptr, 8);
val->uint64_val = _upb_BigEndian_Swap64(val->uint64_val);
*op = OP_SCALAR_LG2(3);
if (((1 << field->descriptortype) & FIXED64_OK_MASK) == 0) {
*op = OP_UNKNOWN;
}
return ptr + 8;
case kUpb_WireType_Delimited: {
int ndx = field->descriptortype;
uint64_t size;
if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += TYPE_COUNT;
ptr = decode_varint64(d, ptr, &size);
if (size >= INT32_MAX || ptr - d->end + (int32_t)size > d->limit) {
break; /* Length overflow. */
}
*op = delim_ops[ndx];
val->size = size;
return ptr;
}
case kUpb_WireType_StartGroup:
val->uint32_val = field->number;
if (field->descriptortype == kUpb_FieldType_Group) {
*op = OP_SUBMSG;
} else if (field->descriptortype == TYPE_MSGSET_ITEM) {
*op = OP_MSGSET_ITEM;
} else {
*op = OP_UNKNOWN;
}
return ptr;
default:
break;
}
return decode_err(d, kUpb_DecodeStatus_Malformed);
}
UPB_FORCEINLINE
static const char* decode_known(upb_Decoder* d, const char* ptr,
upb_Message* msg, const upb_MiniTable* layout,
const upb_MiniTable_Field* field, int op,
wireval* val) {
const upb_MiniTable_Sub* subs = layout->subs;
uint8_t mode = field->mode;
if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) {
const upb_MiniTable_Extension* ext_layout =
(const upb_MiniTable_Extension*)field;
upb_Message_Extension* ext =
_upb_Message_Getorcreateext(msg, ext_layout, &d->arena);
if (UPB_UNLIKELY(!ext)) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
msg = &ext->data;
subs = &ext->ext->sub;
}
switch (mode & kUpb_FieldMode_Mask) {
case kUpb_FieldMode_Array:
return decode_toarray(d, ptr, msg, subs, field, val, op);
case kUpb_FieldMode_Map:
return decode_tomap(d, ptr, msg, subs, field, val);
case kUpb_FieldMode_Scalar:
return decode_tomsg(d, ptr, msg, subs, field, val, op);
default:
UPB_UNREACHABLE();
}
}
static const char* decode_reverse_skip_varint(const char* ptr, uint32_t val) {
uint32_t seen = 0;
do {
ptr--;
seen <<= 7;
seen |= *ptr & 0x7f;
} while (seen != val);
return ptr;
}
static const char* decode_unknown(upb_Decoder* d, const char* ptr,
upb_Message* msg, int field_number,
int wire_type, wireval val) {
if (field_number == 0) return decode_err(d, kUpb_DecodeStatus_Malformed);
// Since unknown fields are the uncommon case, we do a little extra work here
// to walk backwards through the buffer to find the field start. This frees
// up a register in the fast paths (when the field is known), which leads to
// significant speedups in benchmarks.
const char* start = ptr;
if (wire_type == kUpb_WireType_Delimited) ptr += val.size;
if (msg) {
switch (wire_type) {
case kUpb_WireType_Varint:
case kUpb_WireType_Delimited:
start--;
while (start[-1] & 0x80) start--;
break;
case kUpb_WireType_32Bit:
start -= 4;
break;
case kUpb_WireType_64Bit:
start -= 8;
break;
default:
break;
}
assert(start == d->debug_valstart);
uint32_t tag = ((uint32_t)field_number << 3) | wire_type;
start = decode_reverse_skip_varint(start, tag);
assert(start == d->debug_tagstart);
if (wire_type == kUpb_WireType_StartGroup) {
d->unknown = start;
d->unknown_msg = msg;
ptr = decode_group(d, ptr, NULL, NULL, field_number);
start = d->unknown;
d->unknown_msg = NULL;
d->unknown = NULL;
}
if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) {
return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
}
} else if (wire_type == kUpb_WireType_StartGroup) {
ptr = decode_group(d, ptr, NULL, NULL, field_number);
}
return ptr;
}
UPB_NOINLINE
static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg,
const upb_MiniTable* layout) {
int last_field_index = 0;
#if UPB_FASTTABLE
// The first time we want to skip fast dispatch, because we may have just been
// invoked by the fast parser to handle a case that it bailed on.
if (!decode_isdone(d, &ptr)) goto nofast;
#endif
while (!decode_isdone(d, &ptr)) {
uint32_t tag;
const upb_MiniTable_Field* field;
int field_number;
int wire_type;
wireval val;
int op;
if (decode_tryfastdispatch(d, &ptr, msg, layout)) break;
#if UPB_FASTTABLE
nofast:
#endif
#ifndef NDEBUG
d->debug_tagstart = ptr;
#endif
UPB_ASSERT(ptr < d->limit_ptr);
ptr = decode_tag(d, ptr, &tag);
field_number = tag >> 3;
wire_type = tag & 7;
#ifndef NDEBUG
d->debug_valstart = ptr;
#endif
if (wire_type == kUpb_WireType_EndGroup) {
d->end_group = field_number;
return ptr;
}
field = decode_findfield(d, layout, field_number, &last_field_index);
ptr = decode_wireval(d, ptr, field, wire_type, &val, &op);
if (op >= 0) {
ptr = decode_known(d, ptr, msg, layout, field, op, &val);
} else {
switch (op) {
case OP_UNKNOWN:
ptr = decode_unknown(d, ptr, msg, field_number, wire_type, val);
break;
case OP_MSGSET_ITEM:
ptr = decode_msgset(d, ptr, msg, layout);
break;
case OP_MSGSET_TYPEID: {
const upb_MiniTable_Extension* ext = _upb_extreg_get(
d->extreg, layout->subs[0].submsg, val.uint64_val);
if (ext) ((upb_MiniTable*)layout)->fields = &ext->field;
break;
}
}
}
}
return UPB_UNLIKELY(layout && layout->required_count)
? decode_checkrequired(d, ptr, msg, layout)
: ptr;
}
const char* fastdecode_generic(struct upb_Decoder* d, const char* ptr,
upb_Message* msg, intptr_t table,
uint64_t hasbits, uint64_t data) {
(void)data;
*(uint32_t*)msg |= hasbits;
return decode_msg(d, ptr, msg, decode_totablep(table));
}
static upb_DecodeStatus decode_top(struct upb_Decoder* d, const char* buf,
void* msg, const upb_MiniTable* l) {
if (!decode_tryfastdispatch(d, &buf, msg, l)) {
decode_msg(d, buf, msg, l);
}
if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed;
if (d->missing_required) return kUpb_DecodeStatus_MissingRequired;
return kUpb_DecodeStatus_Ok;
}
upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg,
const upb_MiniTable* l,
const upb_ExtensionRegistry* extreg, int options,
upb_Arena* arena) {
upb_Decoder state;
unsigned depth = (unsigned)options >> 16;
if (size <= 16) {
memset(&state.patch, 0, 32);
if (size) memcpy(&state.patch, buf, size);
buf = state.patch;
state.end = buf + size;
state.limit = 0;
options &= ~kUpb_DecodeOption_AliasString; // Can't alias patch buf.
} else {
state.end = buf + size - 16;
state.limit = 16;
}
state.extreg = extreg;
state.limit_ptr = state.end;
state.unknown_msg = NULL;
state.depth = depth ? depth : 64;
state.end_group = DECODE_NOGROUP;
state.options = (uint16_t)options;
state.missing_required = false;
state.arena.head = arena->head;
state.arena.last_size = arena->last_size;
state.arena.cleanup_metadata = arena->cleanup_metadata;
state.arena.parent = arena;
upb_DecodeStatus status = UPB_SETJMP(state.err);
if (UPB_LIKELY(status == kUpb_DecodeStatus_Ok)) {
status = decode_top(&state, buf, msg, l);
}
arena->head.ptr = state.arena.head.ptr;
arena->head.end = state.arena.head.end;
arena->cleanup_metadata = state.arena.cleanup_metadata;
return status;
}
#undef OP_UNKNOWN
#undef OP_SKIP
#undef OP_SCALAR_LG2
#undef OP_FIXPCK_LG2
#undef OP_VARPCK_LG2
#undef OP_STRING
#undef OP_BYTES
#undef OP_SUBMSG
/** upb/encode.c ************************************************************/
/* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
#include <setjmp.h>
#include <string.h>
/* Must be last. */
#define UPB_PB_VARINT_MAX_LEN 10
UPB_NOINLINE
static size_t encode_varint64(uint64_t val, char* buf) {
size_t i = 0;
do {
uint8_t byte = val & 0x7fU;
val >>= 7;
if (val) byte |= 0x80U;
buf[i++] = byte;
} while (val);
return i;
}
static uint32_t encode_zz32(int32_t n) {
return ((uint32_t)n << 1) ^ (n >> 31);
}
static uint64_t encode_zz64(int64_t n) {
return ((uint64_t)n << 1) ^ (n >> 63);
}
typedef struct {
jmp_buf err;
upb_alloc* alloc;
char *buf, *ptr, *limit;
int options;
int depth;
_upb_mapsorter sorter;
} upb_encstate;
static size_t upb_roundup_pow2(size_t bytes) {
size_t ret = 128;
while (ret < bytes) {
ret *= 2;
}
return ret;
}
UPB_NORETURN static void encode_err(upb_encstate* e) { UPB_LONGJMP(e->err, 1); }
UPB_NOINLINE
static void encode_growbuffer(upb_encstate* e, size_t bytes) {
size_t old_size = e->limit - e->buf;
size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
char* new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
if (!new_buf) encode_err(e);
/* We want previous data at the end, realloc() put it at the beginning. */
if (old_size > 0) {
memmove(new_buf + new_size - old_size, e->buf, old_size);
}
e->ptr = new_buf + new_size - (e->limit - e->ptr);
e->limit = new_buf + new_size;
e->buf = new_buf;
e->ptr -= bytes;
}
/* Call to ensure that at least "bytes" bytes are available for writing at
* e->ptr. Returns false if the bytes could not be allocated. */
UPB_FORCEINLINE
static void encode_reserve(upb_encstate* e, size_t bytes) {
if ((size_t)(e->ptr - e->buf) < bytes) {
encode_growbuffer(e, bytes);
return;
}
e->ptr -= bytes;
}
/* Writes the given bytes to the buffer, handling reserve/advance. */
static void encode_bytes(upb_encstate* e, const void* data, size_t len) {
if (len == 0) return; /* memcpy() with zero size is UB */
encode_reserve(e, len);
memcpy(e->ptr, data, len);
}
static void encode_fixed64(upb_encstate* e, uint64_t val) {
val = _upb_BigEndian_Swap64(val);
encode_bytes(e, &val, sizeof(uint64_t));
}
static void encode_fixed32(upb_encstate* e, uint32_t val) {
val = _upb_BigEndian_Swap32(val);
encode_bytes(e, &val, sizeof(uint32_t));
}
UPB_NOINLINE
static void encode_longvarint(upb_encstate* e, uint64_t val) {
size_t len;
char* start;
encode_reserve(e, UPB_PB_VARINT_MAX_LEN);
len = encode_varint64(val, e->ptr);
start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
memmove(start, e->ptr, len);
e->ptr = start;
}
UPB_FORCEINLINE
static void encode_varint(upb_encstate* e, uint64_t val) {
if (val < 128 && e->ptr != e->buf) {
--e->ptr;
*e->ptr = val;
} else {
encode_longvarint(e, val);
}
}
static void encode_double(upb_encstate* e, double d) {
uint64_t u64;
UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
memcpy(&u64, &d, sizeof(uint64_t));
encode_fixed64(e, u64);
}
static void encode_float(upb_encstate* e, float d) {
uint32_t u32;
UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
memcpy(&u32, &d, sizeof(uint32_t));
encode_fixed32(e, u32);
}
static void encode_tag(upb_encstate* e, uint32_t field_number,
uint8_t wire_type) {
encode_varint(e, (field_number << 3) | wire_type);
}
static void encode_fixedarray(upb_encstate* e, const upb_Array* arr,
size_t elem_size, uint32_t tag) {
size_t bytes = arr->len * elem_size;
const char* data = _upb_array_constptr(arr);
const char* ptr = data + bytes - elem_size;
if (tag || !_upb_IsLittleEndian()) {
while (true) {
if (elem_size == 4) {
uint32_t val;
memcpy(&val, ptr, sizeof(val));
val = _upb_BigEndian_Swap32(val);
encode_bytes(e, &val, elem_size);
} else {
UPB_ASSERT(elem_size == 8);
uint64_t val;
memcpy(&val, ptr, sizeof(val));
val = _upb_BigEndian_Swap64(val);
encode_bytes(e, &val, elem_size);
}
if (tag) encode_varint(e, tag);
if (ptr == data) break;
ptr -= elem_size;
}
} else {
encode_bytes(e, data, bytes);
}
}
static void encode_message(upb_encstate* e, const upb_Message* msg,
const upb_MiniTable* m, size_t* size);
static void encode_scalar(upb_encstate* e, const void* _field_mem,
const upb_MiniTable_Sub* subs,
const upb_MiniTable_Field* f) {
const char* field_mem = _field_mem;
int wire_type;
#define CASE(ctype, type, wtype, encodeval) \
{ \
ctype val = *(ctype*)field_mem; \
encode_##type(e, encodeval); \
wire_type = wtype; \
break; \
}
switch (f->descriptortype) {
case kUpb_FieldType_Double:
CASE(double, double, kUpb_WireType_64Bit, val);
case kUpb_FieldType_Float:
CASE(float, float, kUpb_WireType_32Bit, val);
case kUpb_FieldType_Int64:
case kUpb_FieldType_UInt64:
CASE(uint64_t, varint, kUpb_WireType_Varint, val);
case kUpb_FieldType_UInt32:
CASE(uint32_t, varint, kUpb_WireType_Varint, val);
case kUpb_FieldType_Int32:
case kUpb_FieldType_Enum:
CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val);
case kUpb_FieldType_SFixed64:
case kUpb_FieldType_Fixed64:
CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val);
case kUpb_FieldType_Fixed32:
case kUpb_FieldType_SFixed32:
CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val);
case kUpb_FieldType_Bool:
CASE(bool, varint, kUpb_WireType_Varint, val);
case kUpb_FieldType_SInt32:
CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val));
case kUpb_FieldType_SInt64:
CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val));
case kUpb_FieldType_String:
case kUpb_FieldType_Bytes: {
upb_StringView view = *(upb_StringView*)field_mem;
encode_bytes(e, view.data, view.size);
encode_varint(e, view.size);
wire_type = kUpb_WireType_Delimited;
break;
}
case kUpb_FieldType_Group: {
size_t size;
void* submsg = *(void**)field_mem;
const upb_MiniTable* subm = subs[f->submsg_index].submsg;
if (submsg == NULL) {
return;
}
if (--e->depth == 0) encode_err(e);
encode_tag(e, f->number, kUpb_WireType_EndGroup);
encode_message(e, submsg, subm, &size);
wire_type = kUpb_WireType_StartGroup;
e->depth++;
break;
}
case kUpb_FieldType_Message: {
size_t size;
void* submsg = *(void**)field_mem;
const upb_MiniTable* subm = subs[f->submsg_index].submsg;
if (submsg == NULL) {
return;
}
if (--e->depth == 0) encode_err(e);
encode_message(e, submsg, subm, &size);
encode_varint(e, size);
wire_type = kUpb_WireType_Delimited;
e->depth++;
break;
}
default:
UPB_UNREACHABLE();
}
#undef CASE
encode_tag(e, f->number, wire_type);
}
static void encode_array(upb_encstate* e, const upb_Message* msg,
const upb_MiniTable_Sub* subs,
const upb_MiniTable_Field* f) {
const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*);
bool packed = f->mode & kUpb_LabelFlags_IsPacked;
size_t pre_len = e->limit - e->ptr;
if (arr == NULL || arr->len == 0) {
return;
}
#define VARINT_CASE(ctype, encode) \
{ \
const ctype* start = _upb_array_constptr(arr); \
const ctype* ptr = start + arr->len; \
uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \
do { \
ptr--; \
encode_varint(e, encode); \
if (tag) encode_varint(e, tag); \
} while (ptr != start); \
} \
break;
#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
switch (f->descriptortype) {
case kUpb_FieldType_Double:
encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit));
break;
case kUpb_FieldType_Float:
encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit));
break;
case kUpb_FieldType_SFixed64:
case kUpb_FieldType_Fixed64:
encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit));
break;
case kUpb_FieldType_Fixed32:
case kUpb_FieldType_SFixed32:
encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit));
break;
case kUpb_FieldType_Int64:
case kUpb_FieldType_UInt64:
VARINT_CASE(uint64_t, *ptr);
case kUpb_FieldType_UInt32:
VARINT_CASE(uint32_t, *ptr);
case kUpb_FieldType_Int32:
case kUpb_FieldType_Enum:
VARINT_CASE(int32_t, (int64_t)*ptr);
case kUpb_FieldType_Bool:
VARINT_CASE(bool, *ptr);
case kUpb_FieldType_SInt32:
VARINT_CASE(int32_t, encode_zz32(*ptr));
case kUpb_FieldType_SInt64:
VARINT_CASE(int64_t, encode_zz64(*ptr));
case kUpb_FieldType_String:
case kUpb_FieldType_Bytes: {
const upb_StringView* start = _upb_array_constptr(arr);
const upb_StringView* ptr = start + arr->len;
do {
ptr--;
encode_bytes(e, ptr->data, ptr->size);
encode_varint(e, ptr->size);
encode_tag(e, f->number, kUpb_WireType_Delimited);
} while (ptr != start);
return;
}
case kUpb_FieldType_Group: {
const void* const* start = _upb_array_constptr(arr);
const void* const* ptr = start + arr->len;
const upb_MiniTable* subm = subs[f->submsg_index].submsg;
if (--e->depth == 0) encode_err(e);
do {
size_t size;
ptr--;
encode_tag(e, f->number, kUpb_WireType_EndGroup);
encode_message(e, *ptr, subm, &size);
encode_tag(e, f->number, kUpb_WireType_StartGroup);
} while (ptr != start);
e->depth++;
return;
}
case kUpb_FieldType_Message: {
const void* const* start = _upb_array_constptr(arr);
const void* const* ptr = start + arr->len;
const upb_MiniTable* subm = subs[f->submsg_index].submsg;
if (--e->depth == 0) encode_err(e);
do {
size_t size;
ptr--;
encode_message(e, *ptr, subm, &size);
encode_varint(e, size);
encode_tag(e, f->number, kUpb_WireType_Delimited);
} while (ptr != start);
e->depth++;
return;
}
}
#undef VARINT_CASE
if (packed) {
encode_varint(e, e->limit - e->ptr - pre_len);
encode_tag(e, f->number, kUpb_WireType_Delimited);
}
}
static void encode_mapentry(upb_encstate* e, uint32_t number,
const upb_MiniTable* layout,
const upb_MapEntry* ent) {
const upb_MiniTable_Field* key_field = &layout->fields[0];
const upb_MiniTable_Field* val_field = &layout->fields[1];
size_t pre_len = e->limit - e->ptr;
size_t size;
encode_scalar(e, &ent->v, layout->subs, val_field);
encode_scalar(e, &ent->k, layout->subs, key_field);
size = (e->limit - e->ptr) - pre_len;
encode_varint(e, size);
encode_tag(e, number, kUpb_WireType_Delimited);
}
static void encode_map(upb_encstate* e, const upb_Message* msg,
const upb_MiniTable_Sub* subs,
const upb_MiniTable_Field* f) {
const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*);
const upb_MiniTable* layout = subs[f->submsg_index].submsg;
UPB_ASSERT(layout->field_count == 2);
if (map == NULL) return;
if (e->options & kUpb_Encode_Deterministic) {
_upb_sortedmap sorted;
_upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map,
&sorted);
upb_MapEntry ent;
while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
encode_mapentry(e, f->number, layout, &ent);
}
_upb_mapsorter_popmap(&e->sorter, &sorted);
} else {
upb_strtable_iter i;
upb_strtable_begin(&i, &map->table);
for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
upb_StringView key = upb_strtable_iter_key(&i);
const upb_value val = upb_strtable_iter_value(&i);
upb_MapEntry ent;
_upb_map_fromkey(key, &ent.k, map->key_size);
_upb_map_fromvalue(val, &ent.v, map->val_size);
encode_mapentry(e, f->number, layout, &ent);
}
}
}
static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg,
const upb_MiniTable_Sub* subs,
const upb_MiniTable_Field* f) {
if (f->presence == 0) {
/* Proto3 presence or map/array. */
const void* mem = UPB_PTR_AT(msg, f->offset, void);
switch (f->mode >> kUpb_FieldRep_Shift) {
case kUpb_FieldRep_1Byte: {
char ch;
memcpy(&ch, mem, 1);
return ch != 0;
}
#if UINTPTR_MAX == 0xffffffff
case kUpb_FieldRep_Pointer:
#endif
case kUpb_FieldRep_4Byte: {
uint32_t u32;
memcpy(&u32, mem, 4);
return u32 != 0;
}
#if UINTPTR_MAX != 0xffffffff
case kUpb_FieldRep_Pointer:
#endif
case kUpb_FieldRep_8Byte: {
uint64_t u64;
memcpy(&u64, mem, 8);
return u64 != 0;
}
case kUpb_FieldRep_StringView: {
const upb_StringView* str = (const upb_StringView*)mem;
return str->size != 0;
}
default:
UPB_UNREACHABLE();
}
} else if (f->presence > 0) {
/* Proto2 presence: hasbit. */
return _upb_hasbit_field(msg, f);
} else {
/* Field is in a oneof. */
return _upb_getoneofcase_field(msg, f) == f->number;
}
}
static void encode_field(upb_encstate* e, const upb_Message* msg,
const upb_MiniTable_Sub* subs,
const upb_MiniTable_Field* field) {
switch (upb_FieldMode_Get(field)) {
case kUpb_FieldMode_Array:
encode_array(e, msg, subs, field);
break;
case kUpb_FieldMode_Map:
encode_map(e, msg, subs, field);
break;
case kUpb_FieldMode_Scalar:
encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field);
break;
default:
UPB_UNREACHABLE();
}
}
/* message MessageSet {
* repeated group Item = 1 {
* required int32 type_id = 2;
* required string message = 3;
* }
* } */
static void encode_msgset_item(upb_encstate* e,
const upb_Message_Extension* ext) {
size_t size;
encode_tag(e, 1, kUpb_WireType_EndGroup);
encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size);
encode_varint(e, size);
encode_tag(e, 3, kUpb_WireType_Delimited);
encode_varint(e, ext->ext->field.number);
encode_tag(e, 2, kUpb_WireType_Varint);
encode_tag(e, 1, kUpb_WireType_StartGroup);
}
static void encode_message(upb_encstate* e, const upb_Message* msg,
const upb_MiniTable* m, size_t* size) {
size_t pre_len = e->limit - e->ptr;
if ((e->options & kUpb_Encode_CheckRequired) && m->required_count) {
uint64_t msg_head;
memcpy(&msg_head, msg, 8);
msg_head = _upb_BigEndian_Swap64(msg_head);
if (upb_MiniTable_requiredmask(m) & ~msg_head) {
encode_err(e);
}
}
if ((e->options & kUpb_Encode_SkipUnknown) == 0) {
size_t unknown_size;
const char* unknown = upb_Message_GetUnknown(msg, &unknown_size);
if (unknown) {
encode_bytes(e, unknown, unknown_size);
}
}
if (m->ext != kUpb_ExtMode_NonExtendable) {
/* Encode all extensions together. Unlike C++, we do not attempt to keep
* these in field number order relative to normal fields or even to each
* other. */
size_t ext_count;
const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count);
if (ext_count) {
const upb_Message_Extension* end = ext + ext_count;
for (; ext != end; ext++) {
if (UPB_UNLIKELY(m->ext == kUpb_ExtMode_IsMessageSet)) {
encode_msgset_item(e, ext);
} else {
encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field);
}
}
}
}
if (m->field_count) {
const upb_MiniTable_Field* f = &m->fields[m->field_count];
const upb_MiniTable_Field* first = &m->fields[0];
while (f != first) {
f--;
if (encode_shouldencode(e, msg, m->subs, f)) {
encode_field(e, msg, m->subs, f);
}
}
}
*size = (e->limit - e->ptr) - pre_len;
}
char* upb_Encode(const void* msg, const upb_MiniTable* l, int options,
upb_Arena* arena, size_t* size) {
upb_encstate e;
unsigned depth = (unsigned)options >> 16;
e.alloc = upb_Arena_Alloc(arena);
e.buf = NULL;
e.limit = NULL;
e.ptr = NULL;
e.depth = depth ? depth : 64;
e.options = options;
_upb_mapsorter_init(&e.sorter);
char* ret = NULL;
if (UPB_SETJMP(e.err)) {
*size = 0;
ret = NULL;
} else {
encode_message(&e, msg, l, size);
*size = e.limit - e.ptr;
if (*size == 0) {
static char ch;
ret = &ch;
} else {
UPB_ASSERT(e.ptr);
ret = e.ptr;
}
}
_upb_mapsorter_destroy(&e.sorter);
return ret;
}
/** upb/msg.c ************************************************************/
/** upb_Message
* *******************************************************************/
static const size_t overhead = sizeof(upb_Message_InternalData);
static const upb_Message_Internal* upb_Message_Getinternal_const(
const upb_Message* msg) {
ptrdiff_t size = sizeof(upb_Message_Internal);
return (upb_Message_Internal*)((char*)msg - size);
}
upb_Message* _upb_Message_New(const upb_MiniTable* l, upb_Arena* a) {
return _upb_Message_New_inl(l, a);
}
void _upb_Message_Clear(upb_Message* msg, const upb_MiniTable* l) {
void* mem = UPB_PTR_AT(msg, -sizeof(upb_Message_Internal), char);
memset(mem, 0, upb_msg_sizeof(l));
}
static bool realloc_internal(upb_Message* msg, size_t need, upb_Arena* arena) {
upb_Message_Internal* in = upb_Message_Getinternal(msg);
if (!in->internal) {
/* No internal data, allocate from scratch. */
size_t size = UPB_MAX(128, _upb_Log2CeilingSize(need + overhead));
upb_Message_InternalData* internal = upb_Arena_Malloc(arena, size);
if (!internal) return false;
internal->size = size;
internal->unknown_end = overhead;
internal->ext_begin = size;
in->internal = internal;
} else if (in->internal->ext_begin - in->internal->unknown_end < need) {
/* Internal data is too small, reallocate. */
size_t new_size = _upb_Log2CeilingSize(in->internal->size + need);
size_t ext_bytes = in->internal->size - in->internal->ext_begin;
size_t new_ext_begin = new_size - ext_bytes;
upb_Message_InternalData* internal =
upb_Arena_Realloc(arena, in->internal, in->internal->size, new_size);
if (!internal) return false;
if (ext_bytes) {
/* Need to move extension data to the end. */
char* ptr = (char*)internal;
memmove(ptr + new_ext_begin, ptr + internal->ext_begin, ext_bytes);
}
internal->ext_begin = new_ext_begin;
internal->size = new_size;
in->internal = internal;
}
UPB_ASSERT(in->internal->ext_begin - in->internal->unknown_end >= need);
return true;
}
bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len,
upb_Arena* arena) {
if (!realloc_internal(msg, len, arena)) return false;
upb_Message_Internal* in = upb_Message_Getinternal(msg);
memcpy(UPB_PTR_AT(in->internal, in->internal->unknown_end, char), data, len);
in->internal->unknown_end += len;
return true;
}
void _upb_Message_DiscardUnknown_shallow(upb_Message* msg) {
upb_Message_Internal* in = upb_Message_Getinternal(msg);
if (in->internal) {
in->internal->unknown_end = overhead;
}
}
const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len) {
const upb_Message_Internal* in = upb_Message_Getinternal_const(msg);
if (in->internal) {
*len = in->internal->unknown_end - overhead;
return (char*)(in->internal + 1);
} else {
*len = 0;
return NULL;
}
}
const upb_Message_Extension* _upb_Message_Getexts(const upb_Message* msg,
size_t* count) {
const upb_Message_Internal* in = upb_Message_Getinternal_const(msg);
if (in->internal) {
*count = (in->internal->size - in->internal->ext_begin) /
sizeof(upb_Message_Extension);
return UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
} else {
*count = 0;
return NULL;
}
}
const upb_Message_Extension* _upb_Message_Getext(
const upb_Message* msg, const upb_MiniTable_Extension* e) {
size_t n;
const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &n);
/* For now we use linear search exclusively to find extensions. If this
* becomes an issue due to messages with lots of extensions, we can introduce
* a table of some sort. */
for (size_t i = 0; i < n; i++) {
if (ext[i].ext == e) {
return &ext[i];
}
}
return NULL;
}
void _upb_Message_Clearext(upb_Message* msg,
const upb_MiniTable_Extension* ext_l) {
upb_Message_Internal* in = upb_Message_Getinternal(msg);
if (!in->internal) return;
const upb_Message_Extension* base =
UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
upb_Message_Extension* ext =
(upb_Message_Extension*)_upb_Message_Getext(msg, ext_l);
if (ext) {
*ext = *base;
in->internal->ext_begin += sizeof(upb_Message_Extension);
}
}
upb_Message_Extension* _upb_Message_Getorcreateext(
upb_Message* msg, const upb_MiniTable_Extension* e, upb_Arena* arena) {
upb_Message_Extension* ext =
(upb_Message_Extension*)_upb_Message_Getext(msg, e);
if (ext) return ext;
if (!realloc_internal(msg, sizeof(upb_Message_Extension), arena)) return NULL;
upb_Message_Internal* in = upb_Message_Getinternal(msg);
in->internal->ext_begin -= sizeof(upb_Message_Extension);
ext = UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
memset(ext, 0, sizeof(upb_Message_Extension));
ext->ext = e;
return ext;
}
size_t upb_Message_ExtensionCount(const upb_Message* msg) {
size_t count;
_upb_Message_Getexts(msg, &count);
return count;
}
/** upb_Array *****************************************************************/
bool _upb_array_realloc(upb_Array* arr, size_t min_size, upb_Arena* arena) {
size_t new_size = UPB_MAX(arr->size, 4);
int elem_size_lg2 = arr->data & 7;
size_t old_bytes = arr->size << elem_size_lg2;
size_t new_bytes;
void* ptr = _upb_array_ptr(arr);
/* Log2 ceiling of size. */
while (new_size < min_size) new_size *= 2;
new_bytes = new_size << elem_size_lg2;
ptr = upb_Arena_Realloc(arena, ptr, old_bytes, new_bytes);
if (!ptr) {
return false;
}
arr->data = _upb_tag_arrptr(ptr, elem_size_lg2);
arr->size = new_size;
return true;
}
static upb_Array* getorcreate_array(upb_Array** arr_ptr, int elem_size_lg2,
upb_Arena* arena) {
upb_Array* arr = *arr_ptr;
if (!arr) {
arr = _upb_Array_New(arena, 4, elem_size_lg2);
if (!arr) return NULL;
*arr_ptr = arr;
}
return arr;
}
void* _upb_Array_Resize_fallback(upb_Array** arr_ptr, size_t size,
int elem_size_lg2, upb_Arena* arena) {
upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
return arr && _upb_Array_Resize(arr, size, arena) ? _upb_array_ptr(arr)
: NULL;
}
bool _upb_Array_Append_fallback(upb_Array** arr_ptr, const void* value,
int elem_size_lg2, upb_Arena* arena) {
upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
if (!arr) return false;
size_t elems = arr->len;
if (!_upb_Array_Resize(arr, elems + 1, arena)) {
return false;
}
char* data = _upb_array_ptr(arr);
memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2);
return true;
}
/** upb_Map *******************************************************************/
upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size) {
upb_Map* map = upb_Arena_Malloc(a, sizeof(upb_Map));
if (!map) {
return NULL;
}
upb_strtable_init(&map->table, 4, a);
map->key_size = key_size;
map->val_size = value_size;
return map;
}
static void _upb_mapsorter_getkeys(const void* _a, const void* _b, void* a_key,
void* b_key, size_t size) {
const upb_tabent* const* a = _a;
const upb_tabent* const* b = _b;
upb_StringView a_tabkey = upb_tabstrview((*a)->key);
upb_StringView b_tabkey = upb_tabstrview((*b)->key);
_upb_map_fromkey(a_tabkey, a_key, size);
_upb_map_fromkey(b_tabkey, b_key, size);
}
#define UPB_COMPARE_INTEGERS(a, b) ((a) < (b) ? -1 : ((a) == (b) ? 0 : 1))
static int _upb_mapsorter_cmpi64(const void* _a, const void* _b) {
int64_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
return UPB_COMPARE_INTEGERS(a, b);
}
static int _upb_mapsorter_cmpu64(const void* _a, const void* _b) {
uint64_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
return UPB_COMPARE_INTEGERS(a, b);
}
static int _upb_mapsorter_cmpi32(const void* _a, const void* _b) {
int32_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
return UPB_COMPARE_INTEGERS(a, b);
}
static int _upb_mapsorter_cmpu32(const void* _a, const void* _b) {
uint32_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
return UPB_COMPARE_INTEGERS(a, b);
}
static int _upb_mapsorter_cmpbool(const void* _a, const void* _b) {
bool a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 1);
return UPB_COMPARE_INTEGERS(a, b);
}
static int _upb_mapsorter_cmpstr(const void* _a, const void* _b) {
upb_StringView a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING);
size_t common_size = UPB_MIN(a.size, b.size);
int cmp = memcmp(a.data, b.data, common_size);
if (cmp) return -cmp;
return UPB_COMPARE_INTEGERS(a.size, b.size);
}
#undef UPB_COMPARE_INTEGERS
bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type,
const upb_Map* map, _upb_sortedmap* sorted) {
int map_size = _upb_Map_Size(map);
sorted->start = s->size;
sorted->pos = sorted->start;
sorted->end = sorted->start + map_size;
/* Grow s->entries if necessary. */
if (sorted->end > s->cap) {
s->cap = _upb_Log2CeilingSize(sorted->end);
s->entries = realloc(s->entries, s->cap * sizeof(*s->entries));
if (!s->entries) return false;
}
s->size = sorted->end;
/* Copy non-empty entries from the table to s->entries. */
upb_tabent const** dst = &s->entries[sorted->start];
const upb_tabent* src = map->table.t.entries;
const upb_tabent* end = src + upb_table_size(&map->table.t);
for (; src < end; src++) {
if (!upb_tabent_isempty(src)) {
*dst = src;
dst++;
}
}
UPB_ASSERT(dst == &s->entries[sorted->end]);
/* Sort entries according to the key type. */
int (*compar)(const void*, const void*);
switch (key_type) {
case kUpb_FieldType_Int64:
case kUpb_FieldType_SFixed64:
case kUpb_FieldType_SInt64:
compar = _upb_mapsorter_cmpi64;
break;
case kUpb_FieldType_UInt64:
case kUpb_FieldType_Fixed64:
compar = _upb_mapsorter_cmpu64;
break;
case kUpb_FieldType_Int32:
case kUpb_FieldType_SInt32:
case kUpb_FieldType_SFixed32:
case kUpb_FieldType_Enum:
compar = _upb_mapsorter_cmpi32;
break;
case kUpb_FieldType_UInt32:
case kUpb_FieldType_Fixed32:
compar = _upb_mapsorter_cmpu32;
break;
case kUpb_FieldType_Bool:
compar = _upb_mapsorter_cmpbool;
break;
case kUpb_FieldType_String:
case kUpb_FieldType_Bytes:
compar = _upb_mapsorter_cmpstr;
break;
default:
UPB_UNREACHABLE();
}
qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar);
return true;
}
/** upb_ExtensionRegistry
* ****************************************************************/
struct upb_ExtensionRegistry {
upb_Arena* arena;
upb_strtable exts; /* Key is upb_MiniTable* concatenated with fieldnum. */
};
#define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t))
static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) {
memcpy(buf, &l, sizeof(l));
memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum));
}
upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) {
upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r));
if (!r) return NULL;
r->arena = arena;
if (!upb_strtable_init(&r->exts, 8, arena)) return NULL;
return r;
}
bool _upb_extreg_add(upb_ExtensionRegistry* r,
const upb_MiniTable_Extension** e, size_t count) {
char buf[EXTREG_KEY_SIZE];
const upb_MiniTable_Extension** start = e;
const upb_MiniTable_Extension** end = UPB_PTRADD(e, count);
for (; e < end; e++) {
const upb_MiniTable_Extension* ext = *e;
extreg_key(buf, ext->extendee, ext->field.number);
if (!upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE,
upb_value_constptr(ext), r->arena)) {
goto failure;
}
}
return true;
failure:
/* Back out the entries previously added. */
for (end = e, e = start; e < end; e++) {
const upb_MiniTable_Extension* ext = *e;
extreg_key(buf, ext->extendee, ext->field.number);
upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL);
}
return false;
}
const upb_MiniTable_Extension* _upb_extreg_get(const upb_ExtensionRegistry* r,
const upb_MiniTable* l,
uint32_t num) {
char buf[EXTREG_KEY_SIZE];
upb_value v;
extreg_key(buf, l, num);
if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) {
return upb_value_getconstptr(v);
} else {
return NULL;
}
}
/** upb/table.c ************************************************************/
/*
* upb_table Implementation
*
* Implementation is heavily inspired by Lua's ltable.c.
*/
#include <string.h>
/* Must be last. */
#define UPB_MAXARRSIZE 16 /* 64k. */
/* From Chromium. */
#define ARRAY_SIZE(x) \
((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
static const double MAX_LOAD = 0.85;
/* The minimum utilization of the array part of a mixed hash/array table. This
* is a speed/memory-usage tradeoff (though it's not straightforward because of
* cache effects). The lower this is, the more memory we'll use. */
static const double MIN_DENSITY = 0.1;
static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
static upb_value _upb_value_val(uint64_t val) {
upb_value ret;
_upb_value_setval(&ret, val);
return ret;
}
static int log2ceil(uint64_t v) {
int ret = 0;
bool pow2 = is_pow2(v);
while (v >>= 1) ret++;
ret = pow2 ? ret : ret + 1; /* Ceiling. */
return UPB_MIN(UPB_MAXARRSIZE, ret);
}
char* upb_strdup2(const char* s, size_t len, upb_Arena* a) {
size_t n;
char* p;
/* Prevent overflow errors. */
if (len == SIZE_MAX) return NULL;
/* Always null-terminate, even if binary data; but don't rely on the input to
* have a null-terminating byte since it may be a raw binary buffer. */
n = len + 1;
p = upb_Arena_Malloc(a, n);
if (p) {
memcpy(p, s, len);
p[len] = 0;
}
return p;
}
/* A type to represent the lookup key of either a strtable or an inttable. */
typedef union {
uintptr_t num;
struct {
const char* str;
size_t len;
} str;
} lookupkey_t;
static lookupkey_t strkey2(const char* str, size_t len) {
lookupkey_t k;
k.str.str = str;
k.str.len = len;
return k;
}
static lookupkey_t intkey(uintptr_t key) {
lookupkey_t k;
k.num = key;
return k;
}
typedef uint32_t hashfunc_t(upb_tabkey key);
typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
/* Base table (shared code) ***************************************************/
static uint32_t upb_inthash(uintptr_t key) { return (uint32_t)key; }
static const upb_tabent* upb_getentry(const upb_table* t, uint32_t hash) {
return t->entries + (hash & t->mask);
}
static bool upb_arrhas(upb_tabval key) { return key.val != (uint64_t)-1; }
static bool isfull(upb_table* t) { return t->count == t->max_count; }
static bool init(upb_table* t, uint8_t size_lg2, upb_Arena* a) {
size_t bytes;
t->count = 0;
t->size_lg2 = size_lg2;
t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
t->max_count = upb_table_size(t) * MAX_LOAD;
bytes = upb_table_size(t) * sizeof(upb_tabent);
if (bytes > 0) {
t->entries = upb_Arena_Malloc(a, bytes);
if (!t->entries) return false;
memset(t->entries, 0, bytes);
} else {
t->entries = NULL;
}
return true;
}
static upb_tabent* emptyent(upb_table* t, upb_tabent* e) {
upb_tabent* begin = t->entries;
upb_tabent* end = begin + upb_table_size(t);
for (e = e + 1; e < end; e++) {
if (upb_tabent_isempty(e)) return e;
}
for (e = begin; e < end; e++) {
if (upb_tabent_isempty(e)) return e;
}
UPB_ASSERT(false);
return NULL;
}
static upb_tabent* getentry_mutable(upb_table* t, uint32_t hash) {
return (upb_tabent*)upb_getentry(t, hash);
}
static const upb_tabent* findentry(const upb_table* t, lookupkey_t key,
uint32_t hash, eqlfunc_t* eql) {
const upb_tabent* e;
if (t->size_lg2 == 0) return NULL;
e = upb_getentry(t, hash);
if (upb_tabent_isempty(e)) return NULL;
while (1) {
if (eql(e->key, key)) return e;
if ((e = e->next) == NULL) return NULL;
}
}
static upb_tabent* findentry_mutable(upb_table* t, lookupkey_t key,
uint32_t hash, eqlfunc_t* eql) {
return (upb_tabent*)findentry(t, key, hash, eql);
}
static bool lookup(const upb_table* t, lookupkey_t key, upb_value* v,
uint32_t hash, eqlfunc_t* eql) {
const upb_tabent* e = findentry(t, key, hash, eql);
if (e) {
if (v) {
_upb_value_setval(v, e->val.val);
}
return true;
} else {
return false;
}
}
/* The given key must not already exist in the table. */
static void insert(upb_table* t, lookupkey_t key, upb_tabkey tabkey,
upb_value val, uint32_t hash, hashfunc_t* hashfunc,
eqlfunc_t* eql) {
upb_tabent* mainpos_e;
upb_tabent* our_e;
UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
t->count++;
mainpos_e = getentry_mutable(t, hash);
our_e = mainpos_e;
if (upb_tabent_isempty(mainpos_e)) {
/* Our main position is empty; use it. */
our_e->next = NULL;
} else {
/* Collision. */
upb_tabent* new_e = emptyent(t, mainpos_e);
/* Head of collider's chain. */
upb_tabent* chain = getentry_mutable(t, hashfunc(mainpos_e->key));
if (chain == mainpos_e) {
/* Existing ent is in its main position (it has the same hash as us, and
* is the head of our chain). Insert to new ent and append to this chain.
*/
new_e->next = mainpos_e->next;
mainpos_e->next = new_e;
our_e = new_e;
} else {
/* Existing ent is not in its main position (it is a node in some other
* chain). This implies that no existing ent in the table has our hash.
* Evict it (updating its chain) and use its ent for head of our chain. */
*new_e = *mainpos_e; /* copies next. */
while (chain->next != mainpos_e) {
chain = (upb_tabent*)chain->next;
UPB_ASSERT(chain);
}
chain->next = new_e;
our_e = mainpos_e;
our_e->next = NULL;
}
}
our_e->key = tabkey;
our_e->val.val = val.val;
UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
}
static bool rm(upb_table* t, lookupkey_t key, upb_value* val,
upb_tabkey* removed, uint32_t hash, eqlfunc_t* eql) {
upb_tabent* chain = getentry_mutable(t, hash);
if (upb_tabent_isempty(chain)) return false;
if (eql(chain->key, key)) {
/* Element to remove is at the head of its chain. */
t->count--;
if (val) _upb_value_setval(val, chain->val.val);
if (removed) *removed = chain->key;
if (chain->next) {
upb_tabent* move = (upb_tabent*)chain->next;
*chain = *move;
move->key = 0; /* Make the slot empty. */
} else {
chain->key = 0; /* Make the slot empty. */
}
return true;
} else {
/* Element to remove is either in a non-head position or not in the
* table. */
while (chain->next && !eql(chain->next->key, key)) {
chain = (upb_tabent*)chain->next;
}
if (chain->next) {
/* Found element to remove. */
upb_tabent* rm = (upb_tabent*)chain->next;
t->count--;
if (val) _upb_value_setval(val, chain->next->val.val);
if (removed) *removed = rm->key;
rm->key = 0; /* Make the slot empty. */
chain->next = rm->next;
return true;
} else {
/* Element to remove is not in the table. */
return false;
}
}
}
static size_t next(const upb_table* t, size_t i) {
do {
if (++i >= upb_table_size(t)) return SIZE_MAX - 1; /* Distinct from -1. */
} while (upb_tabent_isempty(&t->entries[i]));
return i;
}
static size_t begin(const upb_table* t) { return next(t, -1); }
/* upb_strtable ***************************************************************/
/* A simple "subclass" of upb_table that only adds a hash function for strings.
*/
static upb_tabkey strcopy(lookupkey_t k2, upb_Arena* a) {
uint32_t len = (uint32_t)k2.str.len;
char* str = upb_Arena_Malloc(a, k2.str.len + sizeof(uint32_t) + 1);
if (str == NULL) return 0;
memcpy(str, &len, sizeof(uint32_t));
if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
str[sizeof(uint32_t) + k2.str.len] = '\0';
return (uintptr_t)str;
}
/* Adapted from ABSL's wyhash. */
static uint64_t UnalignedLoad64(const void* p) {
uint64_t val;
memcpy(&val, p, 8);
return val;
}
static uint32_t UnalignedLoad32(const void* p) {
uint32_t val;
memcpy(&val, p, 4);
return val;
}
#if defined(_MSC_VER) && defined(_M_X64)
#include <intrin.h>
#endif
/* Computes a * b, returning the low 64 bits of the result and storing the high
* 64 bits in |*high|. */
static uint64_t upb_umul128(uint64_t v0, uint64_t v1, uint64_t* out_high) {
#ifdef __SIZEOF_INT128__
__uint128_t p = v0;
p *= v1;
*out_high = (uint64_t)(p >> 64);
return (uint64_t)p;
#elif defined(_MSC_VER) && defined(_M_X64)
return _umul128(v0, v1, out_high);
#else
uint64_t a32 = v0 >> 32;
uint64_t a00 = v0 & 0xffffffff;
uint64_t b32 = v1 >> 32;
uint64_t b00 = v1 & 0xffffffff;
uint64_t high = a32 * b32;
uint64_t low = a00 * b00;
uint64_t mid1 = a32 * b00;
uint64_t mid2 = a00 * b32;
low += (mid1 << 32) + (mid2 << 32);
// Omit carry bit, for mixing we do not care about exact numerical precision.
high += (mid1 >> 32) + (mid2 >> 32);
*out_high = high;
return low;
#endif
}
static uint64_t WyhashMix(uint64_t v0, uint64_t v1) {
uint64_t high;
uint64_t low = upb_umul128(v0, v1, &high);
return low ^ high;
}
static uint64_t Wyhash(const void* data, size_t len, uint64_t seed,
const uint64_t salt[]) {
const uint8_t* ptr = (const uint8_t*)data;
uint64_t starting_length = (uint64_t)len;
uint64_t current_state = seed ^ salt[0];
if (len > 64) {
// If we have more than 64 bytes, we're going to handle chunks of 64
// bytes at a time. We're going to build up two separate hash states
// which we will then hash together.
uint64_t duplicated_state = current_state;
do {
uint64_t a = UnalignedLoad64(ptr);
uint64_t b = UnalignedLoad64(ptr + 8);
uint64_t c = UnalignedLoad64(ptr + 16);
uint64_t d = UnalignedLoad64(ptr + 24);
uint64_t e = UnalignedLoad64(ptr + 32);
uint64_t f = UnalignedLoad64(ptr + 40);
uint64_t g = UnalignedLoad64(ptr + 48);
uint64_t h = UnalignedLoad64(ptr + 56);
uint64_t cs0 = WyhashMix(a ^ salt[1], b ^ current_state);
uint64_t cs1 = WyhashMix(c ^ salt[2], d ^ current_state);
current_state = (cs0 ^ cs1);
uint64_t ds0 = WyhashMix(e ^ salt[3], f ^ duplicated_state);
uint64_t ds1 = WyhashMix(g ^ salt[4], h ^ duplicated_state);
duplicated_state = (ds0 ^ ds1);
ptr += 64;
len -= 64;
} while (len > 64);
current_state = current_state ^ duplicated_state;
}
// We now have a data `ptr` with at most 64 bytes and the current state
// of the hashing state machine stored in current_state.
while (len > 16) {
uint64_t a = UnalignedLoad64(ptr);
uint64_t b = UnalignedLoad64(ptr + 8);
current_state = WyhashMix(a ^ salt[1], b ^ current_state);
ptr += 16;
len -= 16;
}
// We now have a data `ptr` with at most 16 bytes.
uint64_t a = 0;
uint64_t b = 0;
if (len > 8) {
// When we have at least 9 and at most 16 bytes, set A to the first 64
// bits of the input and B to the last 64 bits of the input. Yes, they will
// overlap in the middle if we are working with less than the full 16
// bytes.
a = UnalignedLoad64(ptr);
b = UnalignedLoad64(ptr + len - 8);
} else if (len > 3) {
// If we have at least 4 and at most 8 bytes, set A to the first 32
// bits and B to the last 32 bits.
a = UnalignedLoad32(ptr);
b = UnalignedLoad32(ptr + len - 4);
} else if (len > 0) {
// If we have at least 1 and at most 3 bytes, read all of the provided
// bits into A, with some adjustments.
a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]);
b = 0;
} else {
a = 0;
b = 0;
}
uint64_t w = WyhashMix(a ^ salt[1], b ^ current_state);
uint64_t z = salt[1] ^ starting_length;
return WyhashMix(w, z);
}
const uint64_t kWyhashSalt[5] = {
0x243F6A8885A308D3ULL, 0