| /* Amalgamated source file */ |
| #include "ruby-upb.h" |
| /* |
| * Copyright (c) 2009-2021, Google LLC |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * * Neither the name of Google LLC nor the |
| * names of its contributors may be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY |
| * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| /* |
| * This is where we define macros used across upb. |
| * |
| * All of these macros are undef'd in port_undef.inc to avoid leaking them to |
| * users. |
| * |
| * The correct usage is: |
| * |
| * #include "upb/foobar.h" |
| * #include "upb/baz.h" |
| * |
| * // MUST be last included header. |
| * #include "upb/port_def.inc" |
| * |
| * // Code for this file. |
| * // <...> |
| * |
| * // Can be omitted for .c files, required for .h. |
| * #include "upb/port_undef.inc" |
| * |
| * This file is private and must not be included by users! |
| */ |
| |
| #if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ |
| (defined(__cplusplus) && __cplusplus >= 201103L) || \ |
| (defined(_MSC_VER) && _MSC_VER >= 1900)) |
| #error upb requires C99 or C++11 or MSVC >= 2015. |
| #endif |
| |
| #include <stdint.h> |
| #include <stddef.h> |
| |
| #if UINTPTR_MAX == 0xffffffff |
| #define UPB_SIZE(size32, size64) size32 |
| #else |
| #define UPB_SIZE(size32, size64) size64 |
| #endif |
| |
| /* If we always read/write as a consistent type to each address, this shouldn't |
| * violate aliasing. |
| */ |
| #define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs))) |
| |
| #define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \ |
| *UPB_PTR_AT(msg, case_offset, int) == case_val \ |
| ? *UPB_PTR_AT(msg, offset, fieldtype) \ |
| : default |
| |
| #define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \ |
| *UPB_PTR_AT(msg, case_offset, int) = case_val; \ |
| *UPB_PTR_AT(msg, offset, fieldtype) = value; |
| |
| #define UPB_MAPTYPE_STRING 0 |
| |
| /* UPB_INLINE: inline if possible, emit standalone code if required. */ |
| #ifdef __cplusplus |
| #define UPB_INLINE inline |
| #elif defined (__GNUC__) || defined(__clang__) |
| #define UPB_INLINE static __inline__ |
| #else |
| #define UPB_INLINE static |
| #endif |
| |
| #define UPB_ALIGN_UP(size, align) (((size) + (align) - 1) / (align) * (align)) |
| #define UPB_ALIGN_DOWN(size, align) ((size) / (align) * (align)) |
| #define UPB_ALIGN_MALLOC(size) UPB_ALIGN_UP(size, 16) |
| #define UPB_ALIGN_OF(type) offsetof (struct { char c; type member; }, member) |
| |
| /* Hints to the compiler about likely/unlikely branches. */ |
| #if defined (__GNUC__) || defined(__clang__) |
| #define UPB_LIKELY(x) __builtin_expect((x),1) |
| #define UPB_UNLIKELY(x) __builtin_expect((x),0) |
| #else |
| #define UPB_LIKELY(x) (x) |
| #define UPB_UNLIKELY(x) (x) |
| #endif |
| |
| /* Macros for function attributes on compilers that support them. */ |
| #ifdef __GNUC__ |
| #define UPB_FORCEINLINE __inline__ __attribute__((always_inline)) |
| #define UPB_NOINLINE __attribute__((noinline)) |
| #define UPB_NORETURN __attribute__((__noreturn__)) |
| #define UPB_PRINTF(str, first_vararg) __attribute__((format (printf, str, first_vararg))) |
| #elif defined(_MSC_VER) |
| #define UPB_NOINLINE |
| #define UPB_FORCEINLINE |
| #define UPB_NORETURN __declspec(noreturn) |
| #define UPB_PRINTF(str, first_vararg) |
| #else /* !defined(__GNUC__) */ |
| #define UPB_FORCEINLINE |
| #define UPB_NOINLINE |
| #define UPB_NORETURN |
| #define UPB_PRINTF(str, first_vararg) |
| #endif |
| |
| #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) |
| #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) |
| |
| #define UPB_UNUSED(var) (void)var |
| |
| /* UPB_ASSUME(): in release mode, we tell the compiler to assume this is true. |
| */ |
| #ifdef NDEBUG |
| #ifdef __GNUC__ |
| #define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable() |
| #elif defined _MSC_VER |
| #define UPB_ASSUME(expr) if (!(expr)) __assume(0) |
| #else |
| #define UPB_ASSUME(expr) do {} while (false && (expr)) |
| #endif |
| #else |
| #define UPB_ASSUME(expr) assert(expr) |
| #endif |
| |
| /* UPB_ASSERT(): in release mode, we use the expression without letting it be |
| * evaluated. This prevents "unused variable" warnings. */ |
| #ifdef NDEBUG |
| #define UPB_ASSERT(expr) do {} while (false && (expr)) |
| #else |
| #define UPB_ASSERT(expr) assert(expr) |
| #endif |
| |
| #if defined(__GNUC__) || defined(__clang__) |
| #define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0) |
| #else |
| #define UPB_UNREACHABLE() do { assert(0); } while(0) |
| #endif |
| |
| /* UPB_SETJMP() / UPB_LONGJMP(): avoid setting/restoring signal mask. */ |
| #ifdef __APPLE__ |
| #define UPB_SETJMP(buf) _setjmp(buf) |
| #define UPB_LONGJMP(buf, val) _longjmp(buf, val) |
| #else |
| #define UPB_SETJMP(buf) setjmp(buf) |
| #define UPB_LONGJMP(buf, val) longjmp(buf, val) |
| #endif |
| |
| /* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */ |
| #define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr)) |
| |
| /* Configure whether fasttable is switched on or not. *************************/ |
| |
| #ifdef __has_attribute |
| #define UPB_HAS_ATTRIBUTE(x) __has_attribute(x) |
| #else |
| #define UPB_HAS_ATTRIBUTE(x) 0 |
| #endif |
| |
| #if UPB_HAS_ATTRIBUTE(musttail) |
| #define UPB_MUSTTAIL __attribute__((musttail)) |
| #else |
| #define UPB_MUSTTAIL |
| #endif |
| |
| #undef UPB_HAS_ATTRIBUTE |
| |
| /* This check is not fully robust: it does not require that we have "musttail" |
| * support available. We need tail calls to avoid consuming arbitrary amounts |
| * of stack space. |
| * |
| * GCC/Clang can mostly be trusted to generate tail calls as long as |
| * optimization is enabled, but, debug builds will not generate tail calls |
| * unless "musttail" is available. |
| * |
| * We should probably either: |
| * 1. require that the compiler supports musttail. |
| * 2. add some fallback code for when musttail isn't available (ie. return |
| * instead of tail calling). This is safe and portable, but this comes at |
| * a CPU cost. |
| */ |
| #if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__) |
| #define UPB_FASTTABLE_SUPPORTED 1 |
| #else |
| #define UPB_FASTTABLE_SUPPORTED 0 |
| #endif |
| |
| /* define UPB_ENABLE_FASTTABLE to force fast table support. |
| * This is useful when we want to ensure we are really getting fasttable, |
| * for example for testing or benchmarking. */ |
| #if defined(UPB_ENABLE_FASTTABLE) |
| #if !UPB_FASTTABLE_SUPPORTED |
| #error fasttable is x86-64/ARM64 only and requires GCC or Clang. |
| #endif |
| #define UPB_FASTTABLE 1 |
| /* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible. |
| * This is useful for releasing code that might be used on multiple platforms, |
| * for example the PHP or Ruby C extensions. */ |
| #elif defined(UPB_TRY_ENABLE_FASTTABLE) |
| #define UPB_FASTTABLE UPB_FASTTABLE_SUPPORTED |
| #else |
| #define UPB_FASTTABLE 0 |
| #endif |
| |
| /* UPB_FASTTABLE_INIT() allows protos compiled for fasttable to gracefully |
| * degrade to non-fasttable if we are using UPB_TRY_ENABLE_FASTTABLE. */ |
| #if !UPB_FASTTABLE && defined(UPB_TRY_ENABLE_FASTTABLE) |
| #define UPB_FASTTABLE_INIT(...) |
| #else |
| #define UPB_FASTTABLE_INIT(...) __VA_ARGS__ |
| #endif |
| |
| #undef UPB_FASTTABLE_SUPPORTED |
| |
| /* ASAN poisoning (for arena) *************************************************/ |
| |
| #if defined(__SANITIZE_ADDRESS__) |
| #define UPB_ASAN 1 |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| void __asan_poison_memory_region(void const volatile *addr, size_t size); |
| void __asan_unpoison_memory_region(void const volatile *addr, size_t size); |
| #ifdef __cplusplus |
| } /* extern "C" */ |
| #endif |
| #define UPB_POISON_MEMORY_REGION(addr, size) \ |
| __asan_poison_memory_region((addr), (size)) |
| #define UPB_UNPOISON_MEMORY_REGION(addr, size) \ |
| __asan_unpoison_memory_region((addr), (size)) |
| #else |
| #define UPB_ASAN 0 |
| #define UPB_POISON_MEMORY_REGION(addr, size) \ |
| ((void)(addr), (void)(size)) |
| #define UPB_UNPOISON_MEMORY_REGION(addr, size) \ |
| ((void)(addr), (void)(size)) |
| #endif |
| |
| /* Disable proto2 arena behavior (TEMPORARY) **********************************/ |
| |
| #ifdef UPB_DISABLE_PROTO2_ENUM_CHECKING |
| #define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 1 |
| #else |
| #define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 0 |
| #endif |
| |
| /** upb/decode.c ************************************************************/ |
| |
| #include <setjmp.h> |
| #include <string.h> |
| |
| |
| /* Must be last. */ |
| |
| /* Maps descriptor type -> elem_size_lg2. */ |
| static const uint8_t desctype_to_elem_size_lg2[] = { |
| -1, /* invalid descriptor type */ |
| 3, /* DOUBLE */ |
| 2, /* FLOAT */ |
| 3, /* INT64 */ |
| 3, /* UINT64 */ |
| 2, /* INT32 */ |
| 3, /* FIXED64 */ |
| 2, /* FIXED32 */ |
| 0, /* BOOL */ |
| UPB_SIZE(3, 4), /* STRING */ |
| UPB_SIZE(2, 3), /* GROUP */ |
| UPB_SIZE(2, 3), /* MESSAGE */ |
| UPB_SIZE(3, 4), /* BYTES */ |
| 2, /* UINT32 */ |
| 2, /* ENUM */ |
| 2, /* SFIXED32 */ |
| 3, /* SFIXED64 */ |
| 2, /* SINT32 */ |
| 3, /* SINT64 */ |
| }; |
| |
| /* Maps descriptor type -> upb map size. */ |
| static const uint8_t desctype_to_mapsize[] = { |
| -1, /* invalid descriptor type */ |
| 8, /* DOUBLE */ |
| 4, /* FLOAT */ |
| 8, /* INT64 */ |
| 8, /* UINT64 */ |
| 4, /* INT32 */ |
| 8, /* FIXED64 */ |
| 4, /* FIXED32 */ |
| 1, /* BOOL */ |
| UPB_MAPTYPE_STRING, /* STRING */ |
| sizeof(void*), /* GROUP */ |
| sizeof(void*), /* MESSAGE */ |
| UPB_MAPTYPE_STRING, /* BYTES */ |
| 4, /* UINT32 */ |
| 4, /* ENUM */ |
| 4, /* SFIXED32 */ |
| 8, /* SFIXED64 */ |
| 4, /* SINT32 */ |
| 8, /* SINT64 */ |
| }; |
| |
| static const unsigned FIXED32_OK_MASK = (1 << kUpb_FieldType_Float) | |
| (1 << kUpb_FieldType_Fixed32) | |
| (1 << kUpb_FieldType_SFixed32); |
| |
| static const unsigned FIXED64_OK_MASK = (1 << kUpb_FieldType_Double) | |
| (1 << kUpb_FieldType_Fixed64) | |
| (1 << kUpb_FieldType_SFixed64); |
| |
| /* Three fake field types for MessageSet. */ |
| #define TYPE_MSGSET_ITEM 19 |
| #define TYPE_MSGSET_TYPE_ID 20 |
| #define TYPE_COUNT 20 |
| |
| /* Op: an action to be performed for a wire-type/field-type combination. */ |
| #define OP_UNKNOWN -1 /* Unknown field. */ |
| #define OP_MSGSET_ITEM -2 |
| #define OP_MSGSET_TYPEID -3 |
| #define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */ |
| #define OP_ENUM 1 |
| #define OP_STRING 4 |
| #define OP_BYTES 5 |
| #define OP_SUBMSG 6 |
| /* Scalar fields use only ops above. Repeated fields can use any op. */ |
| #define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */ |
| #define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */ |
| #define OP_PACKED_ENUM 13 |
| |
| static const int8_t varint_ops[] = { |
| OP_UNKNOWN, /* field not found */ |
| OP_UNKNOWN, /* DOUBLE */ |
| OP_UNKNOWN, /* FLOAT */ |
| OP_SCALAR_LG2(3), /* INT64 */ |
| OP_SCALAR_LG2(3), /* UINT64 */ |
| OP_SCALAR_LG2(2), /* INT32 */ |
| OP_UNKNOWN, /* FIXED64 */ |
| OP_UNKNOWN, /* FIXED32 */ |
| OP_SCALAR_LG2(0), /* BOOL */ |
| OP_UNKNOWN, /* STRING */ |
| OP_UNKNOWN, /* GROUP */ |
| OP_UNKNOWN, /* MESSAGE */ |
| OP_UNKNOWN, /* BYTES */ |
| OP_SCALAR_LG2(2), /* UINT32 */ |
| OP_ENUM, /* ENUM */ |
| OP_UNKNOWN, /* SFIXED32 */ |
| OP_UNKNOWN, /* SFIXED64 */ |
| OP_SCALAR_LG2(2), /* SINT32 */ |
| OP_SCALAR_LG2(3), /* SINT64 */ |
| OP_UNKNOWN, /* MSGSET_ITEM */ |
| OP_MSGSET_TYPEID, /* MSGSET TYPEID */ |
| }; |
| |
| static const int8_t delim_ops[] = { |
| /* For non-repeated field type. */ |
| OP_UNKNOWN, /* field not found */ |
| OP_UNKNOWN, /* DOUBLE */ |
| OP_UNKNOWN, /* FLOAT */ |
| OP_UNKNOWN, /* INT64 */ |
| OP_UNKNOWN, /* UINT64 */ |
| OP_UNKNOWN, /* INT32 */ |
| OP_UNKNOWN, /* FIXED64 */ |
| OP_UNKNOWN, /* FIXED32 */ |
| OP_UNKNOWN, /* BOOL */ |
| OP_STRING, /* STRING */ |
| OP_UNKNOWN, /* GROUP */ |
| OP_SUBMSG, /* MESSAGE */ |
| OP_BYTES, /* BYTES */ |
| OP_UNKNOWN, /* UINT32 */ |
| OP_UNKNOWN, /* ENUM */ |
| OP_UNKNOWN, /* SFIXED32 */ |
| OP_UNKNOWN, /* SFIXED64 */ |
| OP_UNKNOWN, /* SINT32 */ |
| OP_UNKNOWN, /* SINT64 */ |
| OP_UNKNOWN, /* MSGSET_ITEM */ |
| OP_UNKNOWN, /* MSGSET TYPEID */ |
| /* For repeated field type. */ |
| OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */ |
| OP_FIXPCK_LG2(2), /* REPEATED FLOAT */ |
| OP_VARPCK_LG2(3), /* REPEATED INT64 */ |
| OP_VARPCK_LG2(3), /* REPEATED UINT64 */ |
| OP_VARPCK_LG2(2), /* REPEATED INT32 */ |
| OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */ |
| OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */ |
| OP_VARPCK_LG2(0), /* REPEATED BOOL */ |
| OP_STRING, /* REPEATED STRING */ |
| OP_SUBMSG, /* REPEATED GROUP */ |
| OP_SUBMSG, /* REPEATED MESSAGE */ |
| OP_BYTES, /* REPEATED BYTES */ |
| OP_VARPCK_LG2(2), /* REPEATED UINT32 */ |
| OP_PACKED_ENUM, /* REPEATED ENUM */ |
| OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */ |
| OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */ |
| OP_VARPCK_LG2(2), /* REPEATED SINT32 */ |
| OP_VARPCK_LG2(3), /* REPEATED SINT64 */ |
| /* Omitting MSGSET_*, because we never emit a repeated msgset type */ |
| }; |
| |
| typedef union { |
| bool bool_val; |
| uint32_t uint32_val; |
| uint64_t uint64_val; |
| uint32_t size; |
| } wireval; |
| |
| static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg, |
| const upb_MiniTable* layout); |
| |
| UPB_NORETURN static void* decode_err(upb_Decoder* d, upb_DecodeStatus status) { |
| assert(status != kUpb_DecodeStatus_Ok); |
| UPB_LONGJMP(d->err, status); |
| } |
| |
| const char* fastdecode_err(upb_Decoder* d, int status) { |
| assert(status != kUpb_DecodeStatus_Ok); |
| UPB_LONGJMP(d->err, status); |
| return NULL; |
| } |
| static void decode_verifyutf8(upb_Decoder* d, const char* buf, int len) { |
| if (!decode_verifyutf8_inl(buf, len)) |
| decode_err(d, kUpb_DecodeStatus_BadUtf8); |
| } |
| |
| static bool decode_reserve(upb_Decoder* d, upb_Array* arr, size_t elem) { |
| bool need_realloc = arr->size - arr->len < elem; |
| if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, &d->arena)) { |
| decode_err(d, kUpb_DecodeStatus_OutOfMemory); |
| } |
| return need_realloc; |
| } |
| |
| typedef struct { |
| const char* ptr; |
| uint64_t val; |
| } decode_vret; |
| |
| UPB_NOINLINE |
| static decode_vret decode_longvarint64(const char* ptr, uint64_t val) { |
| decode_vret ret = {NULL, 0}; |
| uint64_t byte; |
| int i; |
| for (i = 1; i < 10; i++) { |
| byte = (uint8_t)ptr[i]; |
| val += (byte - 1) << (i * 7); |
| if (!(byte & 0x80)) { |
| ret.ptr = ptr + i + 1; |
| ret.val = val; |
| return ret; |
| } |
| } |
| return ret; |
| } |
| |
| UPB_FORCEINLINE |
| static const char* decode_varint64(upb_Decoder* d, const char* ptr, |
| uint64_t* val) { |
| uint64_t byte = (uint8_t)*ptr; |
| if (UPB_LIKELY((byte & 0x80) == 0)) { |
| *val = byte; |
| return ptr + 1; |
| } else { |
| decode_vret res = decode_longvarint64(ptr, byte); |
| if (!res.ptr) return decode_err(d, kUpb_DecodeStatus_Malformed); |
| *val = res.val; |
| return res.ptr; |
| } |
| } |
| |
| UPB_FORCEINLINE |
| static const char* decode_tag(upb_Decoder* d, const char* ptr, uint32_t* val) { |
| uint64_t byte = (uint8_t)*ptr; |
| if (UPB_LIKELY((byte & 0x80) == 0)) { |
| *val = byte; |
| return ptr + 1; |
| } else { |
| const char* start = ptr; |
| decode_vret res = decode_longvarint64(ptr, byte); |
| if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) { |
| return decode_err(d, kUpb_DecodeStatus_Malformed); |
| } |
| *val = res.val; |
| return res.ptr; |
| } |
| } |
| |
| static void decode_munge_int32(wireval* val) { |
| if (!_upb_IsLittleEndian()) { |
| /* The next stage will memcpy(dst, &val, 4) */ |
| val->uint32_val = val->uint64_val; |
| } |
| } |
| |
| static void decode_munge(int type, wireval* val) { |
| switch (type) { |
| case kUpb_FieldType_Bool: |
| val->bool_val = val->uint64_val != 0; |
| break; |
| case kUpb_FieldType_SInt32: { |
| uint32_t n = val->uint64_val; |
| val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1); |
| break; |
| } |
| case kUpb_FieldType_SInt64: { |
| uint64_t n = val->uint64_val; |
| val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1); |
| break; |
| } |
| case kUpb_FieldType_Int32: |
| case kUpb_FieldType_UInt32: |
| case kUpb_FieldType_Enum: |
| decode_munge_int32(val); |
| break; |
| } |
| } |
| |
| static upb_Message* decode_newsubmsg(upb_Decoder* d, |
| const upb_MiniTable_Sub* subs, |
| const upb_MiniTable_Field* field) { |
| const upb_MiniTable* subl = subs[field->submsg_index].submsg; |
| return _upb_Message_New_inl(subl, &d->arena); |
| } |
| |
| UPB_NOINLINE |
| const char* decode_isdonefallback(upb_Decoder* d, const char* ptr, |
| int overrun) { |
| int status; |
| ptr = decode_isdonefallback_inl(d, ptr, overrun, &status); |
| if (ptr == NULL) { |
| return decode_err(d, status); |
| } |
| return ptr; |
| } |
| |
| static const char* decode_readstr(upb_Decoder* d, const char* ptr, int size, |
| upb_StringView* str) { |
| if (d->options & kUpb_DecodeOption_AliasString) { |
| str->data = ptr; |
| } else { |
| char* data = upb_Arena_Malloc(&d->arena, size); |
| if (!data) return decode_err(d, kUpb_DecodeStatus_OutOfMemory); |
| memcpy(data, ptr, size); |
| str->data = data; |
| } |
| str->size = size; |
| return ptr + size; |
| } |
| |
| UPB_FORCEINLINE |
| static const char* decode_tosubmsg2(upb_Decoder* d, const char* ptr, |
| upb_Message* submsg, |
| const upb_MiniTable* subl, int size) { |
| int saved_delta = decode_pushlimit(d, ptr, size); |
| if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded); |
| ptr = decode_msg(d, ptr, submsg, subl); |
| if (d->end_group != DECODE_NOGROUP) |
| return decode_err(d, kUpb_DecodeStatus_Malformed); |
| decode_poplimit(d, ptr, saved_delta); |
| d->depth++; |
| return ptr; |
| } |
| |
| UPB_FORCEINLINE |
| static const char* decode_tosubmsg(upb_Decoder* d, const char* ptr, |
| upb_Message* submsg, |
| const upb_MiniTable_Sub* subs, |
| const upb_MiniTable_Field* field, int size) { |
| return decode_tosubmsg2(d, ptr, submsg, subs[field->submsg_index].submsg, |
| size); |
| } |
| |
| UPB_FORCEINLINE |
| static const char* decode_group(upb_Decoder* d, const char* ptr, |
| upb_Message* submsg, const upb_MiniTable* subl, |
| uint32_t number) { |
| if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded); |
| if (decode_isdone(d, &ptr)) { |
| return decode_err(d, kUpb_DecodeStatus_Malformed); |
| } |
| ptr = decode_msg(d, ptr, submsg, subl); |
| if (d->end_group != number) return decode_err(d, kUpb_DecodeStatus_Malformed); |
| d->end_group = DECODE_NOGROUP; |
| d->depth++; |
| return ptr; |
| } |
| |
| UPB_FORCEINLINE |
| static const char* decode_togroup(upb_Decoder* d, const char* ptr, |
| upb_Message* submsg, |
| const upb_MiniTable_Sub* subs, |
| const upb_MiniTable_Field* field) { |
| const upb_MiniTable* subl = subs[field->submsg_index].submsg; |
| return decode_group(d, ptr, submsg, subl, field->number); |
| } |
| |
| static char* encode_varint32(uint32_t val, char* ptr) { |
| do { |
| uint8_t byte = val & 0x7fU; |
| val >>= 7; |
| if (val) byte |= 0x80U; |
| *(ptr++) = byte; |
| } while (val); |
| return ptr; |
| } |
| |
| static void upb_Decode_AddUnknownVarints(upb_Decoder* d, upb_Message* msg, |
| uint32_t val1, uint32_t val2) { |
| char buf[20]; |
| char* end = buf; |
| end = encode_varint32(val1, end); |
| end = encode_varint32(val2, end); |
| |
| if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { |
| decode_err(d, kUpb_DecodeStatus_OutOfMemory); |
| } |
| } |
| |
| UPB_NOINLINE |
| static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr, |
| upb_Message* msg, const upb_MiniTable_Enum* e, |
| const upb_MiniTable_Field* field, |
| uint32_t v) { |
| // OPT: binary search long lists? |
| int n = e->value_count; |
| for (int i = 0; i < n; i++) { |
| if ((uint32_t)e->values[i] == v) return true; |
| } |
| |
| // Unrecognized enum goes into unknown fields. |
| // For packed fields the tag could be arbitrarily far in the past, so we |
| // just re-encode the tag and value here. |
| uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint; |
| upb_Decode_AddUnknownVarints(d, msg, tag, v); |
| return false; |
| } |
| |
| UPB_FORCEINLINE |
| static bool decode_checkenum(upb_Decoder* d, const char* ptr, upb_Message* msg, |
| const upb_MiniTable_Enum* e, |
| const upb_MiniTable_Field* field, wireval* val) { |
| uint32_t v = val->uint32_val; |
| |
| if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true; |
| |
| return decode_checkenum_slow(d, ptr, msg, e, field, v); |
| } |
| |
| UPB_NOINLINE |
| static const char* decode_enum_toarray(upb_Decoder* d, const char* ptr, |
| upb_Message* msg, upb_Array* arr, |
| const upb_MiniTable_Sub* subs, |
| const upb_MiniTable_Field* field, |
| wireval* val) { |
| const upb_MiniTable_Enum* e = subs[field->submsg_index].subenum; |
| if (!decode_checkenum(d, ptr, msg, e, field, val)) return ptr; |
| void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void); |
| arr->len++; |
| memcpy(mem, val, 4); |
| return ptr; |
| } |
| |
| UPB_FORCEINLINE |
| static const char* decode_fixed_packed(upb_Decoder* d, const char* ptr, |
| upb_Array* arr, wireval* val, |
| const upb_MiniTable_Field* field, |
| int lg2) { |
| int mask = (1 << lg2) - 1; |
| size_t count = val->size >> lg2; |
| if ((val->size & mask) != 0) { |
| // Length isn't a round multiple of elem size. |
| return decode_err(d, kUpb_DecodeStatus_Malformed); |
| } |
| decode_reserve(d, arr, count); |
| void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); |
| arr->len += count; |
| // Note: if/when the decoder supports multi-buffer input, we will need to |
| // handle buffer seams here. |
| if (_upb_IsLittleEndian()) { |
| memcpy(mem, ptr, val->size); |
| ptr += val->size; |
| } else { |
| const char* end = ptr + val->size; |
| char* dst = mem; |
| while (ptr < end) { |
| if (lg2 == 2) { |
| uint32_t val; |
| memcpy(&val, ptr, sizeof(val)); |
| val = _upb_BigEndian_Swap32(val); |
| memcpy(dst, &val, sizeof(val)); |
| } else { |
| UPB_ASSERT(lg2 == 3); |
| uint64_t val; |
| memcpy(&val, ptr, sizeof(val)); |
| val = _upb_BigEndian_Swap64(val); |
| memcpy(dst, &val, sizeof(val)); |
| } |
| ptr += 1 << lg2; |
| dst += 1 << lg2; |
| } |
| } |
| |
| return ptr; |
| } |
| |
| UPB_FORCEINLINE |
| static const char* decode_varint_packed(upb_Decoder* d, const char* ptr, |
| upb_Array* arr, wireval* val, |
| const upb_MiniTable_Field* field, |
| int lg2) { |
| int scale = 1 << lg2; |
| int saved_limit = decode_pushlimit(d, ptr, val->size); |
| char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); |
| while (!decode_isdone(d, &ptr)) { |
| wireval elem; |
| ptr = decode_varint64(d, ptr, &elem.uint64_val); |
| decode_munge(field->descriptortype, &elem); |
| if (decode_reserve(d, arr, 1)) { |
| out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); |
| } |
| arr->len++; |
| memcpy(out, &elem, scale); |
| out += scale; |
| } |
| decode_poplimit(d, ptr, saved_limit); |
| return ptr; |
| } |
| |
| UPB_NOINLINE |
| static const char* decode_enum_packed(upb_Decoder* d, const char* ptr, |
| upb_Message* msg, upb_Array* arr, |
| const upb_MiniTable_Sub* subs, |
| const upb_MiniTable_Field* field, |
| wireval* val) { |
| const upb_MiniTable_Enum* e = subs[field->submsg_index].subenum; |
| int saved_limit = decode_pushlimit(d, ptr, val->size); |
| char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void); |
| while (!decode_isdone(d, &ptr)) { |
| wireval elem; |
| ptr = decode_varint64(d, ptr, &elem.uint64_val); |
| decode_munge_int32(&elem); |
| if (!decode_checkenum(d, ptr, msg, e, field, &elem)) { |
| continue; |
| } |
| if (decode_reserve(d, arr, 1)) { |
| out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void); |
| } |
| arr->len++; |
| memcpy(out, &elem, 4); |
| out += 4; |
| } |
| decode_poplimit(d, ptr, saved_limit); |
| return ptr; |
| } |
| |
| static const char* decode_toarray(upb_Decoder* d, const char* ptr, |
| upb_Message* msg, |
| const upb_MiniTable_Sub* subs, |
| const upb_MiniTable_Field* field, |
| wireval* val, int op) { |
| upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void); |
| upb_Array* arr = *arrp; |
| void* mem; |
| |
| if (arr) { |
| decode_reserve(d, arr, 1); |
| } else { |
| size_t lg2 = desctype_to_elem_size_lg2[field->descriptortype]; |
| arr = _upb_Array_New(&d->arena, 4, lg2); |
| if (!arr) return decode_err(d, kUpb_DecodeStatus_OutOfMemory); |
| *arrp = arr; |
| } |
| |
| switch (op) { |
| case OP_SCALAR_LG2(0): |
| case OP_SCALAR_LG2(2): |
| case OP_SCALAR_LG2(3): |
| /* Append scalar value. */ |
| mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void); |
| arr->len++; |
| memcpy(mem, val, 1 << op); |
| return ptr; |
| case OP_STRING: |
| decode_verifyutf8(d, ptr, val->size); |
| /* Fallthrough. */ |
| case OP_BYTES: { |
| /* Append bytes. */ |
| upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->len; |
| arr->len++; |
| return decode_readstr(d, ptr, val->size, str); |
| } |
| case OP_SUBMSG: { |
| /* Append submessage / group. */ |
| upb_Message* submsg = decode_newsubmsg(d, subs, field); |
| *UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void*), upb_Message*) = |
| submsg; |
| arr->len++; |
| if (UPB_UNLIKELY(field->descriptortype == kUpb_FieldType_Group)) { |
| return decode_togroup(d, ptr, submsg, subs, field); |
| } else { |
| return decode_tosubmsg(d, ptr, submsg, subs, field, val->size); |
| } |
| } |
| case OP_FIXPCK_LG2(2): |
| case OP_FIXPCK_LG2(3): |
| return decode_fixed_packed(d, ptr, arr, val, field, |
| op - OP_FIXPCK_LG2(0)); |
| case OP_VARPCK_LG2(0): |
| case OP_VARPCK_LG2(2): |
| case OP_VARPCK_LG2(3): |
| return decode_varint_packed(d, ptr, arr, val, field, |
| op - OP_VARPCK_LG2(0)); |
| case OP_ENUM: |
| return decode_enum_toarray(d, ptr, msg, arr, subs, field, val); |
| case OP_PACKED_ENUM: |
| return decode_enum_packed(d, ptr, msg, arr, subs, field, val); |
| default: |
| UPB_UNREACHABLE(); |
| } |
| } |
| |
| static const char* decode_tomap(upb_Decoder* d, const char* ptr, |
| upb_Message* msg, const upb_MiniTable_Sub* subs, |
| const upb_MiniTable_Field* field, |
| wireval* val) { |
| upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*); |
| upb_Map* map = *map_p; |
| upb_MapEntry ent; |
| const upb_MiniTable* entry = subs[field->submsg_index].submsg; |
| |
| if (!map) { |
| /* Lazily create map. */ |
| const upb_MiniTable_Field* key_field = &entry->fields[0]; |
| const upb_MiniTable_Field* val_field = &entry->fields[1]; |
| char key_size = desctype_to_mapsize[key_field->descriptortype]; |
| char val_size = desctype_to_mapsize[val_field->descriptortype]; |
| UPB_ASSERT(key_field->offset == 0); |
| UPB_ASSERT(val_field->offset == sizeof(upb_StringView)); |
| map = _upb_Map_New(&d->arena, key_size, val_size); |
| *map_p = map; |
| } |
| |
| /* Parse map entry. */ |
| memset(&ent, 0, sizeof(ent)); |
| |
| if (entry->fields[1].descriptortype == kUpb_FieldType_Message || |
| entry->fields[1].descriptortype == kUpb_FieldType_Group) { |
| /* Create proactively to handle the case where it doesn't appear. */ |
| ent.v.val = |
| upb_value_ptr(_upb_Message_New(entry->subs[0].submsg, &d->arena)); |
| } |
| |
| const char* start = ptr; |
| ptr = decode_tosubmsg(d, ptr, &ent.k, subs, field, val->size); |
| // check if ent had any unknown fields |
| size_t size; |
| upb_Message_GetUnknown(&ent.k, &size); |
| if (size != 0) { |
| uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited; |
| upb_Decode_AddUnknownVarints(d, msg, tag, (uint32_t)(ptr - start)); |
| if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { |
| decode_err(d, kUpb_DecodeStatus_OutOfMemory); |
| } |
| } else { |
| _upb_Map_Set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena); |
| } |
| return ptr; |
| } |
| |
| static const char* decode_tomsg(upb_Decoder* d, const char* ptr, |
| upb_Message* msg, const upb_MiniTable_Sub* subs, |
| const upb_MiniTable_Field* field, wireval* val, |
| int op) { |
| void* mem = UPB_PTR_AT(msg, field->offset, void); |
| int type = field->descriptortype; |
| |
| if (UPB_UNLIKELY(op == OP_ENUM) && |
| !decode_checkenum(d, ptr, msg, subs[field->submsg_index].subenum, field, |
| val)) { |
| return ptr; |
| } |
| |
| /* Set presence if necessary. */ |
| if (field->presence > 0) { |
| _upb_sethas_field(msg, field); |
| } else if (field->presence < 0) { |
| /* Oneof case */ |
| uint32_t* oneof_case = _upb_oneofcase_field(msg, field); |
| if (op == OP_SUBMSG && *oneof_case != field->number) { |
| memset(mem, 0, sizeof(void*)); |
| } |
| *oneof_case = field->number; |
| } |
| |
| /* Store into message. */ |
| switch (op) { |
| case OP_SUBMSG: { |
| upb_Message** submsgp = mem; |
| upb_Message* submsg = *submsgp; |
| if (!submsg) { |
| submsg = decode_newsubmsg(d, subs, field); |
| *submsgp = submsg; |
| } |
| if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) { |
| ptr = decode_togroup(d, ptr, submsg, subs, field); |
| } else { |
| ptr = decode_tosubmsg(d, ptr, submsg, subs, field, val->size); |
| } |
| break; |
| } |
| case OP_STRING: |
| decode_verifyutf8(d, ptr, val->size); |
| /* Fallthrough. */ |
| case OP_BYTES: |
| return decode_readstr(d, ptr, val->size, mem); |
| case OP_SCALAR_LG2(3): |
| memcpy(mem, val, 8); |
| break; |
| case OP_ENUM: |
| case OP_SCALAR_LG2(2): |
| memcpy(mem, val, 4); |
| break; |
| case OP_SCALAR_LG2(0): |
| memcpy(mem, val, 1); |
| break; |
| default: |
| UPB_UNREACHABLE(); |
| } |
| |
| return ptr; |
| } |
| |
| UPB_NOINLINE |
| const char* decode_checkrequired(upb_Decoder* d, const char* ptr, |
| const upb_Message* msg, |
| const upb_MiniTable* l) { |
| assert(l->required_count); |
| if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) { |
| return ptr; |
| } |
| uint64_t msg_head; |
| memcpy(&msg_head, msg, 8); |
| msg_head = _upb_BigEndian_Swap64(msg_head); |
| if (upb_MiniTable_requiredmask(l) & ~msg_head) { |
| d->missing_required = true; |
| } |
| return ptr; |
| } |
| |
| UPB_FORCEINLINE |
| static bool decode_tryfastdispatch(upb_Decoder* d, const char** ptr, |
| upb_Message* msg, |
| const upb_MiniTable* layout) { |
| #if UPB_FASTTABLE |
| if (layout && layout->table_mask != (unsigned char)-1) { |
| uint16_t tag = fastdecode_loadtag(*ptr); |
| intptr_t table = decode_totable(layout); |
| *ptr = fastdecode_tagdispatch(d, *ptr, msg, table, 0, tag); |
| return true; |
| } |
| #endif |
| return false; |
| } |
| |
| static const char* decode_msgset(upb_Decoder* d, const char* ptr, |
| upb_Message* msg, |
| const upb_MiniTable* layout) { |
| // We create a temporary upb_MiniTable here and abuse its fields as temporary |
| // storage, to avoid creating lots of MessageSet-specific parsing code-paths: |
| // 1. We store 'layout' in item_layout.subs. We will need this later as |
| // a key to look up extensions for this MessageSet. |
| // 2. We use item_layout.fields as temporary storage to store the extension |
| // we |
| // found when parsing the type id. |
| upb_MiniTable item_layout = { |
| .subs = (const upb_MiniTable_Sub[]){{.submsg = layout}}, |
| .fields = NULL, |
| .size = 0, |
| .field_count = 0, |
| .ext = kUpb_ExtMode_IsMessageSet_ITEM, |
| .dense_below = 0, |
| .table_mask = -1}; |
| return decode_group(d, ptr, msg, &item_layout, 1); |
| } |
| |
| static const upb_MiniTable_Field* decode_findfield(upb_Decoder* d, |
| const upb_MiniTable* l, |
| uint32_t field_number, |
| int* last_field_index) { |
| static upb_MiniTable_Field none = {0, 0, 0, 0, 0, 0}; |
| if (l == NULL) return &none; |
| |
| size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX |
| if (idx < l->dense_below) { |
| /* Fastest case: index into dense fields. */ |
| goto found; |
| } |
| |
| if (l->dense_below < l->field_count) { |
| /* Linear search non-dense fields. Resume scanning from last_field_index |
| * since fields are usually in order. */ |
| int last = *last_field_index; |
| for (idx = last; idx < l->field_count; idx++) { |
| if (l->fields[idx].number == field_number) { |
| goto found; |
| } |
| } |
| |
| for (idx = l->dense_below; idx < last; idx++) { |
| if (l->fields[idx].number == field_number) { |
| goto found; |
| } |
| } |
| } |
| |
| if (d->extreg) { |
| switch (l->ext) { |
| case kUpb_ExtMode_Extendable: { |
| const upb_MiniTable_Extension* ext = |
| _upb_extreg_get(d->extreg, l, field_number); |
| if (ext) return &ext->field; |
| break; |
| } |
| case kUpb_ExtMode_IsMessageSet: |
| if (field_number == _UPB_MSGSET_ITEM) { |
| static upb_MiniTable_Field item = {0, 0, 0, 0, TYPE_MSGSET_ITEM, 0}; |
| return &item; |
| } |
| break; |
| case kUpb_ExtMode_IsMessageSet_ITEM: |
| switch (field_number) { |
| case _UPB_MSGSET_TYPEID: { |
| static upb_MiniTable_Field type_id = { |
| 0, 0, 0, 0, TYPE_MSGSET_TYPE_ID, 0}; |
| return &type_id; |
| } |
| case _UPB_MSGSET_MESSAGE: |
| if (l->fields) { |
| // We saw type_id previously and succeeded in looking up msg. |
| return l->fields; |
| } else { |
| // TODO: out of order MessageSet. |
| // This is a very rare case: all serializers will emit in-order |
| // MessageSets. To hit this case there has to be some kind of |
| // re-ordering proxy. We should eventually handle this case, but |
| // not today. |
| } |
| break; |
| } |
| } |
| } |
| |
| return &none; /* Unknown field. */ |
| |
| found: |
| UPB_ASSERT(l->fields[idx].number == field_number); |
| *last_field_index = idx; |
| return &l->fields[idx]; |
| } |
| |
| UPB_FORCEINLINE |
| static const char* decode_wireval(upb_Decoder* d, const char* ptr, |
| const upb_MiniTable_Field* field, |
| int wire_type, wireval* val, int* op) { |
| switch (wire_type) { |
| case kUpb_WireType_Varint: |
| ptr = decode_varint64(d, ptr, &val->uint64_val); |
| *op = varint_ops[field->descriptortype]; |
| decode_munge(field->descriptortype, val); |
| return ptr; |
| case kUpb_WireType_32Bit: |
| memcpy(&val->uint32_val, ptr, 4); |
| val->uint32_val = _upb_BigEndian_Swap32(val->uint32_val); |
| *op = OP_SCALAR_LG2(2); |
| if (((1 << field->descriptortype) & FIXED32_OK_MASK) == 0) { |
| *op = OP_UNKNOWN; |
| } |
| return ptr + 4; |
| case kUpb_WireType_64Bit: |
| memcpy(&val->uint64_val, ptr, 8); |
| val->uint64_val = _upb_BigEndian_Swap64(val->uint64_val); |
| *op = OP_SCALAR_LG2(3); |
| if (((1 << field->descriptortype) & FIXED64_OK_MASK) == 0) { |
| *op = OP_UNKNOWN; |
| } |
| return ptr + 8; |
| case kUpb_WireType_Delimited: { |
| int ndx = field->descriptortype; |
| uint64_t size; |
| if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += TYPE_COUNT; |
| ptr = decode_varint64(d, ptr, &size); |
| if (size >= INT32_MAX || ptr - d->end + (int32_t)size > d->limit) { |
| break; /* Length overflow. */ |
| } |
| *op = delim_ops[ndx]; |
| val->size = size; |
| return ptr; |
| } |
| case kUpb_WireType_StartGroup: |
| val->uint32_val = field->number; |
| if (field->descriptortype == kUpb_FieldType_Group) { |
| *op = OP_SUBMSG; |
| } else if (field->descriptortype == TYPE_MSGSET_ITEM) { |
| *op = OP_MSGSET_ITEM; |
| } else { |
| *op = OP_UNKNOWN; |
| } |
| return ptr; |
| default: |
| break; |
| } |
| return decode_err(d, kUpb_DecodeStatus_Malformed); |
| } |
| |
| UPB_FORCEINLINE |
| static const char* decode_known(upb_Decoder* d, const char* ptr, |
| upb_Message* msg, const upb_MiniTable* layout, |
| const upb_MiniTable_Field* field, int op, |
| wireval* val) { |
| const upb_MiniTable_Sub* subs = layout->subs; |
| uint8_t mode = field->mode; |
| |
| if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) { |
| const upb_MiniTable_Extension* ext_layout = |
| (const upb_MiniTable_Extension*)field; |
| upb_Message_Extension* ext = |
| _upb_Message_Getorcreateext(msg, ext_layout, &d->arena); |
| if (UPB_UNLIKELY(!ext)) return decode_err(d, kUpb_DecodeStatus_OutOfMemory); |
| msg = &ext->data; |
| subs = &ext->ext->sub; |
| } |
| |
| switch (mode & kUpb_FieldMode_Mask) { |
| case kUpb_FieldMode_Array: |
| return decode_toarray(d, ptr, msg, subs, field, val, op); |
| case kUpb_FieldMode_Map: |
| return decode_tomap(d, ptr, msg, subs, field, val); |
| case kUpb_FieldMode_Scalar: |
| return decode_tomsg(d, ptr, msg, subs, field, val, op); |
| default: |
| UPB_UNREACHABLE(); |
| } |
| } |
| |
| static const char* decode_reverse_skip_varint(const char* ptr, uint32_t val) { |
| uint32_t seen = 0; |
| do { |
| ptr--; |
| seen <<= 7; |
| seen |= *ptr & 0x7f; |
| } while (seen != val); |
| return ptr; |
| } |
| |
| static const char* decode_unknown(upb_Decoder* d, const char* ptr, |
| upb_Message* msg, int field_number, |
| int wire_type, wireval val) { |
| if (field_number == 0) return decode_err(d, kUpb_DecodeStatus_Malformed); |
| |
| // Since unknown fields are the uncommon case, we do a little extra work here |
| // to walk backwards through the buffer to find the field start. This frees |
| // up a register in the fast paths (when the field is known), which leads to |
| // significant speedups in benchmarks. |
| const char* start = ptr; |
| |
| if (wire_type == kUpb_WireType_Delimited) ptr += val.size; |
| if (msg) { |
| switch (wire_type) { |
| case kUpb_WireType_Varint: |
| case kUpb_WireType_Delimited: |
| start--; |
| while (start[-1] & 0x80) start--; |
| break; |
| case kUpb_WireType_32Bit: |
| start -= 4; |
| break; |
| case kUpb_WireType_64Bit: |
| start -= 8; |
| break; |
| default: |
| break; |
| } |
| |
| assert(start == d->debug_valstart); |
| uint32_t tag = ((uint32_t)field_number << 3) | wire_type; |
| start = decode_reverse_skip_varint(start, tag); |
| assert(start == d->debug_tagstart); |
| |
| if (wire_type == kUpb_WireType_StartGroup) { |
| d->unknown = start; |
| d->unknown_msg = msg; |
| ptr = decode_group(d, ptr, NULL, NULL, field_number); |
| start = d->unknown; |
| d->unknown_msg = NULL; |
| d->unknown = NULL; |
| } |
| if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { |
| return decode_err(d, kUpb_DecodeStatus_OutOfMemory); |
| } |
| } else if (wire_type == kUpb_WireType_StartGroup) { |
| ptr = decode_group(d, ptr, NULL, NULL, field_number); |
| } |
| return ptr; |
| } |
| |
| UPB_NOINLINE |
| static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg, |
| const upb_MiniTable* layout) { |
| int last_field_index = 0; |
| |
| #if UPB_FASTTABLE |
| // The first time we want to skip fast dispatch, because we may have just been |
| // invoked by the fast parser to handle a case that it bailed on. |
| if (!decode_isdone(d, &ptr)) goto nofast; |
| #endif |
| |
| while (!decode_isdone(d, &ptr)) { |
| uint32_t tag; |
| const upb_MiniTable_Field* field; |
| int field_number; |
| int wire_type; |
| wireval val; |
| int op; |
| |
| if (decode_tryfastdispatch(d, &ptr, msg, layout)) break; |
| |
| #if UPB_FASTTABLE |
| nofast: |
| #endif |
| |
| #ifndef NDEBUG |
| d->debug_tagstart = ptr; |
| #endif |
| |
| UPB_ASSERT(ptr < d->limit_ptr); |
| ptr = decode_tag(d, ptr, &tag); |
| field_number = tag >> 3; |
| wire_type = tag & 7; |
| |
| #ifndef NDEBUG |
| d->debug_valstart = ptr; |
| #endif |
| |
| if (wire_type == kUpb_WireType_EndGroup) { |
| d->end_group = field_number; |
| return ptr; |
| } |
| |
| field = decode_findfield(d, layout, field_number, &last_field_index); |
| ptr = decode_wireval(d, ptr, field, wire_type, &val, &op); |
| |
| if (op >= 0) { |
| ptr = decode_known(d, ptr, msg, layout, field, op, &val); |
| } else { |
| switch (op) { |
| case OP_UNKNOWN: |
| ptr = decode_unknown(d, ptr, msg, field_number, wire_type, val); |
| break; |
| case OP_MSGSET_ITEM: |
| ptr = decode_msgset(d, ptr, msg, layout); |
| break; |
| case OP_MSGSET_TYPEID: { |
| const upb_MiniTable_Extension* ext = _upb_extreg_get( |
| d->extreg, layout->subs[0].submsg, val.uint64_val); |
| if (ext) ((upb_MiniTable*)layout)->fields = &ext->field; |
| break; |
| } |
| } |
| } |
| } |
| |
| return UPB_UNLIKELY(layout && layout->required_count) |
| ? decode_checkrequired(d, ptr, msg, layout) |
| : ptr; |
| } |
| |
| const char* fastdecode_generic(struct upb_Decoder* d, const char* ptr, |
| upb_Message* msg, intptr_t table, |
| uint64_t hasbits, uint64_t data) { |
| (void)data; |
| *(uint32_t*)msg |= hasbits; |
| return decode_msg(d, ptr, msg, decode_totablep(table)); |
| } |
| |
| static upb_DecodeStatus decode_top(struct upb_Decoder* d, const char* buf, |
| void* msg, const upb_MiniTable* l) { |
| if (!decode_tryfastdispatch(d, &buf, msg, l)) { |
| decode_msg(d, buf, msg, l); |
| } |
| if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed; |
| if (d->missing_required) return kUpb_DecodeStatus_MissingRequired; |
| return kUpb_DecodeStatus_Ok; |
| } |
| |
| upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, |
| const upb_MiniTable* l, |
| const upb_ExtensionRegistry* extreg, int options, |
| upb_Arena* arena) { |
| upb_Decoder state; |
| unsigned depth = (unsigned)options >> 16; |
| |
| if (size <= 16) { |
| memset(&state.patch, 0, 32); |
| if (size) memcpy(&state.patch, buf, size); |
| buf = state.patch; |
| state.end = buf + size; |
| state.limit = 0; |
| options &= ~kUpb_DecodeOption_AliasString; // Can't alias patch buf. |
| } else { |
| state.end = buf + size - 16; |
| state.limit = 16; |
| } |
| |
| state.extreg = extreg; |
| state.limit_ptr = state.end; |
| state.unknown_msg = NULL; |
| state.depth = depth ? depth : 64; |
| state.end_group = DECODE_NOGROUP; |
| state.options = (uint16_t)options; |
| state.missing_required = false; |
| state.arena.head = arena->head; |
| state.arena.last_size = arena->last_size; |
| state.arena.cleanup_metadata = arena->cleanup_metadata; |
| state.arena.parent = arena; |
| |
| upb_DecodeStatus status = UPB_SETJMP(state.err); |
| if (UPB_LIKELY(status == kUpb_DecodeStatus_Ok)) { |
| status = decode_top(&state, buf, msg, l); |
| } |
| |
| arena->head.ptr = state.arena.head.ptr; |
| arena->head.end = state.arena.head.end; |
| arena->cleanup_metadata = state.arena.cleanup_metadata; |
| return status; |
| } |
| |
| #undef OP_UNKNOWN |
| #undef OP_SKIP |
| #undef OP_SCALAR_LG2 |
| #undef OP_FIXPCK_LG2 |
| #undef OP_VARPCK_LG2 |
| #undef OP_STRING |
| #undef OP_BYTES |
| #undef OP_SUBMSG |
| |
| /** upb/encode.c ************************************************************/ |
| /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */ |
| |
| |
| #include <setjmp.h> |
| #include <string.h> |
| |
| |
| /* Must be last. */ |
| |
| #define UPB_PB_VARINT_MAX_LEN 10 |
| |
| UPB_NOINLINE |
| static size_t encode_varint64(uint64_t val, char* buf) { |
| size_t i = 0; |
| do { |
| uint8_t byte = val & 0x7fU; |
| val >>= 7; |
| if (val) byte |= 0x80U; |
| buf[i++] = byte; |
| } while (val); |
| return i; |
| } |
| |
| static uint32_t encode_zz32(int32_t n) { |
| return ((uint32_t)n << 1) ^ (n >> 31); |
| } |
| static uint64_t encode_zz64(int64_t n) { |
| return ((uint64_t)n << 1) ^ (n >> 63); |
| } |
| |
| typedef struct { |
| jmp_buf err; |
| upb_alloc* alloc; |
| char *buf, *ptr, *limit; |
| int options; |
| int depth; |
| _upb_mapsorter sorter; |
| } upb_encstate; |
| |
| static size_t upb_roundup_pow2(size_t bytes) { |
| size_t ret = 128; |
| while (ret < bytes) { |
| ret *= 2; |
| } |
| return ret; |
| } |
| |
| UPB_NORETURN static void encode_err(upb_encstate* e) { UPB_LONGJMP(e->err, 1); } |
| |
| UPB_NOINLINE |
| static void encode_growbuffer(upb_encstate* e, size_t bytes) { |
| size_t old_size = e->limit - e->buf; |
| size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); |
| char* new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size); |
| |
| if (!new_buf) encode_err(e); |
| |
| /* We want previous data at the end, realloc() put it at the beginning. */ |
| if (old_size > 0) { |
| memmove(new_buf + new_size - old_size, e->buf, old_size); |
| } |
| |
| e->ptr = new_buf + new_size - (e->limit - e->ptr); |
| e->limit = new_buf + new_size; |
| e->buf = new_buf; |
| |
| e->ptr -= bytes; |
| } |
| |
| /* Call to ensure that at least "bytes" bytes are available for writing at |
| * e->ptr. Returns false if the bytes could not be allocated. */ |
| UPB_FORCEINLINE |
| static void encode_reserve(upb_encstate* e, size_t bytes) { |
| if ((size_t)(e->ptr - e->buf) < bytes) { |
| encode_growbuffer(e, bytes); |
| return; |
| } |
| |
| e->ptr -= bytes; |
| } |
| |
| /* Writes the given bytes to the buffer, handling reserve/advance. */ |
| static void encode_bytes(upb_encstate* e, const void* data, size_t len) { |
| if (len == 0) return; /* memcpy() with zero size is UB */ |
| encode_reserve(e, len); |
| memcpy(e->ptr, data, len); |
| } |
| |
| static void encode_fixed64(upb_encstate* e, uint64_t val) { |
| val = _upb_BigEndian_Swap64(val); |
| encode_bytes(e, &val, sizeof(uint64_t)); |
| } |
| |
| static void encode_fixed32(upb_encstate* e, uint32_t val) { |
| val = _upb_BigEndian_Swap32(val); |
| encode_bytes(e, &val, sizeof(uint32_t)); |
| } |
| |
| UPB_NOINLINE |
| static void encode_longvarint(upb_encstate* e, uint64_t val) { |
| size_t len; |
| char* start; |
| |
| encode_reserve(e, UPB_PB_VARINT_MAX_LEN); |
| len = encode_varint64(val, e->ptr); |
| start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; |
| memmove(start, e->ptr, len); |
| e->ptr = start; |
| } |
| |
| UPB_FORCEINLINE |
| static void encode_varint(upb_encstate* e, uint64_t val) { |
| if (val < 128 && e->ptr != e->buf) { |
| --e->ptr; |
| *e->ptr = val; |
| } else { |
| encode_longvarint(e, val); |
| } |
| } |
| |
| static void encode_double(upb_encstate* e, double d) { |
| uint64_t u64; |
| UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); |
| memcpy(&u64, &d, sizeof(uint64_t)); |
| encode_fixed64(e, u64); |
| } |
| |
| static void encode_float(upb_encstate* e, float d) { |
| uint32_t u32; |
| UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); |
| memcpy(&u32, &d, sizeof(uint32_t)); |
| encode_fixed32(e, u32); |
| } |
| |
| static void encode_tag(upb_encstate* e, uint32_t field_number, |
| uint8_t wire_type) { |
| encode_varint(e, (field_number << 3) | wire_type); |
| } |
| |
| static void encode_fixedarray(upb_encstate* e, const upb_Array* arr, |
| size_t elem_size, uint32_t tag) { |
| size_t bytes = arr->len * elem_size; |
| const char* data = _upb_array_constptr(arr); |
| const char* ptr = data + bytes - elem_size; |
| |
| if (tag || !_upb_IsLittleEndian()) { |
| while (true) { |
| if (elem_size == 4) { |
| uint32_t val; |
| memcpy(&val, ptr, sizeof(val)); |
| val = _upb_BigEndian_Swap32(val); |
| encode_bytes(e, &val, elem_size); |
| } else { |
| UPB_ASSERT(elem_size == 8); |
| uint64_t val; |
| memcpy(&val, ptr, sizeof(val)); |
| val = _upb_BigEndian_Swap64(val); |
| encode_bytes(e, &val, elem_size); |
| } |
| |
| if (tag) encode_varint(e, tag); |
| if (ptr == data) break; |
| ptr -= elem_size; |
| } |
| } else { |
| encode_bytes(e, data, bytes); |
| } |
| } |
| |
| static void encode_message(upb_encstate* e, const upb_Message* msg, |
| const upb_MiniTable* m, size_t* size); |
| |
| static void encode_scalar(upb_encstate* e, const void* _field_mem, |
| const upb_MiniTable_Sub* subs, |
| const upb_MiniTable_Field* f) { |
| const char* field_mem = _field_mem; |
| int wire_type; |
| |
| #define CASE(ctype, type, wtype, encodeval) \ |
| { \ |
| ctype val = *(ctype*)field_mem; \ |
| encode_##type(e, encodeval); \ |
| wire_type = wtype; \ |
| break; \ |
| } |
| |
| switch (f->descriptortype) { |
| case kUpb_FieldType_Double: |
| CASE(double, double, kUpb_WireType_64Bit, val); |
| case kUpb_FieldType_Float: |
| CASE(float, float, kUpb_WireType_32Bit, val); |
| case kUpb_FieldType_Int64: |
| case kUpb_FieldType_UInt64: |
| CASE(uint64_t, varint, kUpb_WireType_Varint, val); |
| case kUpb_FieldType_UInt32: |
| CASE(uint32_t, varint, kUpb_WireType_Varint, val); |
| case kUpb_FieldType_Int32: |
| case kUpb_FieldType_Enum: |
| CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val); |
| case kUpb_FieldType_SFixed64: |
| case kUpb_FieldType_Fixed64: |
| CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val); |
| case kUpb_FieldType_Fixed32: |
| case kUpb_FieldType_SFixed32: |
| CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val); |
| case kUpb_FieldType_Bool: |
| CASE(bool, varint, kUpb_WireType_Varint, val); |
| case kUpb_FieldType_SInt32: |
| CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val)); |
| case kUpb_FieldType_SInt64: |
| CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val)); |
| case kUpb_FieldType_String: |
| case kUpb_FieldType_Bytes: { |
| upb_StringView view = *(upb_StringView*)field_mem; |
| encode_bytes(e, view.data, view.size); |
| encode_varint(e, view.size); |
| wire_type = kUpb_WireType_Delimited; |
| break; |
| } |
| case kUpb_FieldType_Group: { |
| size_t size; |
| void* submsg = *(void**)field_mem; |
| const upb_MiniTable* subm = subs[f->submsg_index].submsg; |
| if (submsg == NULL) { |
| return; |
| } |
| if (--e->depth == 0) encode_err(e); |
| encode_tag(e, f->number, kUpb_WireType_EndGroup); |
| encode_message(e, submsg, subm, &size); |
| wire_type = kUpb_WireType_StartGroup; |
| e->depth++; |
| break; |
| } |
| case kUpb_FieldType_Message: { |
| size_t size; |
| void* submsg = *(void**)field_mem; |
| const upb_MiniTable* subm = subs[f->submsg_index].submsg; |
| if (submsg == NULL) { |
| return; |
| } |
| if (--e->depth == 0) encode_err(e); |
| encode_message(e, submsg, subm, &size); |
| encode_varint(e, size); |
| wire_type = kUpb_WireType_Delimited; |
| e->depth++; |
| break; |
| } |
| default: |
| UPB_UNREACHABLE(); |
| } |
| #undef CASE |
| |
| encode_tag(e, f->number, wire_type); |
| } |
| |
| static void encode_array(upb_encstate* e, const upb_Message* msg, |
| const upb_MiniTable_Sub* subs, |
| const upb_MiniTable_Field* f) { |
| const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*); |
| bool packed = f->mode & kUpb_LabelFlags_IsPacked; |
| size_t pre_len = e->limit - e->ptr; |
| |
| if (arr == NULL || arr->len == 0) { |
| return; |
| } |
| |
| #define VARINT_CASE(ctype, encode) \ |
| { \ |
| const ctype* start = _upb_array_constptr(arr); \ |
| const ctype* ptr = start + arr->len; \ |
| uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \ |
| do { \ |
| ptr--; \ |
| encode_varint(e, encode); \ |
| if (tag) encode_varint(e, tag); \ |
| } while (ptr != start); \ |
| } \ |
| break; |
| |
| #define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type)) |
| |
| switch (f->descriptortype) { |
| case kUpb_FieldType_Double: |
| encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit)); |
| break; |
| case kUpb_FieldType_Float: |
| encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit)); |
| break; |
| case kUpb_FieldType_SFixed64: |
| case kUpb_FieldType_Fixed64: |
| encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit)); |
| break; |
| case kUpb_FieldType_Fixed32: |
| case kUpb_FieldType_SFixed32: |
| encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit)); |
| break; |
| case kUpb_FieldType_Int64: |
| case kUpb_FieldType_UInt64: |
| VARINT_CASE(uint64_t, *ptr); |
| case kUpb_FieldType_UInt32: |
| VARINT_CASE(uint32_t, *ptr); |
| case kUpb_FieldType_Int32: |
| case kUpb_FieldType_Enum: |
| VARINT_CASE(int32_t, (int64_t)*ptr); |
| case kUpb_FieldType_Bool: |
| VARINT_CASE(bool, *ptr); |
| case kUpb_FieldType_SInt32: |
| VARINT_CASE(int32_t, encode_zz32(*ptr)); |
| case kUpb_FieldType_SInt64: |
| VARINT_CASE(int64_t, encode_zz64(*ptr)); |
| case kUpb_FieldType_String: |
| case kUpb_FieldType_Bytes: { |
| const upb_StringView* start = _upb_array_constptr(arr); |
| const upb_StringView* ptr = start + arr->len; |
| do { |
| ptr--; |
| encode_bytes(e, ptr->data, ptr->size); |
| encode_varint(e, ptr->size); |
| encode_tag(e, f->number, kUpb_WireType_Delimited); |
| } while (ptr != start); |
| return; |
| } |
| case kUpb_FieldType_Group: { |
| const void* const* start = _upb_array_constptr(arr); |
| const void* const* ptr = start + arr->len; |
| const upb_MiniTable* subm = subs[f->submsg_index].submsg; |
| if (--e->depth == 0) encode_err(e); |
| do { |
| size_t size; |
| ptr--; |
| encode_tag(e, f->number, kUpb_WireType_EndGroup); |
| encode_message(e, *ptr, subm, &size); |
| encode_tag(e, f->number, kUpb_WireType_StartGroup); |
| } while (ptr != start); |
| e->depth++; |
| return; |
| } |
| case kUpb_FieldType_Message: { |
| const void* const* start = _upb_array_constptr(arr); |
| const void* const* ptr = start + arr->len; |
| const upb_MiniTable* subm = subs[f->submsg_index].submsg; |
| if (--e->depth == 0) encode_err(e); |
| do { |
| size_t size; |
| ptr--; |
| encode_message(e, *ptr, subm, &size); |
| encode_varint(e, size); |
| encode_tag(e, f->number, kUpb_WireType_Delimited); |
| } while (ptr != start); |
| e->depth++; |
| return; |
| } |
| } |
| #undef VARINT_CASE |
| |
| if (packed) { |
| encode_varint(e, e->limit - e->ptr - pre_len); |
| encode_tag(e, f->number, kUpb_WireType_Delimited); |
| } |
| } |
| |
| static void encode_mapentry(upb_encstate* e, uint32_t number, |
| const upb_MiniTable* layout, |
| const upb_MapEntry* ent) { |
| const upb_MiniTable_Field* key_field = &layout->fields[0]; |
| const upb_MiniTable_Field* val_field = &layout->fields[1]; |
| size_t pre_len = e->limit - e->ptr; |
| size_t size; |
| encode_scalar(e, &ent->v, layout->subs, val_field); |
| encode_scalar(e, &ent->k, layout->subs, key_field); |
| size = (e->limit - e->ptr) - pre_len; |
| encode_varint(e, size); |
| encode_tag(e, number, kUpb_WireType_Delimited); |
| } |
| |
| static void encode_map(upb_encstate* e, const upb_Message* msg, |
| const upb_MiniTable_Sub* subs, |
| const upb_MiniTable_Field* f) { |
| const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*); |
| const upb_MiniTable* layout = subs[f->submsg_index].submsg; |
| UPB_ASSERT(layout->field_count == 2); |
| |
| if (map == NULL) return; |
| |
| if (e->options & kUpb_Encode_Deterministic) { |
| _upb_sortedmap sorted; |
| _upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map, |
| &sorted); |
| upb_MapEntry ent; |
| while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { |
| encode_mapentry(e, f->number, layout, &ent); |
| } |
| _upb_mapsorter_popmap(&e->sorter, &sorted); |
| } else { |
| upb_strtable_iter i; |
| upb_strtable_begin(&i, &map->table); |
| for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { |
| upb_StringView key = upb_strtable_iter_key(&i); |
| const upb_value val = upb_strtable_iter_value(&i); |
| upb_MapEntry ent; |
| _upb_map_fromkey(key, &ent.k, map->key_size); |
| _upb_map_fromvalue(val, &ent.v, map->val_size); |
| encode_mapentry(e, f->number, layout, &ent); |
| } |
| } |
| } |
| |
| static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, |
| const upb_MiniTable_Sub* subs, |
| const upb_MiniTable_Field* f) { |
| if (f->presence == 0) { |
| /* Proto3 presence or map/array. */ |
| const void* mem = UPB_PTR_AT(msg, f->offset, void); |
| switch (f->mode >> kUpb_FieldRep_Shift) { |
| case kUpb_FieldRep_1Byte: { |
| char ch; |
| memcpy(&ch, mem, 1); |
| return ch != 0; |
| } |
| #if UINTPTR_MAX == 0xffffffff |
| case kUpb_FieldRep_Pointer: |
| #endif |
| case kUpb_FieldRep_4Byte: { |
| uint32_t u32; |
| memcpy(&u32, mem, 4); |
| return u32 != 0; |
| } |
| #if UINTPTR_MAX != 0xffffffff |
| case kUpb_FieldRep_Pointer: |
| #endif |
| case kUpb_FieldRep_8Byte: { |
| uint64_t u64; |
| memcpy(&u64, mem, 8); |
| return u64 != 0; |
| } |
| case kUpb_FieldRep_StringView: { |
| const upb_StringView* str = (const upb_StringView*)mem; |
| return str->size != 0; |
| } |
| default: |
| UPB_UNREACHABLE(); |
| } |
| } else if (f->presence > 0) { |
| /* Proto2 presence: hasbit. */ |
| return _upb_hasbit_field(msg, f); |
| } else { |
| /* Field is in a oneof. */ |
| return _upb_getoneofcase_field(msg, f) == f->number; |
| } |
| } |
| |
| static void encode_field(upb_encstate* e, const upb_Message* msg, |
| const upb_MiniTable_Sub* subs, |
| const upb_MiniTable_Field* field) { |
| switch (upb_FieldMode_Get(field)) { |
| case kUpb_FieldMode_Array: |
| encode_array(e, msg, subs, field); |
| break; |
| case kUpb_FieldMode_Map: |
| encode_map(e, msg, subs, field); |
| break; |
| case kUpb_FieldMode_Scalar: |
| encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field); |
| break; |
| default: |
| UPB_UNREACHABLE(); |
| } |
| } |
| |
| /* message MessageSet { |
| * repeated group Item = 1 { |
| * required int32 type_id = 2; |
| * required string message = 3; |
| * } |
| * } */ |
| static void encode_msgset_item(upb_encstate* e, |
| const upb_Message_Extension* ext) { |
| size_t size; |
| encode_tag(e, 1, kUpb_WireType_EndGroup); |
| encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size); |
| encode_varint(e, size); |
| encode_tag(e, 3, kUpb_WireType_Delimited); |
| encode_varint(e, ext->ext->field.number); |
| encode_tag(e, 2, kUpb_WireType_Varint); |
| encode_tag(e, 1, kUpb_WireType_StartGroup); |
| } |
| |
| static void encode_message(upb_encstate* e, const upb_Message* msg, |
| const upb_MiniTable* m, size_t* size) { |
| size_t pre_len = e->limit - e->ptr; |
| |
| if ((e->options & kUpb_Encode_CheckRequired) && m->required_count) { |
| uint64_t msg_head; |
| memcpy(&msg_head, msg, 8); |
| msg_head = _upb_BigEndian_Swap64(msg_head); |
| if (upb_MiniTable_requiredmask(m) & ~msg_head) { |
| encode_err(e); |
| } |
| } |
| |
| if ((e->options & kUpb_Encode_SkipUnknown) == 0) { |
| size_t unknown_size; |
| const char* unknown = upb_Message_GetUnknown(msg, &unknown_size); |
| |
| if (unknown) { |
| encode_bytes(e, unknown, unknown_size); |
| } |
| } |
| |
| if (m->ext != kUpb_ExtMode_NonExtendable) { |
| /* Encode all extensions together. Unlike C++, we do not attempt to keep |
| * these in field number order relative to normal fields or even to each |
| * other. */ |
| size_t ext_count; |
| const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count); |
| if (ext_count) { |
| const upb_Message_Extension* end = ext + ext_count; |
| for (; ext != end; ext++) { |
| if (UPB_UNLIKELY(m->ext == kUpb_ExtMode_IsMessageSet)) { |
| encode_msgset_item(e, ext); |
| } else { |
| encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field); |
| } |
| } |
| } |
| } |
| |
| if (m->field_count) { |
| const upb_MiniTable_Field* f = &m->fields[m->field_count]; |
| const upb_MiniTable_Field* first = &m->fields[0]; |
| while (f != first) { |
| f--; |
| if (encode_shouldencode(e, msg, m->subs, f)) { |
| encode_field(e, msg, m->subs, f); |
| } |
| } |
| } |
| |
| *size = (e->limit - e->ptr) - pre_len; |
| } |
| |
| char* upb_Encode(const void* msg, const upb_MiniTable* l, int options, |
| upb_Arena* arena, size_t* size) { |
| upb_encstate e; |
| unsigned depth = (unsigned)options >> 16; |
| |
| e.alloc = upb_Arena_Alloc(arena); |
| e.buf = NULL; |
| e.limit = NULL; |
| e.ptr = NULL; |
| e.depth = depth ? depth : 64; |
| e.options = options; |
| _upb_mapsorter_init(&e.sorter); |
| char* ret = NULL; |
| |
| if (UPB_SETJMP(e.err)) { |
| *size = 0; |
| ret = NULL; |
| } else { |
| encode_message(&e, msg, l, size); |
| *size = e.limit - e.ptr; |
| if (*size == 0) { |
| static char ch; |
| ret = &ch; |
| } else { |
| UPB_ASSERT(e.ptr); |
| ret = e.ptr; |
| } |
| } |
| |
| _upb_mapsorter_destroy(&e.sorter); |
| return ret; |
| } |
| |
| /** upb/msg.c ************************************************************/ |
| |
| |
| /** upb_Message |
| * *******************************************************************/ |
| |
| static const size_t overhead = sizeof(upb_Message_InternalData); |
| |
| static const upb_Message_Internal* upb_Message_Getinternal_const( |
| const upb_Message* msg) { |
| ptrdiff_t size = sizeof(upb_Message_Internal); |
| return (upb_Message_Internal*)((char*)msg - size); |
| } |
| |
| upb_Message* _upb_Message_New(const upb_MiniTable* l, upb_Arena* a) { |
| return _upb_Message_New_inl(l, a); |
| } |
| |
| void _upb_Message_Clear(upb_Message* msg, const upb_MiniTable* l) { |
| void* mem = UPB_PTR_AT(msg, -sizeof(upb_Message_Internal), char); |
| memset(mem, 0, upb_msg_sizeof(l)); |
| } |
| |
| static bool realloc_internal(upb_Message* msg, size_t need, upb_Arena* arena) { |
| upb_Message_Internal* in = upb_Message_Getinternal(msg); |
| if (!in->internal) { |
| /* No internal data, allocate from scratch. */ |
| size_t size = UPB_MAX(128, _upb_Log2CeilingSize(need + overhead)); |
| upb_Message_InternalData* internal = upb_Arena_Malloc(arena, size); |
| if (!internal) return false; |
| internal->size = size; |
| internal->unknown_end = overhead; |
| internal->ext_begin = size; |
| in->internal = internal; |
| } else if (in->internal->ext_begin - in->internal->unknown_end < need) { |
| /* Internal data is too small, reallocate. */ |
| size_t new_size = _upb_Log2CeilingSize(in->internal->size + need); |
| size_t ext_bytes = in->internal->size - in->internal->ext_begin; |
| size_t new_ext_begin = new_size - ext_bytes; |
| upb_Message_InternalData* internal = |
| upb_Arena_Realloc(arena, in->internal, in->internal->size, new_size); |
| if (!internal) return false; |
| if (ext_bytes) { |
| /* Need to move extension data to the end. */ |
| char* ptr = (char*)internal; |
| memmove(ptr + new_ext_begin, ptr + internal->ext_begin, ext_bytes); |
| } |
| internal->ext_begin = new_ext_begin; |
| internal->size = new_size; |
| in->internal = internal; |
| } |
| UPB_ASSERT(in->internal->ext_begin - in->internal->unknown_end >= need); |
| return true; |
| } |
| |
| bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len, |
| upb_Arena* arena) { |
| if (!realloc_internal(msg, len, arena)) return false; |
| upb_Message_Internal* in = upb_Message_Getinternal(msg); |
| memcpy(UPB_PTR_AT(in->internal, in->internal->unknown_end, char), data, len); |
| in->internal->unknown_end += len; |
| return true; |
| } |
| |
| void _upb_Message_DiscardUnknown_shallow(upb_Message* msg) { |
| upb_Message_Internal* in = upb_Message_Getinternal(msg); |
| if (in->internal) { |
| in->internal->unknown_end = overhead; |
| } |
| } |
| |
| const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len) { |
| const upb_Message_Internal* in = upb_Message_Getinternal_const(msg); |
| if (in->internal) { |
| *len = in->internal->unknown_end - overhead; |
| return (char*)(in->internal + 1); |
| } else { |
| *len = 0; |
| return NULL; |
| } |
| } |
| |
| const upb_Message_Extension* _upb_Message_Getexts(const upb_Message* msg, |
| size_t* count) { |
| const upb_Message_Internal* in = upb_Message_Getinternal_const(msg); |
| if (in->internal) { |
| *count = (in->internal->size - in->internal->ext_begin) / |
| sizeof(upb_Message_Extension); |
| return UPB_PTR_AT(in->internal, in->internal->ext_begin, void); |
| } else { |
| *count = 0; |
| return NULL; |
| } |
| } |
| |
| const upb_Message_Extension* _upb_Message_Getext( |
| const upb_Message* msg, const upb_MiniTable_Extension* e) { |
| size_t n; |
| const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &n); |
| |
| /* For now we use linear search exclusively to find extensions. If this |
| * becomes an issue due to messages with lots of extensions, we can introduce |
| * a table of some sort. */ |
| for (size_t i = 0; i < n; i++) { |
| if (ext[i].ext == e) { |
| return &ext[i]; |
| } |
| } |
| |
| return NULL; |
| } |
| |
| void _upb_Message_Clearext(upb_Message* msg, |
| const upb_MiniTable_Extension* ext_l) { |
| upb_Message_Internal* in = upb_Message_Getinternal(msg); |
| if (!in->internal) return; |
| const upb_Message_Extension* base = |
| UPB_PTR_AT(in->internal, in->internal->ext_begin, void); |
| upb_Message_Extension* ext = |
| (upb_Message_Extension*)_upb_Message_Getext(msg, ext_l); |
| if (ext) { |
| *ext = *base; |
| in->internal->ext_begin += sizeof(upb_Message_Extension); |
| } |
| } |
| |
| upb_Message_Extension* _upb_Message_Getorcreateext( |
| upb_Message* msg, const upb_MiniTable_Extension* e, upb_Arena* arena) { |
| upb_Message_Extension* ext = |
| (upb_Message_Extension*)_upb_Message_Getext(msg, e); |
| if (ext) return ext; |
| if (!realloc_internal(msg, sizeof(upb_Message_Extension), arena)) return NULL; |
| upb_Message_Internal* in = upb_Message_Getinternal(msg); |
| in->internal->ext_begin -= sizeof(upb_Message_Extension); |
| ext = UPB_PTR_AT(in->internal, in->internal->ext_begin, void); |
| memset(ext, 0, sizeof(upb_Message_Extension)); |
| ext->ext = e; |
| return ext; |
| } |
| |
| size_t upb_Message_ExtensionCount(const upb_Message* msg) { |
| size_t count; |
| _upb_Message_Getexts(msg, &count); |
| return count; |
| } |
| |
| /** upb_Array *****************************************************************/ |
| |
| bool _upb_array_realloc(upb_Array* arr, size_t min_size, upb_Arena* arena) { |
| size_t new_size = UPB_MAX(arr->size, 4); |
| int elem_size_lg2 = arr->data & 7; |
| size_t old_bytes = arr->size << elem_size_lg2; |
| size_t new_bytes; |
| void* ptr = _upb_array_ptr(arr); |
| |
| /* Log2 ceiling of size. */ |
| while (new_size < min_size) new_size *= 2; |
| |
| new_bytes = new_size << elem_size_lg2; |
| ptr = upb_Arena_Realloc(arena, ptr, old_bytes, new_bytes); |
| |
| if (!ptr) { |
| return false; |
| } |
| |
| arr->data = _upb_tag_arrptr(ptr, elem_size_lg2); |
| arr->size = new_size; |
| return true; |
| } |
| |
| static upb_Array* getorcreate_array(upb_Array** arr_ptr, int elem_size_lg2, |
| upb_Arena* arena) { |
| upb_Array* arr = *arr_ptr; |
| if (!arr) { |
| arr = _upb_Array_New(arena, 4, elem_size_lg2); |
| if (!arr) return NULL; |
| *arr_ptr = arr; |
| } |
| return arr; |
| } |
| |
| void* _upb_Array_Resize_fallback(upb_Array** arr_ptr, size_t size, |
| int elem_size_lg2, upb_Arena* arena) { |
| upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena); |
| return arr && _upb_Array_Resize(arr, size, arena) ? _upb_array_ptr(arr) |
| : NULL; |
| } |
| |
| bool _upb_Array_Append_fallback(upb_Array** arr_ptr, const void* value, |
| int elem_size_lg2, upb_Arena* arena) { |
| upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena); |
| if (!arr) return false; |
| |
| size_t elems = arr->len; |
| |
| if (!_upb_Array_Resize(arr, elems + 1, arena)) { |
| return false; |
| } |
| |
| char* data = _upb_array_ptr(arr); |
| memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2); |
| return true; |
| } |
| |
| /** upb_Map *******************************************************************/ |
| |
| upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size) { |
| upb_Map* map = upb_Arena_Malloc(a, sizeof(upb_Map)); |
| |
| if (!map) { |
| return NULL; |
| } |
| |
| upb_strtable_init(&map->table, 4, a); |
| map->key_size = key_size; |
| map->val_size = value_size; |
| |
| return map; |
| } |
| |
| static void _upb_mapsorter_getkeys(const void* _a, const void* _b, void* a_key, |
| void* b_key, size_t size) { |
| const upb_tabent* const* a = _a; |
| const upb_tabent* const* b = _b; |
| upb_StringView a_tabkey = upb_tabstrview((*a)->key); |
| upb_StringView b_tabkey = upb_tabstrview((*b)->key); |
| _upb_map_fromkey(a_tabkey, a_key, size); |
| _upb_map_fromkey(b_tabkey, b_key, size); |
| } |
| |
| #define UPB_COMPARE_INTEGERS(a, b) ((a) < (b) ? -1 : ((a) == (b) ? 0 : 1)) |
| |
| static int _upb_mapsorter_cmpi64(const void* _a, const void* _b) { |
| int64_t a, b; |
| _upb_mapsorter_getkeys(_a, _b, &a, &b, 8); |
| return UPB_COMPARE_INTEGERS(a, b); |
| } |
| |
| static int _upb_mapsorter_cmpu64(const void* _a, const void* _b) { |
| uint64_t a, b; |
| _upb_mapsorter_getkeys(_a, _b, &a, &b, 8); |
| return UPB_COMPARE_INTEGERS(a, b); |
| } |
| |
| static int _upb_mapsorter_cmpi32(const void* _a, const void* _b) { |
| int32_t a, b; |
| _upb_mapsorter_getkeys(_a, _b, &a, &b, 4); |
| return UPB_COMPARE_INTEGERS(a, b); |
| } |
| |
| static int _upb_mapsorter_cmpu32(const void* _a, const void* _b) { |
| uint32_t a, b; |
| _upb_mapsorter_getkeys(_a, _b, &a, &b, 4); |
| return UPB_COMPARE_INTEGERS(a, b); |
| } |
| |
| static int _upb_mapsorter_cmpbool(const void* _a, const void* _b) { |
| bool a, b; |
| _upb_mapsorter_getkeys(_a, _b, &a, &b, 1); |
| return UPB_COMPARE_INTEGERS(a, b); |
| } |
| |
| static int _upb_mapsorter_cmpstr(const void* _a, const void* _b) { |
| upb_StringView a, b; |
| _upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING); |
| size_t common_size = UPB_MIN(a.size, b.size); |
| int cmp = memcmp(a.data, b.data, common_size); |
| if (cmp) return -cmp; |
| return UPB_COMPARE_INTEGERS(a.size, b.size); |
| } |
| |
| #undef UPB_COMPARE_INTEGERS |
| |
| bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, |
| const upb_Map* map, _upb_sortedmap* sorted) { |
| int map_size = _upb_Map_Size(map); |
| sorted->start = s->size; |
| sorted->pos = sorted->start; |
| sorted->end = sorted->start + map_size; |
| |
| /* Grow s->entries if necessary. */ |
| if (sorted->end > s->cap) { |
| s->cap = _upb_Log2CeilingSize(sorted->end); |
| s->entries = realloc(s->entries, s->cap * sizeof(*s->entries)); |
| if (!s->entries) return false; |
| } |
| |
| s->size = sorted->end; |
| |
| /* Copy non-empty entries from the table to s->entries. */ |
| upb_tabent const** dst = &s->entries[sorted->start]; |
| const upb_tabent* src = map->table.t.entries; |
| const upb_tabent* end = src + upb_table_size(&map->table.t); |
| for (; src < end; src++) { |
| if (!upb_tabent_isempty(src)) { |
| *dst = src; |
| dst++; |
| } |
| } |
| UPB_ASSERT(dst == &s->entries[sorted->end]); |
| |
| /* Sort entries according to the key type. */ |
| |
| int (*compar)(const void*, const void*); |
| |
| switch (key_type) { |
| case kUpb_FieldType_Int64: |
| case kUpb_FieldType_SFixed64: |
| case kUpb_FieldType_SInt64: |
| compar = _upb_mapsorter_cmpi64; |
| break; |
| case kUpb_FieldType_UInt64: |
| case kUpb_FieldType_Fixed64: |
| compar = _upb_mapsorter_cmpu64; |
| break; |
| case kUpb_FieldType_Int32: |
| case kUpb_FieldType_SInt32: |
| case kUpb_FieldType_SFixed32: |
| case kUpb_FieldType_Enum: |
| compar = _upb_mapsorter_cmpi32; |
| break; |
| case kUpb_FieldType_UInt32: |
| case kUpb_FieldType_Fixed32: |
| compar = _upb_mapsorter_cmpu32; |
| break; |
| case kUpb_FieldType_Bool: |
| compar = _upb_mapsorter_cmpbool; |
| break; |
| case kUpb_FieldType_String: |
| case kUpb_FieldType_Bytes: |
| compar = _upb_mapsorter_cmpstr; |
| break; |
| default: |
| UPB_UNREACHABLE(); |
| } |
| |
| qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar); |
| return true; |
| } |
| |
| /** upb_ExtensionRegistry |
| * ****************************************************************/ |
| |
| struct upb_ExtensionRegistry { |
| upb_Arena* arena; |
| upb_strtable exts; /* Key is upb_MiniTable* concatenated with fieldnum. */ |
| }; |
| |
| #define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t)) |
| |
| static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) { |
| memcpy(buf, &l, sizeof(l)); |
| memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum)); |
| } |
| |
| upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) { |
| upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r)); |
| if (!r) return NULL; |
| r->arena = arena; |
| if (!upb_strtable_init(&r->exts, 8, arena)) return NULL; |
| return r; |
| } |
| |
| bool _upb_extreg_add(upb_ExtensionRegistry* r, |
| const upb_MiniTable_Extension** e, size_t count) { |
| char buf[EXTREG_KEY_SIZE]; |
| const upb_MiniTable_Extension** start = e; |
| const upb_MiniTable_Extension** end = UPB_PTRADD(e, count); |
| for (; e < end; e++) { |
| const upb_MiniTable_Extension* ext = *e; |
| extreg_key(buf, ext->extendee, ext->field.number); |
| if (!upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE, |
| upb_value_constptr(ext), r->arena)) { |
| goto failure; |
| } |
| } |
| return true; |
| |
| failure: |
| /* Back out the entries previously added. */ |
| for (end = e, e = start; e < end; e++) { |
| const upb_MiniTable_Extension* ext = *e; |
| extreg_key(buf, ext->extendee, ext->field.number); |
| upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL); |
| } |
| return false; |
| } |
| |
| const upb_MiniTable_Extension* _upb_extreg_get(const upb_ExtensionRegistry* r, |
| const upb_MiniTable* l, |
| uint32_t num) { |
| char buf[EXTREG_KEY_SIZE]; |
| upb_value v; |
| extreg_key(buf, l, num); |
| if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) { |
| return upb_value_getconstptr(v); |
| } else { |
| return NULL; |
| } |
| } |
| |
| /** upb/table.c ************************************************************/ |
| /* |
| * upb_table Implementation |
| * |
| * Implementation is heavily inspired by Lua's ltable.c. |
| */ |
| |
| #include <string.h> |
| |
| |
| /* Must be last. */ |
| |
| #define UPB_MAXARRSIZE 16 /* 64k. */ |
| |
| /* From Chromium. */ |
| #define ARRAY_SIZE(x) \ |
| ((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x]))))) |
| |
| static const double MAX_LOAD = 0.85; |
| |
| /* The minimum utilization of the array part of a mixed hash/array table. This |
| * is a speed/memory-usage tradeoff (though it's not straightforward because of |
| * cache effects). The lower this is, the more memory we'll use. */ |
| static const double MIN_DENSITY = 0.1; |
| |
| static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } |
| |
| static upb_value _upb_value_val(uint64_t val) { |
| upb_value ret; |
| _upb_value_setval(&ret, val); |
| return ret; |
| } |
| |
| static int log2ceil(uint64_t v) { |
| int ret = 0; |
| bool pow2 = is_pow2(v); |
| while (v >>= 1) ret++; |
| ret = pow2 ? ret : ret + 1; /* Ceiling. */ |
| return UPB_MIN(UPB_MAXARRSIZE, ret); |
| } |
| |
| char* upb_strdup2(const char* s, size_t len, upb_Arena* a) { |
| size_t n; |
| char* p; |
| |
| /* Prevent overflow errors. */ |
| if (len == SIZE_MAX) return NULL; |
| /* Always null-terminate, even if binary data; but don't rely on the input to |
| * have a null-terminating byte since it may be a raw binary buffer. */ |
| n = len + 1; |
| p = upb_Arena_Malloc(a, n); |
| if (p) { |
| memcpy(p, s, len); |
| p[len] = 0; |
| } |
| return p; |
| } |
| |
| /* A type to represent the lookup key of either a strtable or an inttable. */ |
| typedef union { |
| uintptr_t num; |
| struct { |
| const char* str; |
| size_t len; |
| } str; |
| } lookupkey_t; |
| |
| static lookupkey_t strkey2(const char* str, size_t len) { |
| lookupkey_t k; |
| k.str.str = str; |
| k.str.len = len; |
| return k; |
| } |
| |
| static lookupkey_t intkey(uintptr_t key) { |
| lookupkey_t k; |
| k.num = key; |
| return k; |
| } |
| |
| typedef uint32_t hashfunc_t(upb_tabkey key); |
| typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2); |
| |
| /* Base table (shared code) ***************************************************/ |
| |
| static uint32_t upb_inthash(uintptr_t key) { return (uint32_t)key; } |
| |
| static const upb_tabent* upb_getentry(const upb_table* t, uint32_t hash) { |
| return t->entries + (hash & t->mask); |
| } |
| |
| static bool upb_arrhas(upb_tabval key) { return key.val != (uint64_t)-1; } |
| |
| static bool isfull(upb_table* t) { return t->count == t->max_count; } |
| |
| static bool init(upb_table* t, uint8_t size_lg2, upb_Arena* a) { |
| size_t bytes; |
| |
| t->count = 0; |
| t->size_lg2 = size_lg2; |
| t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0; |
| t->max_count = upb_table_size(t) * MAX_LOAD; |
| bytes = upb_table_size(t) * sizeof(upb_tabent); |
| if (bytes > 0) { |
| t->entries = upb_Arena_Malloc(a, bytes); |
| if (!t->entries) return false; |
| memset(t->entries, 0, bytes); |
| } else { |
| t->entries = NULL; |
| } |
| return true; |
| } |
| |
| static upb_tabent* emptyent(upb_table* t, upb_tabent* e) { |
| upb_tabent* begin = t->entries; |
| upb_tabent* end = begin + upb_table_size(t); |
| for (e = e + 1; e < end; e++) { |
| if (upb_tabent_isempty(e)) return e; |
| } |
| for (e = begin; e < end; e++) { |
| if (upb_tabent_isempty(e)) return e; |
| } |
| UPB_ASSERT(false); |
| return NULL; |
| } |
| |
| static upb_tabent* getentry_mutable(upb_table* t, uint32_t hash) { |
| return (upb_tabent*)upb_getentry(t, hash); |
| } |
| |
| static const upb_tabent* findentry(const upb_table* t, lookupkey_t key, |
| uint32_t hash, eqlfunc_t* eql) { |
| const upb_tabent* e; |
| |
| if (t->size_lg2 == 0) return NULL; |
| e = upb_getentry(t, hash); |
| if (upb_tabent_isempty(e)) return NULL; |
| while (1) { |
| if (eql(e->key, key)) return e; |
| if ((e = e->next) == NULL) return NULL; |
| } |
| } |
| |
| static upb_tabent* findentry_mutable(upb_table* t, lookupkey_t key, |
| uint32_t hash, eqlfunc_t* eql) { |
| return (upb_tabent*)findentry(t, key, hash, eql); |
| } |
| |
| static bool lookup(const upb_table* t, lookupkey_t key, upb_value* v, |
| uint32_t hash, eqlfunc_t* eql) { |
| const upb_tabent* e = findentry(t, key, hash, eql); |
| if (e) { |
| if (v) { |
| _upb_value_setval(v, e->val.val); |
| } |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| /* The given key must not already exist in the table. */ |
| static void insert(upb_table* t, lookupkey_t key, upb_tabkey tabkey, |
| upb_value val, uint32_t hash, hashfunc_t* hashfunc, |
| eqlfunc_t* eql) { |
| upb_tabent* mainpos_e; |
| upb_tabent* our_e; |
| |
| UPB_ASSERT(findentry(t, key, hash, eql) == NULL); |
| |
| t->count++; |
| mainpos_e = getentry_mutable(t, hash); |
| our_e = mainpos_e; |
| |
| if (upb_tabent_isempty(mainpos_e)) { |
| /* Our main position is empty; use it. */ |
| our_e->next = NULL; |
| } else { |
| /* Collision. */ |
| upb_tabent* new_e = emptyent(t, mainpos_e); |
| /* Head of collider's chain. */ |
| upb_tabent* chain = getentry_mutable(t, hashfunc(mainpos_e->key)); |
| if (chain == mainpos_e) { |
| /* Existing ent is in its main position (it has the same hash as us, and |
| * is the head of our chain). Insert to new ent and append to this chain. |
| */ |
| new_e->next = mainpos_e->next; |
| mainpos_e->next = new_e; |
| our_e = new_e; |
| } else { |
| /* Existing ent is not in its main position (it is a node in some other |
| * chain). This implies that no existing ent in the table has our hash. |
| * Evict it (updating its chain) and use its ent for head of our chain. */ |
| *new_e = *mainpos_e; /* copies next. */ |
| while (chain->next != mainpos_e) { |
| chain = (upb_tabent*)chain->next; |
| UPB_ASSERT(chain); |
| } |
| chain->next = new_e; |
| our_e = mainpos_e; |
| our_e->next = NULL; |
| } |
| } |
| our_e->key = tabkey; |
| our_e->val.val = val.val; |
| UPB_ASSERT(findentry(t, key, hash, eql) == our_e); |
| } |
| |
| static bool rm(upb_table* t, lookupkey_t key, upb_value* val, |
| upb_tabkey* removed, uint32_t hash, eqlfunc_t* eql) { |
| upb_tabent* chain = getentry_mutable(t, hash); |
| if (upb_tabent_isempty(chain)) return false; |
| if (eql(chain->key, key)) { |
| /* Element to remove is at the head of its chain. */ |
| t->count--; |
| if (val) _upb_value_setval(val, chain->val.val); |
| if (removed) *removed = chain->key; |
| if (chain->next) { |
| upb_tabent* move = (upb_tabent*)chain->next; |
| *chain = *move; |
| move->key = 0; /* Make the slot empty. */ |
| } else { |
| chain->key = 0; /* Make the slot empty. */ |
| } |
| return true; |
| } else { |
| /* Element to remove is either in a non-head position or not in the |
| * table. */ |
| while (chain->next && !eql(chain->next->key, key)) { |
| chain = (upb_tabent*)chain->next; |
| } |
| if (chain->next) { |
| /* Found element to remove. */ |
| upb_tabent* rm = (upb_tabent*)chain->next; |
| t->count--; |
| if (val) _upb_value_setval(val, chain->next->val.val); |
| if (removed) *removed = rm->key; |
| rm->key = 0; /* Make the slot empty. */ |
| chain->next = rm->next; |
| return true; |
| } else { |
| /* Element to remove is not in the table. */ |
| return false; |
| } |
| } |
| } |
| |
| static size_t next(const upb_table* t, size_t i) { |
| do { |
| if (++i >= upb_table_size(t)) return SIZE_MAX - 1; /* Distinct from -1. */ |
| } while (upb_tabent_isempty(&t->entries[i])); |
| |
| return i; |
| } |
| |
| static size_t begin(const upb_table* t) { return next(t, -1); } |
| |
| /* upb_strtable ***************************************************************/ |
| |
| /* A simple "subclass" of upb_table that only adds a hash function for strings. |
| */ |
| |
| static upb_tabkey strcopy(lookupkey_t k2, upb_Arena* a) { |
| uint32_t len = (uint32_t)k2.str.len; |
| char* str = upb_Arena_Malloc(a, k2.str.len + sizeof(uint32_t) + 1); |
| if (str == NULL) return 0; |
| memcpy(str, &len, sizeof(uint32_t)); |
| if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len); |
| str[sizeof(uint32_t) + k2.str.len] = '\0'; |
| return (uintptr_t)str; |
| } |
| |
| /* Adapted from ABSL's wyhash. */ |
| |
| static uint64_t UnalignedLoad64(const void* p) { |
| uint64_t val; |
| memcpy(&val, p, 8); |
| return val; |
| } |
| |
| static uint32_t UnalignedLoad32(const void* p) { |
| uint32_t val; |
| memcpy(&val, p, 4); |
| return val; |
| } |
| |
| #if defined(_MSC_VER) && defined(_M_X64) |
| #include <intrin.h> |
| #endif |
| |
| /* Computes a * b, returning the low 64 bits of the result and storing the high |
| * 64 bits in |*high|. */ |
| static uint64_t upb_umul128(uint64_t v0, uint64_t v1, uint64_t* out_high) { |
| #ifdef __SIZEOF_INT128__ |
| __uint128_t p = v0; |
| p *= v1; |
| *out_high = (uint64_t)(p >> 64); |
| return (uint64_t)p; |
| #elif defined(_MSC_VER) && defined(_M_X64) |
| return _umul128(v0, v1, out_high); |
| #else |
| uint64_t a32 = v0 >> 32; |
| uint64_t a00 = v0 & 0xffffffff; |
| uint64_t b32 = v1 >> 32; |
| uint64_t b00 = v1 & 0xffffffff; |
| uint64_t high = a32 * b32; |
| uint64_t low = a00 * b00; |
| uint64_t mid1 = a32 * b00; |
| uint64_t mid2 = a00 * b32; |
| low += (mid1 << 32) + (mid2 << 32); |
| // Omit carry bit, for mixing we do not care about exact numerical precision. |
| high += (mid1 >> 32) + (mid2 >> 32); |
| *out_high = high; |
| return low; |
| #endif |
| } |
| |
| static uint64_t WyhashMix(uint64_t v0, uint64_t v1) { |
| uint64_t high; |
| uint64_t low = upb_umul128(v0, v1, &high); |
| return low ^ high; |
| } |
| |
| static uint64_t Wyhash(const void* data, size_t len, uint64_t seed, |
| const uint64_t salt[]) { |
| const uint8_t* ptr = (const uint8_t*)data; |
| uint64_t starting_length = (uint64_t)len; |
| uint64_t current_state = seed ^ salt[0]; |
| |
| if (len > 64) { |
| // If we have more than 64 bytes, we're going to handle chunks of 64 |
| // bytes at a time. We're going to build up two separate hash states |
| // which we will then hash together. |
| uint64_t duplicated_state = current_state; |
| |
| do { |
| uint64_t a = UnalignedLoad64(ptr); |
| uint64_t b = UnalignedLoad64(ptr + 8); |
| uint64_t c = UnalignedLoad64(ptr + 16); |
| uint64_t d = UnalignedLoad64(ptr + 24); |
| uint64_t e = UnalignedLoad64(ptr + 32); |
| uint64_t f = UnalignedLoad64(ptr + 40); |
| uint64_t g = UnalignedLoad64(ptr + 48); |
| uint64_t h = UnalignedLoad64(ptr + 56); |
| |
| uint64_t cs0 = WyhashMix(a ^ salt[1], b ^ current_state); |
| uint64_t cs1 = WyhashMix(c ^ salt[2], d ^ current_state); |
| current_state = (cs0 ^ cs1); |
| |
| uint64_t ds0 = WyhashMix(e ^ salt[3], f ^ duplicated_state); |
| uint64_t ds1 = WyhashMix(g ^ salt[4], h ^ duplicated_state); |
| duplicated_state = (ds0 ^ ds1); |
| |
| ptr += 64; |
| len -= 64; |
| } while (len > 64); |
| |
| current_state = current_state ^ duplicated_state; |
| } |
| |
| // We now have a data `ptr` with at most 64 bytes and the current state |
| // of the hashing state machine stored in current_state. |
| while (len > 16) { |
| uint64_t a = UnalignedLoad64(ptr); |
| uint64_t b = UnalignedLoad64(ptr + 8); |
| |
| current_state = WyhashMix(a ^ salt[1], b ^ current_state); |
| |
| ptr += 16; |
| len -= 16; |
| } |
| |
| // We now have a data `ptr` with at most 16 bytes. |
| uint64_t a = 0; |
| uint64_t b = 0; |
| if (len > 8) { |
| // When we have at least 9 and at most 16 bytes, set A to the first 64 |
| // bits of the input and B to the last 64 bits of the input. Yes, they will |
| // overlap in the middle if we are working with less than the full 16 |
| // bytes. |
| a = UnalignedLoad64(ptr); |
| b = UnalignedLoad64(ptr + len - 8); |
| } else if (len > 3) { |
| // If we have at least 4 and at most 8 bytes, set A to the first 32 |
| // bits and B to the last 32 bits. |
| a = UnalignedLoad32(ptr); |
| b = UnalignedLoad32(ptr + len - 4); |
| } else if (len > 0) { |
| // If we have at least 1 and at most 3 bytes, read all of the provided |
| // bits into A, with some adjustments. |
| a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]); |
| b = 0; |
| } else { |
| a = 0; |
| b = 0; |
| } |
| |
| uint64_t w = WyhashMix(a ^ salt[1], b ^ current_state); |
| uint64_t z = salt[1] ^ starting_length; |
| return WyhashMix(w, z); |
| } |
| |
| const uint64_t kWyhashSalt[5] = { |
| 0x243F6A8885A308D3ULL, 0
|