| // Protocol Buffers - Google's data interchange format |
| // Copyright 2025 Google LLC. All rights reserved. |
| // |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file or at |
| // https://developers.google.com/open-source/licenses/bsd |
| |
| #include "upb/wire/decode_fast/select.h" |
| |
| #include <stddef.h> |
| #include <stdint.h> |
| #include <stdio.h> |
| |
| #include "upb/base/descriptor_constants.h" |
| #include "upb/base/internal/log2.h" |
| #include "upb/mini_table/field.h" |
| #include "upb/mini_table/internal/field.h" |
| #include "upb/mini_table/internal/message.h" |
| #include "upb/mini_table/message.h" |
| #include "upb/wire/decode_fast/combinations.h" |
| #include "upb/wire/decode_fast/data.h" |
| #include "upb/wire/types.h" |
| |
| // Must be last. |
| #include "upb/port/def.inc" |
| |
| uint32_t GetWireTypeForField(const upb_MiniTableField* field) { |
| if (upb_MiniTableField_IsPacked(field)) return kUpb_WireType_Delimited; |
| switch (upb_MiniTableField_Type(field)) { |
| case kUpb_FieldType_Double: |
| case kUpb_FieldType_Fixed64: |
| case kUpb_FieldType_SFixed64: |
| return kUpb_WireType_64Bit; |
| case kUpb_FieldType_Float: |
| case kUpb_FieldType_Fixed32: |
| case kUpb_FieldType_SFixed32: |
| return kUpb_WireType_32Bit; |
| case kUpb_FieldType_Int64: |
| case kUpb_FieldType_UInt64: |
| case kUpb_FieldType_Int32: |
| case kUpb_FieldType_Bool: |
| case kUpb_FieldType_UInt32: |
| case kUpb_FieldType_Enum: |
| case kUpb_FieldType_SInt32: |
| case kUpb_FieldType_SInt64: |
| return kUpb_WireType_Varint; |
| case kUpb_FieldType_Group: |
| return kUpb_WireType_StartGroup; |
| case kUpb_FieldType_Message: |
| case kUpb_FieldType_String: |
| case kUpb_FieldType_Bytes: |
| return kUpb_WireType_Delimited; |
| default: |
| UPB_UNREACHABLE(); |
| } |
| } |
| |
| static bool upb_DecodeFast_GetEncodedTag(const upb_MiniTableField* field, |
| uint16_t* out_tag, |
| upb_DecodeFast_TagSize* out_tag_size) { |
| uint32_t num = upb_MiniTableField_Number(field); |
| uint32_t wire_type = GetWireTypeForField(field); |
| if (num >= 2048) { |
| return false; // Tag >2 bytes, too large for fast decode. |
| } else if (num >= 16) { |
| *out_tag = ((num << 4) & 0x7f00) | 0x80 | ((num << 3) & 0x7f) | wire_type; |
| *out_tag_size = kUpb_DecodeFast_Tag2Byte; |
| return true; |
| } else { |
| *out_tag = (num << 3) | wire_type; |
| *out_tag_size = kUpb_DecodeFast_Tag1Byte; |
| return true; |
| } |
| } |
| |
| static bool upb_DecodeFast_GetFieldCardinality( |
| const upb_MiniTableField* field, |
| upb_DecodeFast_Cardinality* out_cardinality) { |
| switch (UPB_PRIVATE(_upb_MiniTableField_Mode)(field)) { |
| case kUpb_FieldMode_Map: |
| return false; // Can't parse maps with fast decode. |
| case kUpb_FieldMode_Array: |
| *out_cardinality = upb_MiniTableField_IsPacked(field) |
| ? kUpb_DecodeFast_Packed |
| : kUpb_DecodeFast_Repeated; |
| return true; |
| case kUpb_FieldMode_Scalar: |
| *out_cardinality = upb_MiniTableField_IsInOneof(field) |
| ? kUpb_DecodeFast_Oneof |
| : kUpb_DecodeFast_Scalar; |
| return true; |
| } |
| UPB_UNREACHABLE(); |
| } |
| |
| static bool upb_DecodeFast_GetFieldType(const upb_MiniTable* m, |
| const upb_MiniTableField* field, |
| upb_DecodeFast_Type* out_type) { |
| // We use descriptortype directly instead of upb_MiniTableField_Type because |
| // we want the munging of field->descriptortype: |
| // - kUpb_FieldType_String -> kUpb_FieldType_Bytes if no UTF-8 validation is |
| // required. |
| // - kUpb_FieldType_Enum -> kUpb_FieldType_Int32 if the enum is open. |
| upb_FieldType type = field->UPB_PRIVATE(descriptortype); |
| |
| if (type == kUpb_FieldType_Group) { |
| return false; // Currently not supported. |
| } |
| |
| if (upb_MiniTableField_IsClosedEnum(field)) { |
| *out_type = kUpb_DecodeFast_ClosedEnum; |
| return true; |
| } |
| |
| static const int8_t types[] = { |
| [kUpb_FieldType_Bool] = kUpb_DecodeFast_Bool, |
| [kUpb_FieldType_Enum] = kUpb_DecodeFast_Varint32, |
| [kUpb_FieldType_Int32] = kUpb_DecodeFast_Varint32, |
| [kUpb_FieldType_UInt32] = kUpb_DecodeFast_Varint32, |
| [kUpb_FieldType_Int64] = kUpb_DecodeFast_Varint64, |
| [kUpb_FieldType_UInt64] = kUpb_DecodeFast_Varint64, |
| [kUpb_FieldType_Fixed32] = kUpb_DecodeFast_Fixed32, |
| [kUpb_FieldType_SFixed32] = kUpb_DecodeFast_Fixed32, |
| [kUpb_FieldType_Float] = kUpb_DecodeFast_Fixed32, |
| [kUpb_FieldType_Fixed64] = kUpb_DecodeFast_Fixed64, |
| [kUpb_FieldType_SFixed64] = kUpb_DecodeFast_Fixed64, |
| [kUpb_FieldType_Double] = kUpb_DecodeFast_Fixed64, |
| [kUpb_FieldType_SInt32] = kUpb_DecodeFast_ZigZag32, |
| [kUpb_FieldType_SInt64] = kUpb_DecodeFast_ZigZag64, |
| [kUpb_FieldType_String] = kUpb_DecodeFast_String, |
| [kUpb_FieldType_Bytes] = kUpb_DecodeFast_Bytes, |
| [kUpb_FieldType_Message] = kUpb_DecodeFast_Message, |
| }; |
| |
| UPB_ASSERT(type < UPB_ARRAY_SIZE(types)); |
| *out_type = types[type]; |
| return true; |
| } |
| |
| static bool upb_DecodeFast_GetFunctionIndex(const upb_MiniTable* m, |
| const upb_MiniTableField* field, |
| upb_DecodeFast_TagSize tag_size, |
| uint32_t* out_index) { |
| upb_DecodeFast_Cardinality cardinality; |
| upb_DecodeFast_Type type; |
| if (!upb_DecodeFast_GetFieldCardinality(field, &cardinality)) return false; |
| if (!upb_DecodeFast_GetFieldType(m, field, &type)) return false; |
| |
| *out_index = type << 3 | cardinality << 1 | tag_size; |
| return true; |
| } |
| |
| static uint64_t upb_DecodeFast_GetPresence(const upb_MiniTableField* field, |
| uint64_t* out_data) { |
| if (upb_MiniTableField_IsInOneof(field)) { |
| *out_data = upb_MiniTableField_Number(field); |
| return true; |
| } else if (UPB_PRIVATE(_upb_MiniTableField_HasHasbit)(field)) { |
| *out_data = field->presence - 64; |
| return *out_data < 32; // We use uint32_t for hasbits currently. |
| } else { |
| // We only store 32 bits of hasbits back to the message, so for fields that |
| // don't have a hasbit, we just set the high bit which won't be stored. |
| *out_data = 63; |
| return true; |
| } |
| } |
| |
| static bool upb_DecodeFast_GetFunctionData(const upb_MiniTable* m, |
| const upb_MiniTableField* field, |
| uint16_t tag, uint64_t* out_data) { |
| uint64_t offset = UPB_PRIVATE(_upb_MiniTableField_Offset)(field); |
| uint64_t case_offset = |
| upb_MiniTableField_IsInOneof(field) |
| ? UPB_PRIVATE(_upb_MiniTableField_OneofOffset)(field) |
| : 0; |
| uint64_t subofs = 0; |
| if (upb_MiniTableField_IsSubMessage(field) || |
| upb_MiniTableField_IsClosedEnum(field)) { |
| uint64_t idx = field - m->UPB_PRIVATE(fields); |
| // Here we rely on the fact that sizeof(upb_MiniTableField) is the same on |
| // both 32 and 64 bit; if it wasn't, we could generate a bad offset if we |
| // compiled on a 32 bit machine targetting a 64 bit one. |
| UPB_STATIC_ASSERT(sizeof(upb_MiniTableField) % kUpb_SubmsgOffsetBytes == 0, |
| "upb_MiniTableField must be a multiple of the offset"); |
| uint64_t ofs_4byte = |
| idx * (sizeof(upb_MiniTableField) / kUpb_SubmsgOffsetBytes) + |
| field->UPB_PRIVATE(submsg_ofs); |
| // Fasttable is only supported on 64-bit platforms where pointers and the |
| // submessage entries (upb_MiniTableSubInternal) are 8 bytes, requiring |
| // 8-byte alignment. Since the base fields array is aligned to pointer size |
| // (at least 8 bytes), and each submessage entry must be 8-byte aligned, |
| // the total byte offset (4 * ofs_4byte) from the start of the fields array |
| // to the submessage entry is guaranteed to be a multiple of 8. |
| // Consequently, ofs_4byte is guaranteed to be even, and thus this offset |
| // can be scaled by 8 when loading. |
| UPB_ASSERT(ofs_4byte % 2 == 0); |
| subofs = ofs_4byte / 2; |
| } |
| |
| uint64_t presence; |
| |
| return upb_DecodeFast_GetPresence(field, &presence) && |
| upb_DecodeFast_MakeData(offset, case_offset, presence, subofs, tag, |
| out_data); |
| } |
| |
| static bool upb_DecodeFast_TryFillEntry(const upb_MiniTable* m, |
| const upb_MiniTableField* field, |
| bool* out_supported_tag_size, |
| upb_DecodeFast_TableEntry* entry) { |
| UPB_ASSERT(!upb_MiniTableField_IsExtension(field)); |
| uint16_t tag; |
| upb_DecodeFast_TagSize tag_size; |
| *out_supported_tag_size = |
| upb_DecodeFast_GetEncodedTag(field, &tag, &tag_size); |
| return *out_supported_tag_size && |
| upb_DecodeFast_GetFunctionIndex(m, field, tag_size, |
| &entry->function_idx) && |
| UPB_DECODEFAST_ISENABLED( |
| upb_DecodeFast_GetType(entry->function_idx), |
| upb_DecodeFast_GetCardinality(entry->function_idx), |
| upb_DecodeFast_GetTagSize(entry->function_idx)) && |
| upb_DecodeFast_GetFunctionData(m, field, tag, &entry->function_data); |
| } |
| |
| int upb_DecodeFast_BuildTable(const upb_MiniTable* m, |
| upb_DecodeFast_TableEntry table[32]) { |
| if (m->UPB_PRIVATE(ext) & kUpb_ExtMode_IsMapEntry) return 0; |
| |
| for (size_t i = 0; i < 32; i++) { |
| table[i].function_idx = UINT32_MAX; |
| table[i].function_data = 0; |
| } |
| |
| // Fasttable only handles fields with tag size of 1 or 2 bytes. If all known |
| // fields with such tag sizes have supported field types, we can short circuit |
| // slot misses to unknown field handling |
| bool all_supported_tag_size_fields_compatible_with_fast_decode = true; |
| |
| // If, in addition, all handled fields are assigned unique slots, then we can |
| // short circuit slot collision to unknown field handling as well. |
| bool all_fields_assigned_unique_slots = true; |
| |
| int max = 0; |
| for (size_t i = 0, n = upb_MiniTable_FieldCount(m); i < n; i++) { |
| const upb_MiniTableField* field = upb_MiniTable_GetFieldByIndex(m, i); |
| bool supported_tag_size; |
| upb_DecodeFast_TableEntry entry; |
| if (!upb_DecodeFast_TryFillEntry(m, field, &supported_tag_size, &entry)) { |
| if (supported_tag_size) { |
| // Check if this tag collides |
| all_supported_tag_size_fields_compatible_with_fast_decode = false; |
| } |
| continue; |
| } |
| int slot = upb_DecodeFastData_GetTableSlot(entry.function_data); |
| if (table[slot].function_idx == UINT32_MAX) { |
| table[slot] = entry; |
| max = UPB_MAX(max, slot); |
| } else { |
| all_fields_assigned_unique_slots = false; |
| } |
| } |
| |
| int table_size = max == 0 ? 0 : upb_RoundUpToPowerOfTwo(max + 1); |
| |
| // If the message is not extendable, we can swap the generic handler for a |
| // fast unknown field handler in remaining open slots. |
| // The fast unknown handler only covers 1/2 byte tags and falls back for >2 |
| // bytes; thus, we do not need to check for total exhaustiveness in field |
| // coverage, only for 1/2 byte tags. |
| const bool non_extendable = |
| UPB_PRIVATE(_upb_MiniTable_ExtModeBase)(m) == kUpb_ExtMode_NonExtendable; |
| if (all_supported_tag_size_fields_compatible_with_fast_decode && |
| (non_extendable || |
| UPB_PRIVATE(_upb_MiniTable_ExtModeBase)(m) == kUpb_ExtMode_Extendable)) { |
| uint32_t fast_handler_idx = non_extendable |
| ? kUpb_DecodeFast_Unknown |
| : kUpb_DecodeFast_ExtensionOrUnknown; |
| for (int i = 0; i < table_size; i++) { |
| if (table[i].function_idx == UINT32_MAX) { |
| table[i].function_idx = fast_handler_idx; |
| table[i].function_data = 0; |
| } |
| } |
| // Also override generic fallback if all fields are assigned unique slots. |
| if (all_fields_assigned_unique_slots) { |
| ((upb_MiniTable*)m)->UPB_PRIVATE(ext) |= |
| kUpb_ExtMode_AllFastFieldsAssigned; |
| } |
| } |
| |
| return table_size; |
| } |
| |
| uint8_t upb_DecodeFast_GetTableMask(int table_size) { |
| return table_size > 0 ? (table_size - 1) << 3 : 255; |
| } |
| |
| const char* upb_DecodeFast_GetFunctionName(uint32_t function_idx) { |
| #define STRINGIFY1(x) #x |
| #define STRINGIFY2(x) STRINGIFY1(x) |
| #define FUNCSTR(...) STRINGIFY2(UPB_DECODEFAST_FUNCNAME(__VA_ARGS__)), |
| // Constructing all combinations of strings at compile time wastes ~2k of |
| // binary size and RAM compared with using eg. snprintf() at runtime. But |
| // this function is only used from the compiler, where 2k is inconsequential. |
| static const char* names[] = {UPB_DECODEFAST_FUNCTIONS(FUNCSTR)}; |
| #undef STRINGIFY1 |
| #undef STRINGIFY2 |
| #undef FUNCSTR |
| |
| if (function_idx == UINT32_MAX) return "_upb_FastDecoder_DecodeGeneric"; |
| if (function_idx == kUpb_DecodeFast_Unknown) { |
| return "_upb_FastDecoder_DecodeUnknown"; |
| } |
| if (function_idx == kUpb_DecodeFast_ExtensionOrUnknown) { |
| return "_upb_FastDecoder_DecodeExtensionOrUnknown"; |
| } |
| UPB_ASSERT(function_idx < UPB_ARRAY_SIZE(names)); |
| return names[function_idx]; |
| } |
| |
| #include "upb/port/undef.inc" |