| // Protocol Buffers - Google's data interchange format |
| // Copyright 2023 Google LLC. All rights reserved. |
| // |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file or at |
| // https://developers.google.com/open-source/licenses/bsd |
| |
| /* |
| ** Our memory representation for parsing tables and messages themselves. |
| ** Functions in this file are used by generated code and possibly reflection. |
| ** |
| ** The definitions in this file are internal to upb. |
| **/ |
| |
| #ifndef UPB_MESSAGE_INTERNAL_MESSAGE_H_ |
| #define UPB_MESSAGE_INTERNAL_MESSAGE_H_ |
| |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <string.h> |
| |
| #include "upb/base/string_view.h" |
| #include "upb/mem/arena.h" |
| #include "upb/message/internal/extension.h" |
| #include "upb/message/internal/types.h" |
| #include "upb/message/value.h" |
| #include "upb/mini_table/extension.h" |
| #include "upb/mini_table/internal/message.h" |
| #include "upb/mini_table/message.h" |
| |
| // Must be last. |
| #include "upb/port/def.inc" |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| extern const float kUpb_FltInfinity; |
| extern const double kUpb_Infinity; |
| extern const double kUpb_NaN; |
| |
| // Internal members of a upb_Message that track unknown fields and/or |
| // extensions. We can change this without breaking binary compatibility. |
| |
| typedef struct upb_TaggedAuxPtr { |
| // Two lowest bits form a tag: |
| // 00 - non-aliased unknown data |
| // 10 - aliased unknown data |
| // 01 - extension |
| // |
| // The main semantic difference between aliased and non-aliased unknown data |
| // is that non-aliased unknown data can be assumed to have the following |
| // layout: |
| // |
| // [upb_StringView] [data] |
| // |
| // where the StringView points to the data buffer and the data buffer is |
| // immediately following the StringView. |
| // |
| // The string view does not necessarily point to the start of the data buffer; |
| // if the initial part of the buffer is removed from the message, the string |
| // view will point to the beginning of the remaining buffer. |
| // |
| // For aliased unknown data, this layout is _not_ guaranteed, since the |
| // pointer to the StringView can be anywhere in the allocation, and the |
| // StringView may point to non-data memory. |
| uintptr_t ptr; |
| } upb_TaggedAuxPtr; |
| |
| UPB_INLINE bool upb_TaggedAuxPtr_IsExtension(upb_TaggedAuxPtr ptr) { |
| return ptr.ptr & 1; |
| } |
| |
| UPB_INLINE bool upb_TaggedAuxPtr_IsUnknown(upb_TaggedAuxPtr ptr) { |
| return (ptr.ptr != 0) && ((ptr.ptr & 1) == 0); |
| } |
| |
| UPB_INLINE bool upb_TaggedAuxPtr_IsUnknownAliased(upb_TaggedAuxPtr ptr) { |
| return (ptr.ptr != 0) && ((ptr.ptr & 2) == 2); |
| } |
| |
| UPB_INLINE upb_Extension* upb_TaggedAuxPtr_Extension(upb_TaggedAuxPtr ptr) { |
| UPB_ASSERT(upb_TaggedAuxPtr_IsExtension(ptr)); |
| return (upb_Extension*)(ptr.ptr & ~3ULL); |
| } |
| |
| UPB_INLINE upb_StringView* upb_TaggedAuxPtr_UnknownData(upb_TaggedAuxPtr ptr) { |
| UPB_ASSERT(!upb_TaggedAuxPtr_IsExtension(ptr)); |
| return (upb_StringView*)(ptr.ptr & ~3ULL); |
| } |
| |
| typedef enum { |
| kUpb_TaggedAuxType_Extension, |
| kUpb_TaggedAuxType_Unknown, |
| kUpb_TaggedAuxType_AliasedUnknown |
| } upb_TaggedAuxType; |
| |
| typedef union { |
| upb_Extension* extension; |
| upb_StringView unknown_data; |
| } upb_TaggedAux; |
| |
| UPB_INLINE upb_TaggedAuxType upb_TaggedAux_Get(upb_TaggedAuxPtr ptr, |
| upb_TaggedAux* data) { |
| if (upb_TaggedAuxPtr_IsExtension(ptr)) { |
| data->extension = upb_TaggedAuxPtr_Extension(ptr); |
| return kUpb_TaggedAuxType_Extension; |
| } else if (upb_TaggedAuxPtr_IsUnknownAliased(ptr)) { |
| data->unknown_data = *upb_TaggedAuxPtr_UnknownData(ptr); |
| return kUpb_TaggedAuxType_AliasedUnknown; |
| } else { |
| UPB_ASSERT(upb_TaggedAuxPtr_IsUnknown(ptr)); |
| data->unknown_data = *upb_TaggedAuxPtr_UnknownData(ptr); |
| return kUpb_TaggedAuxType_Unknown; |
| } |
| } |
| |
| UPB_INLINE upb_TaggedAuxPtr upb_TaggedAuxPtr_Null(void) { |
| upb_TaggedAuxPtr ptr; |
| ptr.ptr = 0; |
| return ptr; |
| } |
| |
| UPB_INLINE upb_TaggedAuxPtr |
| upb_TaggedAuxPtr_MakeExtension(const upb_Extension* e) { |
| upb_TaggedAuxPtr ptr; |
| ptr.ptr = (uintptr_t)e | 1; |
| return ptr; |
| } |
| |
| // This tag means that the original allocation for this field starts with the |
| // string view and ends with the end of the content referenced by the string |
| // view. |
| UPB_INLINE upb_TaggedAuxPtr |
| upb_TaggedAuxPtr_MakeUnknownData(const upb_StringView* sv) { |
| upb_TaggedAuxPtr ptr; |
| ptr.ptr = (uintptr_t)sv; |
| return ptr; |
| } |
| |
| // This tag implies no guarantee between the relationship of the string view and |
| // the data it points to. |
| UPB_INLINE upb_TaggedAuxPtr |
| upb_TaggedAuxPtr_MakeUnknownDataAliased(const upb_StringView* sv) { |
| upb_TaggedAuxPtr ptr; |
| ptr.ptr = (uintptr_t)sv | 2; |
| return ptr; |
| } |
| |
| typedef struct upb_Message_Internal { |
| // Total number of entries set in aux_data |
| uint32_t size; |
| uint32_t capacity; |
| // Tagged pointers to upb_StringView or upb_Extension |
| upb_TaggedAuxPtr aux_data[]; |
| } upb_Message_Internal; |
| |
| #ifdef UPB_TRACING_ENABLED |
| UPB_API void upb_Message_LogNewMessage(const upb_MiniTable* m, |
| const upb_Arena* arena); |
| UPB_API void upb_Message_SetNewMessageTraceHandler( |
| void (*handler)(const upb_MiniTable*, const upb_Arena*)); |
| #endif // UPB_TRACING_ENABLED |
| |
| // We want to avoid the PLT and register spills for the many tiny memsets used |
| // to initialize messages; the dedicated memset instructions won't do that |
| #ifdef __ARM_FEAT_MOPS |
| #define UPB_ARM_MOPS __ARM_FEAT_MOPS |
| #else |
| #define UPB_ARM_MOPS 0 |
| #endif |
| |
| UPB_FORCEINLINE void _upb_Message_AlignedMemsetZero(void* dst, size_t size) { |
| UPB_ASSUME(size % kUpb_Message_Align == 0); |
| UPB_ASSUME(size != 0); |
| UPB_ASSUME((uintptr_t)dst % kUpb_Message_Align == 0); |
| #if UPB_ARM64_ASM && !UPB_ARM_MOPS |
| #if UPB_HAS_BUILTIN(__builtin_constant_p) |
| if (__builtin_constant_p(size)) { |
| // We assume the compiler will do something intelligent with a known-length |
| // memset. |
| memset(dst, 0, size); |
| return; |
| } |
| #endif |
| char* ptr = (char*)dst; |
| char* end = ptr + size; |
| __asm__( |
| // Unconditionally zero the first 8 byte chunk; if the loop runs this is |
| // wasted work, but doing it unconditionally is cheaper than adding |
| // another branch. |
| "str xzr, [%x[ptr]]\n\t" |
| |
| // If size == 8, skip the loop. |
| "cmp %x[count], #8\n\t" |
| "b.eq 2f\n\t" |
| |
| // Loop for size >= 16. |
| // In each iteration, we zero 16 bytes from the ptr and 16 bytes from the |
| // end. These regions may overlap, which is OK; doing it this way lets us |
| // process two chunks per loop iteration. |
| "1:\n\t" |
| "stp xzr, xzr, [%x[ptr]], #16\n\t" // Store then increment by 16 |
| "stp xzr, xzr, [%x[end], #-16]!\n\t" // Decrement by 16 then store |
| // End the loop when pointers cross or meet. |
| "cmp %x[ptr], %x[end]\n\t" |
| "b.lo 1b\n\t" |
| "2:\n\t" |
| : [ptr] "+&r"(ptr), [end] "+&r"(end), "=m"(*(char (*)[])dst) |
| : [count] "r"(size) |
| : "cc"); |
| UPB_PRIVATE(upb_Xsan_MarkInitialized)(dst, size); |
| #else |
| memset(dst, 0, size); |
| #endif |
| } |
| #undef UPB_ARM_MOPS |
| |
| // Inline version upb_Message_New(), for internal use. |
| UPB_NODISCARD UPB_INLINE struct upb_Message* _upb_Message_New( |
| const upb_MiniTable* m, upb_Arena* a) { |
| UPB_PRIVATE(upb_MiniTable_CheckInvariants)(m); |
| #ifdef UPB_TRACING_ENABLED |
| upb_Message_LogNewMessage(m, a); |
| #endif // UPB_TRACING_ENABLED |
| |
| const size_t size = m->UPB_PRIVATE(size); |
| // Message sizes are aligned up when constructing minitables; telling the |
| // compiler this avoids redoing alignment on the malloc fast path |
| UPB_ASSUME(size % kUpb_Message_Align == 0); |
| struct upb_Message* msg = (struct upb_Message*)upb_Arena_Malloc(a, size); |
| if (UPB_UNLIKELY(!msg)) return NULL; |
| _upb_Message_AlignedMemsetZero(msg, size); |
| return msg; |
| } |
| |
| // Discards the unknown fields for this message only. |
| void _upb_Message_DiscardUnknown_shallow(struct upb_Message* msg); |
| |
| UPB_NODISCARD UPB_NOINLINE bool UPB_PRIVATE(_upb_Message_AddUnknownSlowPath)( |
| struct upb_Message* msg, const char* data, size_t len, upb_Arena* arena, |
| bool alias); |
| |
| typedef enum { |
| // Provided buffer is copied into the message. |
| kUpb_AddUnknown_Copy = 0, |
| |
| // The message will alias the provided buffer. |
| kUpb_AddUnknown_Alias = 1, |
| |
| // The message will alias the provided buffer, and we may merge the data with |
| // the immediately preceding unknown field if possible. |
| kUpb_AddUnknown_AliasAllowMerge = 2, |
| } upb_AddUnknownMode; |
| |
| UPB_NODISCARD UPB_INLINE bool UPB_PRIVATE( |
| _upb_Message_TryAddUnknownAliasAllowMerge)(struct upb_Message* msg, |
| const char* data, size_t len, |
| upb_Arena* arena, |
| upb_AddUnknownMode mode) { |
| UPB_ASSERT(!upb_Message_IsFrozen(msg)); |
| UPB_ASSERT(mode == kUpb_AddUnknown_AliasAllowMerge); |
| // Aliasing parse of a message with sequential unknown fields is a simple |
| // pointer bump, so inline it. |
| upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); |
| if (in && in->size) { |
| upb_TaggedAuxPtr ptr = in->aux_data[in->size - 1]; |
| if (upb_TaggedAuxPtr_IsUnknown(ptr)) { |
| upb_StringView* existing = upb_TaggedAuxPtr_UnknownData(ptr); |
| // Fast path if the field we're adding is immediately after the last |
| // added unknown field. |
| // |
| // The caller has guaranteed to us, by passing |
| // kUpb_AddUnknown_AliasAllowMerge, that there is no risk that these two |
| // regions of memory are from different objects that are contiguous in |
| // memory by coincidence. |
| if (existing->data + existing->size == data) { |
| existing->size += len; |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| // Adds unknown data (serialized protobuf data) to the given message. The data |
| // must represent one or more complete and well formed proto fields. |
| // |
| // If `alias_base` is NULL, the bytes from `data` will be copied into the |
| // destination arena. Otherwise it must be a pointer to the beginning of the |
| // buffer that `data` points into, which signals that the message must alias |
| // the bytes instead of copying them. The value of `alias_base` is also used |
| // to mark the boundary of the buffer, so that we do not inappropriately |
| // coalesce two buffers that are separate objects but happen to be contiguous |
| // in memory. |
| UPB_NODISCARD UPB_INLINE bool UPB_PRIVATE(_upb_Message_AddUnknown)( |
| struct upb_Message* msg, const char* data, size_t len, upb_Arena* arena, |
| upb_AddUnknownMode mode) { |
| UPB_ASSERT(!upb_Message_IsFrozen(msg)); |
| if (mode == kUpb_AddUnknown_AliasAllowMerge && |
| UPB_PRIVATE(_upb_Message_TryAddUnknownAliasAllowMerge)(msg, data, len, |
| arena, mode)) { |
| return true; |
| } |
| return UPB_PRIVATE(_upb_Message_AddUnknownSlowPath)( |
| msg, data, len, arena, mode != kUpb_AddUnknown_Copy); |
| } |
| |
| // Adds unknown data (serialized protobuf data) to the given message. |
| // The data is copied into the message instance. Data when concatenated together |
| // must represent one or more complete and well formed proto fields, but the |
| // individual spans may point only to partial fields. |
| UPB_NODISCARD bool UPB_PRIVATE(_upb_Message_AddUnknownV)( |
| struct upb_Message* msg, upb_Arena* arena, upb_StringView data[], |
| size_t count); |
| |
| // Ensures at least one slot is available in the aux_data of this message. |
| // Returns false if a reallocation is needed to satisfy the request, and fails. |
| UPB_NODISCARD bool UPB_PRIVATE(_upb_Message_ReserveSlot)( |
| struct upb_Message* msg, upb_Arena* arena); |
| |
| #define kUpb_Message_UnknownBegin 0 |
| #define kUpb_Message_ExtensionBegin 0 |
| |
| UPB_INLINE bool upb_Message_NextUnknown(const struct upb_Message* msg, |
| upb_StringView* data, uintptr_t* iter) { |
| const upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); |
| size_t i = *iter; |
| if (in) { |
| while (i < in->size) { |
| upb_TaggedAuxPtr tagged_ptr = in->aux_data[i++]; |
| if (upb_TaggedAuxPtr_IsUnknown(tagged_ptr)) { |
| *data = *upb_TaggedAuxPtr_UnknownData(tagged_ptr); |
| *iter = i; |
| return true; |
| } |
| } |
| } |
| data->size = 0; |
| data->data = NULL; |
| *iter = i; |
| return false; |
| } |
| |
| UPB_INLINE bool upb_Message_HasUnknown(const struct upb_Message* msg) { |
| upb_StringView data; |
| uintptr_t iter = kUpb_Message_UnknownBegin; |
| return upb_Message_NextUnknown(msg, &data, &iter); |
| } |
| |
| UPB_INLINE bool upb_Message_NextExtension(const struct upb_Message* msg, |
| const upb_MiniTableExtension** out_e, |
| upb_MessageValue* out_v, |
| uintptr_t* iter) { |
| const upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); |
| uintptr_t i = *iter; |
| if (in) { |
| while (i < in->size) { |
| upb_TaggedAuxPtr tagged_ptr = in->aux_data[i++]; |
| if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { |
| const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); |
| |
| // Empty repeated fields or maps semantically don't exist. |
| if (UPB_PRIVATE(_upb_Extension_IsEmpty)(ext)) continue; |
| |
| *out_e = ext->ext; |
| *out_v = ext->data; |
| *iter = i; |
| return true; |
| } |
| } |
| } |
| *iter = i; |
| |
| return false; |
| } |
| |
| UPB_INLINE bool UPB_PRIVATE(_upb_Message_NextExtensionReverse)( |
| const struct upb_Message* msg, const upb_MiniTableExtension** out_e, |
| upb_MessageValue* out_v, uintptr_t* iter) { |
| upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); |
| if (!in) return false; |
| uintptr_t i = *iter; |
| uint32_t size = in->size; |
| while (i < size) { |
| upb_TaggedAuxPtr tagged_ptr = in->aux_data[size - 1 - i]; |
| i++; |
| if (!upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { |
| continue; |
| } |
| const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); |
| |
| // Empty repeated fields or maps semantically don't exist. |
| if (UPB_PRIVATE(_upb_Extension_IsEmpty)(ext)) continue; |
| |
| *out_e = ext->ext; |
| *out_v = ext->data; |
| *iter = i; |
| return true; |
| } |
| *iter = i; |
| return false; |
| } |
| |
| #ifdef __cplusplus |
| } /* extern "C" */ |
| #endif |
| |
| #include "upb/port/undef.inc" |
| |
| #endif /* UPB_MESSAGE_INTERNAL_MESSAGE_H_ */ |