Support deleting regions of unknown fields via noncontiguous storage API
PiperOrigin-RevId: 695864676
diff --git a/csharp/src/Google.Protobuf/Reflection/FeatureSetDescriptor.g.cs b/csharp/src/Google.Protobuf/Reflection/FeatureSetDescriptor.g.cs
deleted file mode 100644
index 208ce1f..0000000
--- a/csharp/src/Google.Protobuf/Reflection/FeatureSetDescriptor.g.cs
+++ /dev/null
@@ -1,17 +0,0 @@
-#region Copyright notice and license
-// Protocol Buffers - Google's data interchange format
-// Copyright 2008 Google Inc. All rights reserved.
-//
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file or at
-// https://developers.google.com/open-source/licenses/bsd
-#endregion
-
-namespace Google.Protobuf.Reflection;
-
-internal sealed partial class FeatureSetDescriptor
-{
- // Canonical serialized form of the edition defaults, generated by embed_edition_defaults.
- private const string DefaultsBase64 =
- "ChMYhAciACoMCAEQAhgCIAMoATACChMY5wciACoMCAIQARgBIAIoATABChMY6AciDAgBEAEYASACKAEwASoAIOYHKOgH";
-}
diff --git a/upb/message/message.c b/upb/message/message.c
index 8503c35..8596d29 100644
--- a/upb/message/message.c
+++ b/upb/message/message.c
@@ -81,24 +81,31 @@
}
}
-void upb_Message_DeleteUnknown(upb_Message* msg, const char* data, size_t len) {
+bool upb_Message_DeleteUnknown(upb_Message* msg, upb_StringView* data,
+ uintptr_t* iter) {
UPB_ASSERT(!upb_Message_IsFrozen(msg));
+ UPB_ASSERT(*iter == kUpb_Message_UnknownBegin + 1);
upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg);
const char* internal_unknown_end = UPB_PTR_AT(in, in->unknown_end, char);
#ifndef NDEBUG
size_t full_unknown_size;
const char* full_unknown = upb_Message_GetUnknown(msg, &full_unknown_size);
- UPB_ASSERT((uintptr_t)data >= (uintptr_t)full_unknown);
- UPB_ASSERT((uintptr_t)data < (uintptr_t)(full_unknown + full_unknown_size));
- UPB_ASSERT((uintptr_t)(data + len) > (uintptr_t)data);
- UPB_ASSERT((uintptr_t)(data + len) <= (uintptr_t)internal_unknown_end);
+ UPB_ASSERT((uintptr_t)data->data >= (uintptr_t)full_unknown);
+ UPB_ASSERT((uintptr_t)data->data <
+ (uintptr_t)(full_unknown + full_unknown_size));
+ UPB_ASSERT((uintptr_t)(data->data + data->size) > (uintptr_t)data->data);
+ UPB_ASSERT((uintptr_t)(data->data + data->size) <=
+ (uintptr_t)internal_unknown_end);
#endif
-
- if ((data + len) != internal_unknown_end) {
- memmove((char*)data, data + len, internal_unknown_end - data - len);
+ const char* end = data->data + data->size;
+ size_t offset = data->data - (const char*)in;
+ if (end != internal_unknown_end) {
+ memmove(UPB_PTR_AT(in, offset, char), end, internal_unknown_end - end);
}
- in->unknown_end -= len;
+ in->unknown_end -= data->size;
+ data->size = in->unknown_end - offset;
+ return data->size != 0;
}
size_t upb_Message_ExtensionCount(const upb_Message* msg) {
diff --git a/upb/message/message.h b/upb/message/message.h
index 46f7d73..7e4b65a 100644
--- a/upb/message/message.h
+++ b/upb/message/message.h
@@ -51,8 +51,29 @@
// Returns a reference to the message's unknown data.
const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len);
-// Removes partial unknown data from message.
-void upb_Message_DeleteUnknown(upb_Message* msg, const char* data, size_t len);
+// Removes a segment of unknown data from the message, advancing to the next
+// segment. Returns false if the removed segment was at the end of the last
+// chunk.
+//
+// This must be done while iterating:
+//
+// uintptr_t iter = kUpb_Message_UnknownBegin;
+// upb_StringView data;
+// // Iterate chunks
+// while (upb_Message_NextUnknown(msg, &data, &iter)) {
+// // Iterate within a chunk, deleting ranges
+// while (ShouldDeleteSubSegment(&data)) {
+// // Data now points to the region to be deleted
+// if (!upb_Message_DeleteUnknown(msg, &data, &iter)) return;
+// // If DeleteUnknown returned true, then data now points to the
+// // remaining unknown fields after the region that was just deleted.
+// }
+// }
+//
+// The range given in `data` must be contained inside the most recently
+// returned region.
+bool upb_Message_DeleteUnknown(upb_Message* msg, upb_StringView* data,
+ uintptr_t* iter);
// Returns the number of extensions present in this message.
size_t upb_Message_ExtensionCount(const upb_Message* msg);
diff --git a/upb/message/promote.c b/upb/message/promote.c
index a278944..757f8fa 100644
--- a/upb/message/promote.c
+++ b/upb/message/promote.c
@@ -110,8 +110,21 @@
}
ext->data.msg_val = extension_msg;
value->msg_val = extension_msg;
- const char* delete_ptr = upb_Message_GetUnknown(msg, &len) + ofs;
- upb_Message_DeleteUnknown(msg, delete_ptr, result.len);
+ // Adding to extensions may have invalidated our previous pointers to unknowns
+ // This second search won't be necessary once unknown iteration is pointer
+ // stable, because it'll be separate storage from extensions and won't realloc
+ uintptr_t iter = kUpb_Message_UnknownBegin;
+ upb_StringView data;
+ while (upb_Message_NextUnknown(msg, &data, &iter)) {
+ if (data.size < ofs) {
+ ofs -= data.size;
+ } else {
+ data.data += ofs;
+ data.size = result.len;
+ break;
+ }
+ }
+ upb_Message_DeleteUnknown(msg, &data, &iter);
return kUpb_GetExtension_Ok;
}
@@ -123,11 +136,10 @@
uint32_t field_number,
int depth_limit) {
depth_limit = depth_limit ? depth_limit : 100;
- uintptr_t iter = kUpb_Message_UnknownBegin;
-
upb_FindUnknownRet ret;
+ ret.iter = kUpb_Message_UnknownBegin;
upb_StringView data;
- while (upb_Message_NextUnknown(msg, &data, &iter)) {
+ while (upb_Message_NextUnknown(msg, &data, &ret.iter)) {
upb_EpsCopyInputStream stream;
const char* ptr = data.data;
upb_EpsCopyInputStream_Init(&stream, &ptr, data.size, true);
@@ -154,6 +166,7 @@
ret.status = kUpb_FindUnknown_NotPresent;
ret.ptr = NULL;
ret.len = 0;
+ ret.iter = kUpb_Message_UnknownBegin;
return ret;
}
@@ -266,7 +279,9 @@
decode_options, arena);
if (ret.status == kUpb_UnknownToMessage_Ok) {
message = ret.message;
- upb_Message_DeleteUnknown(msg, unknown_data, unknown_size);
+ upb_StringView del =
+ upb_StringView_FromDataAndSize(unknown_data, unknown_size);
+ upb_Message_DeleteUnknown(msg, &del, &(unknown.iter));
}
} break;
case kUpb_FindUnknown_ParseError:
@@ -322,7 +337,9 @@
if (!upb_Array_Append(repeated_messages, value, arena)) {
return kUpb_UnknownToMessage_OutOfMemory;
}
- upb_Message_DeleteUnknown(msg, unknown.ptr, unknown.len);
+ upb_StringView del =
+ upb_StringView_FromDataAndSize(unknown.ptr, unknown.len);
+ upb_Message_DeleteUnknown(msg, &del, &unknown.iter);
} else {
return ret.status;
}
@@ -358,7 +375,9 @@
bool insert_success = upb_Message_SetMapEntry(map, mini_table, field,
map_entry_message, arena);
if (!insert_success) return kUpb_UnknownToMessage_OutOfMemory;
- upb_Message_DeleteUnknown(msg, unknown.ptr, unknown.len);
+ upb_StringView del =
+ upb_StringView_FromDataAndSize(unknown.ptr, unknown.len);
+ upb_Message_DeleteUnknown(msg, &del, &unknown.iter);
}
return kUpb_UnknownToMessage_Ok;
}
diff --git a/upb/message/promote.h b/upb/message/promote.h
index 6c780c9..9e8fbe2 100644
--- a/upb/message/promote.h
+++ b/upb/message/promote.h
@@ -53,6 +53,7 @@
const char* ptr;
// Size of unknown field data.
size_t len;
+ uintptr_t iter;
} upb_FindUnknownRet;
// Finds first occurrence of unknown data by tag id in message.