Support deleting regions of unknown fields via noncontiguous storage API

PiperOrigin-RevId: 695864676
diff --git a/csharp/src/Google.Protobuf/Reflection/FeatureSetDescriptor.g.cs b/csharp/src/Google.Protobuf/Reflection/FeatureSetDescriptor.g.cs
deleted file mode 100644
index 208ce1f..0000000
--- a/csharp/src/Google.Protobuf/Reflection/FeatureSetDescriptor.g.cs
+++ /dev/null
@@ -1,17 +0,0 @@
-#region Copyright notice and license
-// Protocol Buffers - Google's data interchange format
-// Copyright 2008 Google Inc.  All rights reserved.
-//
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file or at
-// https://developers.google.com/open-source/licenses/bsd
-#endregion
-
-namespace Google.Protobuf.Reflection;
-
-internal sealed partial class FeatureSetDescriptor
-{
-    // Canonical serialized form of the edition defaults, generated by embed_edition_defaults.
-    private const string DefaultsBase64 =
-        "ChMYhAciACoMCAEQAhgCIAMoATACChMY5wciACoMCAIQARgBIAIoATABChMY6AciDAgBEAEYASACKAEwASoAIOYHKOgH";
-}
diff --git a/upb/message/message.c b/upb/message/message.c
index 8503c35..8596d29 100644
--- a/upb/message/message.c
+++ b/upb/message/message.c
@@ -81,24 +81,31 @@
   }
 }
 
-void upb_Message_DeleteUnknown(upb_Message* msg, const char* data, size_t len) {
+bool upb_Message_DeleteUnknown(upb_Message* msg, upb_StringView* data,
+                               uintptr_t* iter) {
   UPB_ASSERT(!upb_Message_IsFrozen(msg));
+  UPB_ASSERT(*iter == kUpb_Message_UnknownBegin + 1);
   upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg);
   const char* internal_unknown_end = UPB_PTR_AT(in, in->unknown_end, char);
 
 #ifndef NDEBUG
   size_t full_unknown_size;
   const char* full_unknown = upb_Message_GetUnknown(msg, &full_unknown_size);
-  UPB_ASSERT((uintptr_t)data >= (uintptr_t)full_unknown);
-  UPB_ASSERT((uintptr_t)data < (uintptr_t)(full_unknown + full_unknown_size));
-  UPB_ASSERT((uintptr_t)(data + len) > (uintptr_t)data);
-  UPB_ASSERT((uintptr_t)(data + len) <= (uintptr_t)internal_unknown_end);
+  UPB_ASSERT((uintptr_t)data->data >= (uintptr_t)full_unknown);
+  UPB_ASSERT((uintptr_t)data->data <
+             (uintptr_t)(full_unknown + full_unknown_size));
+  UPB_ASSERT((uintptr_t)(data->data + data->size) > (uintptr_t)data->data);
+  UPB_ASSERT((uintptr_t)(data->data + data->size) <=
+             (uintptr_t)internal_unknown_end);
 #endif
-
-  if ((data + len) != internal_unknown_end) {
-    memmove((char*)data, data + len, internal_unknown_end - data - len);
+  const char* end = data->data + data->size;
+  size_t offset = data->data - (const char*)in;
+  if (end != internal_unknown_end) {
+    memmove(UPB_PTR_AT(in, offset, char), end, internal_unknown_end - end);
   }
-  in->unknown_end -= len;
+  in->unknown_end -= data->size;
+  data->size = in->unknown_end - offset;
+  return data->size != 0;
 }
 
 size_t upb_Message_ExtensionCount(const upb_Message* msg) {
diff --git a/upb/message/message.h b/upb/message/message.h
index 46f7d73..7e4b65a 100644
--- a/upb/message/message.h
+++ b/upb/message/message.h
@@ -51,8 +51,29 @@
 // Returns a reference to the message's unknown data.
 const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len);
 
-// Removes partial unknown data from message.
-void upb_Message_DeleteUnknown(upb_Message* msg, const char* data, size_t len);
+// Removes a segment of unknown data from the message, advancing to the next
+// segment.  Returns false if the removed segment was at the end of the last
+// chunk.
+//
+// This must be done while iterating:
+//
+//   uintptr_t iter = kUpb_Message_UnknownBegin;
+//   upb_StringView data;
+//   // Iterate chunks
+//   while (upb_Message_NextUnknown(msg, &data, &iter)) {
+//     // Iterate within a chunk, deleting ranges
+//     while (ShouldDeleteSubSegment(&data)) {
+//       // Data now points to the region to be deleted
+//       if (!upb_Message_DeleteUnknown(msg, &data, &iter)) return;
+//       // If DeleteUnknown returned true, then data now points to the
+//       // remaining unknown fields after the region that was just deleted.
+//     }
+//   }
+//
+// The range given in `data` must be contained inside the most recently
+// returned region.
+bool upb_Message_DeleteUnknown(upb_Message* msg, upb_StringView* data,
+                               uintptr_t* iter);
 
 // Returns the number of extensions present in this message.
 size_t upb_Message_ExtensionCount(const upb_Message* msg);
diff --git a/upb/message/promote.c b/upb/message/promote.c
index a278944..757f8fa 100644
--- a/upb/message/promote.c
+++ b/upb/message/promote.c
@@ -110,8 +110,21 @@
   }
   ext->data.msg_val = extension_msg;
   value->msg_val = extension_msg;
-  const char* delete_ptr = upb_Message_GetUnknown(msg, &len) + ofs;
-  upb_Message_DeleteUnknown(msg, delete_ptr, result.len);
+  // Adding to extensions may have invalidated our previous pointers to unknowns
+  // This second search won't be necessary once unknown iteration is pointer
+  // stable, because it'll be separate storage from extensions and won't realloc
+  uintptr_t iter = kUpb_Message_UnknownBegin;
+  upb_StringView data;
+  while (upb_Message_NextUnknown(msg, &data, &iter)) {
+    if (data.size < ofs) {
+      ofs -= data.size;
+    } else {
+      data.data += ofs;
+      data.size = result.len;
+      break;
+    }
+  }
+  upb_Message_DeleteUnknown(msg, &data, &iter);
   return kUpb_GetExtension_Ok;
 }
 
@@ -123,11 +136,10 @@
                                            uint32_t field_number,
                                            int depth_limit) {
   depth_limit = depth_limit ? depth_limit : 100;
-  uintptr_t iter = kUpb_Message_UnknownBegin;
-
   upb_FindUnknownRet ret;
+  ret.iter = kUpb_Message_UnknownBegin;
   upb_StringView data;
-  while (upb_Message_NextUnknown(msg, &data, &iter)) {
+  while (upb_Message_NextUnknown(msg, &data, &ret.iter)) {
     upb_EpsCopyInputStream stream;
     const char* ptr = data.data;
     upb_EpsCopyInputStream_Init(&stream, &ptr, data.size, true);
@@ -154,6 +166,7 @@
   ret.status = kUpb_FindUnknown_NotPresent;
   ret.ptr = NULL;
   ret.len = 0;
+  ret.iter = kUpb_Message_UnknownBegin;
   return ret;
 }
 
@@ -266,7 +279,9 @@
                                                 decode_options, arena);
         if (ret.status == kUpb_UnknownToMessage_Ok) {
           message = ret.message;
-          upb_Message_DeleteUnknown(msg, unknown_data, unknown_size);
+          upb_StringView del =
+              upb_StringView_FromDataAndSize(unknown_data, unknown_size);
+          upb_Message_DeleteUnknown(msg, &del, &(unknown.iter));
         }
       } break;
       case kUpb_FindUnknown_ParseError:
@@ -322,7 +337,9 @@
         if (!upb_Array_Append(repeated_messages, value, arena)) {
           return kUpb_UnknownToMessage_OutOfMemory;
         }
-        upb_Message_DeleteUnknown(msg, unknown.ptr, unknown.len);
+        upb_StringView del =
+            upb_StringView_FromDataAndSize(unknown.ptr, unknown.len);
+        upb_Message_DeleteUnknown(msg, &del, &unknown.iter);
       } else {
         return ret.status;
       }
@@ -358,7 +375,9 @@
     bool insert_success = upb_Message_SetMapEntry(map, mini_table, field,
                                                   map_entry_message, arena);
     if (!insert_success) return kUpb_UnknownToMessage_OutOfMemory;
-    upb_Message_DeleteUnknown(msg, unknown.ptr, unknown.len);
+    upb_StringView del =
+        upb_StringView_FromDataAndSize(unknown.ptr, unknown.len);
+    upb_Message_DeleteUnknown(msg, &del, &unknown.iter);
   }
   return kUpb_UnknownToMessage_Ok;
 }
diff --git a/upb/message/promote.h b/upb/message/promote.h
index 6c780c9..9e8fbe2 100644
--- a/upb/message/promote.h
+++ b/upb/message/promote.h
@@ -53,6 +53,7 @@
   const char* ptr;
   // Size of unknown field data.
   size_t len;
+  uintptr_t iter;
 } upb_FindUnknownRet;
 
 // Finds first occurrence of unknown data by tag id in message.