Adam Cozzette | 501ecec | 2023-09-26 14:36:20 -0700 | [diff] [blame] | 1 | // Protocol Buffers - Google's data interchange format |
| 2 | // Copyright 2023 Google LLC. All rights reserved. |
Adam Cozzette | 501ecec | 2023-09-26 14:36:20 -0700 | [diff] [blame] | 3 | // |
Protobuf Team Bot | 0fab773 | 2023-11-20 13:38:15 -0800 | [diff] [blame] | 4 | // Use of this source code is governed by a BSD-style |
| 5 | // license that can be found in the LICENSE file or at |
| 6 | // https://developers.google.com/open-source/licenses/bsd |
Adam Cozzette | 501ecec | 2023-09-26 14:36:20 -0700 | [diff] [blame] | 7 | |
| 8 | #include "python/unknown_fields.h" |
| 9 | |
| 10 | #include "python/message.h" |
| 11 | #include "python/protobuf.h" |
| 12 | #include "upb/wire/eps_copy_input_stream.h" |
| 13 | #include "upb/wire/reader.h" |
| 14 | #include "upb/wire/types.h" |
| 15 | |
| 16 | // ----------------------------------------------------------------------------- |
| 17 | // UnknownFieldSet |
| 18 | // ----------------------------------------------------------------------------- |
| 19 | |
| 20 | typedef struct { |
| 21 | PyObject_HEAD; |
| 22 | PyObject* fields; |
| 23 | } PyUpb_UnknownFieldSet; |
| 24 | |
| 25 | static void PyUpb_UnknownFieldSet_Dealloc(PyObject* _self) { |
| 26 | PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self; |
| 27 | Py_XDECREF(self->fields); |
| 28 | PyUpb_Dealloc(self); |
| 29 | } |
| 30 | |
| 31 | PyUpb_UnknownFieldSet* PyUpb_UnknownFieldSet_NewBare(void) { |
| 32 | PyUpb_ModuleState* s = PyUpb_ModuleState_Get(); |
| 33 | PyUpb_UnknownFieldSet* self = |
| 34 | (void*)PyType_GenericAlloc(s->unknown_fields_type, 0); |
| 35 | return self; |
| 36 | } |
| 37 | |
| 38 | // For MessageSet the established behavior is for UnknownFieldSet to interpret |
| 39 | // the MessageSet wire format: |
| 40 | // message MessageSet { |
| 41 | // repeated group Item = 1 { |
| 42 | // required int32 type_id = 2; |
| 43 | // required bytes message = 3; |
| 44 | // } |
| 45 | // } |
| 46 | // |
| 47 | // And create unknown fields like: |
| 48 | // UnknownField(type_id, WIRE_TYPE_DELIMITED, message) |
| 49 | // |
| 50 | // For any unknown fields that are unexpected per the wire format defined above, |
| 51 | // we drop them on the floor. |
| 52 | |
| 53 | enum { |
| 54 | kUpb_MessageSet_StartItemTag = (1 << 3) | kUpb_WireType_StartGroup, |
| 55 | kUpb_MessageSet_EndItemTag = (1 << 3) | kUpb_WireType_EndGroup, |
| 56 | kUpb_MessageSet_TypeIdTag = (2 << 3) | kUpb_WireType_Varint, |
| 57 | kUpb_MessageSet_MessageTag = (3 << 3) | kUpb_WireType_Delimited, |
| 58 | }; |
| 59 | |
| 60 | static const char* PyUpb_UnknownFieldSet_BuildMessageSetItem( |
| 61 | PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream, |
| 62 | const char* ptr) { |
| 63 | PyUpb_ModuleState* s = PyUpb_ModuleState_Get(); |
| 64 | int type_id = 0; |
| 65 | PyObject* msg = NULL; |
| 66 | while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { |
| 67 | uint32_t tag; |
| 68 | ptr = upb_WireReader_ReadTag(ptr, &tag); |
| 69 | if (!ptr) goto err; |
| 70 | switch (tag) { |
| 71 | case kUpb_MessageSet_EndItemTag: |
| 72 | goto done; |
| 73 | case kUpb_MessageSet_TypeIdTag: { |
| 74 | uint64_t tmp; |
| 75 | ptr = upb_WireReader_ReadVarint(ptr, &tmp); |
| 76 | if (!ptr) goto err; |
| 77 | if (!type_id) type_id = tmp; |
| 78 | break; |
| 79 | } |
| 80 | case kUpb_MessageSet_MessageTag: { |
| 81 | int size; |
| 82 | ptr = upb_WireReader_ReadSize(ptr, &size); |
| 83 | if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) { |
| 84 | goto err; |
| 85 | } |
| 86 | const char* str = ptr; |
| 87 | ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size); |
| 88 | if (!msg) { |
| 89 | msg = PyBytes_FromStringAndSize(str, size); |
| 90 | if (!msg) goto err; |
| 91 | } else { |
| 92 | // already saw a message here so deliberately skipping the duplicate |
| 93 | } |
| 94 | break; |
| 95 | } |
| 96 | default: |
| 97 | ptr = upb_WireReader_SkipValue(ptr, tag, stream); |
| 98 | if (!ptr) goto err; |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | done: |
| 103 | if (type_id && msg) { |
| 104 | PyObject* field = PyObject_CallFunction( |
| 105 | s->unknown_field_type, "iiO", type_id, kUpb_WireType_Delimited, msg); |
| 106 | if (!field) goto err; |
| 107 | PyList_Append(self->fields, field); |
| 108 | Py_DECREF(field); |
| 109 | } |
| 110 | Py_XDECREF(msg); |
| 111 | return ptr; |
| 112 | |
| 113 | err: |
| 114 | Py_XDECREF(msg); |
| 115 | return NULL; |
| 116 | } |
| 117 | |
| 118 | static const char* PyUpb_UnknownFieldSet_BuildMessageSet( |
| 119 | PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream, |
| 120 | const char* ptr) { |
| 121 | self->fields = PyList_New(0); |
| 122 | while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { |
| 123 | uint32_t tag; |
| 124 | ptr = upb_WireReader_ReadTag(ptr, &tag); |
| 125 | if (!ptr) goto err; |
| 126 | if (tag == kUpb_MessageSet_StartItemTag) { |
| 127 | ptr = PyUpb_UnknownFieldSet_BuildMessageSetItem(self, stream, ptr); |
| 128 | } else { |
| 129 | ptr = upb_WireReader_SkipValue(ptr, tag, stream); |
| 130 | } |
| 131 | if (!ptr) goto err; |
| 132 | } |
| 133 | if (upb_EpsCopyInputStream_IsError(stream)) goto err; |
| 134 | return ptr; |
| 135 | |
| 136 | err: |
| 137 | Py_DECREF(self->fields); |
| 138 | self->fields = NULL; |
| 139 | return NULL; |
| 140 | } |
| 141 | |
| 142 | static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self, |
| 143 | upb_EpsCopyInputStream* stream, |
| 144 | const char* ptr, |
| 145 | int group_number); |
| 146 | |
| 147 | static const char* PyUpb_UnknownFieldSet_BuildValue( |
| 148 | PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream, |
| 149 | const char* ptr, int field_number, int wire_type, int group_number, |
| 150 | PyObject** data) { |
| 151 | switch (wire_type) { |
| 152 | case kUpb_WireType_Varint: { |
| 153 | uint64_t val; |
| 154 | ptr = upb_WireReader_ReadVarint(ptr, &val); |
| 155 | if (!ptr) return NULL; |
| 156 | *data = PyLong_FromUnsignedLongLong(val); |
| 157 | return ptr; |
| 158 | } |
| 159 | case kUpb_WireType_64Bit: { |
| 160 | uint64_t val; |
| 161 | ptr = upb_WireReader_ReadFixed64(ptr, &val); |
| 162 | *data = PyLong_FromUnsignedLongLong(val); |
| 163 | return ptr; |
| 164 | } |
| 165 | case kUpb_WireType_32Bit: { |
| 166 | uint32_t val; |
| 167 | ptr = upb_WireReader_ReadFixed32(ptr, &val); |
| 168 | *data = PyLong_FromUnsignedLongLong(val); |
| 169 | return ptr; |
| 170 | } |
| 171 | case kUpb_WireType_Delimited: { |
| 172 | int size; |
| 173 | ptr = upb_WireReader_ReadSize(ptr, &size); |
| 174 | if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) { |
| 175 | return NULL; |
| 176 | } |
| 177 | const char* str = ptr; |
| 178 | ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size); |
| 179 | *data = PyBytes_FromStringAndSize(str, size); |
| 180 | return ptr; |
| 181 | } |
| 182 | case kUpb_WireType_StartGroup: { |
| 183 | PyUpb_UnknownFieldSet* sub = PyUpb_UnknownFieldSet_NewBare(); |
| 184 | if (!sub) return NULL; |
| 185 | *data = &sub->ob_base; |
| 186 | return PyUpb_UnknownFieldSet_Build(sub, stream, ptr, field_number); |
| 187 | } |
| 188 | default: |
| 189 | assert(0); |
| 190 | *data = NULL; |
| 191 | return NULL; |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | // For non-MessageSet we just build the unknown fields exactly as they exist on |
| 196 | // the wire. |
| 197 | static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self, |
| 198 | upb_EpsCopyInputStream* stream, |
| 199 | const char* ptr, |
| 200 | int group_number) { |
| 201 | PyUpb_ModuleState* s = PyUpb_ModuleState_Get(); |
| 202 | self->fields = PyList_New(0); |
| 203 | while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { |
| 204 | uint32_t tag; |
| 205 | ptr = upb_WireReader_ReadTag(ptr, &tag); |
| 206 | if (!ptr) goto err; |
| 207 | PyObject* data = NULL; |
| 208 | int field_number = upb_WireReader_GetFieldNumber(tag); |
| 209 | int wire_type = upb_WireReader_GetWireType(tag); |
| 210 | if (wire_type == kUpb_WireType_EndGroup) { |
| 211 | if (field_number != group_number) return NULL; |
| 212 | return ptr; |
| 213 | } |
| 214 | ptr = PyUpb_UnknownFieldSet_BuildValue(self, stream, ptr, field_number, |
| 215 | wire_type, group_number, &data); |
| 216 | if (!ptr) { |
| 217 | Py_XDECREF(data); |
| 218 | goto err; |
| 219 | } |
| 220 | assert(data); |
| 221 | PyObject* field = PyObject_CallFunction(s->unknown_field_type, "iiN", |
| 222 | field_number, wire_type, data); |
| 223 | PyList_Append(self->fields, field); |
| 224 | Py_DECREF(field); |
| 225 | } |
| 226 | if (upb_EpsCopyInputStream_IsError(stream)) goto err; |
| 227 | return ptr; |
| 228 | |
| 229 | err: |
| 230 | Py_DECREF(self->fields); |
| 231 | self->fields = NULL; |
| 232 | return NULL; |
| 233 | } |
| 234 | |
| 235 | static PyObject* PyUpb_UnknownFieldSet_New(PyTypeObject* type, PyObject* args, |
| 236 | PyObject* kwargs) { |
| 237 | char* kwlist[] = {"message", 0}; |
| 238 | PyObject* py_msg = NULL; |
| 239 | |
| 240 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &py_msg)) { |
| 241 | return NULL; |
| 242 | } |
| 243 | |
| 244 | if (!PyUpb_Message_Verify(py_msg)) return NULL; |
| 245 | PyUpb_UnknownFieldSet* self = PyUpb_UnknownFieldSet_NewBare(); |
| 246 | upb_Message* msg = PyUpb_Message_GetIfReified(py_msg); |
| 247 | if (!msg) return &self->ob_base; |
| 248 | |
| 249 | size_t size; |
| 250 | const char* ptr = upb_Message_GetUnknown(msg, &size); |
| 251 | if (size == 0) return &self->ob_base; |
| 252 | |
| 253 | upb_EpsCopyInputStream stream; |
| 254 | upb_EpsCopyInputStream_Init(&stream, &ptr, size, true); |
| 255 | const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(py_msg); |
| 256 | |
| 257 | bool ok; |
| 258 | if (upb_MessageDef_IsMessageSet(msgdef)) { |
| 259 | ok = PyUpb_UnknownFieldSet_BuildMessageSet(self, &stream, ptr) != NULL; |
| 260 | } else { |
| 261 | ok = PyUpb_UnknownFieldSet_Build(self, &stream, ptr, -1) != NULL; |
| 262 | } |
| 263 | |
| 264 | if (!ok) { |
| 265 | Py_DECREF(&self->ob_base); |
| 266 | return NULL; |
| 267 | } |
| 268 | |
| 269 | return &self->ob_base; |
| 270 | } |
| 271 | |
| 272 | static Py_ssize_t PyUpb_UnknownFieldSet_Length(PyObject* _self) { |
| 273 | PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self; |
| 274 | return self->fields ? PyObject_Length(self->fields) : 0; |
| 275 | } |
| 276 | |
| 277 | static PyObject* PyUpb_UnknownFieldSet_GetItem(PyObject* _self, |
| 278 | Py_ssize_t index) { |
| 279 | PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self; |
| 280 | if (!self->fields) { |
| 281 | PyErr_Format(PyExc_IndexError, "list index (%zd) out of range", index); |
| 282 | return NULL; |
| 283 | } |
| 284 | PyObject* ret = PyList_GetItem(self->fields, index); |
| 285 | if (ret) Py_INCREF(ret); |
| 286 | return ret; |
| 287 | } |
| 288 | |
| 289 | static PyType_Slot PyUpb_UnknownFieldSet_Slots[] = { |
| 290 | {Py_tp_new, &PyUpb_UnknownFieldSet_New}, |
| 291 | {Py_tp_dealloc, &PyUpb_UnknownFieldSet_Dealloc}, |
| 292 | {Py_sq_length, PyUpb_UnknownFieldSet_Length}, |
| 293 | {Py_sq_item, PyUpb_UnknownFieldSet_GetItem}, |
| 294 | {Py_tp_hash, PyObject_HashNotImplemented}, |
| 295 | {0, NULL}, |
| 296 | }; |
| 297 | |
| 298 | static PyType_Spec PyUpb_UnknownFieldSet_Spec = { |
| 299 | PYUPB_MODULE_NAME ".UnknownFieldSet", // tp_name |
| 300 | sizeof(PyUpb_UnknownFieldSet), // tp_basicsize |
| 301 | 0, // tp_itemsize |
| 302 | Py_TPFLAGS_DEFAULT, // tp_flags |
| 303 | PyUpb_UnknownFieldSet_Slots, |
| 304 | }; |
| 305 | |
| 306 | // ----------------------------------------------------------------------------- |
| 307 | // Top Level |
| 308 | // ----------------------------------------------------------------------------- |
| 309 | |
| 310 | PyObject* PyUpb_UnknownFieldSet_CreateNamedTuple(void) { |
| 311 | PyObject* mod = NULL; |
| 312 | PyObject* namedtuple = NULL; |
| 313 | PyObject* ret = NULL; |
| 314 | |
| 315 | mod = PyImport_ImportModule("collections"); |
| 316 | if (!mod) goto done; |
| 317 | namedtuple = PyObject_GetAttrString(mod, "namedtuple"); |
| 318 | if (!namedtuple) goto done; |
| 319 | ret = PyObject_CallFunction(namedtuple, "s[sss]", "PyUnknownField", |
| 320 | "field_number", "wire_type", "data"); |
| 321 | |
| 322 | done: |
| 323 | Py_XDECREF(mod); |
| 324 | Py_XDECREF(namedtuple); |
| 325 | return ret; |
| 326 | } |
| 327 | |
| 328 | bool PyUpb_UnknownFields_Init(PyObject* m) { |
| 329 | PyUpb_ModuleState* s = PyUpb_ModuleState_GetFromModule(m); |
| 330 | |
| 331 | s->unknown_fields_type = PyUpb_AddClass(m, &PyUpb_UnknownFieldSet_Spec); |
| 332 | s->unknown_field_type = PyUpb_UnknownFieldSet_CreateNamedTuple(); |
| 333 | |
| 334 | return s->unknown_fields_type && s->unknown_field_type; |
| 335 | } |