blob: 5ad1b06ebbd92f8778b0c049fa9c682abad89d79 [file] [log] [blame]
Adam Cozzette501ecec2023-09-26 14:36:20 -07001// Protocol Buffers - Google's data interchange format
2// Copyright 2023 Google LLC. All rights reserved.
Adam Cozzette501ecec2023-09-26 14:36:20 -07003//
Protobuf Team Bot0fab7732023-11-20 13:38:15 -08004// Use of this source code is governed by a BSD-style
5// license that can be found in the LICENSE file or at
6// https://developers.google.com/open-source/licenses/bsd
Adam Cozzette501ecec2023-09-26 14:36:20 -07007
8#include "python/message.h"
9
10#include "python/convert.h"
11#include "python/descriptor.h"
12#include "python/extension_dict.h"
13#include "python/map.h"
14#include "python/repeated.h"
Eric Salo3d21bc22024-04-05 18:38:21 -070015#include "upb/message/compare.h"
Adam Cozzette501ecec2023-09-26 14:36:20 -070016#include "upb/message/copy.h"
17#include "upb/reflection/def.h"
18#include "upb/reflection/message.h"
19#include "upb/text/encode.h"
20#include "upb/util/required_fields.h"
21
22static const upb_MessageDef* PyUpb_MessageMeta_GetMsgdef(PyObject* cls);
23static PyObject* PyUpb_MessageMeta_GetAttr(PyObject* self, PyObject* name);
24
25// -----------------------------------------------------------------------------
26// CPythonBits
27// -----------------------------------------------------------------------------
28
29// This struct contains a few things that are not exposed directly through the
30// limited API, but that we can get at in somewhat more roundabout ways. The
31// roundabout ways are slower, so we cache the values here.
32//
33// These values are valid to cache in a global, even across sub-interpreters,
34// because they are not pointers to interpreter state. They are process
35// globals that will be the same for any interpreter in this process.
36typedef struct {
37 // For each member, we note the equivalent expression that we could use in the
38 // full (non-limited) API.
39 newfunc type_new; // PyTypeObject.tp_new
40 destructor type_dealloc; // PyTypeObject.tp_dealloc
41 getattrofunc type_getattro; // PyTypeObject.tp_getattro
42 setattrofunc type_setattro; // PyTypeObject.tp_setattro
43 size_t type_basicsize; // sizeof(PyHeapTypeObject)
44 traverseproc type_traverse; // PyTypeObject.tp_traverse
45 inquiry type_clear; // PyTypeObject.tp_clear
46
47 // While we can refer to PY_VERSION_HEX in the limited API, this will give us
48 // the version of Python we were compiled against, which may be different
49 // than the version we are dynamically linked against. Here we want the
50 // version that is actually running in this process.
51 long python_version_hex; // PY_VERSION_HEX
52} PyUpb_CPythonBits;
53
54// A global containing the values for this process.
55PyUpb_CPythonBits cpython_bits;
56
57destructor upb_Pre310_PyType_GetDeallocSlot(PyTypeObject* type_subclass) {
58 // This is a bit desperate. We need type_dealloc(), but PyType_GetSlot(type,
59 // Py_tp_dealloc) will return subtype_dealloc(). There appears to be no way
60 // whatsoever to fetch type_dealloc() through the limited API until Python
61 // 3.10.
62 //
63 // To work around this so we attempt to find it by looking for the offset of
64 // tp_dealloc in PyTypeObject, then memcpy() it directly. This should always
65 // work in practice.
66 //
67 // Starting with Python 3.10 on you can call PyType_GetSlot() on non-heap
68 // types. We will be able to replace all this hack with just:
69 //
70 // PyType_GetSlot(&PyType_Type, Py_tp_dealloc)
71 //
72 destructor subtype_dealloc = PyType_GetSlot(type_subclass, Py_tp_dealloc);
73 for (size_t i = 0; i < 2000; i += sizeof(uintptr_t)) {
74 destructor maybe_subtype_dealloc;
75 memcpy(&maybe_subtype_dealloc, (char*)type_subclass + i,
76 sizeof(destructor));
77 if (maybe_subtype_dealloc == subtype_dealloc) {
78 destructor type_dealloc;
79 memcpy(&type_dealloc, (char*)&PyType_Type + i, sizeof(destructor));
80 return type_dealloc;
81 }
82 }
83 assert(false);
84 return NULL;
85}
86
87static bool PyUpb_CPythonBits_Init(PyUpb_CPythonBits* bits) {
88 PyObject* bases = NULL;
89 PyTypeObject* type = NULL;
90 PyObject* size = NULL;
91 PyObject* sys = NULL;
92 PyObject* hex_version = NULL;
93 bool ret = false;
94
95 // PyType_GetSlot() only works on heap types, so we cannot use it on
96 // &PyType_Type directly. Instead we create our own (temporary) type derived
97 // from PyType_Type: this will inherit all of the slots from PyType_Type, but
98 // as a heap type it can be queried with PyType_GetSlot().
99 static PyType_Slot dummy_slots[] = {{0, NULL}};
100
101 static PyType_Spec dummy_spec = {
102 "module.DummyClass", // tp_name
103 0, // To be filled in by size of base // tp_basicsize
104 0, // tp_itemsize
105 Py_TPFLAGS_DEFAULT, // tp_flags
106 dummy_slots,
107 };
108
109 bases = Py_BuildValue("(O)", &PyType_Type);
110 if (!bases) goto err;
111 type = (PyTypeObject*)PyType_FromSpecWithBases(&dummy_spec, bases);
112 if (!type) goto err;
113
114 bits->type_new = PyType_GetSlot(type, Py_tp_new);
115 bits->type_dealloc = upb_Pre310_PyType_GetDeallocSlot(type);
116 bits->type_getattro = PyType_GetSlot(type, Py_tp_getattro);
117 bits->type_setattro = PyType_GetSlot(type, Py_tp_setattro);
118 bits->type_traverse = PyType_GetSlot(type, Py_tp_traverse);
119 bits->type_clear = PyType_GetSlot(type, Py_tp_clear);
120
121 size = PyObject_GetAttrString((PyObject*)&PyType_Type, "__basicsize__");
122 if (!size) goto err;
123 bits->type_basicsize = PyLong_AsLong(size);
124 if (bits->type_basicsize == -1) goto err;
125
126 assert(bits->type_new);
127 assert(bits->type_dealloc);
128 assert(bits->type_getattro);
129 assert(bits->type_setattro);
130 assert(bits->type_traverse);
131 assert(bits->type_clear);
132
133#ifndef Py_LIMITED_API
134 assert(bits->type_new == PyType_Type.tp_new);
135 assert(bits->type_dealloc == PyType_Type.tp_dealloc);
136 assert(bits->type_getattro == PyType_Type.tp_getattro);
137 assert(bits->type_setattro == PyType_Type.tp_setattro);
138 assert(bits->type_basicsize == sizeof(PyHeapTypeObject));
139 assert(bits->type_traverse == PyType_Type.tp_traverse);
140 assert(bits->type_clear == PyType_Type.tp_clear);
141#endif
142
143 sys = PyImport_ImportModule("sys");
144 hex_version = PyObject_GetAttrString(sys, "hexversion");
145 bits->python_version_hex = PyLong_AsLong(hex_version);
146 ret = true;
147
148err:
149 Py_XDECREF(bases);
150 Py_XDECREF(type);
151 Py_XDECREF(size);
152 Py_XDECREF(sys);
153 Py_XDECREF(hex_version);
154 return ret;
155}
156
157// -----------------------------------------------------------------------------
158// Message
159// -----------------------------------------------------------------------------
160
161// The main message object. The type of the object (PyUpb_Message.ob_type)
162// will be an instance of the PyUpb_MessageMeta type (defined below). So the
163// chain is:
164// FooMessage = MessageMeta(...)
165// foo = FooMessage()
166//
167// Which becomes:
168// Object C Struct Type Python type (ob_type)
169// ----------------- ----------------- ---------------------
170// foo PyUpb_Message FooMessage
171// FooMessage PyUpb_MessageMeta message_meta_type
172// message_meta_type PyTypeObject 'type' in Python
173//
174// A message object can be in one of two states: present or non-present. When
175// a message is non-present, it stores a reference to its parent, and a write
176// to any attribute will trigger the message to become present in its parent.
177// The parent may also be non-present, in which case a mutation will trigger a
178// chain reaction.
179typedef struct PyUpb_Message {
180 PyObject_HEAD;
181 PyObject* arena;
182 uintptr_t def; // Tagged, low bit 1 == upb_FieldDef*, else upb_MessageDef*
183 union {
184 // when def is msgdef, the data for this msg.
185 upb_Message* msg;
186 // when def is fielddef, owning pointer to parent
187 struct PyUpb_Message* parent;
188 } ptr;
189 PyObject* ext_dict; // Weak pointer to extension dict, if any.
190 // name->obj dict for non-present msg/map/repeated, NULL if none.
191 PyUpb_WeakMap* unset_subobj_map;
192 int version;
193} PyUpb_Message;
194
195static PyObject* PyUpb_Message_GetAttr(PyObject* _self, PyObject* attr);
196
197bool PyUpb_Message_IsStub(PyUpb_Message* msg) { return msg->def & 1; }
198
199const upb_FieldDef* PyUpb_Message_GetFieldDef(PyUpb_Message* msg) {
200 assert(PyUpb_Message_IsStub(msg));
201 return (void*)(msg->def & ~(uintptr_t)1);
202}
203
204static const upb_MessageDef* _PyUpb_Message_GetMsgdef(PyUpb_Message* msg) {
205 return PyUpb_Message_IsStub(msg)
206 ? upb_FieldDef_MessageSubDef(PyUpb_Message_GetFieldDef(msg))
207 : (void*)msg->def;
208}
209
210const upb_MessageDef* PyUpb_Message_GetMsgdef(PyObject* self) {
211 return _PyUpb_Message_GetMsgdef((PyUpb_Message*)self);
212}
213
214static upb_Message* PyUpb_Message_GetMsg(PyUpb_Message* self) {
215 assert(!PyUpb_Message_IsStub(self));
216 return self->ptr.msg;
217}
218
219bool PyUpb_Message_TryCheck(PyObject* self) {
220 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
221 PyObject* type = (PyObject*)Py_TYPE(self);
222 return Py_TYPE(type) == state->message_meta_type;
223}
224
225bool PyUpb_Message_Verify(PyObject* self) {
226 if (!PyUpb_Message_TryCheck(self)) {
227 PyErr_Format(PyExc_TypeError, "Expected a message object, but got %R.",
228 self);
229 return false;
230 }
231 return true;
232}
233
234// If the message is reified, returns it. Otherwise, returns NULL.
235// If NULL is returned, the object is empty and has no underlying data.
236upb_Message* PyUpb_Message_GetIfReified(PyObject* _self) {
237 PyUpb_Message* self = (void*)_self;
238 return PyUpb_Message_IsStub(self) ? NULL : self->ptr.msg;
239}
240
241static PyObject* PyUpb_Message_New(PyObject* cls, PyObject* unused_args,
242 PyObject* unused_kwargs) {
243 const upb_MessageDef* msgdef = PyUpb_MessageMeta_GetMsgdef(cls);
244 const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
245 PyUpb_Message* msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
246 msg->def = (uintptr_t)msgdef;
247 msg->arena = PyUpb_Arena_New();
248 msg->ptr.msg = upb_Message_New(layout, PyUpb_Arena_Get(msg->arena));
249 msg->unset_subobj_map = NULL;
250 msg->ext_dict = NULL;
251 msg->version = 0;
252
253 PyObject* ret = &msg->ob_base;
254 PyUpb_ObjCache_Add(msg->ptr.msg, ret);
255 return ret;
256}
257
258/*
259 * PyUpb_Message_LookupName()
260 *
261 * Tries to find a field or oneof named `py_name` in the message object `self`.
262 * The user must pass `f` and/or `o` to indicate whether a field or a oneof name
263 * is expected. If the name is found and it has an expected type, the function
264 * sets `*f` or `*o` respectively and returns true. Otherwise returns false
265 * and sets an exception of type `exc_type` if provided.
266 */
267static bool PyUpb_Message_LookupName(PyUpb_Message* self, PyObject* py_name,
268 const upb_FieldDef** f,
269 const upb_OneofDef** o,
270 PyObject* exc_type) {
271 assert(f || o);
272 Py_ssize_t size;
273 const char* name = NULL;
274 if (PyUnicode_Check(py_name)) {
275 name = PyUnicode_AsUTF8AndSize(py_name, &size);
276 } else if (PyBytes_Check(py_name)) {
277 PyBytes_AsStringAndSize(py_name, (char**)&name, &size);
278 }
279 if (!name) {
280 PyErr_Format(exc_type,
281 "Expected a field name, but got non-string argument %S.",
282 py_name);
283 return false;
284 }
285 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
286
287 if (!upb_MessageDef_FindByNameWithSize(msgdef, name, size, f, o)) {
288 if (exc_type) {
289 PyErr_Format(exc_type, "Protocol message %s has no \"%s\" field.",
290 upb_MessageDef_Name(msgdef), name);
291 }
292 return false;
293 }
294
295 if (!o && !*f) {
296 if (exc_type) {
297 PyErr_Format(exc_type, "Expected a field name, but got oneof name %s.",
298 name);
299 }
300 return false;
301 }
302
303 if (!f && !*o) {
304 if (exc_type) {
305 PyErr_Format(exc_type, "Expected a oneof name, but got field name %s.",
306 name);
307 }
308 return false;
309 }
310
311 return true;
312}
313
314static bool PyUpb_Message_InitMessageMapEntry(PyObject* dst, PyObject* src) {
315 if (!src || !dst) return false;
316
317 PyObject* ok = PyObject_CallMethod(dst, "CopyFrom", "O", src);
318 if (!ok) return false;
319 Py_DECREF(ok);
320
321 return true;
322}
323
324int PyUpb_Message_InitMapAttributes(PyObject* map, PyObject* value,
325 const upb_FieldDef* f) {
326 const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
327 const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
328 PyObject* it = NULL;
329 PyObject* tmp = NULL;
330 int ret = -1;
331 if (upb_FieldDef_IsSubMessage(val_f)) {
332 it = PyObject_GetIter(value);
333 if (it == NULL) {
334 PyErr_Format(PyExc_TypeError, "Argument for field %s is not iterable",
335 upb_FieldDef_FullName(f));
336 goto err;
337 }
338 PyObject* e;
339 while ((e = PyIter_Next(it)) != NULL) {
340 PyObject* src = PyObject_GetItem(value, e);
341 PyObject* dst = PyObject_GetItem(map, e);
342 Py_DECREF(e);
343 bool ok = PyUpb_Message_InitMessageMapEntry(dst, src);
344 Py_XDECREF(src);
345 Py_XDECREF(dst);
346 if (!ok) goto err;
347 }
348 } else {
349 tmp = PyObject_CallMethod(map, "update", "O", value);
350 if (!tmp) goto err;
351 }
352 ret = 0;
353
354err:
355 Py_XDECREF(it);
356 Py_XDECREF(tmp);
357 return ret;
358}
359
360void PyUpb_Message_EnsureReified(PyUpb_Message* self);
361
362static bool PyUpb_Message_InitMapAttribute(PyObject* _self, PyObject* name,
363 const upb_FieldDef* f,
364 PyObject* value) {
365 PyObject* map = PyUpb_Message_GetAttr(_self, name);
366 int ok = PyUpb_Message_InitMapAttributes(map, value, f);
367 Py_DECREF(map);
368 return ok >= 0;
369}
370
371static bool PyUpb_Message_InitRepeatedMessageAttribute(PyObject* _self,
372 PyObject* repeated,
373 PyObject* value,
374 const upb_FieldDef* f) {
375 PyObject* it = PyObject_GetIter(value);
376 if (!it) {
377 PyErr_Format(PyExc_TypeError, "Argument for field %s is not iterable",
378 upb_FieldDef_FullName(f));
379 return false;
380 }
381 PyObject* e = NULL;
382 PyObject* m = NULL;
383 while ((e = PyIter_Next(it)) != NULL) {
384 if (PyDict_Check(e)) {
385 m = PyUpb_RepeatedCompositeContainer_Add(repeated, NULL, e);
386 if (!m) goto err;
387 } else {
388 m = PyUpb_RepeatedCompositeContainer_Add(repeated, NULL, NULL);
389 if (!m) goto err;
390 PyObject* merged = PyUpb_Message_MergeFrom(m, e);
391 if (!merged) goto err;
392 Py_DECREF(merged);
393 }
394 Py_DECREF(e);
395 Py_DECREF(m);
396 m = NULL;
397 }
398
399err:
400 Py_XDECREF(it);
401 Py_XDECREF(e);
402 Py_XDECREF(m);
403 return !PyErr_Occurred(); // Check PyIter_Next() exit.
404}
405
406static bool PyUpb_Message_InitRepeatedAttribute(PyObject* _self, PyObject* name,
407 PyObject* value) {
408 PyUpb_Message* self = (void*)_self;
409 const upb_FieldDef* field;
410 if (!PyUpb_Message_LookupName(self, name, &field, NULL,
411 PyExc_AttributeError)) {
412 return false;
413 }
414 bool ok = false;
415 PyObject* repeated = PyUpb_Message_GetFieldValue(_self, field);
416 PyObject* tmp = NULL;
417 if (!repeated) goto err;
418 if (upb_FieldDef_IsSubMessage(field)) {
419 if (!PyUpb_Message_InitRepeatedMessageAttribute(_self, repeated, value,
420 field)) {
421 goto err;
422 }
423 } else {
424 tmp = PyUpb_RepeatedContainer_Extend(repeated, value);
425 if (!tmp) goto err;
426 }
427 ok = true;
428
429err:
430 Py_XDECREF(repeated);
431 Py_XDECREF(tmp);
432 return ok;
433}
434
Jie Luoe17821c2024-06-24 08:16:00 -0700435static PyObject* PyUpb_Message_Clear(PyUpb_Message* self);
436
Adam Cozzette501ecec2023-09-26 14:36:20 -0700437static bool PyUpb_Message_InitMessageAttribute(PyObject* _self, PyObject* name,
Jie Luob690e722024-06-05 13:52:52 -0700438 const upb_FieldDef* field,
Adam Cozzette501ecec2023-09-26 14:36:20 -0700439 PyObject* value) {
440 PyObject* submsg = PyUpb_Message_GetAttr(_self, name);
441 if (!submsg) return -1;
442 assert(!PyErr_Occurred());
443 bool ok;
444 if (PyUpb_Message_TryCheck(value)) {
445 PyObject* tmp = PyUpb_Message_MergeFrom(submsg, value);
446 ok = tmp != NULL;
447 Py_XDECREF(tmp);
448 } else if (PyDict_Check(value)) {
449 assert(!PyErr_Occurred());
Jie Luoe17821c2024-06-24 08:16:00 -0700450 const upb_MessageDef* msgdef = upb_FieldDef_MessageSubDef(field);
451 if (upb_MessageDef_WellKnownType(msgdef) == kUpb_WellKnown_Struct) {
452 ok = PyObject_CallMethod(submsg, "_internal_assign", "O", value);
453 if (!ok && PyDict_Size(value) == 1 &&
454 PyDict_Contains(value, PyUnicode_FromString("fields"))) {
455 // Fall back to init as normal message field.
456 PyErr_Clear();
457 PyObject* tmp = PyUpb_Message_Clear((PyUpb_Message*)submsg);
458 Py_DECREF(tmp);
459 ok = PyUpb_Message_InitAttributes(submsg, NULL, value) >= 0;
460 }
461 } else {
462 ok = PyUpb_Message_InitAttributes(submsg, NULL, value) >= 0;
463 }
Adam Cozzette501ecec2023-09-26 14:36:20 -0700464 } else {
Jie Luob690e722024-06-05 13:52:52 -0700465 const upb_MessageDef* msgdef = upb_FieldDef_MessageSubDef(field);
Jie Luoe17821c2024-06-24 08:16:00 -0700466 if (upb_MessageDef_WellKnownType(msgdef) != kUpb_WellKnown_Unspecified &&
467 PyObject_HasAttrString(submsg, "_internal_assign")) {
468 ok = PyObject_CallMethod(submsg, "_internal_assign", "O", value);
469 } else {
470 const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
471 PyErr_Format(PyExc_TypeError,
472 "Message must be initialized with a dict: %s",
473 upb_MessageDef_FullName(m));
474 ok = false;
Jie Luob690e722024-06-05 13:52:52 -0700475 }
Adam Cozzette501ecec2023-09-26 14:36:20 -0700476 }
477 Py_DECREF(submsg);
478 return ok;
479}
480
481static bool PyUpb_Message_InitScalarAttribute(upb_Message* msg,
482 const upb_FieldDef* f,
483 PyObject* value,
484 upb_Arena* arena) {
485 upb_MessageValue msgval;
486 assert(!PyErr_Occurred());
487 if (!PyUpb_PyToUpb(value, f, &msgval, arena)) return false;
488 upb_Message_SetFieldByDef(msg, f, msgval, arena);
489 return true;
490}
491
492int PyUpb_Message_InitAttributes(PyObject* _self, PyObject* args,
493 PyObject* kwargs) {
494 assert(!PyErr_Occurred());
495
496 if (args != NULL && PyTuple_Size(args) != 0) {
497 PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
498 return -1;
499 }
500
501 if (kwargs == NULL) return 0;
502
503 PyUpb_Message* self = (void*)_self;
504 Py_ssize_t pos = 0;
505 PyObject* name;
506 PyObject* value;
507 PyUpb_Message_EnsureReified(self);
508 upb_Message* msg = PyUpb_Message_GetMsg(self);
509 upb_Arena* arena = PyUpb_Arena_Get(self->arena);
510
511 while (PyDict_Next(kwargs, &pos, &name, &value)) {
512 assert(!PyErr_Occurred());
513 const upb_FieldDef* f;
514 assert(!PyErr_Occurred());
515 if (!PyUpb_Message_LookupName(self, name, &f, NULL, PyExc_ValueError)) {
516 return -1;
517 }
518
519 if (value == Py_None) continue; // Ignored.
520
521 assert(!PyErr_Occurred());
522
523 if (upb_FieldDef_IsMap(f)) {
524 if (!PyUpb_Message_InitMapAttribute(_self, name, f, value)) return -1;
525 } else if (upb_FieldDef_IsRepeated(f)) {
526 if (!PyUpb_Message_InitRepeatedAttribute(_self, name, value)) return -1;
527 } else if (upb_FieldDef_IsSubMessage(f)) {
Jie Luob690e722024-06-05 13:52:52 -0700528 if (!PyUpb_Message_InitMessageAttribute(_self, name, f, value)) return -1;
Adam Cozzette501ecec2023-09-26 14:36:20 -0700529 } else {
530 if (!PyUpb_Message_InitScalarAttribute(msg, f, value, arena)) return -1;
531 }
532 if (PyErr_Occurred()) return -1;
533 }
534
535 if (PyErr_Occurred()) return -1;
536 return 0;
537}
538
539static int PyUpb_Message_Init(PyObject* _self, PyObject* args,
540 PyObject* kwargs) {
541 if (args != NULL && PyTuple_Size(args) != 0) {
542 PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
543 return -1;
544 }
545
546 return PyUpb_Message_InitAttributes(_self, args, kwargs);
547}
548
549static PyObject* PyUpb_Message_NewStub(PyObject* parent, const upb_FieldDef* f,
550 PyObject* arena) {
551 const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f);
552 PyObject* cls = PyUpb_Descriptor_GetClass(sub_m);
553
554 PyUpb_Message* msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
555 msg->def = (uintptr_t)f | 1;
556 msg->arena = arena;
557 msg->ptr.parent = (PyUpb_Message*)parent;
558 msg->unset_subobj_map = NULL;
559 msg->ext_dict = NULL;
560 msg->version = 0;
561
562 Py_DECREF(cls);
563 Py_INCREF(parent);
564 Py_INCREF(arena);
565 return &msg->ob_base;
566}
567
568static bool PyUpb_Message_IsEmpty(const upb_Message* msg,
569 const upb_MessageDef* m,
570 const upb_DefPool* ext_pool) {
571 if (!msg) return true;
572
573 size_t iter = kUpb_Message_Begin;
574 const upb_FieldDef* f;
575 upb_MessageValue val;
576 if (upb_Message_Next(msg, m, ext_pool, &f, &val, &iter)) return false;
577
578 size_t len;
579 (void)upb_Message_GetUnknown(msg, &len);
580 return len == 0;
581}
582
583static bool PyUpb_Message_IsEqual(PyUpb_Message* m1, PyObject* _m2) {
584 PyUpb_Message* m2 = (void*)_m2;
585 if (m1 == m2) return true;
586 if (!PyObject_TypeCheck(_m2, m1->ob_base.ob_type)) {
587 return false;
588 }
589 const upb_MessageDef* m1_msgdef = _PyUpb_Message_GetMsgdef(m1);
590#ifndef NDEBUG
591 const upb_MessageDef* m2_msgdef = _PyUpb_Message_GetMsgdef(m2);
592 assert(m1_msgdef == m2_msgdef);
593#endif
594 const upb_Message* m1_msg = PyUpb_Message_GetIfReified((PyObject*)m1);
595 const upb_Message* m2_msg = PyUpb_Message_GetIfReified(_m2);
596 const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m1_msgdef));
597
598 const bool e1 = PyUpb_Message_IsEmpty(m1_msg, m1_msgdef, symtab);
599 const bool e2 = PyUpb_Message_IsEmpty(m2_msg, m1_msgdef, symtab);
600 if (e1 || e2) return e1 && e2;
601
Eric Salo3d21bc22024-04-05 18:38:21 -0700602 const int options = kUpb_CompareOption_IncludeUnknownFields;
603 return upb_Message_IsEqualByDef(m1_msg, m2_msg, m1_msgdef, options);
Adam Cozzette501ecec2023-09-26 14:36:20 -0700604}
605
606static const upb_FieldDef* PyUpb_Message_InitAsMsg(PyUpb_Message* m,
607 upb_Arena* arena) {
608 const upb_FieldDef* f = PyUpb_Message_GetFieldDef(m);
609 const upb_MessageDef* m2 = upb_FieldDef_MessageSubDef(f);
610 m->ptr.msg = upb_Message_New(upb_MessageDef_MiniTable(m2), arena);
611 m->def = (uintptr_t)m2;
612 PyUpb_ObjCache_Add(m->ptr.msg, &m->ob_base);
613 return f;
614}
615
616static void PyUpb_Message_SetField(PyUpb_Message* parent, const upb_FieldDef* f,
617 PyUpb_Message* child, upb_Arena* arena) {
618 upb_MessageValue msgval = {.msg_val = PyUpb_Message_GetMsg(child)};
619 upb_Message_SetFieldByDef(PyUpb_Message_GetMsg(parent), f, msgval, arena);
620 PyUpb_WeakMap_Delete(parent->unset_subobj_map, f);
621 // Releases a ref previously owned by child->ptr.parent of our child.
622 Py_DECREF(child);
623}
624
625/*
626 * PyUpb_Message_EnsureReified()
627 *
628 * This implements the "expando" behavior of Python protos:
629 * foo = FooProto()
630 *
631 * # The intermediate messages don't really exist, and won't be serialized.
632 * x = foo.bar.bar.bar.bar.bar.baz
633 *
634 * # Now all the intermediate objects are created.
635 * foo.bar.bar.bar.bar.bar.baz = 5
636 *
637 * This function should be called before performing any mutation of a protobuf
638 * object.
639 *
640 * Post-condition:
641 * PyUpb_Message_IsStub(self) is false
642 */
643void PyUpb_Message_EnsureReified(PyUpb_Message* self) {
644 if (!PyUpb_Message_IsStub(self)) return;
645 upb_Arena* arena = PyUpb_Arena_Get(self->arena);
646
647 // This is a non-present message. We need to create a real upb_Message for
648 // this object and every parent until we reach a present message.
649 PyUpb_Message* child = self;
650 PyUpb_Message* parent = self->ptr.parent;
651 const upb_FieldDef* child_f = PyUpb_Message_InitAsMsg(child, arena);
652 Py_INCREF(child); // To avoid a special-case in PyUpb_Message_SetField().
653
654 do {
655 PyUpb_Message* next_parent = parent->ptr.parent;
656 const upb_FieldDef* parent_f = NULL;
657 if (PyUpb_Message_IsStub(parent)) {
658 parent_f = PyUpb_Message_InitAsMsg(parent, arena);
659 }
660 PyUpb_Message_SetField(parent, child_f, child, arena);
661 child = parent;
662 child_f = parent_f;
663 parent = next_parent;
664 } while (child_f);
665
666 // Releases ref previously owned by child->ptr.parent of our child.
667 Py_DECREF(child);
668 self->version++;
669}
670
671static void PyUpb_Message_SyncSubobjs(PyUpb_Message* self);
672
673/*
674 * PyUpb_Message_Reify()
675 *
676 * The message equivalent of PyUpb_*Container_Reify(), this transitions
677 * the wrapper from the unset state (owning a reference on self->ptr.parent) to
678 * the set state (having a non-owning pointer to self->ptr.msg).
679 */
680static void PyUpb_Message_Reify(PyUpb_Message* self, const upb_FieldDef* f,
681 upb_Message* msg) {
682 assert(f == PyUpb_Message_GetFieldDef(self));
683 if (!msg) {
684 const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef((PyObject*)self);
685 const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
686 msg = upb_Message_New(layout, PyUpb_Arena_Get(self->arena));
687 }
688 PyUpb_ObjCache_Add(msg, &self->ob_base);
689 Py_DECREF(&self->ptr.parent->ob_base);
690 self->ptr.msg = msg; // Overwrites self->ptr.parent
691 self->def = (uintptr_t)upb_FieldDef_MessageSubDef(f);
692 PyUpb_Message_SyncSubobjs(self);
693}
694
695/*
696 * PyUpb_Message_SyncSubobjs()
697 *
698 * This operation must be invoked whenever the underlying upb_Message has been
699 * mutated directly in C. This will attach any newly-present field data
700 * to previously returned stub wrapper objects.
701 *
702 * For example:
703 * foo = FooMessage()
704 * sub = foo.submsg # Empty, unset sub-message
705 *
706 * # SyncSubobjs() is required to connect our existing 'sub' wrapper to the
707 * # newly created foo.submsg data in C.
708 * foo.MergeFrom(FooMessage(submsg={}))
709 *
710 * This requires that all of the new sub-objects that have appeared are owned
711 * by `self`'s arena.
712 */
713static void PyUpb_Message_SyncSubobjs(PyUpb_Message* self) {
714 PyUpb_WeakMap* subobj_map = self->unset_subobj_map;
715 if (!subobj_map) return;
716
717 upb_Message* msg = PyUpb_Message_GetMsg(self);
718 intptr_t iter = PYUPB_WEAKMAP_BEGIN;
719 const void* key;
720 PyObject* obj;
721
722 // The last ref to this message could disappear during iteration.
723 // When we call PyUpb_*Container_Reify() below, the container will drop
724 // its ref on `self`. If that was the last ref on self, the object will be
725 // deleted, and `subobj_map` along with it. We need it to live until we are
726 // done iterating.
727 Py_INCREF(&self->ob_base);
728
729 while (PyUpb_WeakMap_Next(subobj_map, &key, &obj, &iter)) {
730 const upb_FieldDef* f = key;
731 if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f))
732 continue;
733 upb_MessageValue msgval = upb_Message_GetFieldByDef(msg, f);
734 PyUpb_WeakMap_DeleteIter(subobj_map, &iter);
735 if (upb_FieldDef_IsMap(f)) {
736 if (!msgval.map_val) continue;
737 PyUpb_MapContainer_Reify(obj, (upb_Map*)msgval.map_val);
738 } else if (upb_FieldDef_IsRepeated(f)) {
739 if (!msgval.array_val) continue;
740 PyUpb_RepeatedContainer_Reify(obj, (upb_Array*)msgval.array_val);
741 } else {
742 PyUpb_Message* sub = (void*)obj;
743 assert(self == sub->ptr.parent);
744 PyUpb_Message_Reify(sub, f, (upb_Message*)msgval.msg_val);
745 }
746 }
747
748 Py_DECREF(&self->ob_base);
749
750 // TODO: present fields need to be iterated too if they can reach
751 // a WeakMap.
752}
753
754static PyObject* PyUpb_Message_ToString(PyUpb_Message* self) {
755 if (PyUpb_Message_IsStub(self)) {
756 return PyUnicode_FromStringAndSize(NULL, 0);
757 }
758 upb_Message* msg = PyUpb_Message_GetMsg(self);
759 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
760 const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(msgdef));
761 char buf[1024];
762 int options = UPB_TXTENC_SKIPUNKNOWN;
763 size_t size = upb_TextEncode(msg, msgdef, symtab, options, buf, sizeof(buf));
764 if (size < sizeof(buf)) {
765 return PyUnicode_FromStringAndSize(buf, size);
766 } else {
767 char* buf2 = malloc(size + 1);
768 size_t size2 = upb_TextEncode(msg, msgdef, symtab, options, buf2, size + 1);
769 assert(size == size2);
770 PyObject* ret = PyUnicode_FromStringAndSize(buf2, size2);
771 free(buf2);
772 return ret;
773 }
774}
775
776static PyObject* PyUpb_Message_RichCompare(PyObject* _self, PyObject* other,
777 int opid) {
778 PyUpb_Message* self = (void*)_self;
779 if (opid != Py_EQ && opid != Py_NE) {
780 Py_INCREF(Py_NotImplemented);
781 return Py_NotImplemented;
782 }
Jie Luoe17821c2024-06-24 08:16:00 -0700783 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
784 upb_WellKnown wkt = upb_MessageDef_WellKnownType(msgdef);
785 if ((wkt == kUpb_WellKnown_ListValue && PyList_Check(other)) ||
786 (wkt == kUpb_WellKnown_Struct && PyDict_Check(other))) {
787 return PyObject_CallMethod(_self, "_internal_compare", "O", other);
788 }
789
Adam Cozzette501ecec2023-09-26 14:36:20 -0700790 if (!PyObject_TypeCheck(other, Py_TYPE(self))) {
791 Py_INCREF(Py_NotImplemented);
792 return Py_NotImplemented;
793 }
794 bool ret = PyUpb_Message_IsEqual(self, other);
795 if (opid == Py_NE) ret = !ret;
796 return PyBool_FromLong(ret);
797}
798
799void PyUpb_Message_CacheDelete(PyObject* _self, const upb_FieldDef* f) {
800 PyUpb_Message* self = (void*)_self;
801 PyUpb_WeakMap_Delete(self->unset_subobj_map, f);
802}
803
804void PyUpb_Message_SetConcreteSubobj(PyObject* _self, const upb_FieldDef* f,
805 upb_MessageValue subobj) {
806 PyUpb_Message* self = (void*)_self;
807 PyUpb_Message_EnsureReified(self);
808 PyUpb_Message_CacheDelete(_self, f);
809 upb_Message_SetFieldByDef(self->ptr.msg, f, subobj,
810 PyUpb_Arena_Get(self->arena));
811}
812
813static void PyUpb_Message_Dealloc(PyObject* _self) {
814 PyUpb_Message* self = (void*)_self;
815
816 if (PyUpb_Message_IsStub(self)) {
817 PyUpb_Message_CacheDelete((PyObject*)self->ptr.parent,
818 PyUpb_Message_GetFieldDef(self));
819 Py_DECREF(self->ptr.parent);
820 } else {
821 PyUpb_ObjCache_Delete(self->ptr.msg);
822 }
823
824 if (self->unset_subobj_map) {
825 PyUpb_WeakMap_Free(self->unset_subobj_map);
826 }
827
828 Py_DECREF(self->arena);
829
830 // We do not use PyUpb_Dealloc() here because Message is a base type and for
831 // base types there is a bug we have to work around in this case (see below).
832 PyTypeObject* tp = Py_TYPE(self);
833 freefunc tp_free = PyType_GetSlot(tp, Py_tp_free);
834 tp_free(self);
835
836 if (cpython_bits.python_version_hex >= 0x03080000) {
837 // Prior to Python 3.8 there is a bug where deallocating the type here would
838 // lead to a double-decref: https://bugs.python.org/issue37879
839 Py_DECREF(tp);
840 }
841}
842
843PyObject* PyUpb_Message_Get(upb_Message* u_msg, const upb_MessageDef* m,
844 PyObject* arena) {
845 PyObject* ret = PyUpb_ObjCache_Get(u_msg);
846 if (ret) return ret;
847
848 PyObject* cls = PyUpb_Descriptor_GetClass(m);
849 // It is not safe to use PyObject_{,GC}_New() due to:
850 // https://bugs.python.org/issue35810
851 PyUpb_Message* py_msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
852 py_msg->arena = arena;
853 py_msg->def = (uintptr_t)m;
854 py_msg->ptr.msg = u_msg;
855 py_msg->unset_subobj_map = NULL;
856 py_msg->ext_dict = NULL;
857 py_msg->version = 0;
858 ret = &py_msg->ob_base;
859 Py_DECREF(cls);
860 Py_INCREF(arena);
861 PyUpb_ObjCache_Add(u_msg, ret);
862 return ret;
863}
864
865/* PyUpb_Message_GetStub()
866 *
867 * Non-present messages return "stub" objects that point to their parent, but
868 * will materialize into real upb objects if they are mutated.
869 *
870 * Note: we do *not* create stubs for repeated/map fields unless the parent
871 * is a stub:
872 *
873 * msg = TestMessage()
874 * msg.submessage # (A) Creates a stub
875 * msg.repeated_foo # (B) Does *not* create a stub
876 * msg.submessage.repeated_bar # (C) Creates a stub
877 *
878 * In case (B) we have some freedom: we could either create a stub, or create
879 * a reified object with underlying data. It appears that either could work
880 * equally well, with no observable change to users. There isn't a clear
881 * advantage to either choice. We choose to follow the behavior of the
882 * pre-existing C++ behavior for consistency, but if it becomes apparent that
883 * there would be some benefit to reversing this decision, it should be totally
884 * within the realm of possibility.
885 */
886PyObject* PyUpb_Message_GetStub(PyUpb_Message* self,
887 const upb_FieldDef* field) {
888 PyObject* _self = (void*)self;
889 if (!self->unset_subobj_map) {
890 self->unset_subobj_map = PyUpb_WeakMap_New();
891 }
892 PyObject* subobj = PyUpb_WeakMap_Get(self->unset_subobj_map, field);
893
894 if (subobj) return subobj;
895
896 if (upb_FieldDef_IsMap(field)) {
897 subobj = PyUpb_MapContainer_NewStub(_self, field, self->arena);
898 } else if (upb_FieldDef_IsRepeated(field)) {
899 subobj = PyUpb_RepeatedContainer_NewStub(_self, field, self->arena);
900 } else {
901 subobj = PyUpb_Message_NewStub(&self->ob_base, field, self->arena);
902 }
903 PyUpb_WeakMap_Add(self->unset_subobj_map, field, subobj);
904
905 assert(!PyErr_Occurred());
906 return subobj;
907}
908
909PyObject* PyUpb_Message_GetPresentWrapper(PyUpb_Message* self,
910 const upb_FieldDef* field) {
911 assert(!PyUpb_Message_IsStub(self));
912 upb_MutableMessageValue mutval =
913 upb_Message_Mutable(self->ptr.msg, field, PyUpb_Arena_Get(self->arena));
914 if (upb_FieldDef_IsMap(field)) {
915 return PyUpb_MapContainer_GetOrCreateWrapper(mutval.map, field,
916 self->arena);
917 } else {
918 return PyUpb_RepeatedContainer_GetOrCreateWrapper(mutval.array, field,
919 self->arena);
920 }
921}
922
923PyObject* PyUpb_Message_GetScalarValue(PyUpb_Message* self,
924 const upb_FieldDef* field) {
925 upb_MessageValue val;
926 if (PyUpb_Message_IsStub(self)) {
927 // Unset message always returns default values.
928 val = upb_FieldDef_Default(field);
929 } else {
930 val = upb_Message_GetFieldByDef(self->ptr.msg, field);
931 }
932 return PyUpb_UpbToPy(val, field, self->arena);
933}
934
935/*
936 * PyUpb_Message_GetFieldValue()
937 *
938 * Implements the equivalent of getattr(msg, field), once `field` has
939 * already been resolved to a `upb_FieldDef*`.
940 *
941 * This may involve constructing a wrapper object for the given field, or
942 * returning one that was previously constructed. If the field is not actually
943 * set, the wrapper object will be an "unset" object that is not actually
944 * connected to any C data.
945 */
946PyObject* PyUpb_Message_GetFieldValue(PyObject* _self,
947 const upb_FieldDef* field) {
948 PyUpb_Message* self = (void*)_self;
949 assert(upb_FieldDef_ContainingType(field) == PyUpb_Message_GetMsgdef(_self));
950 bool submsg = upb_FieldDef_IsSubMessage(field);
951 bool seq = upb_FieldDef_IsRepeated(field);
952
953 if ((PyUpb_Message_IsStub(self) && (submsg || seq)) ||
954 (submsg && !seq && !upb_Message_HasFieldByDef(self->ptr.msg, field))) {
955 return PyUpb_Message_GetStub(self, field);
956 } else if (seq) {
957 return PyUpb_Message_GetPresentWrapper(self, field);
958 } else {
959 return PyUpb_Message_GetScalarValue(self, field);
960 }
961}
962
963int PyUpb_Message_SetFieldValue(PyObject* _self, const upb_FieldDef* field,
964 PyObject* value, PyObject* exc) {
965 PyUpb_Message* self = (void*)_self;
966 assert(value);
967
Jie Luob690e722024-06-05 13:52:52 -0700968 if (upb_FieldDef_IsRepeated(field)) {
Adam Cozzette501ecec2023-09-26 14:36:20 -0700969 PyErr_Format(exc,
Jie Luob690e722024-06-05 13:52:52 -0700970 "Assignment not allowed to map, or repeated "
Adam Cozzette501ecec2023-09-26 14:36:20 -0700971 "field \"%s\" in protocol message object.",
972 upb_FieldDef_Name(field));
973 return -1;
974 }
975
976 PyUpb_Message_EnsureReified(self);
977
Jie Luob690e722024-06-05 13:52:52 -0700978 if (upb_FieldDef_IsSubMessage(field)) {
979 const upb_MessageDef* msgdef = upb_FieldDef_MessageSubDef(field);
Jie Luoe17821c2024-06-24 08:16:00 -0700980 if (upb_MessageDef_WellKnownType(msgdef) != kUpb_WellKnown_Unspecified) {
981 PyObject* sub_message = PyUpb_Message_GetFieldValue(_self, field);
982 if (PyObject_HasAttrString(sub_message, "_internal_assign")) {
Jie Luob690e722024-06-05 13:52:52 -0700983 PyObject* ok =
Jie Luoe17821c2024-06-24 08:16:00 -0700984 PyObject_CallMethod(sub_message, "_internal_assign", "O", value);
Jie Luob690e722024-06-05 13:52:52 -0700985 if (!ok) return -1;
986 Py_DECREF(ok);
987 return 0;
988 }
Jie Luob690e722024-06-05 13:52:52 -0700989 }
Jie Luoe17821c2024-06-24 08:16:00 -0700990 PyErr_Format(exc,
991 "Assignment not allowed to message "
992 "field \"%s\" in protocol message object.",
993 upb_FieldDef_Name(field));
994 return -1;
Jie Luob690e722024-06-05 13:52:52 -0700995 }
996
Adam Cozzette501ecec2023-09-26 14:36:20 -0700997 upb_MessageValue val;
998 upb_Arena* arena = PyUpb_Arena_Get(self->arena);
999 if (!PyUpb_PyToUpb(value, field, &val, arena)) {
1000 return -1;
1001 }
1002
1003 upb_Message_SetFieldByDef(self->ptr.msg, field, val, arena);
1004 return 0;
1005}
1006
1007int PyUpb_Message_GetVersion(PyObject* _self) {
1008 PyUpb_Message* self = (void*)_self;
1009 return self->version;
1010}
1011
1012/*
1013 * PyUpb_Message_GetAttr()
1014 *
1015 * Implements:
1016 * foo = msg.foo
1017 *
1018 * Attribute lookup must find both message fields and base class methods like
1019 * msg.SerializeToString().
1020 */
1021__attribute__((flatten)) static PyObject* PyUpb_Message_GetAttr(
1022 PyObject* _self, PyObject* attr) {
1023 PyUpb_Message* self = (void*)_self;
1024
1025 // Lookup field by name.
1026 const upb_FieldDef* field;
1027 if (PyUpb_Message_LookupName(self, attr, &field, NULL, NULL)) {
1028 return PyUpb_Message_GetFieldValue(_self, field);
1029 }
1030
1031 // Check base class attributes.
1032 assert(!PyErr_Occurred());
1033 PyObject* ret = PyObject_GenericGetAttr(_self, attr);
1034 if (ret) return ret;
1035
1036 // Swallow AttributeError if it occurred and try again on the metaclass
1037 // to pick up class attributes. But we have to special-case "Extensions"
1038 // which affirmatively returns AttributeError when a message is not
1039 // extendable.
1040 const char* name;
1041 if (PyErr_ExceptionMatches(PyExc_AttributeError) &&
1042 (name = PyUpb_GetStrData(attr)) && strcmp(name, "Extensions") != 0) {
1043 PyErr_Clear();
1044 return PyUpb_MessageMeta_GetAttr((PyObject*)Py_TYPE(_self), attr);
1045 }
1046
1047 return NULL;
1048}
1049
1050/*
1051 * PyUpb_Message_SetAttr()
1052 *
1053 * Implements:
1054 * msg.foo = foo
1055 */
1056static int PyUpb_Message_SetAttr(PyObject* _self, PyObject* attr,
1057 PyObject* value) {
1058 PyUpb_Message* self = (void*)_self;
Joshua Habermande529442023-12-11 16:55:43 -08001059
1060 if (value == NULL) {
1061 PyErr_SetString(PyExc_AttributeError, "Cannot delete field attribute");
1062 return -1;
1063 }
1064
Adam Cozzette501ecec2023-09-26 14:36:20 -07001065 const upb_FieldDef* field;
1066 if (!PyUpb_Message_LookupName(self, attr, &field, NULL,
1067 PyExc_AttributeError)) {
1068 return -1;
1069 }
1070
1071 return PyUpb_Message_SetFieldValue(_self, field, value, PyExc_AttributeError);
1072}
1073
1074static PyObject* PyUpb_Message_HasField(PyObject* _self, PyObject* arg) {
1075 PyUpb_Message* self = (void*)_self;
1076 const upb_FieldDef* field;
1077 const upb_OneofDef* oneof;
1078
1079 if (!PyUpb_Message_LookupName(self, arg, &field, &oneof, PyExc_ValueError)) {
1080 return NULL;
1081 }
1082
1083 if (field && !upb_FieldDef_HasPresence(field)) {
1084 PyErr_Format(PyExc_ValueError, "Field %s does not have presence.",
1085 upb_FieldDef_FullName(field));
1086 return NULL;
1087 }
1088
1089 if (PyUpb_Message_IsStub(self)) Py_RETURN_FALSE;
1090
Hong Shin0730adb2024-05-28 09:48:40 -07001091 return PyBool_FromLong(
1092 field ? upb_Message_HasFieldByDef(self->ptr.msg, field)
1093 : upb_Message_WhichOneofByDef(self->ptr.msg, oneof) != NULL);
Adam Cozzette501ecec2023-09-26 14:36:20 -07001094}
1095
Jie Luo24f27c32024-05-06 12:10:59 -07001096static PyObject* PyUpb_Message_Contains(PyObject* _self, PyObject* arg) {
1097 const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(_self);
1098 switch (upb_MessageDef_WellKnownType(msgdef)) {
1099 case kUpb_WellKnown_Struct: {
1100 // For WKT Struct, check if the key is in the fields.
1101 PyUpb_Message* self = (void*)_self;
1102 if (PyUpb_Message_IsStub(self)) Py_RETURN_FALSE;
1103 upb_Message* msg = PyUpb_Message_GetMsg(self);
1104 const upb_FieldDef* f = upb_MessageDef_FindFieldByName(msgdef, "fields");
1105 const upb_Map* map = upb_Message_GetFieldByDef(msg, f).map_val;
1106 const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
1107 const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
1108 upb_MessageValue u_key;
1109 if (!PyUpb_PyToUpb(arg, key_f, &u_key, NULL)) return NULL;
1110 return PyBool_FromLong(upb_Map_Get(map, u_key, NULL));
1111 }
1112 case kUpb_WellKnown_ListValue: {
1113 // For WKT ListValue, check if the key is in the items.
1114 PyUpb_Message* self = (void*)_self;
1115 if (PyUpb_Message_IsStub(self)) Py_RETURN_FALSE;
1116 PyObject* items = PyObject_CallMethod(_self, "items", NULL);
1117 return PyBool_FromLong(PySequence_Contains(items, arg));
1118 }
1119 default:
1120 // For other messages, check with HasField.
1121 return PyUpb_Message_HasField(_self, arg);
1122 }
1123}
1124
Adam Cozzette501ecec2023-09-26 14:36:20 -07001125static PyObject* PyUpb_Message_FindInitializationErrors(PyObject* _self,
1126 PyObject* arg);
1127
1128static PyObject* PyUpb_Message_IsInitializedAppendErrors(PyObject* _self,
1129 PyObject* errors) {
1130 PyObject* list = PyUpb_Message_FindInitializationErrors(_self, NULL);
1131 if (!list) return NULL;
1132 bool ok = PyList_Size(list) == 0;
1133 PyObject* ret = NULL;
1134 PyObject* extend_result = NULL;
1135 if (!ok) {
1136 extend_result = PyObject_CallMethod(errors, "extend", "O", list);
1137 if (!extend_result) goto done;
1138 }
1139 ret = PyBool_FromLong(ok);
1140
1141done:
1142 Py_XDECREF(list);
1143 Py_XDECREF(extend_result);
1144 return ret;
1145}
1146
1147static PyObject* PyUpb_Message_IsInitialized(PyObject* _self, PyObject* args) {
1148 PyObject* errors = NULL;
1149 if (!PyArg_ParseTuple(args, "|O", &errors)) {
1150 return NULL;
1151 }
1152 if (errors) {
1153 // We need to collect a list of unset required fields and append it to
1154 // `errors`.
1155 return PyUpb_Message_IsInitializedAppendErrors(_self, errors);
1156 } else {
1157 // We just need to return a boolean "true" or "false" for whether all
1158 // required fields are set.
1159 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1160 const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
1161 const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
1162 bool initialized = !upb_util_HasUnsetRequired(msg, m, symtab, NULL);
1163 return PyBool_FromLong(initialized);
1164 }
1165}
1166
1167static PyObject* PyUpb_Message_ListFieldsItemKey(PyObject* self,
1168 PyObject* val) {
1169 assert(PyTuple_Check(val));
1170 PyObject* field = PyTuple_GetItem(val, 0);
1171 const upb_FieldDef* f = PyUpb_FieldDescriptor_GetDef(field);
1172 return PyLong_FromLong(upb_FieldDef_Number(f));
1173}
1174
1175static PyObject* PyUpb_Message_CheckCalledFromGeneratedFile(
1176 PyObject* unused, PyObject* unused_arg) {
1177 PyErr_SetString(
1178 PyExc_TypeError,
1179 "Descriptors cannot be created directly.\n"
1180 "If this call came from a _pb2.py file, your generated code is out of "
1181 "date and must be regenerated with protoc >= 3.19.0.\n"
1182 "If you cannot immediately regenerate your protos, some other possible "
1183 "workarounds are:\n"
1184 " 1. Downgrade the protobuf package to 3.20.x or lower.\n"
1185 " 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will "
1186 "use pure-Python parsing and will be much slower).\n"
1187 "\n"
1188 "More information: "
1189 "https://developers.google.com/protocol-buffers/docs/news/"
1190 "2022-05-06#python-updates");
1191 return NULL;
1192}
1193
1194static bool PyUpb_Message_SortFieldList(PyObject* list) {
1195 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1196 bool ok = false;
1197 PyObject* args = PyTuple_New(0);
1198 PyObject* kwargs = PyDict_New();
1199 PyObject* method = PyObject_GetAttrString(list, "sort");
1200 PyObject* call_result = NULL;
1201 if (!args || !kwargs || !method) goto err;
1202 if (PyDict_SetItemString(kwargs, "key", state->listfields_item_key) < 0) {
1203 goto err;
1204 }
1205 call_result = PyObject_Call(method, args, kwargs);
1206 if (!call_result) goto err;
1207 ok = true;
1208
1209err:
1210 Py_XDECREF(method);
1211 Py_XDECREF(args);
1212 Py_XDECREF(kwargs);
1213 Py_XDECREF(call_result);
1214 return ok;
1215}
1216
1217static PyObject* PyUpb_Message_ListFields(PyObject* _self, PyObject* arg) {
1218 PyObject* list = PyList_New(0);
1219 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1220 if (!msg) return list;
1221
1222 size_t iter1 = kUpb_Message_Begin;
1223 const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
1224 const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
1225 const upb_FieldDef* f;
1226 PyObject* field_desc = NULL;
1227 PyObject* py_val = NULL;
1228 PyObject* tuple = NULL;
1229 upb_MessageValue val;
1230 uint32_t last_field = 0;
1231 bool in_order = true;
1232 while (upb_Message_Next(msg, m, symtab, &f, &val, &iter1)) {
1233 const uint32_t field_number = upb_FieldDef_Number(f);
1234 if (field_number < last_field) in_order = false;
1235 last_field = field_number;
1236 PyObject* field_desc = PyUpb_FieldDescriptor_Get(f);
1237 PyObject* py_val = PyUpb_Message_GetFieldValue(_self, f);
1238 if (!field_desc || !py_val) goto err;
1239 PyObject* tuple = Py_BuildValue("(NN)", field_desc, py_val);
1240 field_desc = NULL;
1241 py_val = NULL;
1242 if (!tuple) goto err;
1243 if (PyList_Append(list, tuple)) goto err;
1244 Py_DECREF(tuple);
1245 tuple = NULL;
1246 }
1247
1248 // Users rely on fields being returned in field number order.
1249 if (!in_order && !PyUpb_Message_SortFieldList(list)) goto err;
1250
1251 return list;
1252
1253err:
1254 Py_XDECREF(field_desc);
1255 Py_XDECREF(py_val);
1256 Py_XDECREF(tuple);
1257 Py_DECREF(list);
1258 return NULL;
1259}
1260
1261PyObject* PyUpb_Message_MergeFrom(PyObject* self, PyObject* arg) {
1262 if (self->ob_type != arg->ob_type) {
1263 PyErr_Format(PyExc_TypeError,
1264 "Parameter to MergeFrom() must be instance of same class: "
1265 "expected %S got %S.",
1266 Py_TYPE(self), Py_TYPE(arg));
1267 return NULL;
1268 }
1269 // OPT: exit if src is empty.
1270 PyObject* subargs = PyTuple_New(0);
1271 PyObject* serialized =
1272 PyUpb_Message_SerializePartialToString(arg, subargs, NULL);
1273 Py_DECREF(subargs);
1274 if (!serialized) return NULL;
1275 PyObject* ret = PyUpb_Message_MergeFromString(self, serialized);
1276 Py_DECREF(serialized);
1277 Py_XDECREF(ret);
1278 Py_RETURN_NONE;
1279}
1280
Adam Cozzette501ecec2023-09-26 14:36:20 -07001281static PyObject* PyUpb_Message_CopyFrom(PyObject* _self, PyObject* arg) {
1282 if (_self->ob_type != arg->ob_type) {
1283 PyErr_Format(PyExc_TypeError,
1284 "Parameter to CopyFrom() must be instance of same class: "
1285 "expected %S got %S.",
1286 Py_TYPE(_self), Py_TYPE(arg));
1287 return NULL;
1288 }
1289 if (_self == arg) {
1290 Py_RETURN_NONE;
1291 }
1292 PyUpb_Message* self = (void*)_self;
1293 PyUpb_Message* other = (void*)arg;
1294 PyUpb_Message_EnsureReified(self);
1295
1296 const upb_Message* other_msg = PyUpb_Message_GetIfReified((PyObject*)other);
1297 if (other_msg) {
1298 upb_Message_DeepCopy(
1299 self->ptr.msg, other_msg,
1300 upb_MessageDef_MiniTable((const upb_MessageDef*)other->def),
1301 PyUpb_Arena_Get(self->arena));
1302 } else {
1303 PyObject* tmp = PyUpb_Message_Clear(self);
1304 Py_DECREF(tmp);
1305 }
1306 PyUpb_Message_SyncSubobjs(self);
1307
1308 Py_RETURN_NONE;
1309}
1310
1311static PyObject* PyUpb_Message_SetInParent(PyObject* _self, PyObject* arg) {
1312 PyUpb_Message* self = (void*)_self;
1313 PyUpb_Message_EnsureReified(self);
1314 Py_RETURN_NONE;
1315}
1316
1317static PyObject* PyUpb_Message_UnknownFields(PyObject* _self, PyObject* arg) {
1318 // TODO: re-enable when unknown fields are added.
1319 // return PyUpb_UnknownFields_New(_self);
1320 PyErr_SetString(PyExc_NotImplementedError, "unknown field accessor");
1321 return NULL;
1322}
1323
1324PyObject* PyUpb_Message_MergeFromString(PyObject* _self, PyObject* arg) {
1325 PyUpb_Message* self = (void*)_self;
1326 char* buf;
1327 Py_ssize_t size;
1328 PyObject* bytes = NULL;
1329
1330 if (PyMemoryView_Check(arg)) {
1331 bytes = PyBytes_FromObject(arg);
1332 // Cannot fail when passed something of the correct type.
1333 int err = PyBytes_AsStringAndSize(bytes, &buf, &size);
1334 (void)err;
1335 assert(err >= 0);
jensbjorgenseneb67a912024-06-12 08:31:37 -07001336 } else if (PyByteArray_Check(arg)) {
Joshua Haberman1f984452024-06-12 11:29:45 -07001337 buf = PyByteArray_AsString(arg);
1338 size = PyByteArray_Size(arg);
Adam Cozzette501ecec2023-09-26 14:36:20 -07001339 } else if (PyBytes_AsStringAndSize(arg, &buf, &size) < 0) {
1340 return NULL;
1341 }
1342
1343 PyUpb_Message_EnsureReified(self);
1344 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1345 const upb_FileDef* file = upb_MessageDef_File(msgdef);
1346 const upb_ExtensionRegistry* extreg =
1347 upb_DefPool_ExtensionRegistry(upb_FileDef_Pool(file));
1348 const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
1349 upb_Arena* arena = PyUpb_Arena_Get(self->arena);
1350 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1351 int options =
1352 upb_DecodeOptions_MaxDepth(state->allow_oversize_protos ? UINT16_MAX : 0);
1353 upb_DecodeStatus status =
1354 upb_Decode(buf, size, self->ptr.msg, layout, extreg, options, arena);
1355 Py_XDECREF(bytes);
1356 if (status != kUpb_DecodeStatus_Ok) {
Jie Luod8793112024-06-13 15:21:50 -07001357 PyErr_Format(state->decode_error_class,
1358 "Error parsing message with type '%s'",
1359 upb_MessageDef_FullName(msgdef));
Adam Cozzette501ecec2023-09-26 14:36:20 -07001360 return NULL;
1361 }
1362 PyUpb_Message_SyncSubobjs(self);
1363 return PyLong_FromSsize_t(size);
1364}
1365
1366static PyObject* PyUpb_Message_ParseFromString(PyObject* self, PyObject* arg) {
1367 PyObject* tmp = PyUpb_Message_Clear((PyUpb_Message*)self);
1368 Py_DECREF(tmp);
1369 return PyUpb_Message_MergeFromString(self, arg);
1370}
1371
1372static PyObject* PyUpb_Message_ByteSize(PyObject* self, PyObject* args) {
1373 // TODO: At the
1374 // moment upb does not have a "byte size" function, so we just serialize to
1375 // string and get the size of the string.
1376 PyObject* subargs = PyTuple_New(0);
1377 PyObject* serialized = PyUpb_Message_SerializeToString(self, subargs, NULL);
1378 Py_DECREF(subargs);
1379 if (!serialized) return NULL;
1380 size_t size = PyBytes_Size(serialized);
1381 Py_DECREF(serialized);
1382 return PyLong_FromSize_t(size);
1383}
1384
1385static PyObject* PyUpb_Message_Clear(PyUpb_Message* self) {
1386 PyUpb_Message_EnsureReified(self);
1387 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1388 PyUpb_WeakMap* subobj_map = self->unset_subobj_map;
1389
1390 if (subobj_map) {
1391 upb_Message* msg = PyUpb_Message_GetMsg(self);
1392 (void)msg; // Suppress unused warning when asserts are disabled.
1393 intptr_t iter = PYUPB_WEAKMAP_BEGIN;
1394 const void* key;
1395 PyObject* obj;
1396
1397 while (PyUpb_WeakMap_Next(subobj_map, &key, &obj, &iter)) {
1398 const upb_FieldDef* f = key;
1399 PyUpb_WeakMap_DeleteIter(subobj_map, &iter);
1400 if (upb_FieldDef_IsMap(f)) {
1401 assert(upb_Message_GetFieldByDef(msg, f).map_val == NULL);
1402 PyUpb_MapContainer_Reify(obj, NULL);
1403 } else if (upb_FieldDef_IsRepeated(f)) {
1404 assert(upb_Message_GetFieldByDef(msg, f).array_val == NULL);
1405 PyUpb_RepeatedContainer_Reify(obj, NULL);
1406 } else {
1407 assert(!upb_Message_HasFieldByDef(msg, f));
1408 PyUpb_Message* sub = (void*)obj;
1409 assert(self == sub->ptr.parent);
1410 PyUpb_Message_Reify(sub, f, NULL);
1411 }
1412 }
1413 }
1414
1415 upb_Message_ClearByDef(self->ptr.msg, msgdef);
1416 Py_RETURN_NONE;
1417}
1418
1419void PyUpb_Message_DoClearField(PyObject* _self, const upb_FieldDef* f) {
1420 PyUpb_Message* self = (void*)_self;
1421 PyUpb_Message_EnsureReified((PyUpb_Message*)self);
1422
1423 // We must ensure that any stub object is reified so its parent no longer
1424 // points to us.
1425 PyObject* sub = self->unset_subobj_map
1426 ? PyUpb_WeakMap_Get(self->unset_subobj_map, f)
1427 : NULL;
1428
1429 if (upb_FieldDef_IsMap(f)) {
1430 // For maps we additionally have to invalidate any iterators. So we need
1431 // to get an object even if it's reified.
1432 if (!sub) {
1433 sub = PyUpb_Message_GetFieldValue(_self, f);
1434 }
1435 PyUpb_MapContainer_EnsureReified(sub);
1436 PyUpb_MapContainer_Invalidate(sub);
1437 } else if (upb_FieldDef_IsRepeated(f)) {
1438 if (sub) {
1439 PyUpb_RepeatedContainer_EnsureReified(sub);
1440 }
1441 } else if (upb_FieldDef_IsSubMessage(f)) {
1442 if (sub) {
1443 PyUpb_Message_EnsureReified((PyUpb_Message*)sub);
1444 }
1445 }
1446
1447 Py_XDECREF(sub);
1448 upb_Message_ClearFieldByDef(self->ptr.msg, f);
1449}
1450
1451static PyObject* PyUpb_Message_ClearExtension(PyObject* _self, PyObject* arg) {
1452 PyUpb_Message* self = (void*)_self;
1453 PyUpb_Message_EnsureReified(self);
1454 const upb_FieldDef* f = PyUpb_Message_GetExtensionDef(_self, arg);
1455 if (!f) return NULL;
1456 PyUpb_Message_DoClearField(_self, f);
1457 Py_RETURN_NONE;
1458}
1459
1460static PyObject* PyUpb_Message_ClearField(PyObject* _self, PyObject* arg) {
1461 PyUpb_Message* self = (void*)_self;
1462
1463 // We always need EnsureReified() here (even for an unset message) to
1464 // preserve behavior like:
1465 // msg = FooMessage()
1466 // msg.foo.Clear()
1467 // assert msg.HasField("foo")
1468 PyUpb_Message_EnsureReified(self);
1469
1470 const upb_FieldDef* f;
1471 const upb_OneofDef* o;
1472 if (!PyUpb_Message_LookupName(self, arg, &f, &o, PyExc_ValueError)) {
1473 return NULL;
1474 }
1475
Hong Shin0730adb2024-05-28 09:48:40 -07001476 if (o) f = upb_Message_WhichOneofByDef(self->ptr.msg, o);
Adam Cozzette501ecec2023-09-26 14:36:20 -07001477 if (f) PyUpb_Message_DoClearField(_self, f);
1478 Py_RETURN_NONE;
1479}
1480
1481static PyObject* PyUpb_Message_DiscardUnknownFields(PyUpb_Message* self,
1482 PyObject* arg) {
1483 PyUpb_Message_EnsureReified(self);
1484 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1485 upb_Message_DiscardUnknown(self->ptr.msg, msgdef, 64);
1486 Py_RETURN_NONE;
1487}
1488
1489static PyObject* PyUpb_Message_FindInitializationErrors(PyObject* _self,
1490 PyObject* arg) {
1491 PyUpb_Message* self = (void*)_self;
1492 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1493 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1494 const upb_DefPool* ext_pool = upb_FileDef_Pool(upb_MessageDef_File(msgdef));
1495 upb_FieldPathEntry* fields_base;
1496 PyObject* ret = PyList_New(0);
1497 if (upb_util_HasUnsetRequired(msg, msgdef, ext_pool, &fields_base)) {
1498 upb_FieldPathEntry* fields = fields_base;
1499 char* buf = NULL;
1500 size_t size = 0;
1501 assert(fields->field);
1502 while (fields->field) {
1503 upb_FieldPathEntry* field = fields;
1504 size_t need = upb_FieldPath_ToText(&fields, buf, size);
1505 if (need >= size) {
1506 fields = field;
1507 size = size ? size * 2 : 16;
1508 while (size <= need) size *= 2;
1509 buf = realloc(buf, size);
1510 need = upb_FieldPath_ToText(&fields, buf, size);
1511 assert(size > need);
1512 }
1513 PyObject* str = PyUnicode_FromString(buf);
1514 PyList_Append(ret, str);
1515 Py_DECREF(str);
1516 }
1517 free(buf);
1518 free(fields_base);
1519 }
1520 return ret;
1521}
1522
1523static PyObject* PyUpb_Message_FromString(PyObject* cls, PyObject* serialized) {
1524 PyObject* ret = NULL;
1525 PyObject* length = NULL;
1526
1527 ret = PyObject_CallObject(cls, NULL);
1528 if (ret == NULL) goto err;
1529 length = PyUpb_Message_MergeFromString(ret, serialized);
1530 if (length == NULL) goto err;
1531
1532done:
1533 Py_XDECREF(length);
1534 return ret;
1535
1536err:
1537 Py_XDECREF(ret);
1538 ret = NULL;
1539 goto done;
1540}
1541
1542const upb_FieldDef* PyUpb_Message_GetExtensionDef(PyObject* _self,
1543 PyObject* key) {
1544 const upb_FieldDef* f = PyUpb_FieldDescriptor_GetDef(key);
1545 if (!f) {
1546 PyErr_Clear();
1547 PyErr_Format(PyExc_KeyError, "Object %R is not a field descriptor\n", key);
1548 return NULL;
1549 }
1550 if (!upb_FieldDef_IsExtension(f)) {
1551 PyErr_Format(PyExc_KeyError, "Field %s is not an extension\n",
1552 upb_FieldDef_FullName(f));
1553 return NULL;
1554 }
1555 const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(_self);
1556 if (upb_FieldDef_ContainingType(f) != msgdef) {
1557 PyErr_Format(PyExc_KeyError, "Extension doesn't match (%s vs %s)",
1558 upb_MessageDef_FullName(msgdef), upb_FieldDef_FullName(f));
1559 return NULL;
1560 }
1561 return f;
1562}
1563
1564static PyObject* PyUpb_Message_HasExtension(PyObject* _self,
1565 PyObject* ext_desc) {
1566 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1567 const upb_FieldDef* f = PyUpb_Message_GetExtensionDef(_self, ext_desc);
1568 if (!f) return NULL;
1569 if (upb_FieldDef_IsRepeated(f)) {
1570 PyErr_SetString(PyExc_KeyError,
1571 "Field is repeated. A singular method is required.");
1572 return NULL;
1573 }
1574 if (!msg) Py_RETURN_FALSE;
1575 return PyBool_FromLong(upb_Message_HasFieldByDef(msg, f));
1576}
1577
1578void PyUpb_Message_ReportInitializationErrors(const upb_MessageDef* msgdef,
1579 PyObject* errors, PyObject* exc) {
1580 PyObject* comma = PyUnicode_FromString(",");
1581 PyObject* missing_fields = NULL;
1582 if (!comma) goto done;
1583 missing_fields = PyUnicode_Join(comma, errors);
1584 if (!missing_fields) goto done;
1585 PyErr_Format(exc, "Message %s is missing required fields: %U",
1586 upb_MessageDef_FullName(msgdef), missing_fields);
1587done:
1588 Py_XDECREF(comma);
1589 Py_XDECREF(missing_fields);
1590 Py_DECREF(errors);
1591}
1592
1593PyObject* PyUpb_Message_SerializeInternal(PyObject* _self, PyObject* args,
1594 PyObject* kwargs,
1595 bool check_required) {
1596 PyUpb_Message* self = (void*)_self;
1597 if (!PyUpb_Message_Verify((PyObject*)self)) return NULL;
1598 static const char* kwlist[] = {"deterministic", NULL};
1599 int deterministic = 0;
1600 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|p", (char**)(kwlist),
1601 &deterministic)) {
1602 return NULL;
1603 }
1604
1605 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1606 if (PyUpb_Message_IsStub(self)) {
1607 // Nothing to serialize, but we do have to check whether the message is
1608 // initialized.
1609 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1610 PyObject* errors = PyUpb_Message_FindInitializationErrors(_self, NULL);
1611 if (!errors) return NULL;
1612 if (PyList_Size(errors) == 0) {
1613 Py_DECREF(errors);
1614 return PyBytes_FromStringAndSize(NULL, 0);
1615 }
1616 PyUpb_Message_ReportInitializationErrors(msgdef, errors,
1617 state->encode_error_class);
1618 return NULL;
1619 }
1620
1621 upb_Arena* arena = upb_Arena_New();
1622 const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
1623 size_t size = 0;
1624 // Python does not currently have any effective limit on serialization depth.
1625 int options = upb_EncodeOptions_MaxDepth(UINT16_MAX);
1626 if (check_required) options |= kUpb_EncodeOption_CheckRequired;
1627 if (deterministic) options |= kUpb_EncodeOption_Deterministic;
1628 char* pb;
1629 upb_EncodeStatus status =
1630 upb_Encode(self->ptr.msg, layout, options, arena, &pb, &size);
1631 PyObject* ret = NULL;
1632
1633 if (status != kUpb_EncodeStatus_Ok) {
1634 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1635 PyObject* errors = PyUpb_Message_FindInitializationErrors(_self, NULL);
1636 if (PyList_Size(errors) != 0) {
1637 PyUpb_Message_ReportInitializationErrors(msgdef, errors,
1638 state->encode_error_class);
1639 } else {
1640 PyErr_Format(state->encode_error_class, "Failed to serialize proto");
1641 }
1642 goto done;
1643 }
1644
1645 ret = PyBytes_FromStringAndSize(pb, size);
1646
1647done:
1648 upb_Arena_Free(arena);
1649 return ret;
1650}
1651
1652PyObject* PyUpb_Message_SerializeToString(PyObject* _self, PyObject* args,
1653 PyObject* kwargs) {
1654 return PyUpb_Message_SerializeInternal(_self, args, kwargs, true);
1655}
1656
1657PyObject* PyUpb_Message_SerializePartialToString(PyObject* _self,
1658 PyObject* args,
1659 PyObject* kwargs) {
1660 return PyUpb_Message_SerializeInternal(_self, args, kwargs, false);
1661}
1662
1663static PyObject* PyUpb_Message_WhichOneof(PyObject* _self, PyObject* name) {
1664 PyUpb_Message* self = (void*)_self;
1665 const upb_OneofDef* o;
1666 if (!PyUpb_Message_LookupName(self, name, NULL, &o, PyExc_ValueError)) {
1667 return NULL;
1668 }
1669 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1670 if (!msg) Py_RETURN_NONE;
Hong Shin0730adb2024-05-28 09:48:40 -07001671 const upb_FieldDef* f = upb_Message_WhichOneofByDef(msg, o);
Adam Cozzette501ecec2023-09-26 14:36:20 -07001672 if (!f) Py_RETURN_NONE;
1673 return PyUnicode_FromString(upb_FieldDef_Name(f));
1674}
1675
1676PyObject* DeepCopy(PyObject* _self, PyObject* arg) {
Adam Cozzette501ecec2023-09-26 14:36:20 -07001677 const upb_MessageDef* def = PyUpb_Message_GetMsgdef(_self);
Joshua Habermanb9e48942024-01-23 17:10:27 -08001678 const upb_MiniTable* mini_table = upb_MessageDef_MiniTable(def);
1679 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
Adam Cozzette501ecec2023-09-26 14:36:20 -07001680 PyObject* arena = PyUpb_Arena_New();
Joshua Habermanb9e48942024-01-23 17:10:27 -08001681 upb_Arena* upb_arena = PyUpb_Arena_Get(arena);
1682
1683 upb_Message* clone = msg ? upb_Message_DeepClone(msg, mini_table, upb_arena)
1684 : upb_Message_New(mini_table, upb_arena);
Adam Cozzette501ecec2023-09-26 14:36:20 -07001685 PyObject* ret = PyUpb_Message_Get(clone, def, arena);
1686 Py_DECREF(arena);
1687
1688 return ret;
1689}
1690
1691void PyUpb_Message_ClearExtensionDict(PyObject* _self) {
1692 PyUpb_Message* self = (void*)_self;
1693 assert(self->ext_dict);
1694 self->ext_dict = NULL;
1695}
1696
1697static PyObject* PyUpb_Message_GetExtensionDict(PyObject* _self,
1698 void* closure) {
1699 PyUpb_Message* self = (void*)_self;
1700 if (self->ext_dict) {
1701 Py_INCREF(self->ext_dict);
1702 return self->ext_dict;
1703 }
1704
1705 const upb_MessageDef* m = _PyUpb_Message_GetMsgdef(self);
1706 if (upb_MessageDef_ExtensionRangeCount(m) == 0) {
1707 PyErr_SetNone(PyExc_AttributeError);
1708 return NULL;
1709 }
1710
1711 self->ext_dict = PyUpb_ExtensionDict_New(_self);
1712 return self->ext_dict;
1713}
1714
1715static PyGetSetDef PyUpb_Message_Getters[] = {
1716 {"Extensions", PyUpb_Message_GetExtensionDict, NULL, "Extension dict"},
1717 {NULL}};
1718
1719static PyMethodDef PyUpb_Message_Methods[] = {
1720 {"__deepcopy__", (PyCFunction)DeepCopy, METH_VARARGS,
1721 "Makes a deep copy of the class."},
1722 // TODO
1723 //{ "__unicode__", (PyCFunction)ToUnicode, METH_NOARGS,
1724 // "Outputs a unicode representation of the message." },
Jie Luo24f27c32024-05-06 12:10:59 -07001725 {"__contains__", PyUpb_Message_Contains, METH_O,
1726 "Checks if a message field is set."},
Adam Cozzette501ecec2023-09-26 14:36:20 -07001727 {"ByteSize", (PyCFunction)PyUpb_Message_ByteSize, METH_NOARGS,
1728 "Returns the size of the message in bytes."},
1729 {"Clear", (PyCFunction)PyUpb_Message_Clear, METH_NOARGS,
1730 "Clears the message."},
1731 {"ClearExtension", PyUpb_Message_ClearExtension, METH_O,
1732 "Clears a message field."},
1733 {"ClearField", PyUpb_Message_ClearField, METH_O, "Clears a message field."},
1734 {"CopyFrom", PyUpb_Message_CopyFrom, METH_O,
1735 "Copies a protocol message into the current message."},
1736 {"DiscardUnknownFields", (PyCFunction)PyUpb_Message_DiscardUnknownFields,
1737 METH_NOARGS, "Discards the unknown fields."},
1738 {"FindInitializationErrors", PyUpb_Message_FindInitializationErrors,
1739 METH_NOARGS, "Finds unset required fields."},
1740 {"FromString", PyUpb_Message_FromString, METH_O | METH_CLASS,
1741 "Creates new method instance from given serialized data."},
1742 {"HasExtension", PyUpb_Message_HasExtension, METH_O,
1743 "Checks if a message field is set."},
1744 {"HasField", PyUpb_Message_HasField, METH_O,
1745 "Checks if a message field is set."},
1746 {"IsInitialized", PyUpb_Message_IsInitialized, METH_VARARGS,
1747 "Checks if all required fields of a protocol message are set."},
1748 {"ListFields", PyUpb_Message_ListFields, METH_NOARGS,
1749 "Lists all set fields of a message."},
1750 {"MergeFrom", PyUpb_Message_MergeFrom, METH_O,
1751 "Merges a protocol message into the current message."},
1752 {"MergeFromString", PyUpb_Message_MergeFromString, METH_O,
1753 "Merges a serialized message into the current message."},
1754 {"ParseFromString", PyUpb_Message_ParseFromString, METH_O,
1755 "Parses a serialized message into the current message."},
1756 {"SerializePartialToString",
1757 (PyCFunction)PyUpb_Message_SerializePartialToString,
1758 METH_VARARGS | METH_KEYWORDS,
1759 "Serializes the message to a string, even if it isn't initialized."},
1760 {"SerializeToString", (PyCFunction)PyUpb_Message_SerializeToString,
1761 METH_VARARGS | METH_KEYWORDS,
1762 "Serializes the message to a string, only for initialized messages."},
1763 {"SetInParent", (PyCFunction)PyUpb_Message_SetInParent, METH_NOARGS,
1764 "Sets the has bit of the given field in its parent message."},
1765 {"UnknownFields", (PyCFunction)PyUpb_Message_UnknownFields, METH_NOARGS,
1766 "Parse unknown field set"},
1767 {"WhichOneof", PyUpb_Message_WhichOneof, METH_O,
1768 "Returns the name of the field set inside a oneof, "
1769 "or None if no field is set."},
1770 {"_ListFieldsItemKey", PyUpb_Message_ListFieldsItemKey,
1771 METH_O | METH_STATIC,
1772 "Compares ListFields() list entries by field number"},
1773 {"_CheckCalledFromGeneratedFile",
1774 PyUpb_Message_CheckCalledFromGeneratedFile, METH_NOARGS | METH_STATIC,
1775 "Raises TypeError if the caller is not in a _pb2.py file."},
1776 {NULL, NULL}};
1777
1778static PyType_Slot PyUpb_Message_Slots[] = {
1779 {Py_tp_dealloc, PyUpb_Message_Dealloc},
1780 {Py_tp_doc, "A ProtocolMessage"},
1781 {Py_tp_getattro, PyUpb_Message_GetAttr},
1782 {Py_tp_getset, PyUpb_Message_Getters},
1783 {Py_tp_hash, PyObject_HashNotImplemented},
1784 {Py_tp_methods, PyUpb_Message_Methods},
1785 {Py_tp_new, PyUpb_Message_New},
1786 {Py_tp_str, PyUpb_Message_ToString},
1787 {Py_tp_repr, PyUpb_Message_ToString},
1788 {Py_tp_richcompare, PyUpb_Message_RichCompare},
1789 {Py_tp_setattro, PyUpb_Message_SetAttr},
1790 {Py_tp_init, PyUpb_Message_Init},
1791 {0, NULL}};
1792
1793PyType_Spec PyUpb_Message_Spec = {
1794 PYUPB_MODULE_NAME ".Message", // tp_name
1795 sizeof(PyUpb_Message), // tp_basicsize
1796 0, // tp_itemsize
1797 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags
1798 PyUpb_Message_Slots,
1799};
1800
1801// -----------------------------------------------------------------------------
1802// MessageMeta
1803// -----------------------------------------------------------------------------
1804
1805// MessageMeta is the metaclass for message objects. The generated code uses it
1806// to construct message classes, ie.
1807//
1808// FooMessage = _message.MessageMeta('FooMessage', (_message.Message), {...})
1809//
1810// (This is not quite true: at the moment the Python library subclasses
1811// MessageMeta, and uses that subclass as the metaclass. There is a TODO below
1812// to simplify this, so that the illustration above is indeed accurate).
1813
1814typedef struct {
1815 const upb_MiniTable* layout;
1816 PyObject* py_message_descriptor;
1817} PyUpb_MessageMeta;
1818
1819// The PyUpb_MessageMeta struct is trailing data tacked onto the end of
1820// MessageMeta instances. This means that we get our instances of this struct
1821// by adding the appropriate number of bytes.
1822static PyUpb_MessageMeta* PyUpb_GetMessageMeta(PyObject* cls) {
1823#ifndef NDEBUG
1824 PyUpb_ModuleState* state = PyUpb_ModuleState_MaybeGet();
1825 assert(!state || cls->ob_type == state->message_meta_type);
1826#endif
1827 return (PyUpb_MessageMeta*)((char*)cls + cpython_bits.type_basicsize);
1828}
1829
1830static const upb_MessageDef* PyUpb_MessageMeta_GetMsgdef(PyObject* cls) {
1831 PyUpb_MessageMeta* self = PyUpb_GetMessageMeta(cls);
1832 return PyUpb_Descriptor_GetDef(self->py_message_descriptor);
1833}
1834
1835PyObject* PyUpb_MessageMeta_DoCreateClass(PyObject* py_descriptor,
1836 const char* name, PyObject* dict) {
1837 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1838 PyTypeObject* descriptor_type = state->descriptor_types[kPyUpb_Descriptor];
1839 if (!PyObject_TypeCheck(py_descriptor, descriptor_type)) {
1840 return PyErr_Format(PyExc_TypeError, "Expected a message Descriptor");
1841 }
1842
1843 const upb_MessageDef* msgdef = PyUpb_Descriptor_GetDef(py_descriptor);
1844 assert(msgdef);
1845 assert(!PyUpb_ObjCache_Get(upb_MessageDef_MiniTable(msgdef)));
1846
1847 PyObject* slots = PyTuple_New(0);
1848 if (!slots) return NULL;
1849 int status = PyDict_SetItemString(dict, "__slots__", slots);
1850 Py_DECREF(slots);
1851 if (status < 0) return NULL;
1852
1853 // Bases are either:
1854 // (Message, Message) # for regular messages
1855 // (Message, Message, WktBase) # For well-known types
1856 PyObject* wkt_bases = PyUpb_GetWktBases(state);
1857 PyObject* wkt_base =
1858 PyDict_GetItemString(wkt_bases, upb_MessageDef_FullName(msgdef));
1859 PyObject* args;
1860 if (wkt_base == NULL) {
1861 args = Py_BuildValue("s(OO)O", name, state->cmessage_type,
1862 state->message_class, dict);
1863 } else {
1864 args = Py_BuildValue("s(OOO)O", name, state->cmessage_type,
1865 state->message_class, wkt_base, dict);
1866 }
1867
1868 PyObject* ret = cpython_bits.type_new(state->message_meta_type, args, NULL);
1869 Py_DECREF(args);
1870 if (!ret) return NULL;
1871
1872 PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(ret);
1873 meta->py_message_descriptor = py_descriptor;
1874 meta->layout = upb_MessageDef_MiniTable(msgdef);
1875 Py_INCREF(meta->py_message_descriptor);
1876 PyUpb_Descriptor_SetClass(py_descriptor, ret);
1877
1878 PyUpb_ObjCache_Add(meta->layout, ret);
1879
1880 return ret;
1881}
1882
1883static PyObject* PyUpb_MessageMeta_New(PyTypeObject* type, PyObject* args,
1884 PyObject* kwargs) {
1885 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1886 static const char* kwlist[] = {"name", "bases", "dict", 0};
1887 PyObject *bases, *dict;
1888 const char* name;
1889
1890 // Check arguments: (name, bases, dict)
1891 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sO!O!:type", (char**)kwlist,
1892 &name, &PyTuple_Type, &bases, &PyDict_Type,
1893 &dict)) {
1894 return NULL;
1895 }
1896
1897 // Check bases: only (), or (message.Message,) are allowed
1898 Py_ssize_t size = PyTuple_Size(bases);
1899 if (!(size == 0 ||
1900 (size == 1 && PyTuple_GetItem(bases, 0) == state->message_class))) {
1901 PyErr_Format(PyExc_TypeError,
1902 "A Message class can only inherit from Message, not %S",
1903 bases);
1904 return NULL;
1905 }
1906
1907 // Check dict['DESCRIPTOR']
1908 PyObject* py_descriptor = PyDict_GetItemString(dict, "DESCRIPTOR");
1909 if (py_descriptor == NULL) {
1910 PyErr_SetString(PyExc_TypeError, "Message class has no DESCRIPTOR");
1911 return NULL;
1912 }
1913
1914 const upb_MessageDef* m = PyUpb_Descriptor_GetDef(py_descriptor);
1915 PyObject* ret = PyUpb_ObjCache_Get(upb_MessageDef_MiniTable(m));
1916 if (ret) return ret;
1917 return PyUpb_MessageMeta_DoCreateClass(py_descriptor, name, dict);
1918}
1919
1920static void PyUpb_MessageMeta_Dealloc(PyObject* self) {
1921 PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self);
1922 PyUpb_ObjCache_Delete(meta->layout);
Protobuf Team Bote32d0942023-11-06 06:43:06 -08001923 // The MessageMeta type is a GC type, which means we should untrack the
1924 // object before invalidating internal state (so that code executed by the
1925 // GC doesn't see the invalid state). Unfortunately since we're calling
1926 // cpython_bits.type_dealloc, which also untracks the object, we can't.
1927 // Instead just make sure the internal state remains reasonable by using
1928 // Py_CLEAR(), which sets the struct member to NULL. The tp_traverse and
1929 // tp_clear methods, which are called by Python's GC, already allow for it
1930 // to be NULL.
1931 Py_CLEAR(meta->py_message_descriptor);
Adam Cozzette501ecec2023-09-26 14:36:20 -07001932 PyTypeObject* tp = Py_TYPE(self);
1933 cpython_bits.type_dealloc(self);
1934 Py_DECREF(tp);
1935}
1936
1937void PyUpb_MessageMeta_AddFieldNumber(PyObject* self, const upb_FieldDef* f) {
1938 PyObject* name =
1939 PyUnicode_FromFormat("%s_FIELD_NUMBER", upb_FieldDef_Name(f));
1940 PyObject* upper = PyObject_CallMethod(name, "upper", "");
1941 PyObject_SetAttr(self, upper, PyLong_FromLong(upb_FieldDef_Number(f)));
1942 Py_DECREF(name);
1943 Py_DECREF(upper);
1944}
1945
1946static PyObject* PyUpb_MessageMeta_GetDynamicAttr(PyObject* self,
1947 PyObject* name) {
1948 const char* name_buf = PyUpb_GetStrData(name);
1949 if (!name_buf) return NULL;
1950 const upb_MessageDef* msgdef = PyUpb_MessageMeta_GetMsgdef(self);
1951 const upb_FileDef* filedef = upb_MessageDef_File(msgdef);
1952 const upb_DefPool* symtab = upb_FileDef_Pool(filedef);
1953
1954 PyObject* py_key =
1955 PyBytes_FromFormat("%s.%s", upb_MessageDef_FullName(msgdef), name_buf);
1956 const char* key = PyUpb_GetStrData(py_key);
1957 PyObject* ret = NULL;
1958 const upb_MessageDef* nested = upb_DefPool_FindMessageByName(symtab, key);
1959 const upb_EnumDef* enumdef;
1960 const upb_EnumValueDef* enumval;
1961 const upb_FieldDef* ext;
1962
1963 if (nested) {
1964 ret = PyUpb_Descriptor_GetClass(nested);
1965 } else if ((enumdef = upb_DefPool_FindEnumByName(symtab, key))) {
1966 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1967 PyObject* klass = state->enum_type_wrapper_class;
1968 ret = PyUpb_EnumDescriptor_Get(enumdef);
1969 ret = PyObject_CallFunctionObjArgs(klass, ret, NULL);
1970 } else if ((enumval = upb_DefPool_FindEnumByNameval(symtab, key))) {
1971 ret = PyLong_FromLong(upb_EnumValueDef_Number(enumval));
1972 } else if ((ext = upb_DefPool_FindExtensionByName(symtab, key))) {
1973 ret = PyUpb_FieldDescriptor_Get(ext);
1974 }
1975
1976 Py_DECREF(py_key);
1977
1978 const char* suffix = "_FIELD_NUMBER";
1979 size_t n = strlen(name_buf);
1980 size_t suffix_n = strlen(suffix);
1981 if (n > suffix_n && memcmp(suffix, name_buf + n - suffix_n, suffix_n) == 0) {
1982 // We can't look up field names dynamically, because the <NAME>_FIELD_NUMBER
1983 // naming scheme upper-cases the field name and is therefore non-reversible.
1984 // So we just add all field numbers.
1985 int n = upb_MessageDef_FieldCount(msgdef);
1986 for (int i = 0; i < n; i++) {
1987 PyUpb_MessageMeta_AddFieldNumber(self, upb_MessageDef_Field(msgdef, i));
1988 }
1989 n = upb_MessageDef_NestedExtensionCount(msgdef);
1990 for (int i = 0; i < n; i++) {
1991 PyUpb_MessageMeta_AddFieldNumber(
1992 self, upb_MessageDef_NestedExtension(msgdef, i));
1993 }
1994 ret = PyObject_GenericGetAttr(self, name);
1995 }
1996
1997 return ret;
1998}
1999
2000static PyObject* PyUpb_MessageMeta_GetAttr(PyObject* self, PyObject* name) {
2001 // We want to first delegate to the type's tp_dict to retrieve any attributes
2002 // that were previously calculated and cached in the type's dict.
2003 PyObject* ret = cpython_bits.type_getattro(self, name);
2004 if (ret) return ret;
2005
2006 // We did not find a cached attribute. Try to calculate the attribute
2007 // dynamically, using the descriptor as an argument.
2008 PyErr_Clear();
2009 ret = PyUpb_MessageMeta_GetDynamicAttr(self, name);
2010
2011 if (ret) {
2012 PyObject_SetAttr(self, name, ret);
2013 PyErr_Clear();
2014 return ret;
2015 }
2016
2017 PyErr_SetObject(PyExc_AttributeError, name);
2018 return NULL;
2019}
2020
2021static int PyUpb_MessageMeta_Traverse(PyObject* self, visitproc visit,
2022 void* arg) {
2023 PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self);
2024 Py_VISIT(meta->py_message_descriptor);
2025 return cpython_bits.type_traverse(self, visit, arg);
2026}
2027
Hood Chathamb915e9f2024-08-31 05:23:22 -07002028static int PyUpb_MessageMeta_Clear(PyObject* self) {
Protobuf Team Bote32d0942023-11-06 06:43:06 -08002029 PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self);
2030 Py_CLEAR(meta->py_message_descriptor);
Adam Cozzette501ecec2023-09-26 14:36:20 -07002031 return cpython_bits.type_clear(self);
2032}
2033
2034static PyType_Slot PyUpb_MessageMeta_Slots[] = {
2035 {Py_tp_new, PyUpb_MessageMeta_New},
2036 {Py_tp_dealloc, PyUpb_MessageMeta_Dealloc},
2037 {Py_tp_getattro, PyUpb_MessageMeta_GetAttr},
2038 {Py_tp_traverse, PyUpb_MessageMeta_Traverse},
2039 {Py_tp_clear, PyUpb_MessageMeta_Clear},
2040 {0, NULL}};
2041
2042static PyType_Spec PyUpb_MessageMeta_Spec = {
2043 PYUPB_MODULE_NAME ".MessageMeta", // tp_name
2044 0, // To be filled in by size of base // tp_basicsize
2045 0, // tp_itemsize
2046 // TODO: remove BASETYPE, Python should just use MessageMeta
2047 // directly instead of subclassing it.
2048 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, // tp_flags
2049 PyUpb_MessageMeta_Slots,
2050};
2051
2052static PyObject* PyUpb_MessageMeta_CreateType(void) {
2053 PyObject* bases = Py_BuildValue("(O)", &PyType_Type);
2054 if (!bases) return NULL;
2055 PyUpb_MessageMeta_Spec.basicsize =
2056 cpython_bits.type_basicsize + sizeof(PyUpb_MessageMeta);
2057 PyObject* type = PyType_FromSpecWithBases(&PyUpb_MessageMeta_Spec, bases);
2058 Py_DECREF(bases);
2059 return type;
2060}
2061
2062bool PyUpb_InitMessage(PyObject* m) {
2063 if (!PyUpb_CPythonBits_Init(&cpython_bits)) return false;
2064 PyObject* message_meta_type = PyUpb_MessageMeta_CreateType();
2065
2066 PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
2067 state->cmessage_type = PyUpb_AddClass(m, &PyUpb_Message_Spec);
2068 state->message_meta_type = (PyTypeObject*)message_meta_type;
2069
2070 if (!state->cmessage_type || !state->message_meta_type) return false;
2071 if (PyModule_AddObject(m, "MessageMeta", message_meta_type)) return false;
2072 state->listfields_item_key = PyObject_GetAttrString(
2073 (PyObject*)state->cmessage_type, "_ListFieldsItemKey");
2074
2075 PyObject* mod =
2076 PyImport_ImportModule(PYUPB_PROTOBUF_PUBLIC_PACKAGE ".message");
2077 if (mod == NULL) return false;
2078
2079 state->encode_error_class = PyObject_GetAttrString(mod, "EncodeError");
2080 state->decode_error_class = PyObject_GetAttrString(mod, "DecodeError");
2081 state->message_class = PyObject_GetAttrString(mod, "Message");
2082 Py_DECREF(mod);
2083
2084 PyObject* enum_type_wrapper = PyImport_ImportModule(
2085 PYUPB_PROTOBUF_INTERNAL_PACKAGE ".enum_type_wrapper");
2086 if (enum_type_wrapper == NULL) return false;
2087
2088 state->enum_type_wrapper_class =
2089 PyObject_GetAttrString(enum_type_wrapper, "EnumTypeWrapper");
2090 Py_DECREF(enum_type_wrapper);
2091
2092 if (!state->encode_error_class || !state->decode_error_class ||
2093 !state->message_class || !state->listfields_item_key ||
2094 !state->enum_type_wrapper_class) {
2095 return false;
2096 }
2097
2098 return true;
2099}