blob: 2aea2c5a9ff990e01d0402c84524c6f15601c71a [file] [log] [blame]
Adam Cozzette501ecec2023-09-26 14:36:20 -07001// Protocol Buffers - Google's data interchange format
2// Copyright 2023 Google LLC. All rights reserved.
Adam Cozzette501ecec2023-09-26 14:36:20 -07003//
Protobuf Team Bot0fab7732023-11-20 13:38:15 -08004// Use of this source code is governed by a BSD-style
5// license that can be found in the LICENSE file or at
6// https://developers.google.com/open-source/licenses/bsd
Adam Cozzette501ecec2023-09-26 14:36:20 -07007
8#include "python/message.h"
9
10#include "python/convert.h"
11#include "python/descriptor.h"
12#include "python/extension_dict.h"
13#include "python/map.h"
14#include "python/repeated.h"
15#include "upb/message/copy.h"
16#include "upb/reflection/def.h"
17#include "upb/reflection/message.h"
18#include "upb/text/encode.h"
19#include "upb/util/required_fields.h"
20
21static const upb_MessageDef* PyUpb_MessageMeta_GetMsgdef(PyObject* cls);
22static PyObject* PyUpb_MessageMeta_GetAttr(PyObject* self, PyObject* name);
23
24// -----------------------------------------------------------------------------
25// CPythonBits
26// -----------------------------------------------------------------------------
27
28// This struct contains a few things that are not exposed directly through the
29// limited API, but that we can get at in somewhat more roundabout ways. The
30// roundabout ways are slower, so we cache the values here.
31//
32// These values are valid to cache in a global, even across sub-interpreters,
33// because they are not pointers to interpreter state. They are process
34// globals that will be the same for any interpreter in this process.
35typedef struct {
36 // For each member, we note the equivalent expression that we could use in the
37 // full (non-limited) API.
38 newfunc type_new; // PyTypeObject.tp_new
39 destructor type_dealloc; // PyTypeObject.tp_dealloc
40 getattrofunc type_getattro; // PyTypeObject.tp_getattro
41 setattrofunc type_setattro; // PyTypeObject.tp_setattro
42 size_t type_basicsize; // sizeof(PyHeapTypeObject)
43 traverseproc type_traverse; // PyTypeObject.tp_traverse
44 inquiry type_clear; // PyTypeObject.tp_clear
45
46 // While we can refer to PY_VERSION_HEX in the limited API, this will give us
47 // the version of Python we were compiled against, which may be different
48 // than the version we are dynamically linked against. Here we want the
49 // version that is actually running in this process.
50 long python_version_hex; // PY_VERSION_HEX
51} PyUpb_CPythonBits;
52
53// A global containing the values for this process.
54PyUpb_CPythonBits cpython_bits;
55
56destructor upb_Pre310_PyType_GetDeallocSlot(PyTypeObject* type_subclass) {
57 // This is a bit desperate. We need type_dealloc(), but PyType_GetSlot(type,
58 // Py_tp_dealloc) will return subtype_dealloc(). There appears to be no way
59 // whatsoever to fetch type_dealloc() through the limited API until Python
60 // 3.10.
61 //
62 // To work around this so we attempt to find it by looking for the offset of
63 // tp_dealloc in PyTypeObject, then memcpy() it directly. This should always
64 // work in practice.
65 //
66 // Starting with Python 3.10 on you can call PyType_GetSlot() on non-heap
67 // types. We will be able to replace all this hack with just:
68 //
69 // PyType_GetSlot(&PyType_Type, Py_tp_dealloc)
70 //
71 destructor subtype_dealloc = PyType_GetSlot(type_subclass, Py_tp_dealloc);
72 for (size_t i = 0; i < 2000; i += sizeof(uintptr_t)) {
73 destructor maybe_subtype_dealloc;
74 memcpy(&maybe_subtype_dealloc, (char*)type_subclass + i,
75 sizeof(destructor));
76 if (maybe_subtype_dealloc == subtype_dealloc) {
77 destructor type_dealloc;
78 memcpy(&type_dealloc, (char*)&PyType_Type + i, sizeof(destructor));
79 return type_dealloc;
80 }
81 }
82 assert(false);
83 return NULL;
84}
85
86static bool PyUpb_CPythonBits_Init(PyUpb_CPythonBits* bits) {
87 PyObject* bases = NULL;
88 PyTypeObject* type = NULL;
89 PyObject* size = NULL;
90 PyObject* sys = NULL;
91 PyObject* hex_version = NULL;
92 bool ret = false;
93
94 // PyType_GetSlot() only works on heap types, so we cannot use it on
95 // &PyType_Type directly. Instead we create our own (temporary) type derived
96 // from PyType_Type: this will inherit all of the slots from PyType_Type, but
97 // as a heap type it can be queried with PyType_GetSlot().
98 static PyType_Slot dummy_slots[] = {{0, NULL}};
99
100 static PyType_Spec dummy_spec = {
101 "module.DummyClass", // tp_name
102 0, // To be filled in by size of base // tp_basicsize
103 0, // tp_itemsize
104 Py_TPFLAGS_DEFAULT, // tp_flags
105 dummy_slots,
106 };
107
108 bases = Py_BuildValue("(O)", &PyType_Type);
109 if (!bases) goto err;
110 type = (PyTypeObject*)PyType_FromSpecWithBases(&dummy_spec, bases);
111 if (!type) goto err;
112
113 bits->type_new = PyType_GetSlot(type, Py_tp_new);
114 bits->type_dealloc = upb_Pre310_PyType_GetDeallocSlot(type);
115 bits->type_getattro = PyType_GetSlot(type, Py_tp_getattro);
116 bits->type_setattro = PyType_GetSlot(type, Py_tp_setattro);
117 bits->type_traverse = PyType_GetSlot(type, Py_tp_traverse);
118 bits->type_clear = PyType_GetSlot(type, Py_tp_clear);
119
120 size = PyObject_GetAttrString((PyObject*)&PyType_Type, "__basicsize__");
121 if (!size) goto err;
122 bits->type_basicsize = PyLong_AsLong(size);
123 if (bits->type_basicsize == -1) goto err;
124
125 assert(bits->type_new);
126 assert(bits->type_dealloc);
127 assert(bits->type_getattro);
128 assert(bits->type_setattro);
129 assert(bits->type_traverse);
130 assert(bits->type_clear);
131
132#ifndef Py_LIMITED_API
133 assert(bits->type_new == PyType_Type.tp_new);
134 assert(bits->type_dealloc == PyType_Type.tp_dealloc);
135 assert(bits->type_getattro == PyType_Type.tp_getattro);
136 assert(bits->type_setattro == PyType_Type.tp_setattro);
137 assert(bits->type_basicsize == sizeof(PyHeapTypeObject));
138 assert(bits->type_traverse == PyType_Type.tp_traverse);
139 assert(bits->type_clear == PyType_Type.tp_clear);
140#endif
141
142 sys = PyImport_ImportModule("sys");
143 hex_version = PyObject_GetAttrString(sys, "hexversion");
144 bits->python_version_hex = PyLong_AsLong(hex_version);
145 ret = true;
146
147err:
148 Py_XDECREF(bases);
149 Py_XDECREF(type);
150 Py_XDECREF(size);
151 Py_XDECREF(sys);
152 Py_XDECREF(hex_version);
153 return ret;
154}
155
156// -----------------------------------------------------------------------------
157// Message
158// -----------------------------------------------------------------------------
159
160// The main message object. The type of the object (PyUpb_Message.ob_type)
161// will be an instance of the PyUpb_MessageMeta type (defined below). So the
162// chain is:
163// FooMessage = MessageMeta(...)
164// foo = FooMessage()
165//
166// Which becomes:
167// Object C Struct Type Python type (ob_type)
168// ----------------- ----------------- ---------------------
169// foo PyUpb_Message FooMessage
170// FooMessage PyUpb_MessageMeta message_meta_type
171// message_meta_type PyTypeObject 'type' in Python
172//
173// A message object can be in one of two states: present or non-present. When
174// a message is non-present, it stores a reference to its parent, and a write
175// to any attribute will trigger the message to become present in its parent.
176// The parent may also be non-present, in which case a mutation will trigger a
177// chain reaction.
178typedef struct PyUpb_Message {
179 PyObject_HEAD;
180 PyObject* arena;
181 uintptr_t def; // Tagged, low bit 1 == upb_FieldDef*, else upb_MessageDef*
182 union {
183 // when def is msgdef, the data for this msg.
184 upb_Message* msg;
185 // when def is fielddef, owning pointer to parent
186 struct PyUpb_Message* parent;
187 } ptr;
188 PyObject* ext_dict; // Weak pointer to extension dict, if any.
189 // name->obj dict for non-present msg/map/repeated, NULL if none.
190 PyUpb_WeakMap* unset_subobj_map;
191 int version;
192} PyUpb_Message;
193
194static PyObject* PyUpb_Message_GetAttr(PyObject* _self, PyObject* attr);
195
196bool PyUpb_Message_IsStub(PyUpb_Message* msg) { return msg->def & 1; }
197
198const upb_FieldDef* PyUpb_Message_GetFieldDef(PyUpb_Message* msg) {
199 assert(PyUpb_Message_IsStub(msg));
200 return (void*)(msg->def & ~(uintptr_t)1);
201}
202
203static const upb_MessageDef* _PyUpb_Message_GetMsgdef(PyUpb_Message* msg) {
204 return PyUpb_Message_IsStub(msg)
205 ? upb_FieldDef_MessageSubDef(PyUpb_Message_GetFieldDef(msg))
206 : (void*)msg->def;
207}
208
209const upb_MessageDef* PyUpb_Message_GetMsgdef(PyObject* self) {
210 return _PyUpb_Message_GetMsgdef((PyUpb_Message*)self);
211}
212
213static upb_Message* PyUpb_Message_GetMsg(PyUpb_Message* self) {
214 assert(!PyUpb_Message_IsStub(self));
215 return self->ptr.msg;
216}
217
218bool PyUpb_Message_TryCheck(PyObject* self) {
219 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
220 PyObject* type = (PyObject*)Py_TYPE(self);
221 return Py_TYPE(type) == state->message_meta_type;
222}
223
224bool PyUpb_Message_Verify(PyObject* self) {
225 if (!PyUpb_Message_TryCheck(self)) {
226 PyErr_Format(PyExc_TypeError, "Expected a message object, but got %R.",
227 self);
228 return false;
229 }
230 return true;
231}
232
233// If the message is reified, returns it. Otherwise, returns NULL.
234// If NULL is returned, the object is empty and has no underlying data.
235upb_Message* PyUpb_Message_GetIfReified(PyObject* _self) {
236 PyUpb_Message* self = (void*)_self;
237 return PyUpb_Message_IsStub(self) ? NULL : self->ptr.msg;
238}
239
240static PyObject* PyUpb_Message_New(PyObject* cls, PyObject* unused_args,
241 PyObject* unused_kwargs) {
242 const upb_MessageDef* msgdef = PyUpb_MessageMeta_GetMsgdef(cls);
243 const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
244 PyUpb_Message* msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
245 msg->def = (uintptr_t)msgdef;
246 msg->arena = PyUpb_Arena_New();
247 msg->ptr.msg = upb_Message_New(layout, PyUpb_Arena_Get(msg->arena));
248 msg->unset_subobj_map = NULL;
249 msg->ext_dict = NULL;
250 msg->version = 0;
251
252 PyObject* ret = &msg->ob_base;
253 PyUpb_ObjCache_Add(msg->ptr.msg, ret);
254 return ret;
255}
256
257/*
258 * PyUpb_Message_LookupName()
259 *
260 * Tries to find a field or oneof named `py_name` in the message object `self`.
261 * The user must pass `f` and/or `o` to indicate whether a field or a oneof name
262 * is expected. If the name is found and it has an expected type, the function
263 * sets `*f` or `*o` respectively and returns true. Otherwise returns false
264 * and sets an exception of type `exc_type` if provided.
265 */
266static bool PyUpb_Message_LookupName(PyUpb_Message* self, PyObject* py_name,
267 const upb_FieldDef** f,
268 const upb_OneofDef** o,
269 PyObject* exc_type) {
270 assert(f || o);
271 Py_ssize_t size;
272 const char* name = NULL;
273 if (PyUnicode_Check(py_name)) {
274 name = PyUnicode_AsUTF8AndSize(py_name, &size);
275 } else if (PyBytes_Check(py_name)) {
276 PyBytes_AsStringAndSize(py_name, (char**)&name, &size);
277 }
278 if (!name) {
279 PyErr_Format(exc_type,
280 "Expected a field name, but got non-string argument %S.",
281 py_name);
282 return false;
283 }
284 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
285
286 if (!upb_MessageDef_FindByNameWithSize(msgdef, name, size, f, o)) {
287 if (exc_type) {
288 PyErr_Format(exc_type, "Protocol message %s has no \"%s\" field.",
289 upb_MessageDef_Name(msgdef), name);
290 }
291 return false;
292 }
293
294 if (!o && !*f) {
295 if (exc_type) {
296 PyErr_Format(exc_type, "Expected a field name, but got oneof name %s.",
297 name);
298 }
299 return false;
300 }
301
302 if (!f && !*o) {
303 if (exc_type) {
304 PyErr_Format(exc_type, "Expected a oneof name, but got field name %s.",
305 name);
306 }
307 return false;
308 }
309
310 return true;
311}
312
313static bool PyUpb_Message_InitMessageMapEntry(PyObject* dst, PyObject* src) {
314 if (!src || !dst) return false;
315
316 PyObject* ok = PyObject_CallMethod(dst, "CopyFrom", "O", src);
317 if (!ok) return false;
318 Py_DECREF(ok);
319
320 return true;
321}
322
323int PyUpb_Message_InitMapAttributes(PyObject* map, PyObject* value,
324 const upb_FieldDef* f) {
325 const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
326 const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
327 PyObject* it = NULL;
328 PyObject* tmp = NULL;
329 int ret = -1;
330 if (upb_FieldDef_IsSubMessage(val_f)) {
331 it = PyObject_GetIter(value);
332 if (it == NULL) {
333 PyErr_Format(PyExc_TypeError, "Argument for field %s is not iterable",
334 upb_FieldDef_FullName(f));
335 goto err;
336 }
337 PyObject* e;
338 while ((e = PyIter_Next(it)) != NULL) {
339 PyObject* src = PyObject_GetItem(value, e);
340 PyObject* dst = PyObject_GetItem(map, e);
341 Py_DECREF(e);
342 bool ok = PyUpb_Message_InitMessageMapEntry(dst, src);
343 Py_XDECREF(src);
344 Py_XDECREF(dst);
345 if (!ok) goto err;
346 }
347 } else {
348 tmp = PyObject_CallMethod(map, "update", "O", value);
349 if (!tmp) goto err;
350 }
351 ret = 0;
352
353err:
354 Py_XDECREF(it);
355 Py_XDECREF(tmp);
356 return ret;
357}
358
359void PyUpb_Message_EnsureReified(PyUpb_Message* self);
360
361static bool PyUpb_Message_InitMapAttribute(PyObject* _self, PyObject* name,
362 const upb_FieldDef* f,
363 PyObject* value) {
364 PyObject* map = PyUpb_Message_GetAttr(_self, name);
365 int ok = PyUpb_Message_InitMapAttributes(map, value, f);
366 Py_DECREF(map);
367 return ok >= 0;
368}
369
370static bool PyUpb_Message_InitRepeatedMessageAttribute(PyObject* _self,
371 PyObject* repeated,
372 PyObject* value,
373 const upb_FieldDef* f) {
374 PyObject* it = PyObject_GetIter(value);
375 if (!it) {
376 PyErr_Format(PyExc_TypeError, "Argument for field %s is not iterable",
377 upb_FieldDef_FullName(f));
378 return false;
379 }
380 PyObject* e = NULL;
381 PyObject* m = NULL;
382 while ((e = PyIter_Next(it)) != NULL) {
383 if (PyDict_Check(e)) {
384 m = PyUpb_RepeatedCompositeContainer_Add(repeated, NULL, e);
385 if (!m) goto err;
386 } else {
387 m = PyUpb_RepeatedCompositeContainer_Add(repeated, NULL, NULL);
388 if (!m) goto err;
389 PyObject* merged = PyUpb_Message_MergeFrom(m, e);
390 if (!merged) goto err;
391 Py_DECREF(merged);
392 }
393 Py_DECREF(e);
394 Py_DECREF(m);
395 m = NULL;
396 }
397
398err:
399 Py_XDECREF(it);
400 Py_XDECREF(e);
401 Py_XDECREF(m);
402 return !PyErr_Occurred(); // Check PyIter_Next() exit.
403}
404
405static bool PyUpb_Message_InitRepeatedAttribute(PyObject* _self, PyObject* name,
406 PyObject* value) {
407 PyUpb_Message* self = (void*)_self;
408 const upb_FieldDef* field;
409 if (!PyUpb_Message_LookupName(self, name, &field, NULL,
410 PyExc_AttributeError)) {
411 return false;
412 }
413 bool ok = false;
414 PyObject* repeated = PyUpb_Message_GetFieldValue(_self, field);
415 PyObject* tmp = NULL;
416 if (!repeated) goto err;
417 if (upb_FieldDef_IsSubMessage(field)) {
418 if (!PyUpb_Message_InitRepeatedMessageAttribute(_self, repeated, value,
419 field)) {
420 goto err;
421 }
422 } else {
423 tmp = PyUpb_RepeatedContainer_Extend(repeated, value);
424 if (!tmp) goto err;
425 }
426 ok = true;
427
428err:
429 Py_XDECREF(repeated);
430 Py_XDECREF(tmp);
431 return ok;
432}
433
434static bool PyUpb_Message_InitMessageAttribute(PyObject* _self, PyObject* name,
435 PyObject* value) {
436 PyObject* submsg = PyUpb_Message_GetAttr(_self, name);
437 if (!submsg) return -1;
438 assert(!PyErr_Occurred());
439 bool ok;
440 if (PyUpb_Message_TryCheck(value)) {
441 PyObject* tmp = PyUpb_Message_MergeFrom(submsg, value);
442 ok = tmp != NULL;
443 Py_XDECREF(tmp);
444 } else if (PyDict_Check(value)) {
445 assert(!PyErr_Occurred());
446 ok = PyUpb_Message_InitAttributes(submsg, NULL, value) >= 0;
447 } else {
448 const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
449 PyErr_Format(PyExc_TypeError, "Message must be initialized with a dict: %s",
450 upb_MessageDef_FullName(m));
451 ok = false;
452 }
453 Py_DECREF(submsg);
454 return ok;
455}
456
457static bool PyUpb_Message_InitScalarAttribute(upb_Message* msg,
458 const upb_FieldDef* f,
459 PyObject* value,
460 upb_Arena* arena) {
461 upb_MessageValue msgval;
462 assert(!PyErr_Occurred());
463 if (!PyUpb_PyToUpb(value, f, &msgval, arena)) return false;
464 upb_Message_SetFieldByDef(msg, f, msgval, arena);
465 return true;
466}
467
468int PyUpb_Message_InitAttributes(PyObject* _self, PyObject* args,
469 PyObject* kwargs) {
470 assert(!PyErr_Occurred());
471
472 if (args != NULL && PyTuple_Size(args) != 0) {
473 PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
474 return -1;
475 }
476
477 if (kwargs == NULL) return 0;
478
479 PyUpb_Message* self = (void*)_self;
480 Py_ssize_t pos = 0;
481 PyObject* name;
482 PyObject* value;
483 PyUpb_Message_EnsureReified(self);
484 upb_Message* msg = PyUpb_Message_GetMsg(self);
485 upb_Arena* arena = PyUpb_Arena_Get(self->arena);
486
487 while (PyDict_Next(kwargs, &pos, &name, &value)) {
488 assert(!PyErr_Occurred());
489 const upb_FieldDef* f;
490 assert(!PyErr_Occurred());
491 if (!PyUpb_Message_LookupName(self, name, &f, NULL, PyExc_ValueError)) {
492 return -1;
493 }
494
495 if (value == Py_None) continue; // Ignored.
496
497 assert(!PyErr_Occurred());
498
499 if (upb_FieldDef_IsMap(f)) {
500 if (!PyUpb_Message_InitMapAttribute(_self, name, f, value)) return -1;
501 } else if (upb_FieldDef_IsRepeated(f)) {
502 if (!PyUpb_Message_InitRepeatedAttribute(_self, name, value)) return -1;
503 } else if (upb_FieldDef_IsSubMessage(f)) {
504 if (!PyUpb_Message_InitMessageAttribute(_self, name, value)) return -1;
505 } else {
506 if (!PyUpb_Message_InitScalarAttribute(msg, f, value, arena)) return -1;
507 }
508 if (PyErr_Occurred()) return -1;
509 }
510
511 if (PyErr_Occurred()) return -1;
512 return 0;
513}
514
515static int PyUpb_Message_Init(PyObject* _self, PyObject* args,
516 PyObject* kwargs) {
517 if (args != NULL && PyTuple_Size(args) != 0) {
518 PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
519 return -1;
520 }
521
522 return PyUpb_Message_InitAttributes(_self, args, kwargs);
523}
524
525static PyObject* PyUpb_Message_NewStub(PyObject* parent, const upb_FieldDef* f,
526 PyObject* arena) {
527 const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f);
528 PyObject* cls = PyUpb_Descriptor_GetClass(sub_m);
529
530 PyUpb_Message* msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
531 msg->def = (uintptr_t)f | 1;
532 msg->arena = arena;
533 msg->ptr.parent = (PyUpb_Message*)parent;
534 msg->unset_subobj_map = NULL;
535 msg->ext_dict = NULL;
536 msg->version = 0;
537
538 Py_DECREF(cls);
539 Py_INCREF(parent);
540 Py_INCREF(arena);
541 return &msg->ob_base;
542}
543
544static bool PyUpb_Message_IsEmpty(const upb_Message* msg,
545 const upb_MessageDef* m,
546 const upb_DefPool* ext_pool) {
547 if (!msg) return true;
548
549 size_t iter = kUpb_Message_Begin;
550 const upb_FieldDef* f;
551 upb_MessageValue val;
552 if (upb_Message_Next(msg, m, ext_pool, &f, &val, &iter)) return false;
553
554 size_t len;
555 (void)upb_Message_GetUnknown(msg, &len);
556 return len == 0;
557}
558
559static bool PyUpb_Message_IsEqual(PyUpb_Message* m1, PyObject* _m2) {
560 PyUpb_Message* m2 = (void*)_m2;
561 if (m1 == m2) return true;
562 if (!PyObject_TypeCheck(_m2, m1->ob_base.ob_type)) {
563 return false;
564 }
565 const upb_MessageDef* m1_msgdef = _PyUpb_Message_GetMsgdef(m1);
566#ifndef NDEBUG
567 const upb_MessageDef* m2_msgdef = _PyUpb_Message_GetMsgdef(m2);
568 assert(m1_msgdef == m2_msgdef);
569#endif
570 const upb_Message* m1_msg = PyUpb_Message_GetIfReified((PyObject*)m1);
571 const upb_Message* m2_msg = PyUpb_Message_GetIfReified(_m2);
572 const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m1_msgdef));
573
574 const bool e1 = PyUpb_Message_IsEmpty(m1_msg, m1_msgdef, symtab);
575 const bool e2 = PyUpb_Message_IsEmpty(m2_msg, m1_msgdef, symtab);
576 if (e1 || e2) return e1 && e2;
577
578 return upb_Message_IsEqual(m1_msg, m2_msg, m1_msgdef);
579}
580
581static const upb_FieldDef* PyUpb_Message_InitAsMsg(PyUpb_Message* m,
582 upb_Arena* arena) {
583 const upb_FieldDef* f = PyUpb_Message_GetFieldDef(m);
584 const upb_MessageDef* m2 = upb_FieldDef_MessageSubDef(f);
585 m->ptr.msg = upb_Message_New(upb_MessageDef_MiniTable(m2), arena);
586 m->def = (uintptr_t)m2;
587 PyUpb_ObjCache_Add(m->ptr.msg, &m->ob_base);
588 return f;
589}
590
591static void PyUpb_Message_SetField(PyUpb_Message* parent, const upb_FieldDef* f,
592 PyUpb_Message* child, upb_Arena* arena) {
593 upb_MessageValue msgval = {.msg_val = PyUpb_Message_GetMsg(child)};
594 upb_Message_SetFieldByDef(PyUpb_Message_GetMsg(parent), f, msgval, arena);
595 PyUpb_WeakMap_Delete(parent->unset_subobj_map, f);
596 // Releases a ref previously owned by child->ptr.parent of our child.
597 Py_DECREF(child);
598}
599
600/*
601 * PyUpb_Message_EnsureReified()
602 *
603 * This implements the "expando" behavior of Python protos:
604 * foo = FooProto()
605 *
606 * # The intermediate messages don't really exist, and won't be serialized.
607 * x = foo.bar.bar.bar.bar.bar.baz
608 *
609 * # Now all the intermediate objects are created.
610 * foo.bar.bar.bar.bar.bar.baz = 5
611 *
612 * This function should be called before performing any mutation of a protobuf
613 * object.
614 *
615 * Post-condition:
616 * PyUpb_Message_IsStub(self) is false
617 */
618void PyUpb_Message_EnsureReified(PyUpb_Message* self) {
619 if (!PyUpb_Message_IsStub(self)) return;
620 upb_Arena* arena = PyUpb_Arena_Get(self->arena);
621
622 // This is a non-present message. We need to create a real upb_Message for
623 // this object and every parent until we reach a present message.
624 PyUpb_Message* child = self;
625 PyUpb_Message* parent = self->ptr.parent;
626 const upb_FieldDef* child_f = PyUpb_Message_InitAsMsg(child, arena);
627 Py_INCREF(child); // To avoid a special-case in PyUpb_Message_SetField().
628
629 do {
630 PyUpb_Message* next_parent = parent->ptr.parent;
631 const upb_FieldDef* parent_f = NULL;
632 if (PyUpb_Message_IsStub(parent)) {
633 parent_f = PyUpb_Message_InitAsMsg(parent, arena);
634 }
635 PyUpb_Message_SetField(parent, child_f, child, arena);
636 child = parent;
637 child_f = parent_f;
638 parent = next_parent;
639 } while (child_f);
640
641 // Releases ref previously owned by child->ptr.parent of our child.
642 Py_DECREF(child);
643 self->version++;
644}
645
646static void PyUpb_Message_SyncSubobjs(PyUpb_Message* self);
647
648/*
649 * PyUpb_Message_Reify()
650 *
651 * The message equivalent of PyUpb_*Container_Reify(), this transitions
652 * the wrapper from the unset state (owning a reference on self->ptr.parent) to
653 * the set state (having a non-owning pointer to self->ptr.msg).
654 */
655static void PyUpb_Message_Reify(PyUpb_Message* self, const upb_FieldDef* f,
656 upb_Message* msg) {
657 assert(f == PyUpb_Message_GetFieldDef(self));
658 if (!msg) {
659 const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef((PyObject*)self);
660 const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
661 msg = upb_Message_New(layout, PyUpb_Arena_Get(self->arena));
662 }
663 PyUpb_ObjCache_Add(msg, &self->ob_base);
664 Py_DECREF(&self->ptr.parent->ob_base);
665 self->ptr.msg = msg; // Overwrites self->ptr.parent
666 self->def = (uintptr_t)upb_FieldDef_MessageSubDef(f);
667 PyUpb_Message_SyncSubobjs(self);
668}
669
670/*
671 * PyUpb_Message_SyncSubobjs()
672 *
673 * This operation must be invoked whenever the underlying upb_Message has been
674 * mutated directly in C. This will attach any newly-present field data
675 * to previously returned stub wrapper objects.
676 *
677 * For example:
678 * foo = FooMessage()
679 * sub = foo.submsg # Empty, unset sub-message
680 *
681 * # SyncSubobjs() is required to connect our existing 'sub' wrapper to the
682 * # newly created foo.submsg data in C.
683 * foo.MergeFrom(FooMessage(submsg={}))
684 *
685 * This requires that all of the new sub-objects that have appeared are owned
686 * by `self`'s arena.
687 */
688static void PyUpb_Message_SyncSubobjs(PyUpb_Message* self) {
689 PyUpb_WeakMap* subobj_map = self->unset_subobj_map;
690 if (!subobj_map) return;
691
692 upb_Message* msg = PyUpb_Message_GetMsg(self);
693 intptr_t iter = PYUPB_WEAKMAP_BEGIN;
694 const void* key;
695 PyObject* obj;
696
697 // The last ref to this message could disappear during iteration.
698 // When we call PyUpb_*Container_Reify() below, the container will drop
699 // its ref on `self`. If that was the last ref on self, the object will be
700 // deleted, and `subobj_map` along with it. We need it to live until we are
701 // done iterating.
702 Py_INCREF(&self->ob_base);
703
704 while (PyUpb_WeakMap_Next(subobj_map, &key, &obj, &iter)) {
705 const upb_FieldDef* f = key;
706 if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f))
707 continue;
708 upb_MessageValue msgval = upb_Message_GetFieldByDef(msg, f);
709 PyUpb_WeakMap_DeleteIter(subobj_map, &iter);
710 if (upb_FieldDef_IsMap(f)) {
711 if (!msgval.map_val) continue;
712 PyUpb_MapContainer_Reify(obj, (upb_Map*)msgval.map_val);
713 } else if (upb_FieldDef_IsRepeated(f)) {
714 if (!msgval.array_val) continue;
715 PyUpb_RepeatedContainer_Reify(obj, (upb_Array*)msgval.array_val);
716 } else {
717 PyUpb_Message* sub = (void*)obj;
718 assert(self == sub->ptr.parent);
719 PyUpb_Message_Reify(sub, f, (upb_Message*)msgval.msg_val);
720 }
721 }
722
723 Py_DECREF(&self->ob_base);
724
725 // TODO: present fields need to be iterated too if they can reach
726 // a WeakMap.
727}
728
729static PyObject* PyUpb_Message_ToString(PyUpb_Message* self) {
730 if (PyUpb_Message_IsStub(self)) {
731 return PyUnicode_FromStringAndSize(NULL, 0);
732 }
733 upb_Message* msg = PyUpb_Message_GetMsg(self);
734 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
735 const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(msgdef));
736 char buf[1024];
737 int options = UPB_TXTENC_SKIPUNKNOWN;
738 size_t size = upb_TextEncode(msg, msgdef, symtab, options, buf, sizeof(buf));
739 if (size < sizeof(buf)) {
740 return PyUnicode_FromStringAndSize(buf, size);
741 } else {
742 char* buf2 = malloc(size + 1);
743 size_t size2 = upb_TextEncode(msg, msgdef, symtab, options, buf2, size + 1);
744 assert(size == size2);
745 PyObject* ret = PyUnicode_FromStringAndSize(buf2, size2);
746 free(buf2);
747 return ret;
748 }
749}
750
751static PyObject* PyUpb_Message_RichCompare(PyObject* _self, PyObject* other,
752 int opid) {
753 PyUpb_Message* self = (void*)_self;
754 if (opid != Py_EQ && opid != Py_NE) {
755 Py_INCREF(Py_NotImplemented);
756 return Py_NotImplemented;
757 }
758 if (!PyObject_TypeCheck(other, Py_TYPE(self))) {
759 Py_INCREF(Py_NotImplemented);
760 return Py_NotImplemented;
761 }
762 bool ret = PyUpb_Message_IsEqual(self, other);
763 if (opid == Py_NE) ret = !ret;
764 return PyBool_FromLong(ret);
765}
766
767void PyUpb_Message_CacheDelete(PyObject* _self, const upb_FieldDef* f) {
768 PyUpb_Message* self = (void*)_self;
769 PyUpb_WeakMap_Delete(self->unset_subobj_map, f);
770}
771
772void PyUpb_Message_SetConcreteSubobj(PyObject* _self, const upb_FieldDef* f,
773 upb_MessageValue subobj) {
774 PyUpb_Message* self = (void*)_self;
775 PyUpb_Message_EnsureReified(self);
776 PyUpb_Message_CacheDelete(_self, f);
777 upb_Message_SetFieldByDef(self->ptr.msg, f, subobj,
778 PyUpb_Arena_Get(self->arena));
779}
780
781static void PyUpb_Message_Dealloc(PyObject* _self) {
782 PyUpb_Message* self = (void*)_self;
783
784 if (PyUpb_Message_IsStub(self)) {
785 PyUpb_Message_CacheDelete((PyObject*)self->ptr.parent,
786 PyUpb_Message_GetFieldDef(self));
787 Py_DECREF(self->ptr.parent);
788 } else {
789 PyUpb_ObjCache_Delete(self->ptr.msg);
790 }
791
792 if (self->unset_subobj_map) {
793 PyUpb_WeakMap_Free(self->unset_subobj_map);
794 }
795
796 Py_DECREF(self->arena);
797
798 // We do not use PyUpb_Dealloc() here because Message is a base type and for
799 // base types there is a bug we have to work around in this case (see below).
800 PyTypeObject* tp = Py_TYPE(self);
801 freefunc tp_free = PyType_GetSlot(tp, Py_tp_free);
802 tp_free(self);
803
804 if (cpython_bits.python_version_hex >= 0x03080000) {
805 // Prior to Python 3.8 there is a bug where deallocating the type here would
806 // lead to a double-decref: https://bugs.python.org/issue37879
807 Py_DECREF(tp);
808 }
809}
810
811PyObject* PyUpb_Message_Get(upb_Message* u_msg, const upb_MessageDef* m,
812 PyObject* arena) {
813 PyObject* ret = PyUpb_ObjCache_Get(u_msg);
814 if (ret) return ret;
815
816 PyObject* cls = PyUpb_Descriptor_GetClass(m);
817 // It is not safe to use PyObject_{,GC}_New() due to:
818 // https://bugs.python.org/issue35810
819 PyUpb_Message* py_msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
820 py_msg->arena = arena;
821 py_msg->def = (uintptr_t)m;
822 py_msg->ptr.msg = u_msg;
823 py_msg->unset_subobj_map = NULL;
824 py_msg->ext_dict = NULL;
825 py_msg->version = 0;
826 ret = &py_msg->ob_base;
827 Py_DECREF(cls);
828 Py_INCREF(arena);
829 PyUpb_ObjCache_Add(u_msg, ret);
830 return ret;
831}
832
833/* PyUpb_Message_GetStub()
834 *
835 * Non-present messages return "stub" objects that point to their parent, but
836 * will materialize into real upb objects if they are mutated.
837 *
838 * Note: we do *not* create stubs for repeated/map fields unless the parent
839 * is a stub:
840 *
841 * msg = TestMessage()
842 * msg.submessage # (A) Creates a stub
843 * msg.repeated_foo # (B) Does *not* create a stub
844 * msg.submessage.repeated_bar # (C) Creates a stub
845 *
846 * In case (B) we have some freedom: we could either create a stub, or create
847 * a reified object with underlying data. It appears that either could work
848 * equally well, with no observable change to users. There isn't a clear
849 * advantage to either choice. We choose to follow the behavior of the
850 * pre-existing C++ behavior for consistency, but if it becomes apparent that
851 * there would be some benefit to reversing this decision, it should be totally
852 * within the realm of possibility.
853 */
854PyObject* PyUpb_Message_GetStub(PyUpb_Message* self,
855 const upb_FieldDef* field) {
856 PyObject* _self = (void*)self;
857 if (!self->unset_subobj_map) {
858 self->unset_subobj_map = PyUpb_WeakMap_New();
859 }
860 PyObject* subobj = PyUpb_WeakMap_Get(self->unset_subobj_map, field);
861
862 if (subobj) return subobj;
863
864 if (upb_FieldDef_IsMap(field)) {
865 subobj = PyUpb_MapContainer_NewStub(_self, field, self->arena);
866 } else if (upb_FieldDef_IsRepeated(field)) {
867 subobj = PyUpb_RepeatedContainer_NewStub(_self, field, self->arena);
868 } else {
869 subobj = PyUpb_Message_NewStub(&self->ob_base, field, self->arena);
870 }
871 PyUpb_WeakMap_Add(self->unset_subobj_map, field, subobj);
872
873 assert(!PyErr_Occurred());
874 return subobj;
875}
876
877PyObject* PyUpb_Message_GetPresentWrapper(PyUpb_Message* self,
878 const upb_FieldDef* field) {
879 assert(!PyUpb_Message_IsStub(self));
880 upb_MutableMessageValue mutval =
881 upb_Message_Mutable(self->ptr.msg, field, PyUpb_Arena_Get(self->arena));
882 if (upb_FieldDef_IsMap(field)) {
883 return PyUpb_MapContainer_GetOrCreateWrapper(mutval.map, field,
884 self->arena);
885 } else {
886 return PyUpb_RepeatedContainer_GetOrCreateWrapper(mutval.array, field,
887 self->arena);
888 }
889}
890
891PyObject* PyUpb_Message_GetScalarValue(PyUpb_Message* self,
892 const upb_FieldDef* field) {
893 upb_MessageValue val;
894 if (PyUpb_Message_IsStub(self)) {
895 // Unset message always returns default values.
896 val = upb_FieldDef_Default(field);
897 } else {
898 val = upb_Message_GetFieldByDef(self->ptr.msg, field);
899 }
900 return PyUpb_UpbToPy(val, field, self->arena);
901}
902
903/*
904 * PyUpb_Message_GetFieldValue()
905 *
906 * Implements the equivalent of getattr(msg, field), once `field` has
907 * already been resolved to a `upb_FieldDef*`.
908 *
909 * This may involve constructing a wrapper object for the given field, or
910 * returning one that was previously constructed. If the field is not actually
911 * set, the wrapper object will be an "unset" object that is not actually
912 * connected to any C data.
913 */
914PyObject* PyUpb_Message_GetFieldValue(PyObject* _self,
915 const upb_FieldDef* field) {
916 PyUpb_Message* self = (void*)_self;
917 assert(upb_FieldDef_ContainingType(field) == PyUpb_Message_GetMsgdef(_self));
918 bool submsg = upb_FieldDef_IsSubMessage(field);
919 bool seq = upb_FieldDef_IsRepeated(field);
920
921 if ((PyUpb_Message_IsStub(self) && (submsg || seq)) ||
922 (submsg && !seq && !upb_Message_HasFieldByDef(self->ptr.msg, field))) {
923 return PyUpb_Message_GetStub(self, field);
924 } else if (seq) {
925 return PyUpb_Message_GetPresentWrapper(self, field);
926 } else {
927 return PyUpb_Message_GetScalarValue(self, field);
928 }
929}
930
931int PyUpb_Message_SetFieldValue(PyObject* _self, const upb_FieldDef* field,
932 PyObject* value, PyObject* exc) {
933 PyUpb_Message* self = (void*)_self;
934 assert(value);
935
936 if (upb_FieldDef_IsSubMessage(field) || upb_FieldDef_IsRepeated(field)) {
937 PyErr_Format(exc,
938 "Assignment not allowed to message, map, or repeated "
939 "field \"%s\" in protocol message object.",
940 upb_FieldDef_Name(field));
941 return -1;
942 }
943
944 PyUpb_Message_EnsureReified(self);
945
946 upb_MessageValue val;
947 upb_Arena* arena = PyUpb_Arena_Get(self->arena);
948 if (!PyUpb_PyToUpb(value, field, &val, arena)) {
949 return -1;
950 }
951
952 upb_Message_SetFieldByDef(self->ptr.msg, field, val, arena);
953 return 0;
954}
955
956int PyUpb_Message_GetVersion(PyObject* _self) {
957 PyUpb_Message* self = (void*)_self;
958 return self->version;
959}
960
961/*
962 * PyUpb_Message_GetAttr()
963 *
964 * Implements:
965 * foo = msg.foo
966 *
967 * Attribute lookup must find both message fields and base class methods like
968 * msg.SerializeToString().
969 */
970__attribute__((flatten)) static PyObject* PyUpb_Message_GetAttr(
971 PyObject* _self, PyObject* attr) {
972 PyUpb_Message* self = (void*)_self;
973
974 // Lookup field by name.
975 const upb_FieldDef* field;
976 if (PyUpb_Message_LookupName(self, attr, &field, NULL, NULL)) {
977 return PyUpb_Message_GetFieldValue(_self, field);
978 }
979
980 // Check base class attributes.
981 assert(!PyErr_Occurred());
982 PyObject* ret = PyObject_GenericGetAttr(_self, attr);
983 if (ret) return ret;
984
985 // Swallow AttributeError if it occurred and try again on the metaclass
986 // to pick up class attributes. But we have to special-case "Extensions"
987 // which affirmatively returns AttributeError when a message is not
988 // extendable.
989 const char* name;
990 if (PyErr_ExceptionMatches(PyExc_AttributeError) &&
991 (name = PyUpb_GetStrData(attr)) && strcmp(name, "Extensions") != 0) {
992 PyErr_Clear();
993 return PyUpb_MessageMeta_GetAttr((PyObject*)Py_TYPE(_self), attr);
994 }
995
996 return NULL;
997}
998
999/*
1000 * PyUpb_Message_SetAttr()
1001 *
1002 * Implements:
1003 * msg.foo = foo
1004 */
1005static int PyUpb_Message_SetAttr(PyObject* _self, PyObject* attr,
1006 PyObject* value) {
1007 PyUpb_Message* self = (void*)_self;
1008 const upb_FieldDef* field;
1009 if (!PyUpb_Message_LookupName(self, attr, &field, NULL,
1010 PyExc_AttributeError)) {
1011 return -1;
1012 }
1013
1014 return PyUpb_Message_SetFieldValue(_self, field, value, PyExc_AttributeError);
1015}
1016
1017static PyObject* PyUpb_Message_HasField(PyObject* _self, PyObject* arg) {
1018 PyUpb_Message* self = (void*)_self;
1019 const upb_FieldDef* field;
1020 const upb_OneofDef* oneof;
1021
1022 if (!PyUpb_Message_LookupName(self, arg, &field, &oneof, PyExc_ValueError)) {
1023 return NULL;
1024 }
1025
1026 if (field && !upb_FieldDef_HasPresence(field)) {
1027 PyErr_Format(PyExc_ValueError, "Field %s does not have presence.",
1028 upb_FieldDef_FullName(field));
1029 return NULL;
1030 }
1031
1032 if (PyUpb_Message_IsStub(self)) Py_RETURN_FALSE;
1033
1034 return PyBool_FromLong(field ? upb_Message_HasFieldByDef(self->ptr.msg, field)
1035 : upb_Message_WhichOneof(self->ptr.msg, oneof) !=
1036 NULL);
1037}
1038
1039static PyObject* PyUpb_Message_FindInitializationErrors(PyObject* _self,
1040 PyObject* arg);
1041
1042static PyObject* PyUpb_Message_IsInitializedAppendErrors(PyObject* _self,
1043 PyObject* errors) {
1044 PyObject* list = PyUpb_Message_FindInitializationErrors(_self, NULL);
1045 if (!list) return NULL;
1046 bool ok = PyList_Size(list) == 0;
1047 PyObject* ret = NULL;
1048 PyObject* extend_result = NULL;
1049 if (!ok) {
1050 extend_result = PyObject_CallMethod(errors, "extend", "O", list);
1051 if (!extend_result) goto done;
1052 }
1053 ret = PyBool_FromLong(ok);
1054
1055done:
1056 Py_XDECREF(list);
1057 Py_XDECREF(extend_result);
1058 return ret;
1059}
1060
1061static PyObject* PyUpb_Message_IsInitialized(PyObject* _self, PyObject* args) {
1062 PyObject* errors = NULL;
1063 if (!PyArg_ParseTuple(args, "|O", &errors)) {
1064 return NULL;
1065 }
1066 if (errors) {
1067 // We need to collect a list of unset required fields and append it to
1068 // `errors`.
1069 return PyUpb_Message_IsInitializedAppendErrors(_self, errors);
1070 } else {
1071 // We just need to return a boolean "true" or "false" for whether all
1072 // required fields are set.
1073 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1074 const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
1075 const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
1076 bool initialized = !upb_util_HasUnsetRequired(msg, m, symtab, NULL);
1077 return PyBool_FromLong(initialized);
1078 }
1079}
1080
1081static PyObject* PyUpb_Message_ListFieldsItemKey(PyObject* self,
1082 PyObject* val) {
1083 assert(PyTuple_Check(val));
1084 PyObject* field = PyTuple_GetItem(val, 0);
1085 const upb_FieldDef* f = PyUpb_FieldDescriptor_GetDef(field);
1086 return PyLong_FromLong(upb_FieldDef_Number(f));
1087}
1088
1089static PyObject* PyUpb_Message_CheckCalledFromGeneratedFile(
1090 PyObject* unused, PyObject* unused_arg) {
1091 PyErr_SetString(
1092 PyExc_TypeError,
1093 "Descriptors cannot be created directly.\n"
1094 "If this call came from a _pb2.py file, your generated code is out of "
1095 "date and must be regenerated with protoc >= 3.19.0.\n"
1096 "If you cannot immediately regenerate your protos, some other possible "
1097 "workarounds are:\n"
1098 " 1. Downgrade the protobuf package to 3.20.x or lower.\n"
1099 " 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will "
1100 "use pure-Python parsing and will be much slower).\n"
1101 "\n"
1102 "More information: "
1103 "https://developers.google.com/protocol-buffers/docs/news/"
1104 "2022-05-06#python-updates");
1105 return NULL;
1106}
1107
1108static bool PyUpb_Message_SortFieldList(PyObject* list) {
1109 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1110 bool ok = false;
1111 PyObject* args = PyTuple_New(0);
1112 PyObject* kwargs = PyDict_New();
1113 PyObject* method = PyObject_GetAttrString(list, "sort");
1114 PyObject* call_result = NULL;
1115 if (!args || !kwargs || !method) goto err;
1116 if (PyDict_SetItemString(kwargs, "key", state->listfields_item_key) < 0) {
1117 goto err;
1118 }
1119 call_result = PyObject_Call(method, args, kwargs);
1120 if (!call_result) goto err;
1121 ok = true;
1122
1123err:
1124 Py_XDECREF(method);
1125 Py_XDECREF(args);
1126 Py_XDECREF(kwargs);
1127 Py_XDECREF(call_result);
1128 return ok;
1129}
1130
1131static PyObject* PyUpb_Message_ListFields(PyObject* _self, PyObject* arg) {
1132 PyObject* list = PyList_New(0);
1133 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1134 if (!msg) return list;
1135
1136 size_t iter1 = kUpb_Message_Begin;
1137 const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
1138 const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
1139 const upb_FieldDef* f;
1140 PyObject* field_desc = NULL;
1141 PyObject* py_val = NULL;
1142 PyObject* tuple = NULL;
1143 upb_MessageValue val;
1144 uint32_t last_field = 0;
1145 bool in_order = true;
1146 while (upb_Message_Next(msg, m, symtab, &f, &val, &iter1)) {
1147 const uint32_t field_number = upb_FieldDef_Number(f);
1148 if (field_number < last_field) in_order = false;
1149 last_field = field_number;
1150 PyObject* field_desc = PyUpb_FieldDescriptor_Get(f);
1151 PyObject* py_val = PyUpb_Message_GetFieldValue(_self, f);
1152 if (!field_desc || !py_val) goto err;
1153 PyObject* tuple = Py_BuildValue("(NN)", field_desc, py_val);
1154 field_desc = NULL;
1155 py_val = NULL;
1156 if (!tuple) goto err;
1157 if (PyList_Append(list, tuple)) goto err;
1158 Py_DECREF(tuple);
1159 tuple = NULL;
1160 }
1161
1162 // Users rely on fields being returned in field number order.
1163 if (!in_order && !PyUpb_Message_SortFieldList(list)) goto err;
1164
1165 return list;
1166
1167err:
1168 Py_XDECREF(field_desc);
1169 Py_XDECREF(py_val);
1170 Py_XDECREF(tuple);
1171 Py_DECREF(list);
1172 return NULL;
1173}
1174
1175PyObject* PyUpb_Message_MergeFrom(PyObject* self, PyObject* arg) {
1176 if (self->ob_type != arg->ob_type) {
1177 PyErr_Format(PyExc_TypeError,
1178 "Parameter to MergeFrom() must be instance of same class: "
1179 "expected %S got %S.",
1180 Py_TYPE(self), Py_TYPE(arg));
1181 return NULL;
1182 }
1183 // OPT: exit if src is empty.
1184 PyObject* subargs = PyTuple_New(0);
1185 PyObject* serialized =
1186 PyUpb_Message_SerializePartialToString(arg, subargs, NULL);
1187 Py_DECREF(subargs);
1188 if (!serialized) return NULL;
1189 PyObject* ret = PyUpb_Message_MergeFromString(self, serialized);
1190 Py_DECREF(serialized);
1191 Py_XDECREF(ret);
1192 Py_RETURN_NONE;
1193}
1194
1195static PyObject* PyUpb_Message_Clear(PyUpb_Message* self);
1196
1197static PyObject* PyUpb_Message_CopyFrom(PyObject* _self, PyObject* arg) {
1198 if (_self->ob_type != arg->ob_type) {
1199 PyErr_Format(PyExc_TypeError,
1200 "Parameter to CopyFrom() must be instance of same class: "
1201 "expected %S got %S.",
1202 Py_TYPE(_self), Py_TYPE(arg));
1203 return NULL;
1204 }
1205 if (_self == arg) {
1206 Py_RETURN_NONE;
1207 }
1208 PyUpb_Message* self = (void*)_self;
1209 PyUpb_Message* other = (void*)arg;
1210 PyUpb_Message_EnsureReified(self);
1211
1212 const upb_Message* other_msg = PyUpb_Message_GetIfReified((PyObject*)other);
1213 if (other_msg) {
1214 upb_Message_DeepCopy(
1215 self->ptr.msg, other_msg,
1216 upb_MessageDef_MiniTable((const upb_MessageDef*)other->def),
1217 PyUpb_Arena_Get(self->arena));
1218 } else {
1219 PyObject* tmp = PyUpb_Message_Clear(self);
1220 Py_DECREF(tmp);
1221 }
1222 PyUpb_Message_SyncSubobjs(self);
1223
1224 Py_RETURN_NONE;
1225}
1226
1227static PyObject* PyUpb_Message_SetInParent(PyObject* _self, PyObject* arg) {
1228 PyUpb_Message* self = (void*)_self;
1229 PyUpb_Message_EnsureReified(self);
1230 Py_RETURN_NONE;
1231}
1232
1233static PyObject* PyUpb_Message_UnknownFields(PyObject* _self, PyObject* arg) {
1234 // TODO: re-enable when unknown fields are added.
1235 // return PyUpb_UnknownFields_New(_self);
1236 PyErr_SetString(PyExc_NotImplementedError, "unknown field accessor");
1237 return NULL;
1238}
1239
1240PyObject* PyUpb_Message_MergeFromString(PyObject* _self, PyObject* arg) {
1241 PyUpb_Message* self = (void*)_self;
1242 char* buf;
1243 Py_ssize_t size;
1244 PyObject* bytes = NULL;
1245
1246 if (PyMemoryView_Check(arg)) {
1247 bytes = PyBytes_FromObject(arg);
1248 // Cannot fail when passed something of the correct type.
1249 int err = PyBytes_AsStringAndSize(bytes, &buf, &size);
1250 (void)err;
1251 assert(err >= 0);
1252 } else if (PyBytes_AsStringAndSize(arg, &buf, &size) < 0) {
1253 return NULL;
1254 }
1255
1256 PyUpb_Message_EnsureReified(self);
1257 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1258 const upb_FileDef* file = upb_MessageDef_File(msgdef);
1259 const upb_ExtensionRegistry* extreg =
1260 upb_DefPool_ExtensionRegistry(upb_FileDef_Pool(file));
1261 const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
1262 upb_Arena* arena = PyUpb_Arena_Get(self->arena);
1263 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1264 int options =
1265 upb_DecodeOptions_MaxDepth(state->allow_oversize_protos ? UINT16_MAX : 0);
1266 upb_DecodeStatus status =
1267 upb_Decode(buf, size, self->ptr.msg, layout, extreg, options, arena);
1268 Py_XDECREF(bytes);
1269 if (status != kUpb_DecodeStatus_Ok) {
1270 PyErr_Format(state->decode_error_class, "Error parsing message");
1271 return NULL;
1272 }
1273 PyUpb_Message_SyncSubobjs(self);
1274 return PyLong_FromSsize_t(size);
1275}
1276
1277static PyObject* PyUpb_Message_ParseFromString(PyObject* self, PyObject* arg) {
1278 PyObject* tmp = PyUpb_Message_Clear((PyUpb_Message*)self);
1279 Py_DECREF(tmp);
1280 return PyUpb_Message_MergeFromString(self, arg);
1281}
1282
1283static PyObject* PyUpb_Message_ByteSize(PyObject* self, PyObject* args) {
1284 // TODO: At the
1285 // moment upb does not have a "byte size" function, so we just serialize to
1286 // string and get the size of the string.
1287 PyObject* subargs = PyTuple_New(0);
1288 PyObject* serialized = PyUpb_Message_SerializeToString(self, subargs, NULL);
1289 Py_DECREF(subargs);
1290 if (!serialized) return NULL;
1291 size_t size = PyBytes_Size(serialized);
1292 Py_DECREF(serialized);
1293 return PyLong_FromSize_t(size);
1294}
1295
1296static PyObject* PyUpb_Message_Clear(PyUpb_Message* self) {
1297 PyUpb_Message_EnsureReified(self);
1298 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1299 PyUpb_WeakMap* subobj_map = self->unset_subobj_map;
1300
1301 if (subobj_map) {
1302 upb_Message* msg = PyUpb_Message_GetMsg(self);
1303 (void)msg; // Suppress unused warning when asserts are disabled.
1304 intptr_t iter = PYUPB_WEAKMAP_BEGIN;
1305 const void* key;
1306 PyObject* obj;
1307
1308 while (PyUpb_WeakMap_Next(subobj_map, &key, &obj, &iter)) {
1309 const upb_FieldDef* f = key;
1310 PyUpb_WeakMap_DeleteIter(subobj_map, &iter);
1311 if (upb_FieldDef_IsMap(f)) {
1312 assert(upb_Message_GetFieldByDef(msg, f).map_val == NULL);
1313 PyUpb_MapContainer_Reify(obj, NULL);
1314 } else if (upb_FieldDef_IsRepeated(f)) {
1315 assert(upb_Message_GetFieldByDef(msg, f).array_val == NULL);
1316 PyUpb_RepeatedContainer_Reify(obj, NULL);
1317 } else {
1318 assert(!upb_Message_HasFieldByDef(msg, f));
1319 PyUpb_Message* sub = (void*)obj;
1320 assert(self == sub->ptr.parent);
1321 PyUpb_Message_Reify(sub, f, NULL);
1322 }
1323 }
1324 }
1325
1326 upb_Message_ClearByDef(self->ptr.msg, msgdef);
1327 Py_RETURN_NONE;
1328}
1329
1330void PyUpb_Message_DoClearField(PyObject* _self, const upb_FieldDef* f) {
1331 PyUpb_Message* self = (void*)_self;
1332 PyUpb_Message_EnsureReified((PyUpb_Message*)self);
1333
1334 // We must ensure that any stub object is reified so its parent no longer
1335 // points to us.
1336 PyObject* sub = self->unset_subobj_map
1337 ? PyUpb_WeakMap_Get(self->unset_subobj_map, f)
1338 : NULL;
1339
1340 if (upb_FieldDef_IsMap(f)) {
1341 // For maps we additionally have to invalidate any iterators. So we need
1342 // to get an object even if it's reified.
1343 if (!sub) {
1344 sub = PyUpb_Message_GetFieldValue(_self, f);
1345 }
1346 PyUpb_MapContainer_EnsureReified(sub);
1347 PyUpb_MapContainer_Invalidate(sub);
1348 } else if (upb_FieldDef_IsRepeated(f)) {
1349 if (sub) {
1350 PyUpb_RepeatedContainer_EnsureReified(sub);
1351 }
1352 } else if (upb_FieldDef_IsSubMessage(f)) {
1353 if (sub) {
1354 PyUpb_Message_EnsureReified((PyUpb_Message*)sub);
1355 }
1356 }
1357
1358 Py_XDECREF(sub);
1359 upb_Message_ClearFieldByDef(self->ptr.msg, f);
1360}
1361
1362static PyObject* PyUpb_Message_ClearExtension(PyObject* _self, PyObject* arg) {
1363 PyUpb_Message* self = (void*)_self;
1364 PyUpb_Message_EnsureReified(self);
1365 const upb_FieldDef* f = PyUpb_Message_GetExtensionDef(_self, arg);
1366 if (!f) return NULL;
1367 PyUpb_Message_DoClearField(_self, f);
1368 Py_RETURN_NONE;
1369}
1370
1371static PyObject* PyUpb_Message_ClearField(PyObject* _self, PyObject* arg) {
1372 PyUpb_Message* self = (void*)_self;
1373
1374 // We always need EnsureReified() here (even for an unset message) to
1375 // preserve behavior like:
1376 // msg = FooMessage()
1377 // msg.foo.Clear()
1378 // assert msg.HasField("foo")
1379 PyUpb_Message_EnsureReified(self);
1380
1381 const upb_FieldDef* f;
1382 const upb_OneofDef* o;
1383 if (!PyUpb_Message_LookupName(self, arg, &f, &o, PyExc_ValueError)) {
1384 return NULL;
1385 }
1386
1387 if (o) f = upb_Message_WhichOneof(self->ptr.msg, o);
1388 if (f) PyUpb_Message_DoClearField(_self, f);
1389 Py_RETURN_NONE;
1390}
1391
1392static PyObject* PyUpb_Message_DiscardUnknownFields(PyUpb_Message* self,
1393 PyObject* arg) {
1394 PyUpb_Message_EnsureReified(self);
1395 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1396 upb_Message_DiscardUnknown(self->ptr.msg, msgdef, 64);
1397 Py_RETURN_NONE;
1398}
1399
1400static PyObject* PyUpb_Message_FindInitializationErrors(PyObject* _self,
1401 PyObject* arg) {
1402 PyUpb_Message* self = (void*)_self;
1403 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1404 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1405 const upb_DefPool* ext_pool = upb_FileDef_Pool(upb_MessageDef_File(msgdef));
1406 upb_FieldPathEntry* fields_base;
1407 PyObject* ret = PyList_New(0);
1408 if (upb_util_HasUnsetRequired(msg, msgdef, ext_pool, &fields_base)) {
1409 upb_FieldPathEntry* fields = fields_base;
1410 char* buf = NULL;
1411 size_t size = 0;
1412 assert(fields->field);
1413 while (fields->field) {
1414 upb_FieldPathEntry* field = fields;
1415 size_t need = upb_FieldPath_ToText(&fields, buf, size);
1416 if (need >= size) {
1417 fields = field;
1418 size = size ? size * 2 : 16;
1419 while (size <= need) size *= 2;
1420 buf = realloc(buf, size);
1421 need = upb_FieldPath_ToText(&fields, buf, size);
1422 assert(size > need);
1423 }
1424 PyObject* str = PyUnicode_FromString(buf);
1425 PyList_Append(ret, str);
1426 Py_DECREF(str);
1427 }
1428 free(buf);
1429 free(fields_base);
1430 }
1431 return ret;
1432}
1433
1434static PyObject* PyUpb_Message_FromString(PyObject* cls, PyObject* serialized) {
1435 PyObject* ret = NULL;
1436 PyObject* length = NULL;
1437
1438 ret = PyObject_CallObject(cls, NULL);
1439 if (ret == NULL) goto err;
1440 length = PyUpb_Message_MergeFromString(ret, serialized);
1441 if (length == NULL) goto err;
1442
1443done:
1444 Py_XDECREF(length);
1445 return ret;
1446
1447err:
1448 Py_XDECREF(ret);
1449 ret = NULL;
1450 goto done;
1451}
1452
1453const upb_FieldDef* PyUpb_Message_GetExtensionDef(PyObject* _self,
1454 PyObject* key) {
1455 const upb_FieldDef* f = PyUpb_FieldDescriptor_GetDef(key);
1456 if (!f) {
1457 PyErr_Clear();
1458 PyErr_Format(PyExc_KeyError, "Object %R is not a field descriptor\n", key);
1459 return NULL;
1460 }
1461 if (!upb_FieldDef_IsExtension(f)) {
1462 PyErr_Format(PyExc_KeyError, "Field %s is not an extension\n",
1463 upb_FieldDef_FullName(f));
1464 return NULL;
1465 }
1466 const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(_self);
1467 if (upb_FieldDef_ContainingType(f) != msgdef) {
1468 PyErr_Format(PyExc_KeyError, "Extension doesn't match (%s vs %s)",
1469 upb_MessageDef_FullName(msgdef), upb_FieldDef_FullName(f));
1470 return NULL;
1471 }
1472 return f;
1473}
1474
1475static PyObject* PyUpb_Message_HasExtension(PyObject* _self,
1476 PyObject* ext_desc) {
1477 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1478 const upb_FieldDef* f = PyUpb_Message_GetExtensionDef(_self, ext_desc);
1479 if (!f) return NULL;
1480 if (upb_FieldDef_IsRepeated(f)) {
1481 PyErr_SetString(PyExc_KeyError,
1482 "Field is repeated. A singular method is required.");
1483 return NULL;
1484 }
1485 if (!msg) Py_RETURN_FALSE;
1486 return PyBool_FromLong(upb_Message_HasFieldByDef(msg, f));
1487}
1488
1489void PyUpb_Message_ReportInitializationErrors(const upb_MessageDef* msgdef,
1490 PyObject* errors, PyObject* exc) {
1491 PyObject* comma = PyUnicode_FromString(",");
1492 PyObject* missing_fields = NULL;
1493 if (!comma) goto done;
1494 missing_fields = PyUnicode_Join(comma, errors);
1495 if (!missing_fields) goto done;
1496 PyErr_Format(exc, "Message %s is missing required fields: %U",
1497 upb_MessageDef_FullName(msgdef), missing_fields);
1498done:
1499 Py_XDECREF(comma);
1500 Py_XDECREF(missing_fields);
1501 Py_DECREF(errors);
1502}
1503
1504PyObject* PyUpb_Message_SerializeInternal(PyObject* _self, PyObject* args,
1505 PyObject* kwargs,
1506 bool check_required) {
1507 PyUpb_Message* self = (void*)_self;
1508 if (!PyUpb_Message_Verify((PyObject*)self)) return NULL;
1509 static const char* kwlist[] = {"deterministic", NULL};
1510 int deterministic = 0;
1511 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|p", (char**)(kwlist),
1512 &deterministic)) {
1513 return NULL;
1514 }
1515
1516 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1517 if (PyUpb_Message_IsStub(self)) {
1518 // Nothing to serialize, but we do have to check whether the message is
1519 // initialized.
1520 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1521 PyObject* errors = PyUpb_Message_FindInitializationErrors(_self, NULL);
1522 if (!errors) return NULL;
1523 if (PyList_Size(errors) == 0) {
1524 Py_DECREF(errors);
1525 return PyBytes_FromStringAndSize(NULL, 0);
1526 }
1527 PyUpb_Message_ReportInitializationErrors(msgdef, errors,
1528 state->encode_error_class);
1529 return NULL;
1530 }
1531
1532 upb_Arena* arena = upb_Arena_New();
1533 const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
1534 size_t size = 0;
1535 // Python does not currently have any effective limit on serialization depth.
1536 int options = upb_EncodeOptions_MaxDepth(UINT16_MAX);
1537 if (check_required) options |= kUpb_EncodeOption_CheckRequired;
1538 if (deterministic) options |= kUpb_EncodeOption_Deterministic;
1539 char* pb;
1540 upb_EncodeStatus status =
1541 upb_Encode(self->ptr.msg, layout, options, arena, &pb, &size);
1542 PyObject* ret = NULL;
1543
1544 if (status != kUpb_EncodeStatus_Ok) {
1545 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1546 PyObject* errors = PyUpb_Message_FindInitializationErrors(_self, NULL);
1547 if (PyList_Size(errors) != 0) {
1548 PyUpb_Message_ReportInitializationErrors(msgdef, errors,
1549 state->encode_error_class);
1550 } else {
1551 PyErr_Format(state->encode_error_class, "Failed to serialize proto");
1552 }
1553 goto done;
1554 }
1555
1556 ret = PyBytes_FromStringAndSize(pb, size);
1557
1558done:
1559 upb_Arena_Free(arena);
1560 return ret;
1561}
1562
1563PyObject* PyUpb_Message_SerializeToString(PyObject* _self, PyObject* args,
1564 PyObject* kwargs) {
1565 return PyUpb_Message_SerializeInternal(_self, args, kwargs, true);
1566}
1567
1568PyObject* PyUpb_Message_SerializePartialToString(PyObject* _self,
1569 PyObject* args,
1570 PyObject* kwargs) {
1571 return PyUpb_Message_SerializeInternal(_self, args, kwargs, false);
1572}
1573
1574static PyObject* PyUpb_Message_WhichOneof(PyObject* _self, PyObject* name) {
1575 PyUpb_Message* self = (void*)_self;
1576 const upb_OneofDef* o;
1577 if (!PyUpb_Message_LookupName(self, name, NULL, &o, PyExc_ValueError)) {
1578 return NULL;
1579 }
1580 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1581 if (!msg) Py_RETURN_NONE;
1582 const upb_FieldDef* f = upb_Message_WhichOneof(msg, o);
1583 if (!f) Py_RETURN_NONE;
1584 return PyUnicode_FromString(upb_FieldDef_Name(f));
1585}
1586
1587PyObject* DeepCopy(PyObject* _self, PyObject* arg) {
1588 PyUpb_Message* self = (void*)_self;
1589 const upb_MessageDef* def = PyUpb_Message_GetMsgdef(_self);
1590
1591 PyObject* arena = PyUpb_Arena_New();
1592 upb_Message* clone = upb_Message_DeepClone(
1593 self->ptr.msg, upb_MessageDef_MiniTable(def), PyUpb_Arena_Get(arena));
1594 PyObject* ret = PyUpb_Message_Get(clone, def, arena);
1595 Py_DECREF(arena);
1596
1597 return ret;
1598}
1599
1600void PyUpb_Message_ClearExtensionDict(PyObject* _self) {
1601 PyUpb_Message* self = (void*)_self;
1602 assert(self->ext_dict);
1603 self->ext_dict = NULL;
1604}
1605
1606static PyObject* PyUpb_Message_GetExtensionDict(PyObject* _self,
1607 void* closure) {
1608 PyUpb_Message* self = (void*)_self;
1609 if (self->ext_dict) {
1610 Py_INCREF(self->ext_dict);
1611 return self->ext_dict;
1612 }
1613
1614 const upb_MessageDef* m = _PyUpb_Message_GetMsgdef(self);
1615 if (upb_MessageDef_ExtensionRangeCount(m) == 0) {
1616 PyErr_SetNone(PyExc_AttributeError);
1617 return NULL;
1618 }
1619
1620 self->ext_dict = PyUpb_ExtensionDict_New(_self);
1621 return self->ext_dict;
1622}
1623
1624static PyGetSetDef PyUpb_Message_Getters[] = {
1625 {"Extensions", PyUpb_Message_GetExtensionDict, NULL, "Extension dict"},
1626 {NULL}};
1627
1628static PyMethodDef PyUpb_Message_Methods[] = {
1629 {"__deepcopy__", (PyCFunction)DeepCopy, METH_VARARGS,
1630 "Makes a deep copy of the class."},
1631 // TODO
1632 //{ "__unicode__", (PyCFunction)ToUnicode, METH_NOARGS,
1633 // "Outputs a unicode representation of the message." },
1634 {"ByteSize", (PyCFunction)PyUpb_Message_ByteSize, METH_NOARGS,
1635 "Returns the size of the message in bytes."},
1636 {"Clear", (PyCFunction)PyUpb_Message_Clear, METH_NOARGS,
1637 "Clears the message."},
1638 {"ClearExtension", PyUpb_Message_ClearExtension, METH_O,
1639 "Clears a message field."},
1640 {"ClearField", PyUpb_Message_ClearField, METH_O, "Clears a message field."},
1641 {"CopyFrom", PyUpb_Message_CopyFrom, METH_O,
1642 "Copies a protocol message into the current message."},
1643 {"DiscardUnknownFields", (PyCFunction)PyUpb_Message_DiscardUnknownFields,
1644 METH_NOARGS, "Discards the unknown fields."},
1645 {"FindInitializationErrors", PyUpb_Message_FindInitializationErrors,
1646 METH_NOARGS, "Finds unset required fields."},
1647 {"FromString", PyUpb_Message_FromString, METH_O | METH_CLASS,
1648 "Creates new method instance from given serialized data."},
1649 {"HasExtension", PyUpb_Message_HasExtension, METH_O,
1650 "Checks if a message field is set."},
1651 {"HasField", PyUpb_Message_HasField, METH_O,
1652 "Checks if a message field is set."},
1653 {"IsInitialized", PyUpb_Message_IsInitialized, METH_VARARGS,
1654 "Checks if all required fields of a protocol message are set."},
1655 {"ListFields", PyUpb_Message_ListFields, METH_NOARGS,
1656 "Lists all set fields of a message."},
1657 {"MergeFrom", PyUpb_Message_MergeFrom, METH_O,
1658 "Merges a protocol message into the current message."},
1659 {"MergeFromString", PyUpb_Message_MergeFromString, METH_O,
1660 "Merges a serialized message into the current message."},
1661 {"ParseFromString", PyUpb_Message_ParseFromString, METH_O,
1662 "Parses a serialized message into the current message."},
1663 {"SerializePartialToString",
1664 (PyCFunction)PyUpb_Message_SerializePartialToString,
1665 METH_VARARGS | METH_KEYWORDS,
1666 "Serializes the message to a string, even if it isn't initialized."},
1667 {"SerializeToString", (PyCFunction)PyUpb_Message_SerializeToString,
1668 METH_VARARGS | METH_KEYWORDS,
1669 "Serializes the message to a string, only for initialized messages."},
1670 {"SetInParent", (PyCFunction)PyUpb_Message_SetInParent, METH_NOARGS,
1671 "Sets the has bit of the given field in its parent message."},
1672 {"UnknownFields", (PyCFunction)PyUpb_Message_UnknownFields, METH_NOARGS,
1673 "Parse unknown field set"},
1674 {"WhichOneof", PyUpb_Message_WhichOneof, METH_O,
1675 "Returns the name of the field set inside a oneof, "
1676 "or None if no field is set."},
1677 {"_ListFieldsItemKey", PyUpb_Message_ListFieldsItemKey,
1678 METH_O | METH_STATIC,
1679 "Compares ListFields() list entries by field number"},
1680 {"_CheckCalledFromGeneratedFile",
1681 PyUpb_Message_CheckCalledFromGeneratedFile, METH_NOARGS | METH_STATIC,
1682 "Raises TypeError if the caller is not in a _pb2.py file."},
1683 {NULL, NULL}};
1684
1685static PyType_Slot PyUpb_Message_Slots[] = {
1686 {Py_tp_dealloc, PyUpb_Message_Dealloc},
1687 {Py_tp_doc, "A ProtocolMessage"},
1688 {Py_tp_getattro, PyUpb_Message_GetAttr},
1689 {Py_tp_getset, PyUpb_Message_Getters},
1690 {Py_tp_hash, PyObject_HashNotImplemented},
1691 {Py_tp_methods, PyUpb_Message_Methods},
1692 {Py_tp_new, PyUpb_Message_New},
1693 {Py_tp_str, PyUpb_Message_ToString},
1694 {Py_tp_repr, PyUpb_Message_ToString},
1695 {Py_tp_richcompare, PyUpb_Message_RichCompare},
1696 {Py_tp_setattro, PyUpb_Message_SetAttr},
1697 {Py_tp_init, PyUpb_Message_Init},
1698 {0, NULL}};
1699
1700PyType_Spec PyUpb_Message_Spec = {
1701 PYUPB_MODULE_NAME ".Message", // tp_name
1702 sizeof(PyUpb_Message), // tp_basicsize
1703 0, // tp_itemsize
1704 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags
1705 PyUpb_Message_Slots,
1706};
1707
1708// -----------------------------------------------------------------------------
1709// MessageMeta
1710// -----------------------------------------------------------------------------
1711
1712// MessageMeta is the metaclass for message objects. The generated code uses it
1713// to construct message classes, ie.
1714//
1715// FooMessage = _message.MessageMeta('FooMessage', (_message.Message), {...})
1716//
1717// (This is not quite true: at the moment the Python library subclasses
1718// MessageMeta, and uses that subclass as the metaclass. There is a TODO below
1719// to simplify this, so that the illustration above is indeed accurate).
1720
1721typedef struct {
1722 const upb_MiniTable* layout;
1723 PyObject* py_message_descriptor;
1724} PyUpb_MessageMeta;
1725
1726// The PyUpb_MessageMeta struct is trailing data tacked onto the end of
1727// MessageMeta instances. This means that we get our instances of this struct
1728// by adding the appropriate number of bytes.
1729static PyUpb_MessageMeta* PyUpb_GetMessageMeta(PyObject* cls) {
1730#ifndef NDEBUG
1731 PyUpb_ModuleState* state = PyUpb_ModuleState_MaybeGet();
1732 assert(!state || cls->ob_type == state->message_meta_type);
1733#endif
1734 return (PyUpb_MessageMeta*)((char*)cls + cpython_bits.type_basicsize);
1735}
1736
1737static const upb_MessageDef* PyUpb_MessageMeta_GetMsgdef(PyObject* cls) {
1738 PyUpb_MessageMeta* self = PyUpb_GetMessageMeta(cls);
1739 return PyUpb_Descriptor_GetDef(self->py_message_descriptor);
1740}
1741
1742PyObject* PyUpb_MessageMeta_DoCreateClass(PyObject* py_descriptor,
1743 const char* name, PyObject* dict) {
1744 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1745 PyTypeObject* descriptor_type = state->descriptor_types[kPyUpb_Descriptor];
1746 if (!PyObject_TypeCheck(py_descriptor, descriptor_type)) {
1747 return PyErr_Format(PyExc_TypeError, "Expected a message Descriptor");
1748 }
1749
1750 const upb_MessageDef* msgdef = PyUpb_Descriptor_GetDef(py_descriptor);
1751 assert(msgdef);
1752 assert(!PyUpb_ObjCache_Get(upb_MessageDef_MiniTable(msgdef)));
1753
1754 PyObject* slots = PyTuple_New(0);
1755 if (!slots) return NULL;
1756 int status = PyDict_SetItemString(dict, "__slots__", slots);
1757 Py_DECREF(slots);
1758 if (status < 0) return NULL;
1759
1760 // Bases are either:
1761 // (Message, Message) # for regular messages
1762 // (Message, Message, WktBase) # For well-known types
1763 PyObject* wkt_bases = PyUpb_GetWktBases(state);
1764 PyObject* wkt_base =
1765 PyDict_GetItemString(wkt_bases, upb_MessageDef_FullName(msgdef));
1766 PyObject* args;
1767 if (wkt_base == NULL) {
1768 args = Py_BuildValue("s(OO)O", name, state->cmessage_type,
1769 state->message_class, dict);
1770 } else {
1771 args = Py_BuildValue("s(OOO)O", name, state->cmessage_type,
1772 state->message_class, wkt_base, dict);
1773 }
1774
1775 PyObject* ret = cpython_bits.type_new(state->message_meta_type, args, NULL);
1776 Py_DECREF(args);
1777 if (!ret) return NULL;
1778
1779 PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(ret);
1780 meta->py_message_descriptor = py_descriptor;
1781 meta->layout = upb_MessageDef_MiniTable(msgdef);
1782 Py_INCREF(meta->py_message_descriptor);
1783 PyUpb_Descriptor_SetClass(py_descriptor, ret);
1784
1785 PyUpb_ObjCache_Add(meta->layout, ret);
1786
1787 return ret;
1788}
1789
1790static PyObject* PyUpb_MessageMeta_New(PyTypeObject* type, PyObject* args,
1791 PyObject* kwargs) {
1792 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1793 static const char* kwlist[] = {"name", "bases", "dict", 0};
1794 PyObject *bases, *dict;
1795 const char* name;
1796
1797 // Check arguments: (name, bases, dict)
1798 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sO!O!:type", (char**)kwlist,
1799 &name, &PyTuple_Type, &bases, &PyDict_Type,
1800 &dict)) {
1801 return NULL;
1802 }
1803
1804 // Check bases: only (), or (message.Message,) are allowed
1805 Py_ssize_t size = PyTuple_Size(bases);
1806 if (!(size == 0 ||
1807 (size == 1 && PyTuple_GetItem(bases, 0) == state->message_class))) {
1808 PyErr_Format(PyExc_TypeError,
1809 "A Message class can only inherit from Message, not %S",
1810 bases);
1811 return NULL;
1812 }
1813
1814 // Check dict['DESCRIPTOR']
1815 PyObject* py_descriptor = PyDict_GetItemString(dict, "DESCRIPTOR");
1816 if (py_descriptor == NULL) {
1817 PyErr_SetString(PyExc_TypeError, "Message class has no DESCRIPTOR");
1818 return NULL;
1819 }
1820
1821 const upb_MessageDef* m = PyUpb_Descriptor_GetDef(py_descriptor);
1822 PyObject* ret = PyUpb_ObjCache_Get(upb_MessageDef_MiniTable(m));
1823 if (ret) return ret;
1824 return PyUpb_MessageMeta_DoCreateClass(py_descriptor, name, dict);
1825}
1826
1827static void PyUpb_MessageMeta_Dealloc(PyObject* self) {
1828 PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self);
1829 PyUpb_ObjCache_Delete(meta->layout);
Protobuf Team Bote32d0942023-11-06 06:43:06 -08001830 // The MessageMeta type is a GC type, which means we should untrack the
1831 // object before invalidating internal state (so that code executed by the
1832 // GC doesn't see the invalid state). Unfortunately since we're calling
1833 // cpython_bits.type_dealloc, which also untracks the object, we can't.
1834 // Instead just make sure the internal state remains reasonable by using
1835 // Py_CLEAR(), which sets the struct member to NULL. The tp_traverse and
1836 // tp_clear methods, which are called by Python's GC, already allow for it
1837 // to be NULL.
1838 Py_CLEAR(meta->py_message_descriptor);
Adam Cozzette501ecec2023-09-26 14:36:20 -07001839 PyTypeObject* tp = Py_TYPE(self);
1840 cpython_bits.type_dealloc(self);
1841 Py_DECREF(tp);
1842}
1843
1844void PyUpb_MessageMeta_AddFieldNumber(PyObject* self, const upb_FieldDef* f) {
1845 PyObject* name =
1846 PyUnicode_FromFormat("%s_FIELD_NUMBER", upb_FieldDef_Name(f));
1847 PyObject* upper = PyObject_CallMethod(name, "upper", "");
1848 PyObject_SetAttr(self, upper, PyLong_FromLong(upb_FieldDef_Number(f)));
1849 Py_DECREF(name);
1850 Py_DECREF(upper);
1851}
1852
1853static PyObject* PyUpb_MessageMeta_GetDynamicAttr(PyObject* self,
1854 PyObject* name) {
1855 const char* name_buf = PyUpb_GetStrData(name);
1856 if (!name_buf) return NULL;
1857 const upb_MessageDef* msgdef = PyUpb_MessageMeta_GetMsgdef(self);
1858 const upb_FileDef* filedef = upb_MessageDef_File(msgdef);
1859 const upb_DefPool* symtab = upb_FileDef_Pool(filedef);
1860
1861 PyObject* py_key =
1862 PyBytes_FromFormat("%s.%s", upb_MessageDef_FullName(msgdef), name_buf);
1863 const char* key = PyUpb_GetStrData(py_key);
1864 PyObject* ret = NULL;
1865 const upb_MessageDef* nested = upb_DefPool_FindMessageByName(symtab, key);
1866 const upb_EnumDef* enumdef;
1867 const upb_EnumValueDef* enumval;
1868 const upb_FieldDef* ext;
1869
1870 if (nested) {
1871 ret = PyUpb_Descriptor_GetClass(nested);
1872 } else if ((enumdef = upb_DefPool_FindEnumByName(symtab, key))) {
1873 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1874 PyObject* klass = state->enum_type_wrapper_class;
1875 ret = PyUpb_EnumDescriptor_Get(enumdef);
1876 ret = PyObject_CallFunctionObjArgs(klass, ret, NULL);
1877 } else if ((enumval = upb_DefPool_FindEnumByNameval(symtab, key))) {
1878 ret = PyLong_FromLong(upb_EnumValueDef_Number(enumval));
1879 } else if ((ext = upb_DefPool_FindExtensionByName(symtab, key))) {
1880 ret = PyUpb_FieldDescriptor_Get(ext);
1881 }
1882
1883 Py_DECREF(py_key);
1884
1885 const char* suffix = "_FIELD_NUMBER";
1886 size_t n = strlen(name_buf);
1887 size_t suffix_n = strlen(suffix);
1888 if (n > suffix_n && memcmp(suffix, name_buf + n - suffix_n, suffix_n) == 0) {
1889 // We can't look up field names dynamically, because the <NAME>_FIELD_NUMBER
1890 // naming scheme upper-cases the field name and is therefore non-reversible.
1891 // So we just add all field numbers.
1892 int n = upb_MessageDef_FieldCount(msgdef);
1893 for (int i = 0; i < n; i++) {
1894 PyUpb_MessageMeta_AddFieldNumber(self, upb_MessageDef_Field(msgdef, i));
1895 }
1896 n = upb_MessageDef_NestedExtensionCount(msgdef);
1897 for (int i = 0; i < n; i++) {
1898 PyUpb_MessageMeta_AddFieldNumber(
1899 self, upb_MessageDef_NestedExtension(msgdef, i));
1900 }
1901 ret = PyObject_GenericGetAttr(self, name);
1902 }
1903
1904 return ret;
1905}
1906
1907static PyObject* PyUpb_MessageMeta_GetAttr(PyObject* self, PyObject* name) {
1908 // We want to first delegate to the type's tp_dict to retrieve any attributes
1909 // that were previously calculated and cached in the type's dict.
1910 PyObject* ret = cpython_bits.type_getattro(self, name);
1911 if (ret) return ret;
1912
1913 // We did not find a cached attribute. Try to calculate the attribute
1914 // dynamically, using the descriptor as an argument.
1915 PyErr_Clear();
1916 ret = PyUpb_MessageMeta_GetDynamicAttr(self, name);
1917
1918 if (ret) {
1919 PyObject_SetAttr(self, name, ret);
1920 PyErr_Clear();
1921 return ret;
1922 }
1923
1924 PyErr_SetObject(PyExc_AttributeError, name);
1925 return NULL;
1926}
1927
1928static int PyUpb_MessageMeta_Traverse(PyObject* self, visitproc visit,
1929 void* arg) {
1930 PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self);
1931 Py_VISIT(meta->py_message_descriptor);
1932 return cpython_bits.type_traverse(self, visit, arg);
1933}
1934
1935static int PyUpb_MessageMeta_Clear(PyObject* self, visitproc visit, void* arg) {
Protobuf Team Bote32d0942023-11-06 06:43:06 -08001936 PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self);
1937 Py_CLEAR(meta->py_message_descriptor);
Adam Cozzette501ecec2023-09-26 14:36:20 -07001938 return cpython_bits.type_clear(self);
1939}
1940
1941static PyType_Slot PyUpb_MessageMeta_Slots[] = {
1942 {Py_tp_new, PyUpb_MessageMeta_New},
1943 {Py_tp_dealloc, PyUpb_MessageMeta_Dealloc},
1944 {Py_tp_getattro, PyUpb_MessageMeta_GetAttr},
1945 {Py_tp_traverse, PyUpb_MessageMeta_Traverse},
1946 {Py_tp_clear, PyUpb_MessageMeta_Clear},
1947 {0, NULL}};
1948
1949static PyType_Spec PyUpb_MessageMeta_Spec = {
1950 PYUPB_MODULE_NAME ".MessageMeta", // tp_name
1951 0, // To be filled in by size of base // tp_basicsize
1952 0, // tp_itemsize
1953 // TODO: remove BASETYPE, Python should just use MessageMeta
1954 // directly instead of subclassing it.
1955 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, // tp_flags
1956 PyUpb_MessageMeta_Slots,
1957};
1958
1959static PyObject* PyUpb_MessageMeta_CreateType(void) {
1960 PyObject* bases = Py_BuildValue("(O)", &PyType_Type);
1961 if (!bases) return NULL;
1962 PyUpb_MessageMeta_Spec.basicsize =
1963 cpython_bits.type_basicsize + sizeof(PyUpb_MessageMeta);
1964 PyObject* type = PyType_FromSpecWithBases(&PyUpb_MessageMeta_Spec, bases);
1965 Py_DECREF(bases);
1966 return type;
1967}
1968
1969bool PyUpb_InitMessage(PyObject* m) {
1970 if (!PyUpb_CPythonBits_Init(&cpython_bits)) return false;
1971 PyObject* message_meta_type = PyUpb_MessageMeta_CreateType();
1972
1973 PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
1974 state->cmessage_type = PyUpb_AddClass(m, &PyUpb_Message_Spec);
1975 state->message_meta_type = (PyTypeObject*)message_meta_type;
1976
1977 if (!state->cmessage_type || !state->message_meta_type) return false;
1978 if (PyModule_AddObject(m, "MessageMeta", message_meta_type)) return false;
1979 state->listfields_item_key = PyObject_GetAttrString(
1980 (PyObject*)state->cmessage_type, "_ListFieldsItemKey");
1981
1982 PyObject* mod =
1983 PyImport_ImportModule(PYUPB_PROTOBUF_PUBLIC_PACKAGE ".message");
1984 if (mod == NULL) return false;
1985
1986 state->encode_error_class = PyObject_GetAttrString(mod, "EncodeError");
1987 state->decode_error_class = PyObject_GetAttrString(mod, "DecodeError");
1988 state->message_class = PyObject_GetAttrString(mod, "Message");
1989 Py_DECREF(mod);
1990
1991 PyObject* enum_type_wrapper = PyImport_ImportModule(
1992 PYUPB_PROTOBUF_INTERNAL_PACKAGE ".enum_type_wrapper");
1993 if (enum_type_wrapper == NULL) return false;
1994
1995 state->enum_type_wrapper_class =
1996 PyObject_GetAttrString(enum_type_wrapper, "EnumTypeWrapper");
1997 Py_DECREF(enum_type_wrapper);
1998
1999 if (!state->encode_error_class || !state->decode_error_class ||
2000 !state->message_class || !state->listfields_item_key ||
2001 !state->enum_type_wrapper_class) {
2002 return false;
2003 }
2004
2005 return true;
2006}