blob: d394e6f58c3f9f5cf3208876468e64baca0eae82 [file] [log] [blame]
Adam Cozzette501ecec2023-09-26 14:36:20 -07001// Protocol Buffers - Google's data interchange format
2// Copyright 2023 Google LLC. All rights reserved.
Adam Cozzette501ecec2023-09-26 14:36:20 -07003//
Protobuf Team Bot0fab7732023-11-20 13:38:15 -08004// Use of this source code is governed by a BSD-style
5// license that can be found in the LICENSE file or at
6// https://developers.google.com/open-source/licenses/bsd
Adam Cozzette501ecec2023-09-26 14:36:20 -07007
8#include "python/message.h"
9
10#include "python/convert.h"
11#include "python/descriptor.h"
12#include "python/extension_dict.h"
13#include "python/map.h"
14#include "python/repeated.h"
15#include "upb/message/copy.h"
16#include "upb/reflection/def.h"
17#include "upb/reflection/message.h"
18#include "upb/text/encode.h"
19#include "upb/util/required_fields.h"
20
21static const upb_MessageDef* PyUpb_MessageMeta_GetMsgdef(PyObject* cls);
22static PyObject* PyUpb_MessageMeta_GetAttr(PyObject* self, PyObject* name);
23
24// -----------------------------------------------------------------------------
25// CPythonBits
26// -----------------------------------------------------------------------------
27
28// This struct contains a few things that are not exposed directly through the
29// limited API, but that we can get at in somewhat more roundabout ways. The
30// roundabout ways are slower, so we cache the values here.
31//
32// These values are valid to cache in a global, even across sub-interpreters,
33// because they are not pointers to interpreter state. They are process
34// globals that will be the same for any interpreter in this process.
35typedef struct {
36 // For each member, we note the equivalent expression that we could use in the
37 // full (non-limited) API.
38 newfunc type_new; // PyTypeObject.tp_new
39 destructor type_dealloc; // PyTypeObject.tp_dealloc
40 getattrofunc type_getattro; // PyTypeObject.tp_getattro
41 setattrofunc type_setattro; // PyTypeObject.tp_setattro
42 size_t type_basicsize; // sizeof(PyHeapTypeObject)
43 traverseproc type_traverse; // PyTypeObject.tp_traverse
44 inquiry type_clear; // PyTypeObject.tp_clear
45
46 // While we can refer to PY_VERSION_HEX in the limited API, this will give us
47 // the version of Python we were compiled against, which may be different
48 // than the version we are dynamically linked against. Here we want the
49 // version that is actually running in this process.
50 long python_version_hex; // PY_VERSION_HEX
51} PyUpb_CPythonBits;
52
53// A global containing the values for this process.
54PyUpb_CPythonBits cpython_bits;
55
56destructor upb_Pre310_PyType_GetDeallocSlot(PyTypeObject* type_subclass) {
57 // This is a bit desperate. We need type_dealloc(), but PyType_GetSlot(type,
58 // Py_tp_dealloc) will return subtype_dealloc(). There appears to be no way
59 // whatsoever to fetch type_dealloc() through the limited API until Python
60 // 3.10.
61 //
62 // To work around this so we attempt to find it by looking for the offset of
63 // tp_dealloc in PyTypeObject, then memcpy() it directly. This should always
64 // work in practice.
65 //
66 // Starting with Python 3.10 on you can call PyType_GetSlot() on non-heap
67 // types. We will be able to replace all this hack with just:
68 //
69 // PyType_GetSlot(&PyType_Type, Py_tp_dealloc)
70 //
71 destructor subtype_dealloc = PyType_GetSlot(type_subclass, Py_tp_dealloc);
72 for (size_t i = 0; i < 2000; i += sizeof(uintptr_t)) {
73 destructor maybe_subtype_dealloc;
74 memcpy(&maybe_subtype_dealloc, (char*)type_subclass + i,
75 sizeof(destructor));
76 if (maybe_subtype_dealloc == subtype_dealloc) {
77 destructor type_dealloc;
78 memcpy(&type_dealloc, (char*)&PyType_Type + i, sizeof(destructor));
79 return type_dealloc;
80 }
81 }
82 assert(false);
83 return NULL;
84}
85
86static bool PyUpb_CPythonBits_Init(PyUpb_CPythonBits* bits) {
87 PyObject* bases = NULL;
88 PyTypeObject* type = NULL;
89 PyObject* size = NULL;
90 PyObject* sys = NULL;
91 PyObject* hex_version = NULL;
92 bool ret = false;
93
94 // PyType_GetSlot() only works on heap types, so we cannot use it on
95 // &PyType_Type directly. Instead we create our own (temporary) type derived
96 // from PyType_Type: this will inherit all of the slots from PyType_Type, but
97 // as a heap type it can be queried with PyType_GetSlot().
98 static PyType_Slot dummy_slots[] = {{0, NULL}};
99
100 static PyType_Spec dummy_spec = {
101 "module.DummyClass", // tp_name
102 0, // To be filled in by size of base // tp_basicsize
103 0, // tp_itemsize
104 Py_TPFLAGS_DEFAULT, // tp_flags
105 dummy_slots,
106 };
107
108 bases = Py_BuildValue("(O)", &PyType_Type);
109 if (!bases) goto err;
110 type = (PyTypeObject*)PyType_FromSpecWithBases(&dummy_spec, bases);
111 if (!type) goto err;
112
113 bits->type_new = PyType_GetSlot(type, Py_tp_new);
114 bits->type_dealloc = upb_Pre310_PyType_GetDeallocSlot(type);
115 bits->type_getattro = PyType_GetSlot(type, Py_tp_getattro);
116 bits->type_setattro = PyType_GetSlot(type, Py_tp_setattro);
117 bits->type_traverse = PyType_GetSlot(type, Py_tp_traverse);
118 bits->type_clear = PyType_GetSlot(type, Py_tp_clear);
119
120 size = PyObject_GetAttrString((PyObject*)&PyType_Type, "__basicsize__");
121 if (!size) goto err;
122 bits->type_basicsize = PyLong_AsLong(size);
123 if (bits->type_basicsize == -1) goto err;
124
125 assert(bits->type_new);
126 assert(bits->type_dealloc);
127 assert(bits->type_getattro);
128 assert(bits->type_setattro);
129 assert(bits->type_traverse);
130 assert(bits->type_clear);
131
132#ifndef Py_LIMITED_API
133 assert(bits->type_new == PyType_Type.tp_new);
134 assert(bits->type_dealloc == PyType_Type.tp_dealloc);
135 assert(bits->type_getattro == PyType_Type.tp_getattro);
136 assert(bits->type_setattro == PyType_Type.tp_setattro);
137 assert(bits->type_basicsize == sizeof(PyHeapTypeObject));
138 assert(bits->type_traverse == PyType_Type.tp_traverse);
139 assert(bits->type_clear == PyType_Type.tp_clear);
140#endif
141
142 sys = PyImport_ImportModule("sys");
143 hex_version = PyObject_GetAttrString(sys, "hexversion");
144 bits->python_version_hex = PyLong_AsLong(hex_version);
145 ret = true;
146
147err:
148 Py_XDECREF(bases);
149 Py_XDECREF(type);
150 Py_XDECREF(size);
151 Py_XDECREF(sys);
152 Py_XDECREF(hex_version);
153 return ret;
154}
155
156// -----------------------------------------------------------------------------
157// Message
158// -----------------------------------------------------------------------------
159
160// The main message object. The type of the object (PyUpb_Message.ob_type)
161// will be an instance of the PyUpb_MessageMeta type (defined below). So the
162// chain is:
163// FooMessage = MessageMeta(...)
164// foo = FooMessage()
165//
166// Which becomes:
167// Object C Struct Type Python type (ob_type)
168// ----------------- ----------------- ---------------------
169// foo PyUpb_Message FooMessage
170// FooMessage PyUpb_MessageMeta message_meta_type
171// message_meta_type PyTypeObject 'type' in Python
172//
173// A message object can be in one of two states: present or non-present. When
174// a message is non-present, it stores a reference to its parent, and a write
175// to any attribute will trigger the message to become present in its parent.
176// The parent may also be non-present, in which case a mutation will trigger a
177// chain reaction.
178typedef struct PyUpb_Message {
179 PyObject_HEAD;
180 PyObject* arena;
181 uintptr_t def; // Tagged, low bit 1 == upb_FieldDef*, else upb_MessageDef*
182 union {
183 // when def is msgdef, the data for this msg.
184 upb_Message* msg;
185 // when def is fielddef, owning pointer to parent
186 struct PyUpb_Message* parent;
187 } ptr;
188 PyObject* ext_dict; // Weak pointer to extension dict, if any.
189 // name->obj dict for non-present msg/map/repeated, NULL if none.
190 PyUpb_WeakMap* unset_subobj_map;
191 int version;
192} PyUpb_Message;
193
194static PyObject* PyUpb_Message_GetAttr(PyObject* _self, PyObject* attr);
195
196bool PyUpb_Message_IsStub(PyUpb_Message* msg) { return msg->def & 1; }
197
198const upb_FieldDef* PyUpb_Message_GetFieldDef(PyUpb_Message* msg) {
199 assert(PyUpb_Message_IsStub(msg));
200 return (void*)(msg->def & ~(uintptr_t)1);
201}
202
203static const upb_MessageDef* _PyUpb_Message_GetMsgdef(PyUpb_Message* msg) {
204 return PyUpb_Message_IsStub(msg)
205 ? upb_FieldDef_MessageSubDef(PyUpb_Message_GetFieldDef(msg))
206 : (void*)msg->def;
207}
208
209const upb_MessageDef* PyUpb_Message_GetMsgdef(PyObject* self) {
210 return _PyUpb_Message_GetMsgdef((PyUpb_Message*)self);
211}
212
213static upb_Message* PyUpb_Message_GetMsg(PyUpb_Message* self) {
214 assert(!PyUpb_Message_IsStub(self));
215 return self->ptr.msg;
216}
217
218bool PyUpb_Message_TryCheck(PyObject* self) {
219 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
220 PyObject* type = (PyObject*)Py_TYPE(self);
221 return Py_TYPE(type) == state->message_meta_type;
222}
223
224bool PyUpb_Message_Verify(PyObject* self) {
225 if (!PyUpb_Message_TryCheck(self)) {
226 PyErr_Format(PyExc_TypeError, "Expected a message object, but got %R.",
227 self);
228 return false;
229 }
230 return true;
231}
232
233// If the message is reified, returns it. Otherwise, returns NULL.
234// If NULL is returned, the object is empty and has no underlying data.
235upb_Message* PyUpb_Message_GetIfReified(PyObject* _self) {
236 PyUpb_Message* self = (void*)_self;
237 return PyUpb_Message_IsStub(self) ? NULL : self->ptr.msg;
238}
239
240static PyObject* PyUpb_Message_New(PyObject* cls, PyObject* unused_args,
241 PyObject* unused_kwargs) {
242 const upb_MessageDef* msgdef = PyUpb_MessageMeta_GetMsgdef(cls);
243 const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
244 PyUpb_Message* msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
245 msg->def = (uintptr_t)msgdef;
246 msg->arena = PyUpb_Arena_New();
247 msg->ptr.msg = upb_Message_New(layout, PyUpb_Arena_Get(msg->arena));
248 msg->unset_subobj_map = NULL;
249 msg->ext_dict = NULL;
250 msg->version = 0;
251
252 PyObject* ret = &msg->ob_base;
253 PyUpb_ObjCache_Add(msg->ptr.msg, ret);
254 return ret;
255}
256
257/*
258 * PyUpb_Message_LookupName()
259 *
260 * Tries to find a field or oneof named `py_name` in the message object `self`.
261 * The user must pass `f` and/or `o` to indicate whether a field or a oneof name
262 * is expected. If the name is found and it has an expected type, the function
263 * sets `*f` or `*o` respectively and returns true. Otherwise returns false
264 * and sets an exception of type `exc_type` if provided.
265 */
266static bool PyUpb_Message_LookupName(PyUpb_Message* self, PyObject* py_name,
267 const upb_FieldDef** f,
268 const upb_OneofDef** o,
269 PyObject* exc_type) {
270 assert(f || o);
271 Py_ssize_t size;
272 const char* name = NULL;
273 if (PyUnicode_Check(py_name)) {
274 name = PyUnicode_AsUTF8AndSize(py_name, &size);
275 } else if (PyBytes_Check(py_name)) {
276 PyBytes_AsStringAndSize(py_name, (char**)&name, &size);
277 }
278 if (!name) {
279 PyErr_Format(exc_type,
280 "Expected a field name, but got non-string argument %S.",
281 py_name);
282 return false;
283 }
284 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
285
286 if (!upb_MessageDef_FindByNameWithSize(msgdef, name, size, f, o)) {
287 if (exc_type) {
288 PyErr_Format(exc_type, "Protocol message %s has no \"%s\" field.",
289 upb_MessageDef_Name(msgdef), name);
290 }
291 return false;
292 }
293
294 if (!o && !*f) {
295 if (exc_type) {
296 PyErr_Format(exc_type, "Expected a field name, but got oneof name %s.",
297 name);
298 }
299 return false;
300 }
301
302 if (!f && !*o) {
303 if (exc_type) {
304 PyErr_Format(exc_type, "Expected a oneof name, but got field name %s.",
305 name);
306 }
307 return false;
308 }
309
310 return true;
311}
312
313static bool PyUpb_Message_InitMessageMapEntry(PyObject* dst, PyObject* src) {
314 if (!src || !dst) return false;
315
316 PyObject* ok = PyObject_CallMethod(dst, "CopyFrom", "O", src);
317 if (!ok) return false;
318 Py_DECREF(ok);
319
320 return true;
321}
322
323int PyUpb_Message_InitMapAttributes(PyObject* map, PyObject* value,
324 const upb_FieldDef* f) {
325 const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
326 const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
327 PyObject* it = NULL;
328 PyObject* tmp = NULL;
329 int ret = -1;
330 if (upb_FieldDef_IsSubMessage(val_f)) {
331 it = PyObject_GetIter(value);
332 if (it == NULL) {
333 PyErr_Format(PyExc_TypeError, "Argument for field %s is not iterable",
334 upb_FieldDef_FullName(f));
335 goto err;
336 }
337 PyObject* e;
338 while ((e = PyIter_Next(it)) != NULL) {
339 PyObject* src = PyObject_GetItem(value, e);
340 PyObject* dst = PyObject_GetItem(map, e);
341 Py_DECREF(e);
342 bool ok = PyUpb_Message_InitMessageMapEntry(dst, src);
343 Py_XDECREF(src);
344 Py_XDECREF(dst);
345 if (!ok) goto err;
346 }
347 } else {
348 tmp = PyObject_CallMethod(map, "update", "O", value);
349 if (!tmp) goto err;
350 }
351 ret = 0;
352
353err:
354 Py_XDECREF(it);
355 Py_XDECREF(tmp);
356 return ret;
357}
358
359void PyUpb_Message_EnsureReified(PyUpb_Message* self);
360
361static bool PyUpb_Message_InitMapAttribute(PyObject* _self, PyObject* name,
362 const upb_FieldDef* f,
363 PyObject* value) {
364 PyObject* map = PyUpb_Message_GetAttr(_self, name);
365 int ok = PyUpb_Message_InitMapAttributes(map, value, f);
366 Py_DECREF(map);
367 return ok >= 0;
368}
369
370static bool PyUpb_Message_InitRepeatedMessageAttribute(PyObject* _self,
371 PyObject* repeated,
372 PyObject* value,
373 const upb_FieldDef* f) {
374 PyObject* it = PyObject_GetIter(value);
375 if (!it) {
376 PyErr_Format(PyExc_TypeError, "Argument for field %s is not iterable",
377 upb_FieldDef_FullName(f));
378 return false;
379 }
380 PyObject* e = NULL;
381 PyObject* m = NULL;
382 while ((e = PyIter_Next(it)) != NULL) {
383 if (PyDict_Check(e)) {
384 m = PyUpb_RepeatedCompositeContainer_Add(repeated, NULL, e);
385 if (!m) goto err;
386 } else {
387 m = PyUpb_RepeatedCompositeContainer_Add(repeated, NULL, NULL);
388 if (!m) goto err;
389 PyObject* merged = PyUpb_Message_MergeFrom(m, e);
390 if (!merged) goto err;
391 Py_DECREF(merged);
392 }
393 Py_DECREF(e);
394 Py_DECREF(m);
395 m = NULL;
396 }
397
398err:
399 Py_XDECREF(it);
400 Py_XDECREF(e);
401 Py_XDECREF(m);
402 return !PyErr_Occurred(); // Check PyIter_Next() exit.
403}
404
405static bool PyUpb_Message_InitRepeatedAttribute(PyObject* _self, PyObject* name,
406 PyObject* value) {
407 PyUpb_Message* self = (void*)_self;
408 const upb_FieldDef* field;
409 if (!PyUpb_Message_LookupName(self, name, &field, NULL,
410 PyExc_AttributeError)) {
411 return false;
412 }
413 bool ok = false;
414 PyObject* repeated = PyUpb_Message_GetFieldValue(_self, field);
415 PyObject* tmp = NULL;
416 if (!repeated) goto err;
417 if (upb_FieldDef_IsSubMessage(field)) {
418 if (!PyUpb_Message_InitRepeatedMessageAttribute(_self, repeated, value,
419 field)) {
420 goto err;
421 }
422 } else {
423 tmp = PyUpb_RepeatedContainer_Extend(repeated, value);
424 if (!tmp) goto err;
425 }
426 ok = true;
427
428err:
429 Py_XDECREF(repeated);
430 Py_XDECREF(tmp);
431 return ok;
432}
433
434static bool PyUpb_Message_InitMessageAttribute(PyObject* _self, PyObject* name,
435 PyObject* value) {
436 PyObject* submsg = PyUpb_Message_GetAttr(_self, name);
437 if (!submsg) return -1;
438 assert(!PyErr_Occurred());
439 bool ok;
440 if (PyUpb_Message_TryCheck(value)) {
441 PyObject* tmp = PyUpb_Message_MergeFrom(submsg, value);
442 ok = tmp != NULL;
443 Py_XDECREF(tmp);
444 } else if (PyDict_Check(value)) {
445 assert(!PyErr_Occurred());
446 ok = PyUpb_Message_InitAttributes(submsg, NULL, value) >= 0;
447 } else {
448 const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
449 PyErr_Format(PyExc_TypeError, "Message must be initialized with a dict: %s",
450 upb_MessageDef_FullName(m));
451 ok = false;
452 }
453 Py_DECREF(submsg);
454 return ok;
455}
456
457static bool PyUpb_Message_InitScalarAttribute(upb_Message* msg,
458 const upb_FieldDef* f,
459 PyObject* value,
460 upb_Arena* arena) {
461 upb_MessageValue msgval;
462 assert(!PyErr_Occurred());
463 if (!PyUpb_PyToUpb(value, f, &msgval, arena)) return false;
464 upb_Message_SetFieldByDef(msg, f, msgval, arena);
465 return true;
466}
467
468int PyUpb_Message_InitAttributes(PyObject* _self, PyObject* args,
469 PyObject* kwargs) {
470 assert(!PyErr_Occurred());
471
472 if (args != NULL && PyTuple_Size(args) != 0) {
473 PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
474 return -1;
475 }
476
477 if (kwargs == NULL) return 0;
478
479 PyUpb_Message* self = (void*)_self;
480 Py_ssize_t pos = 0;
481 PyObject* name;
482 PyObject* value;
483 PyUpb_Message_EnsureReified(self);
484 upb_Message* msg = PyUpb_Message_GetMsg(self);
485 upb_Arena* arena = PyUpb_Arena_Get(self->arena);
486
487 while (PyDict_Next(kwargs, &pos, &name, &value)) {
488 assert(!PyErr_Occurred());
489 const upb_FieldDef* f;
490 assert(!PyErr_Occurred());
491 if (!PyUpb_Message_LookupName(self, name, &f, NULL, PyExc_ValueError)) {
492 return -1;
493 }
494
495 if (value == Py_None) continue; // Ignored.
496
497 assert(!PyErr_Occurred());
498
499 if (upb_FieldDef_IsMap(f)) {
500 if (!PyUpb_Message_InitMapAttribute(_self, name, f, value)) return -1;
501 } else if (upb_FieldDef_IsRepeated(f)) {
502 if (!PyUpb_Message_InitRepeatedAttribute(_self, name, value)) return -1;
503 } else if (upb_FieldDef_IsSubMessage(f)) {
504 if (!PyUpb_Message_InitMessageAttribute(_self, name, value)) return -1;
505 } else {
506 if (!PyUpb_Message_InitScalarAttribute(msg, f, value, arena)) return -1;
507 }
508 if (PyErr_Occurred()) return -1;
509 }
510
511 if (PyErr_Occurred()) return -1;
512 return 0;
513}
514
515static int PyUpb_Message_Init(PyObject* _self, PyObject* args,
516 PyObject* kwargs) {
517 if (args != NULL && PyTuple_Size(args) != 0) {
518 PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
519 return -1;
520 }
521
522 return PyUpb_Message_InitAttributes(_self, args, kwargs);
523}
524
525static PyObject* PyUpb_Message_NewStub(PyObject* parent, const upb_FieldDef* f,
526 PyObject* arena) {
527 const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f);
528 PyObject* cls = PyUpb_Descriptor_GetClass(sub_m);
529
530 PyUpb_Message* msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
531 msg->def = (uintptr_t)f | 1;
532 msg->arena = arena;
533 msg->ptr.parent = (PyUpb_Message*)parent;
534 msg->unset_subobj_map = NULL;
535 msg->ext_dict = NULL;
536 msg->version = 0;
537
538 Py_DECREF(cls);
539 Py_INCREF(parent);
540 Py_INCREF(arena);
541 return &msg->ob_base;
542}
543
544static bool PyUpb_Message_IsEmpty(const upb_Message* msg,
545 const upb_MessageDef* m,
546 const upb_DefPool* ext_pool) {
547 if (!msg) return true;
548
549 size_t iter = kUpb_Message_Begin;
550 const upb_FieldDef* f;
551 upb_MessageValue val;
552 if (upb_Message_Next(msg, m, ext_pool, &f, &val, &iter)) return false;
553
554 size_t len;
555 (void)upb_Message_GetUnknown(msg, &len);
556 return len == 0;
557}
558
559static bool PyUpb_Message_IsEqual(PyUpb_Message* m1, PyObject* _m2) {
560 PyUpb_Message* m2 = (void*)_m2;
561 if (m1 == m2) return true;
562 if (!PyObject_TypeCheck(_m2, m1->ob_base.ob_type)) {
563 return false;
564 }
565 const upb_MessageDef* m1_msgdef = _PyUpb_Message_GetMsgdef(m1);
566#ifndef NDEBUG
567 const upb_MessageDef* m2_msgdef = _PyUpb_Message_GetMsgdef(m2);
568 assert(m1_msgdef == m2_msgdef);
569#endif
570 const upb_Message* m1_msg = PyUpb_Message_GetIfReified((PyObject*)m1);
571 const upb_Message* m2_msg = PyUpb_Message_GetIfReified(_m2);
572 const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m1_msgdef));
573
574 const bool e1 = PyUpb_Message_IsEmpty(m1_msg, m1_msgdef, symtab);
575 const bool e2 = PyUpb_Message_IsEmpty(m2_msg, m1_msgdef, symtab);
576 if (e1 || e2) return e1 && e2;
577
578 return upb_Message_IsEqual(m1_msg, m2_msg, m1_msgdef);
579}
580
581static const upb_FieldDef* PyUpb_Message_InitAsMsg(PyUpb_Message* m,
582 upb_Arena* arena) {
583 const upb_FieldDef* f = PyUpb_Message_GetFieldDef(m);
584 const upb_MessageDef* m2 = upb_FieldDef_MessageSubDef(f);
585 m->ptr.msg = upb_Message_New(upb_MessageDef_MiniTable(m2), arena);
586 m->def = (uintptr_t)m2;
587 PyUpb_ObjCache_Add(m->ptr.msg, &m->ob_base);
588 return f;
589}
590
591static void PyUpb_Message_SetField(PyUpb_Message* parent, const upb_FieldDef* f,
592 PyUpb_Message* child, upb_Arena* arena) {
593 upb_MessageValue msgval = {.msg_val = PyUpb_Message_GetMsg(child)};
594 upb_Message_SetFieldByDef(PyUpb_Message_GetMsg(parent), f, msgval, arena);
595 PyUpb_WeakMap_Delete(parent->unset_subobj_map, f);
596 // Releases a ref previously owned by child->ptr.parent of our child.
597 Py_DECREF(child);
598}
599
600/*
601 * PyUpb_Message_EnsureReified()
602 *
603 * This implements the "expando" behavior of Python protos:
604 * foo = FooProto()
605 *
606 * # The intermediate messages don't really exist, and won't be serialized.
607 * x = foo.bar.bar.bar.bar.bar.baz
608 *
609 * # Now all the intermediate objects are created.
610 * foo.bar.bar.bar.bar.bar.baz = 5
611 *
612 * This function should be called before performing any mutation of a protobuf
613 * object.
614 *
615 * Post-condition:
616 * PyUpb_Message_IsStub(self) is false
617 */
618void PyUpb_Message_EnsureReified(PyUpb_Message* self) {
619 if (!PyUpb_Message_IsStub(self)) return;
620 upb_Arena* arena = PyUpb_Arena_Get(self->arena);
621
622 // This is a non-present message. We need to create a real upb_Message for
623 // this object and every parent until we reach a present message.
624 PyUpb_Message* child = self;
625 PyUpb_Message* parent = self->ptr.parent;
626 const upb_FieldDef* child_f = PyUpb_Message_InitAsMsg(child, arena);
627 Py_INCREF(child); // To avoid a special-case in PyUpb_Message_SetField().
628
629 do {
630 PyUpb_Message* next_parent = parent->ptr.parent;
631 const upb_FieldDef* parent_f = NULL;
632 if (PyUpb_Message_IsStub(parent)) {
633 parent_f = PyUpb_Message_InitAsMsg(parent, arena);
634 }
635 PyUpb_Message_SetField(parent, child_f, child, arena);
636 child = parent;
637 child_f = parent_f;
638 parent = next_parent;
639 } while (child_f);
640
641 // Releases ref previously owned by child->ptr.parent of our child.
642 Py_DECREF(child);
643 self->version++;
644}
645
646static void PyUpb_Message_SyncSubobjs(PyUpb_Message* self);
647
648/*
649 * PyUpb_Message_Reify()
650 *
651 * The message equivalent of PyUpb_*Container_Reify(), this transitions
652 * the wrapper from the unset state (owning a reference on self->ptr.parent) to
653 * the set state (having a non-owning pointer to self->ptr.msg).
654 */
655static void PyUpb_Message_Reify(PyUpb_Message* self, const upb_FieldDef* f,
656 upb_Message* msg) {
657 assert(f == PyUpb_Message_GetFieldDef(self));
658 if (!msg) {
659 const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef((PyObject*)self);
660 const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
661 msg = upb_Message_New(layout, PyUpb_Arena_Get(self->arena));
662 }
663 PyUpb_ObjCache_Add(msg, &self->ob_base);
664 Py_DECREF(&self->ptr.parent->ob_base);
665 self->ptr.msg = msg; // Overwrites self->ptr.parent
666 self->def = (uintptr_t)upb_FieldDef_MessageSubDef(f);
667 PyUpb_Message_SyncSubobjs(self);
668}
669
670/*
671 * PyUpb_Message_SyncSubobjs()
672 *
673 * This operation must be invoked whenever the underlying upb_Message has been
674 * mutated directly in C. This will attach any newly-present field data
675 * to previously returned stub wrapper objects.
676 *
677 * For example:
678 * foo = FooMessage()
679 * sub = foo.submsg # Empty, unset sub-message
680 *
681 * # SyncSubobjs() is required to connect our existing 'sub' wrapper to the
682 * # newly created foo.submsg data in C.
683 * foo.MergeFrom(FooMessage(submsg={}))
684 *
685 * This requires that all of the new sub-objects that have appeared are owned
686 * by `self`'s arena.
687 */
688static void PyUpb_Message_SyncSubobjs(PyUpb_Message* self) {
689 PyUpb_WeakMap* subobj_map = self->unset_subobj_map;
690 if (!subobj_map) return;
691
692 upb_Message* msg = PyUpb_Message_GetMsg(self);
693 intptr_t iter = PYUPB_WEAKMAP_BEGIN;
694 const void* key;
695 PyObject* obj;
696
697 // The last ref to this message could disappear during iteration.
698 // When we call PyUpb_*Container_Reify() below, the container will drop
699 // its ref on `self`. If that was the last ref on self, the object will be
700 // deleted, and `subobj_map` along with it. We need it to live until we are
701 // done iterating.
702 Py_INCREF(&self->ob_base);
703
704 while (PyUpb_WeakMap_Next(subobj_map, &key, &obj, &iter)) {
705 const upb_FieldDef* f = key;
706 if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f))
707 continue;
708 upb_MessageValue msgval = upb_Message_GetFieldByDef(msg, f);
709 PyUpb_WeakMap_DeleteIter(subobj_map, &iter);
710 if (upb_FieldDef_IsMap(f)) {
711 if (!msgval.map_val) continue;
712 PyUpb_MapContainer_Reify(obj, (upb_Map*)msgval.map_val);
713 } else if (upb_FieldDef_IsRepeated(f)) {
714 if (!msgval.array_val) continue;
715 PyUpb_RepeatedContainer_Reify(obj, (upb_Array*)msgval.array_val);
716 } else {
717 PyUpb_Message* sub = (void*)obj;
718 assert(self == sub->ptr.parent);
719 PyUpb_Message_Reify(sub, f, (upb_Message*)msgval.msg_val);
720 }
721 }
722
723 Py_DECREF(&self->ob_base);
724
725 // TODO: present fields need to be iterated too if they can reach
726 // a WeakMap.
727}
728
729static PyObject* PyUpb_Message_ToString(PyUpb_Message* self) {
730 if (PyUpb_Message_IsStub(self)) {
731 return PyUnicode_FromStringAndSize(NULL, 0);
732 }
733 upb_Message* msg = PyUpb_Message_GetMsg(self);
734 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
735 const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(msgdef));
736 char buf[1024];
737 int options = UPB_TXTENC_SKIPUNKNOWN;
738 size_t size = upb_TextEncode(msg, msgdef, symtab, options, buf, sizeof(buf));
739 if (size < sizeof(buf)) {
740 return PyUnicode_FromStringAndSize(buf, size);
741 } else {
742 char* buf2 = malloc(size + 1);
743 size_t size2 = upb_TextEncode(msg, msgdef, symtab, options, buf2, size + 1);
744 assert(size == size2);
745 PyObject* ret = PyUnicode_FromStringAndSize(buf2, size2);
746 free(buf2);
747 return ret;
748 }
749}
750
751static PyObject* PyUpb_Message_RichCompare(PyObject* _self, PyObject* other,
752 int opid) {
753 PyUpb_Message* self = (void*)_self;
754 if (opid != Py_EQ && opid != Py_NE) {
755 Py_INCREF(Py_NotImplemented);
756 return Py_NotImplemented;
757 }
758 if (!PyObject_TypeCheck(other, Py_TYPE(self))) {
759 Py_INCREF(Py_NotImplemented);
760 return Py_NotImplemented;
761 }
762 bool ret = PyUpb_Message_IsEqual(self, other);
763 if (opid == Py_NE) ret = !ret;
764 return PyBool_FromLong(ret);
765}
766
767void PyUpb_Message_CacheDelete(PyObject* _self, const upb_FieldDef* f) {
768 PyUpb_Message* self = (void*)_self;
769 PyUpb_WeakMap_Delete(self->unset_subobj_map, f);
770}
771
772void PyUpb_Message_SetConcreteSubobj(PyObject* _self, const upb_FieldDef* f,
773 upb_MessageValue subobj) {
774 PyUpb_Message* self = (void*)_self;
775 PyUpb_Message_EnsureReified(self);
776 PyUpb_Message_CacheDelete(_self, f);
777 upb_Message_SetFieldByDef(self->ptr.msg, f, subobj,
778 PyUpb_Arena_Get(self->arena));
779}
780
781static void PyUpb_Message_Dealloc(PyObject* _self) {
782 PyUpb_Message* self = (void*)_self;
783
784 if (PyUpb_Message_IsStub(self)) {
785 PyUpb_Message_CacheDelete((PyObject*)self->ptr.parent,
786 PyUpb_Message_GetFieldDef(self));
787 Py_DECREF(self->ptr.parent);
788 } else {
789 PyUpb_ObjCache_Delete(self->ptr.msg);
790 }
791
792 if (self->unset_subobj_map) {
793 PyUpb_WeakMap_Free(self->unset_subobj_map);
794 }
795
796 Py_DECREF(self->arena);
797
798 // We do not use PyUpb_Dealloc() here because Message is a base type and for
799 // base types there is a bug we have to work around in this case (see below).
800 PyTypeObject* tp = Py_TYPE(self);
801 freefunc tp_free = PyType_GetSlot(tp, Py_tp_free);
802 tp_free(self);
803
804 if (cpython_bits.python_version_hex >= 0x03080000) {
805 // Prior to Python 3.8 there is a bug where deallocating the type here would
806 // lead to a double-decref: https://bugs.python.org/issue37879
807 Py_DECREF(tp);
808 }
809}
810
811PyObject* PyUpb_Message_Get(upb_Message* u_msg, const upb_MessageDef* m,
812 PyObject* arena) {
813 PyObject* ret = PyUpb_ObjCache_Get(u_msg);
814 if (ret) return ret;
815
816 PyObject* cls = PyUpb_Descriptor_GetClass(m);
817 // It is not safe to use PyObject_{,GC}_New() due to:
818 // https://bugs.python.org/issue35810
819 PyUpb_Message* py_msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
820 py_msg->arena = arena;
821 py_msg->def = (uintptr_t)m;
822 py_msg->ptr.msg = u_msg;
823 py_msg->unset_subobj_map = NULL;
824 py_msg->ext_dict = NULL;
825 py_msg->version = 0;
826 ret = &py_msg->ob_base;
827 Py_DECREF(cls);
828 Py_INCREF(arena);
829 PyUpb_ObjCache_Add(u_msg, ret);
830 return ret;
831}
832
833/* PyUpb_Message_GetStub()
834 *
835 * Non-present messages return "stub" objects that point to their parent, but
836 * will materialize into real upb objects if they are mutated.
837 *
838 * Note: we do *not* create stubs for repeated/map fields unless the parent
839 * is a stub:
840 *
841 * msg = TestMessage()
842 * msg.submessage # (A) Creates a stub
843 * msg.repeated_foo # (B) Does *not* create a stub
844 * msg.submessage.repeated_bar # (C) Creates a stub
845 *
846 * In case (B) we have some freedom: we could either create a stub, or create
847 * a reified object with underlying data. It appears that either could work
848 * equally well, with no observable change to users. There isn't a clear
849 * advantage to either choice. We choose to follow the behavior of the
850 * pre-existing C++ behavior for consistency, but if it becomes apparent that
851 * there would be some benefit to reversing this decision, it should be totally
852 * within the realm of possibility.
853 */
854PyObject* PyUpb_Message_GetStub(PyUpb_Message* self,
855 const upb_FieldDef* field) {
856 PyObject* _self = (void*)self;
857 if (!self->unset_subobj_map) {
858 self->unset_subobj_map = PyUpb_WeakMap_New();
859 }
860 PyObject* subobj = PyUpb_WeakMap_Get(self->unset_subobj_map, field);
861
862 if (subobj) return subobj;
863
864 if (upb_FieldDef_IsMap(field)) {
865 subobj = PyUpb_MapContainer_NewStub(_self, field, self->arena);
866 } else if (upb_FieldDef_IsRepeated(field)) {
867 subobj = PyUpb_RepeatedContainer_NewStub(_self, field, self->arena);
868 } else {
869 subobj = PyUpb_Message_NewStub(&self->ob_base, field, self->arena);
870 }
871 PyUpb_WeakMap_Add(self->unset_subobj_map, field, subobj);
872
873 assert(!PyErr_Occurred());
874 return subobj;
875}
876
877PyObject* PyUpb_Message_GetPresentWrapper(PyUpb_Message* self,
878 const upb_FieldDef* field) {
879 assert(!PyUpb_Message_IsStub(self));
880 upb_MutableMessageValue mutval =
881 upb_Message_Mutable(self->ptr.msg, field, PyUpb_Arena_Get(self->arena));
882 if (upb_FieldDef_IsMap(field)) {
883 return PyUpb_MapContainer_GetOrCreateWrapper(mutval.map, field,
884 self->arena);
885 } else {
886 return PyUpb_RepeatedContainer_GetOrCreateWrapper(mutval.array, field,
887 self->arena);
888 }
889}
890
891PyObject* PyUpb_Message_GetScalarValue(PyUpb_Message* self,
892 const upb_FieldDef* field) {
893 upb_MessageValue val;
894 if (PyUpb_Message_IsStub(self)) {
895 // Unset message always returns default values.
896 val = upb_FieldDef_Default(field);
897 } else {
898 val = upb_Message_GetFieldByDef(self->ptr.msg, field);
899 }
900 return PyUpb_UpbToPy(val, field, self->arena);
901}
902
903/*
904 * PyUpb_Message_GetFieldValue()
905 *
906 * Implements the equivalent of getattr(msg, field), once `field` has
907 * already been resolved to a `upb_FieldDef*`.
908 *
909 * This may involve constructing a wrapper object for the given field, or
910 * returning one that was previously constructed. If the field is not actually
911 * set, the wrapper object will be an "unset" object that is not actually
912 * connected to any C data.
913 */
914PyObject* PyUpb_Message_GetFieldValue(PyObject* _self,
915 const upb_FieldDef* field) {
916 PyUpb_Message* self = (void*)_self;
917 assert(upb_FieldDef_ContainingType(field) == PyUpb_Message_GetMsgdef(_self));
918 bool submsg = upb_FieldDef_IsSubMessage(field);
919 bool seq = upb_FieldDef_IsRepeated(field);
920
921 if ((PyUpb_Message_IsStub(self) && (submsg || seq)) ||
922 (submsg && !seq && !upb_Message_HasFieldByDef(self->ptr.msg, field))) {
923 return PyUpb_Message_GetStub(self, field);
924 } else if (seq) {
925 return PyUpb_Message_GetPresentWrapper(self, field);
926 } else {
927 return PyUpb_Message_GetScalarValue(self, field);
928 }
929}
930
931int PyUpb_Message_SetFieldValue(PyObject* _self, const upb_FieldDef* field,
932 PyObject* value, PyObject* exc) {
933 PyUpb_Message* self = (void*)_self;
934 assert(value);
935
936 if (upb_FieldDef_IsSubMessage(field) || upb_FieldDef_IsRepeated(field)) {
937 PyErr_Format(exc,
938 "Assignment not allowed to message, map, or repeated "
939 "field \"%s\" in protocol message object.",
940 upb_FieldDef_Name(field));
941 return -1;
942 }
943
944 PyUpb_Message_EnsureReified(self);
945
946 upb_MessageValue val;
947 upb_Arena* arena = PyUpb_Arena_Get(self->arena);
948 if (!PyUpb_PyToUpb(value, field, &val, arena)) {
949 return -1;
950 }
951
952 upb_Message_SetFieldByDef(self->ptr.msg, field, val, arena);
953 return 0;
954}
955
956int PyUpb_Message_GetVersion(PyObject* _self) {
957 PyUpb_Message* self = (void*)_self;
958 return self->version;
959}
960
961/*
962 * PyUpb_Message_GetAttr()
963 *
964 * Implements:
965 * foo = msg.foo
966 *
967 * Attribute lookup must find both message fields and base class methods like
968 * msg.SerializeToString().
969 */
970__attribute__((flatten)) static PyObject* PyUpb_Message_GetAttr(
971 PyObject* _self, PyObject* attr) {
972 PyUpb_Message* self = (void*)_self;
973
974 // Lookup field by name.
975 const upb_FieldDef* field;
976 if (PyUpb_Message_LookupName(self, attr, &field, NULL, NULL)) {
977 return PyUpb_Message_GetFieldValue(_self, field);
978 }
979
980 // Check base class attributes.
981 assert(!PyErr_Occurred());
982 PyObject* ret = PyObject_GenericGetAttr(_self, attr);
983 if (ret) return ret;
984
985 // Swallow AttributeError if it occurred and try again on the metaclass
986 // to pick up class attributes. But we have to special-case "Extensions"
987 // which affirmatively returns AttributeError when a message is not
988 // extendable.
989 const char* name;
990 if (PyErr_ExceptionMatches(PyExc_AttributeError) &&
991 (name = PyUpb_GetStrData(attr)) && strcmp(name, "Extensions") != 0) {
992 PyErr_Clear();
993 return PyUpb_MessageMeta_GetAttr((PyObject*)Py_TYPE(_self), attr);
994 }
995
996 return NULL;
997}
998
999/*
1000 * PyUpb_Message_SetAttr()
1001 *
1002 * Implements:
1003 * msg.foo = foo
1004 */
1005static int PyUpb_Message_SetAttr(PyObject* _self, PyObject* attr,
1006 PyObject* value) {
1007 PyUpb_Message* self = (void*)_self;
Joshua Habermande529442023-12-11 16:55:43 -08001008
1009 if (value == NULL) {
1010 PyErr_SetString(PyExc_AttributeError, "Cannot delete field attribute");
1011 return -1;
1012 }
1013
Adam Cozzette501ecec2023-09-26 14:36:20 -07001014 const upb_FieldDef* field;
1015 if (!PyUpb_Message_LookupName(self, attr, &field, NULL,
1016 PyExc_AttributeError)) {
1017 return -1;
1018 }
1019
1020 return PyUpb_Message_SetFieldValue(_self, field, value, PyExc_AttributeError);
1021}
1022
1023static PyObject* PyUpb_Message_HasField(PyObject* _self, PyObject* arg) {
1024 PyUpb_Message* self = (void*)_self;
1025 const upb_FieldDef* field;
1026 const upb_OneofDef* oneof;
1027
1028 if (!PyUpb_Message_LookupName(self, arg, &field, &oneof, PyExc_ValueError)) {
1029 return NULL;
1030 }
1031
1032 if (field && !upb_FieldDef_HasPresence(field)) {
1033 PyErr_Format(PyExc_ValueError, "Field %s does not have presence.",
1034 upb_FieldDef_FullName(field));
1035 return NULL;
1036 }
1037
1038 if (PyUpb_Message_IsStub(self)) Py_RETURN_FALSE;
1039
1040 return PyBool_FromLong(field ? upb_Message_HasFieldByDef(self->ptr.msg, field)
1041 : upb_Message_WhichOneof(self->ptr.msg, oneof) !=
1042 NULL);
1043}
1044
1045static PyObject* PyUpb_Message_FindInitializationErrors(PyObject* _self,
1046 PyObject* arg);
1047
1048static PyObject* PyUpb_Message_IsInitializedAppendErrors(PyObject* _self,
1049 PyObject* errors) {
1050 PyObject* list = PyUpb_Message_FindInitializationErrors(_self, NULL);
1051 if (!list) return NULL;
1052 bool ok = PyList_Size(list) == 0;
1053 PyObject* ret = NULL;
1054 PyObject* extend_result = NULL;
1055 if (!ok) {
1056 extend_result = PyObject_CallMethod(errors, "extend", "O", list);
1057 if (!extend_result) goto done;
1058 }
1059 ret = PyBool_FromLong(ok);
1060
1061done:
1062 Py_XDECREF(list);
1063 Py_XDECREF(extend_result);
1064 return ret;
1065}
1066
1067static PyObject* PyUpb_Message_IsInitialized(PyObject* _self, PyObject* args) {
1068 PyObject* errors = NULL;
1069 if (!PyArg_ParseTuple(args, "|O", &errors)) {
1070 return NULL;
1071 }
1072 if (errors) {
1073 // We need to collect a list of unset required fields and append it to
1074 // `errors`.
1075 return PyUpb_Message_IsInitializedAppendErrors(_self, errors);
1076 } else {
1077 // We just need to return a boolean "true" or "false" for whether all
1078 // required fields are set.
1079 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1080 const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
1081 const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
1082 bool initialized = !upb_util_HasUnsetRequired(msg, m, symtab, NULL);
1083 return PyBool_FromLong(initialized);
1084 }
1085}
1086
1087static PyObject* PyUpb_Message_ListFieldsItemKey(PyObject* self,
1088 PyObject* val) {
1089 assert(PyTuple_Check(val));
1090 PyObject* field = PyTuple_GetItem(val, 0);
1091 const upb_FieldDef* f = PyUpb_FieldDescriptor_GetDef(field);
1092 return PyLong_FromLong(upb_FieldDef_Number(f));
1093}
1094
1095static PyObject* PyUpb_Message_CheckCalledFromGeneratedFile(
1096 PyObject* unused, PyObject* unused_arg) {
1097 PyErr_SetString(
1098 PyExc_TypeError,
1099 "Descriptors cannot be created directly.\n"
1100 "If this call came from a _pb2.py file, your generated code is out of "
1101 "date and must be regenerated with protoc >= 3.19.0.\n"
1102 "If you cannot immediately regenerate your protos, some other possible "
1103 "workarounds are:\n"
1104 " 1. Downgrade the protobuf package to 3.20.x or lower.\n"
1105 " 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will "
1106 "use pure-Python parsing and will be much slower).\n"
1107 "\n"
1108 "More information: "
1109 "https://developers.google.com/protocol-buffers/docs/news/"
1110 "2022-05-06#python-updates");
1111 return NULL;
1112}
1113
1114static bool PyUpb_Message_SortFieldList(PyObject* list) {
1115 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1116 bool ok = false;
1117 PyObject* args = PyTuple_New(0);
1118 PyObject* kwargs = PyDict_New();
1119 PyObject* method = PyObject_GetAttrString(list, "sort");
1120 PyObject* call_result = NULL;
1121 if (!args || !kwargs || !method) goto err;
1122 if (PyDict_SetItemString(kwargs, "key", state->listfields_item_key) < 0) {
1123 goto err;
1124 }
1125 call_result = PyObject_Call(method, args, kwargs);
1126 if (!call_result) goto err;
1127 ok = true;
1128
1129err:
1130 Py_XDECREF(method);
1131 Py_XDECREF(args);
1132 Py_XDECREF(kwargs);
1133 Py_XDECREF(call_result);
1134 return ok;
1135}
1136
1137static PyObject* PyUpb_Message_ListFields(PyObject* _self, PyObject* arg) {
1138 PyObject* list = PyList_New(0);
1139 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1140 if (!msg) return list;
1141
1142 size_t iter1 = kUpb_Message_Begin;
1143 const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
1144 const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
1145 const upb_FieldDef* f;
1146 PyObject* field_desc = NULL;
1147 PyObject* py_val = NULL;
1148 PyObject* tuple = NULL;
1149 upb_MessageValue val;
1150 uint32_t last_field = 0;
1151 bool in_order = true;
1152 while (upb_Message_Next(msg, m, symtab, &f, &val, &iter1)) {
1153 const uint32_t field_number = upb_FieldDef_Number(f);
1154 if (field_number < last_field) in_order = false;
1155 last_field = field_number;
1156 PyObject* field_desc = PyUpb_FieldDescriptor_Get(f);
1157 PyObject* py_val = PyUpb_Message_GetFieldValue(_self, f);
1158 if (!field_desc || !py_val) goto err;
1159 PyObject* tuple = Py_BuildValue("(NN)", field_desc, py_val);
1160 field_desc = NULL;
1161 py_val = NULL;
1162 if (!tuple) goto err;
1163 if (PyList_Append(list, tuple)) goto err;
1164 Py_DECREF(tuple);
1165 tuple = NULL;
1166 }
1167
1168 // Users rely on fields being returned in field number order.
1169 if (!in_order && !PyUpb_Message_SortFieldList(list)) goto err;
1170
1171 return list;
1172
1173err:
1174 Py_XDECREF(field_desc);
1175 Py_XDECREF(py_val);
1176 Py_XDECREF(tuple);
1177 Py_DECREF(list);
1178 return NULL;
1179}
1180
1181PyObject* PyUpb_Message_MergeFrom(PyObject* self, PyObject* arg) {
1182 if (self->ob_type != arg->ob_type) {
1183 PyErr_Format(PyExc_TypeError,
1184 "Parameter to MergeFrom() must be instance of same class: "
1185 "expected %S got %S.",
1186 Py_TYPE(self), Py_TYPE(arg));
1187 return NULL;
1188 }
1189 // OPT: exit if src is empty.
1190 PyObject* subargs = PyTuple_New(0);
1191 PyObject* serialized =
1192 PyUpb_Message_SerializePartialToString(arg, subargs, NULL);
1193 Py_DECREF(subargs);
1194 if (!serialized) return NULL;
1195 PyObject* ret = PyUpb_Message_MergeFromString(self, serialized);
1196 Py_DECREF(serialized);
1197 Py_XDECREF(ret);
1198 Py_RETURN_NONE;
1199}
1200
1201static PyObject* PyUpb_Message_Clear(PyUpb_Message* self);
1202
1203static PyObject* PyUpb_Message_CopyFrom(PyObject* _self, PyObject* arg) {
1204 if (_self->ob_type != arg->ob_type) {
1205 PyErr_Format(PyExc_TypeError,
1206 "Parameter to CopyFrom() must be instance of same class: "
1207 "expected %S got %S.",
1208 Py_TYPE(_self), Py_TYPE(arg));
1209 return NULL;
1210 }
1211 if (_self == arg) {
1212 Py_RETURN_NONE;
1213 }
1214 PyUpb_Message* self = (void*)_self;
1215 PyUpb_Message* other = (void*)arg;
1216 PyUpb_Message_EnsureReified(self);
1217
1218 const upb_Message* other_msg = PyUpb_Message_GetIfReified((PyObject*)other);
1219 if (other_msg) {
1220 upb_Message_DeepCopy(
1221 self->ptr.msg, other_msg,
1222 upb_MessageDef_MiniTable((const upb_MessageDef*)other->def),
1223 PyUpb_Arena_Get(self->arena));
1224 } else {
1225 PyObject* tmp = PyUpb_Message_Clear(self);
1226 Py_DECREF(tmp);
1227 }
1228 PyUpb_Message_SyncSubobjs(self);
1229
1230 Py_RETURN_NONE;
1231}
1232
1233static PyObject* PyUpb_Message_SetInParent(PyObject* _self, PyObject* arg) {
1234 PyUpb_Message* self = (void*)_self;
1235 PyUpb_Message_EnsureReified(self);
1236 Py_RETURN_NONE;
1237}
1238
1239static PyObject* PyUpb_Message_UnknownFields(PyObject* _self, PyObject* arg) {
1240 // TODO: re-enable when unknown fields are added.
1241 // return PyUpb_UnknownFields_New(_self);
1242 PyErr_SetString(PyExc_NotImplementedError, "unknown field accessor");
1243 return NULL;
1244}
1245
1246PyObject* PyUpb_Message_MergeFromString(PyObject* _self, PyObject* arg) {
1247 PyUpb_Message* self = (void*)_self;
1248 char* buf;
1249 Py_ssize_t size;
1250 PyObject* bytes = NULL;
1251
1252 if (PyMemoryView_Check(arg)) {
1253 bytes = PyBytes_FromObject(arg);
1254 // Cannot fail when passed something of the correct type.
1255 int err = PyBytes_AsStringAndSize(bytes, &buf, &size);
1256 (void)err;
1257 assert(err >= 0);
1258 } else if (PyBytes_AsStringAndSize(arg, &buf, &size) < 0) {
1259 return NULL;
1260 }
1261
1262 PyUpb_Message_EnsureReified(self);
1263 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1264 const upb_FileDef* file = upb_MessageDef_File(msgdef);
1265 const upb_ExtensionRegistry* extreg =
1266 upb_DefPool_ExtensionRegistry(upb_FileDef_Pool(file));
1267 const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
1268 upb_Arena* arena = PyUpb_Arena_Get(self->arena);
1269 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1270 int options =
1271 upb_DecodeOptions_MaxDepth(state->allow_oversize_protos ? UINT16_MAX : 0);
1272 upb_DecodeStatus status =
1273 upb_Decode(buf, size, self->ptr.msg, layout, extreg, options, arena);
1274 Py_XDECREF(bytes);
1275 if (status != kUpb_DecodeStatus_Ok) {
1276 PyErr_Format(state->decode_error_class, "Error parsing message");
1277 return NULL;
1278 }
1279 PyUpb_Message_SyncSubobjs(self);
1280 return PyLong_FromSsize_t(size);
1281}
1282
1283static PyObject* PyUpb_Message_ParseFromString(PyObject* self, PyObject* arg) {
1284 PyObject* tmp = PyUpb_Message_Clear((PyUpb_Message*)self);
1285 Py_DECREF(tmp);
1286 return PyUpb_Message_MergeFromString(self, arg);
1287}
1288
1289static PyObject* PyUpb_Message_ByteSize(PyObject* self, PyObject* args) {
1290 // TODO: At the
1291 // moment upb does not have a "byte size" function, so we just serialize to
1292 // string and get the size of the string.
1293 PyObject* subargs = PyTuple_New(0);
1294 PyObject* serialized = PyUpb_Message_SerializeToString(self, subargs, NULL);
1295 Py_DECREF(subargs);
1296 if (!serialized) return NULL;
1297 size_t size = PyBytes_Size(serialized);
1298 Py_DECREF(serialized);
1299 return PyLong_FromSize_t(size);
1300}
1301
1302static PyObject* PyUpb_Message_Clear(PyUpb_Message* self) {
1303 PyUpb_Message_EnsureReified(self);
1304 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1305 PyUpb_WeakMap* subobj_map = self->unset_subobj_map;
1306
1307 if (subobj_map) {
1308 upb_Message* msg = PyUpb_Message_GetMsg(self);
1309 (void)msg; // Suppress unused warning when asserts are disabled.
1310 intptr_t iter = PYUPB_WEAKMAP_BEGIN;
1311 const void* key;
1312 PyObject* obj;
1313
1314 while (PyUpb_WeakMap_Next(subobj_map, &key, &obj, &iter)) {
1315 const upb_FieldDef* f = key;
1316 PyUpb_WeakMap_DeleteIter(subobj_map, &iter);
1317 if (upb_FieldDef_IsMap(f)) {
1318 assert(upb_Message_GetFieldByDef(msg, f).map_val == NULL);
1319 PyUpb_MapContainer_Reify(obj, NULL);
1320 } else if (upb_FieldDef_IsRepeated(f)) {
1321 assert(upb_Message_GetFieldByDef(msg, f).array_val == NULL);
1322 PyUpb_RepeatedContainer_Reify(obj, NULL);
1323 } else {
1324 assert(!upb_Message_HasFieldByDef(msg, f));
1325 PyUpb_Message* sub = (void*)obj;
1326 assert(self == sub->ptr.parent);
1327 PyUpb_Message_Reify(sub, f, NULL);
1328 }
1329 }
1330 }
1331
1332 upb_Message_ClearByDef(self->ptr.msg, msgdef);
1333 Py_RETURN_NONE;
1334}
1335
1336void PyUpb_Message_DoClearField(PyObject* _self, const upb_FieldDef* f) {
1337 PyUpb_Message* self = (void*)_self;
1338 PyUpb_Message_EnsureReified((PyUpb_Message*)self);
1339
1340 // We must ensure that any stub object is reified so its parent no longer
1341 // points to us.
1342 PyObject* sub = self->unset_subobj_map
1343 ? PyUpb_WeakMap_Get(self->unset_subobj_map, f)
1344 : NULL;
1345
1346 if (upb_FieldDef_IsMap(f)) {
1347 // For maps we additionally have to invalidate any iterators. So we need
1348 // to get an object even if it's reified.
1349 if (!sub) {
1350 sub = PyUpb_Message_GetFieldValue(_self, f);
1351 }
1352 PyUpb_MapContainer_EnsureReified(sub);
1353 PyUpb_MapContainer_Invalidate(sub);
1354 } else if (upb_FieldDef_IsRepeated(f)) {
1355 if (sub) {
1356 PyUpb_RepeatedContainer_EnsureReified(sub);
1357 }
1358 } else if (upb_FieldDef_IsSubMessage(f)) {
1359 if (sub) {
1360 PyUpb_Message_EnsureReified((PyUpb_Message*)sub);
1361 }
1362 }
1363
1364 Py_XDECREF(sub);
1365 upb_Message_ClearFieldByDef(self->ptr.msg, f);
1366}
1367
1368static PyObject* PyUpb_Message_ClearExtension(PyObject* _self, PyObject* arg) {
1369 PyUpb_Message* self = (void*)_self;
1370 PyUpb_Message_EnsureReified(self);
1371 const upb_FieldDef* f = PyUpb_Message_GetExtensionDef(_self, arg);
1372 if (!f) return NULL;
1373 PyUpb_Message_DoClearField(_self, f);
1374 Py_RETURN_NONE;
1375}
1376
1377static PyObject* PyUpb_Message_ClearField(PyObject* _self, PyObject* arg) {
1378 PyUpb_Message* self = (void*)_self;
1379
1380 // We always need EnsureReified() here (even for an unset message) to
1381 // preserve behavior like:
1382 // msg = FooMessage()
1383 // msg.foo.Clear()
1384 // assert msg.HasField("foo")
1385 PyUpb_Message_EnsureReified(self);
1386
1387 const upb_FieldDef* f;
1388 const upb_OneofDef* o;
1389 if (!PyUpb_Message_LookupName(self, arg, &f, &o, PyExc_ValueError)) {
1390 return NULL;
1391 }
1392
1393 if (o) f = upb_Message_WhichOneof(self->ptr.msg, o);
1394 if (f) PyUpb_Message_DoClearField(_self, f);
1395 Py_RETURN_NONE;
1396}
1397
1398static PyObject* PyUpb_Message_DiscardUnknownFields(PyUpb_Message* self,
1399 PyObject* arg) {
1400 PyUpb_Message_EnsureReified(self);
1401 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1402 upb_Message_DiscardUnknown(self->ptr.msg, msgdef, 64);
1403 Py_RETURN_NONE;
1404}
1405
1406static PyObject* PyUpb_Message_FindInitializationErrors(PyObject* _self,
1407 PyObject* arg) {
1408 PyUpb_Message* self = (void*)_self;
1409 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1410 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1411 const upb_DefPool* ext_pool = upb_FileDef_Pool(upb_MessageDef_File(msgdef));
1412 upb_FieldPathEntry* fields_base;
1413 PyObject* ret = PyList_New(0);
1414 if (upb_util_HasUnsetRequired(msg, msgdef, ext_pool, &fields_base)) {
1415 upb_FieldPathEntry* fields = fields_base;
1416 char* buf = NULL;
1417 size_t size = 0;
1418 assert(fields->field);
1419 while (fields->field) {
1420 upb_FieldPathEntry* field = fields;
1421 size_t need = upb_FieldPath_ToText(&fields, buf, size);
1422 if (need >= size) {
1423 fields = field;
1424 size = size ? size * 2 : 16;
1425 while (size <= need) size *= 2;
1426 buf = realloc(buf, size);
1427 need = upb_FieldPath_ToText(&fields, buf, size);
1428 assert(size > need);
1429 }
1430 PyObject* str = PyUnicode_FromString(buf);
1431 PyList_Append(ret, str);
1432 Py_DECREF(str);
1433 }
1434 free(buf);
1435 free(fields_base);
1436 }
1437 return ret;
1438}
1439
1440static PyObject* PyUpb_Message_FromString(PyObject* cls, PyObject* serialized) {
1441 PyObject* ret = NULL;
1442 PyObject* length = NULL;
1443
1444 ret = PyObject_CallObject(cls, NULL);
1445 if (ret == NULL) goto err;
1446 length = PyUpb_Message_MergeFromString(ret, serialized);
1447 if (length == NULL) goto err;
1448
1449done:
1450 Py_XDECREF(length);
1451 return ret;
1452
1453err:
1454 Py_XDECREF(ret);
1455 ret = NULL;
1456 goto done;
1457}
1458
1459const upb_FieldDef* PyUpb_Message_GetExtensionDef(PyObject* _self,
1460 PyObject* key) {
1461 const upb_FieldDef* f = PyUpb_FieldDescriptor_GetDef(key);
1462 if (!f) {
1463 PyErr_Clear();
1464 PyErr_Format(PyExc_KeyError, "Object %R is not a field descriptor\n", key);
1465 return NULL;
1466 }
1467 if (!upb_FieldDef_IsExtension(f)) {
1468 PyErr_Format(PyExc_KeyError, "Field %s is not an extension\n",
1469 upb_FieldDef_FullName(f));
1470 return NULL;
1471 }
1472 const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(_self);
1473 if (upb_FieldDef_ContainingType(f) != msgdef) {
1474 PyErr_Format(PyExc_KeyError, "Extension doesn't match (%s vs %s)",
1475 upb_MessageDef_FullName(msgdef), upb_FieldDef_FullName(f));
1476 return NULL;
1477 }
1478 return f;
1479}
1480
1481static PyObject* PyUpb_Message_HasExtension(PyObject* _self,
1482 PyObject* ext_desc) {
1483 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1484 const upb_FieldDef* f = PyUpb_Message_GetExtensionDef(_self, ext_desc);
1485 if (!f) return NULL;
1486 if (upb_FieldDef_IsRepeated(f)) {
1487 PyErr_SetString(PyExc_KeyError,
1488 "Field is repeated. A singular method is required.");
1489 return NULL;
1490 }
1491 if (!msg) Py_RETURN_FALSE;
1492 return PyBool_FromLong(upb_Message_HasFieldByDef(msg, f));
1493}
1494
1495void PyUpb_Message_ReportInitializationErrors(const upb_MessageDef* msgdef,
1496 PyObject* errors, PyObject* exc) {
1497 PyObject* comma = PyUnicode_FromString(",");
1498 PyObject* missing_fields = NULL;
1499 if (!comma) goto done;
1500 missing_fields = PyUnicode_Join(comma, errors);
1501 if (!missing_fields) goto done;
1502 PyErr_Format(exc, "Message %s is missing required fields: %U",
1503 upb_MessageDef_FullName(msgdef), missing_fields);
1504done:
1505 Py_XDECREF(comma);
1506 Py_XDECREF(missing_fields);
1507 Py_DECREF(errors);
1508}
1509
1510PyObject* PyUpb_Message_SerializeInternal(PyObject* _self, PyObject* args,
1511 PyObject* kwargs,
1512 bool check_required) {
1513 PyUpb_Message* self = (void*)_self;
1514 if (!PyUpb_Message_Verify((PyObject*)self)) return NULL;
1515 static const char* kwlist[] = {"deterministic", NULL};
1516 int deterministic = 0;
1517 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|p", (char**)(kwlist),
1518 &deterministic)) {
1519 return NULL;
1520 }
1521
1522 const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1523 if (PyUpb_Message_IsStub(self)) {
1524 // Nothing to serialize, but we do have to check whether the message is
1525 // initialized.
1526 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1527 PyObject* errors = PyUpb_Message_FindInitializationErrors(_self, NULL);
1528 if (!errors) return NULL;
1529 if (PyList_Size(errors) == 0) {
1530 Py_DECREF(errors);
1531 return PyBytes_FromStringAndSize(NULL, 0);
1532 }
1533 PyUpb_Message_ReportInitializationErrors(msgdef, errors,
1534 state->encode_error_class);
1535 return NULL;
1536 }
1537
1538 upb_Arena* arena = upb_Arena_New();
1539 const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
1540 size_t size = 0;
1541 // Python does not currently have any effective limit on serialization depth.
1542 int options = upb_EncodeOptions_MaxDepth(UINT16_MAX);
1543 if (check_required) options |= kUpb_EncodeOption_CheckRequired;
1544 if (deterministic) options |= kUpb_EncodeOption_Deterministic;
1545 char* pb;
1546 upb_EncodeStatus status =
1547 upb_Encode(self->ptr.msg, layout, options, arena, &pb, &size);
1548 PyObject* ret = NULL;
1549
1550 if (status != kUpb_EncodeStatus_Ok) {
1551 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1552 PyObject* errors = PyUpb_Message_FindInitializationErrors(_self, NULL);
1553 if (PyList_Size(errors) != 0) {
1554 PyUpb_Message_ReportInitializationErrors(msgdef, errors,
1555 state->encode_error_class);
1556 } else {
1557 PyErr_Format(state->encode_error_class, "Failed to serialize proto");
1558 }
1559 goto done;
1560 }
1561
1562 ret = PyBytes_FromStringAndSize(pb, size);
1563
1564done:
1565 upb_Arena_Free(arena);
1566 return ret;
1567}
1568
1569PyObject* PyUpb_Message_SerializeToString(PyObject* _self, PyObject* args,
1570 PyObject* kwargs) {
1571 return PyUpb_Message_SerializeInternal(_self, args, kwargs, true);
1572}
1573
1574PyObject* PyUpb_Message_SerializePartialToString(PyObject* _self,
1575 PyObject* args,
1576 PyObject* kwargs) {
1577 return PyUpb_Message_SerializeInternal(_self, args, kwargs, false);
1578}
1579
1580static PyObject* PyUpb_Message_WhichOneof(PyObject* _self, PyObject* name) {
1581 PyUpb_Message* self = (void*)_self;
1582 const upb_OneofDef* o;
1583 if (!PyUpb_Message_LookupName(self, name, NULL, &o, PyExc_ValueError)) {
1584 return NULL;
1585 }
1586 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1587 if (!msg) Py_RETURN_NONE;
1588 const upb_FieldDef* f = upb_Message_WhichOneof(msg, o);
1589 if (!f) Py_RETURN_NONE;
1590 return PyUnicode_FromString(upb_FieldDef_Name(f));
1591}
1592
1593PyObject* DeepCopy(PyObject* _self, PyObject* arg) {
1594 PyUpb_Message* self = (void*)_self;
1595 const upb_MessageDef* def = PyUpb_Message_GetMsgdef(_self);
Joshua Habermanb9e48942024-01-23 17:10:27 -08001596 const upb_MiniTable* mini_table = upb_MessageDef_MiniTable(def);
1597 upb_Message* msg = PyUpb_Message_GetIfReified(_self);
Adam Cozzette501ecec2023-09-26 14:36:20 -07001598 PyObject* arena = PyUpb_Arena_New();
Joshua Habermanb9e48942024-01-23 17:10:27 -08001599 upb_Arena* upb_arena = PyUpb_Arena_Get(arena);
1600
1601 upb_Message* clone = msg ? upb_Message_DeepClone(msg, mini_table, upb_arena)
1602 : upb_Message_New(mini_table, upb_arena);
Adam Cozzette501ecec2023-09-26 14:36:20 -07001603 PyObject* ret = PyUpb_Message_Get(clone, def, arena);
1604 Py_DECREF(arena);
1605
1606 return ret;
1607}
1608
1609void PyUpb_Message_ClearExtensionDict(PyObject* _self) {
1610 PyUpb_Message* self = (void*)_self;
1611 assert(self->ext_dict);
1612 self->ext_dict = NULL;
1613}
1614
1615static PyObject* PyUpb_Message_GetExtensionDict(PyObject* _self,
1616 void* closure) {
1617 PyUpb_Message* self = (void*)_self;
1618 if (self->ext_dict) {
1619 Py_INCREF(self->ext_dict);
1620 return self->ext_dict;
1621 }
1622
1623 const upb_MessageDef* m = _PyUpb_Message_GetMsgdef(self);
1624 if (upb_MessageDef_ExtensionRangeCount(m) == 0) {
1625 PyErr_SetNone(PyExc_AttributeError);
1626 return NULL;
1627 }
1628
1629 self->ext_dict = PyUpb_ExtensionDict_New(_self);
1630 return self->ext_dict;
1631}
1632
1633static PyGetSetDef PyUpb_Message_Getters[] = {
1634 {"Extensions", PyUpb_Message_GetExtensionDict, NULL, "Extension dict"},
1635 {NULL}};
1636
1637static PyMethodDef PyUpb_Message_Methods[] = {
1638 {"__deepcopy__", (PyCFunction)DeepCopy, METH_VARARGS,
1639 "Makes a deep copy of the class."},
1640 // TODO
1641 //{ "__unicode__", (PyCFunction)ToUnicode, METH_NOARGS,
1642 // "Outputs a unicode representation of the message." },
1643 {"ByteSize", (PyCFunction)PyUpb_Message_ByteSize, METH_NOARGS,
1644 "Returns the size of the message in bytes."},
1645 {"Clear", (PyCFunction)PyUpb_Message_Clear, METH_NOARGS,
1646 "Clears the message."},
1647 {"ClearExtension", PyUpb_Message_ClearExtension, METH_O,
1648 "Clears a message field."},
1649 {"ClearField", PyUpb_Message_ClearField, METH_O, "Clears a message field."},
1650 {"CopyFrom", PyUpb_Message_CopyFrom, METH_O,
1651 "Copies a protocol message into the current message."},
1652 {"DiscardUnknownFields", (PyCFunction)PyUpb_Message_DiscardUnknownFields,
1653 METH_NOARGS, "Discards the unknown fields."},
1654 {"FindInitializationErrors", PyUpb_Message_FindInitializationErrors,
1655 METH_NOARGS, "Finds unset required fields."},
1656 {"FromString", PyUpb_Message_FromString, METH_O | METH_CLASS,
1657 "Creates new method instance from given serialized data."},
1658 {"HasExtension", PyUpb_Message_HasExtension, METH_O,
1659 "Checks if a message field is set."},
1660 {"HasField", PyUpb_Message_HasField, METH_O,
1661 "Checks if a message field is set."},
1662 {"IsInitialized", PyUpb_Message_IsInitialized, METH_VARARGS,
1663 "Checks if all required fields of a protocol message are set."},
1664 {"ListFields", PyUpb_Message_ListFields, METH_NOARGS,
1665 "Lists all set fields of a message."},
1666 {"MergeFrom", PyUpb_Message_MergeFrom, METH_O,
1667 "Merges a protocol message into the current message."},
1668 {"MergeFromString", PyUpb_Message_MergeFromString, METH_O,
1669 "Merges a serialized message into the current message."},
1670 {"ParseFromString", PyUpb_Message_ParseFromString, METH_O,
1671 "Parses a serialized message into the current message."},
1672 {"SerializePartialToString",
1673 (PyCFunction)PyUpb_Message_SerializePartialToString,
1674 METH_VARARGS | METH_KEYWORDS,
1675 "Serializes the message to a string, even if it isn't initialized."},
1676 {"SerializeToString", (PyCFunction)PyUpb_Message_SerializeToString,
1677 METH_VARARGS | METH_KEYWORDS,
1678 "Serializes the message to a string, only for initialized messages."},
1679 {"SetInParent", (PyCFunction)PyUpb_Message_SetInParent, METH_NOARGS,
1680 "Sets the has bit of the given field in its parent message."},
1681 {"UnknownFields", (PyCFunction)PyUpb_Message_UnknownFields, METH_NOARGS,
1682 "Parse unknown field set"},
1683 {"WhichOneof", PyUpb_Message_WhichOneof, METH_O,
1684 "Returns the name of the field set inside a oneof, "
1685 "or None if no field is set."},
1686 {"_ListFieldsItemKey", PyUpb_Message_ListFieldsItemKey,
1687 METH_O | METH_STATIC,
1688 "Compares ListFields() list entries by field number"},
1689 {"_CheckCalledFromGeneratedFile",
1690 PyUpb_Message_CheckCalledFromGeneratedFile, METH_NOARGS | METH_STATIC,
1691 "Raises TypeError if the caller is not in a _pb2.py file."},
1692 {NULL, NULL}};
1693
1694static PyType_Slot PyUpb_Message_Slots[] = {
1695 {Py_tp_dealloc, PyUpb_Message_Dealloc},
1696 {Py_tp_doc, "A ProtocolMessage"},
1697 {Py_tp_getattro, PyUpb_Message_GetAttr},
1698 {Py_tp_getset, PyUpb_Message_Getters},
1699 {Py_tp_hash, PyObject_HashNotImplemented},
1700 {Py_tp_methods, PyUpb_Message_Methods},
1701 {Py_tp_new, PyUpb_Message_New},
1702 {Py_tp_str, PyUpb_Message_ToString},
1703 {Py_tp_repr, PyUpb_Message_ToString},
1704 {Py_tp_richcompare, PyUpb_Message_RichCompare},
1705 {Py_tp_setattro, PyUpb_Message_SetAttr},
1706 {Py_tp_init, PyUpb_Message_Init},
1707 {0, NULL}};
1708
1709PyType_Spec PyUpb_Message_Spec = {
1710 PYUPB_MODULE_NAME ".Message", // tp_name
1711 sizeof(PyUpb_Message), // tp_basicsize
1712 0, // tp_itemsize
1713 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags
1714 PyUpb_Message_Slots,
1715};
1716
1717// -----------------------------------------------------------------------------
1718// MessageMeta
1719// -----------------------------------------------------------------------------
1720
1721// MessageMeta is the metaclass for message objects. The generated code uses it
1722// to construct message classes, ie.
1723//
1724// FooMessage = _message.MessageMeta('FooMessage', (_message.Message), {...})
1725//
1726// (This is not quite true: at the moment the Python library subclasses
1727// MessageMeta, and uses that subclass as the metaclass. There is a TODO below
1728// to simplify this, so that the illustration above is indeed accurate).
1729
1730typedef struct {
1731 const upb_MiniTable* layout;
1732 PyObject* py_message_descriptor;
1733} PyUpb_MessageMeta;
1734
1735// The PyUpb_MessageMeta struct is trailing data tacked onto the end of
1736// MessageMeta instances. This means that we get our instances of this struct
1737// by adding the appropriate number of bytes.
1738static PyUpb_MessageMeta* PyUpb_GetMessageMeta(PyObject* cls) {
1739#ifndef NDEBUG
1740 PyUpb_ModuleState* state = PyUpb_ModuleState_MaybeGet();
1741 assert(!state || cls->ob_type == state->message_meta_type);
1742#endif
1743 return (PyUpb_MessageMeta*)((char*)cls + cpython_bits.type_basicsize);
1744}
1745
1746static const upb_MessageDef* PyUpb_MessageMeta_GetMsgdef(PyObject* cls) {
1747 PyUpb_MessageMeta* self = PyUpb_GetMessageMeta(cls);
1748 return PyUpb_Descriptor_GetDef(self->py_message_descriptor);
1749}
1750
1751PyObject* PyUpb_MessageMeta_DoCreateClass(PyObject* py_descriptor,
1752 const char* name, PyObject* dict) {
1753 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1754 PyTypeObject* descriptor_type = state->descriptor_types[kPyUpb_Descriptor];
1755 if (!PyObject_TypeCheck(py_descriptor, descriptor_type)) {
1756 return PyErr_Format(PyExc_TypeError, "Expected a message Descriptor");
1757 }
1758
1759 const upb_MessageDef* msgdef = PyUpb_Descriptor_GetDef(py_descriptor);
1760 assert(msgdef);
1761 assert(!PyUpb_ObjCache_Get(upb_MessageDef_MiniTable(msgdef)));
1762
1763 PyObject* slots = PyTuple_New(0);
1764 if (!slots) return NULL;
1765 int status = PyDict_SetItemString(dict, "__slots__", slots);
1766 Py_DECREF(slots);
1767 if (status < 0) return NULL;
1768
1769 // Bases are either:
1770 // (Message, Message) # for regular messages
1771 // (Message, Message, WktBase) # For well-known types
1772 PyObject* wkt_bases = PyUpb_GetWktBases(state);
1773 PyObject* wkt_base =
1774 PyDict_GetItemString(wkt_bases, upb_MessageDef_FullName(msgdef));
1775 PyObject* args;
1776 if (wkt_base == NULL) {
1777 args = Py_BuildValue("s(OO)O", name, state->cmessage_type,
1778 state->message_class, dict);
1779 } else {
1780 args = Py_BuildValue("s(OOO)O", name, state->cmessage_type,
1781 state->message_class, wkt_base, dict);
1782 }
1783
1784 PyObject* ret = cpython_bits.type_new(state->message_meta_type, args, NULL);
1785 Py_DECREF(args);
1786 if (!ret) return NULL;
1787
1788 PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(ret);
1789 meta->py_message_descriptor = py_descriptor;
1790 meta->layout = upb_MessageDef_MiniTable(msgdef);
1791 Py_INCREF(meta->py_message_descriptor);
1792 PyUpb_Descriptor_SetClass(py_descriptor, ret);
1793
1794 PyUpb_ObjCache_Add(meta->layout, ret);
1795
1796 return ret;
1797}
1798
1799static PyObject* PyUpb_MessageMeta_New(PyTypeObject* type, PyObject* args,
1800 PyObject* kwargs) {
1801 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1802 static const char* kwlist[] = {"name", "bases", "dict", 0};
1803 PyObject *bases, *dict;
1804 const char* name;
1805
1806 // Check arguments: (name, bases, dict)
1807 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sO!O!:type", (char**)kwlist,
1808 &name, &PyTuple_Type, &bases, &PyDict_Type,
1809 &dict)) {
1810 return NULL;
1811 }
1812
1813 // Check bases: only (), or (message.Message,) are allowed
1814 Py_ssize_t size = PyTuple_Size(bases);
1815 if (!(size == 0 ||
1816 (size == 1 && PyTuple_GetItem(bases, 0) == state->message_class))) {
1817 PyErr_Format(PyExc_TypeError,
1818 "A Message class can only inherit from Message, not %S",
1819 bases);
1820 return NULL;
1821 }
1822
1823 // Check dict['DESCRIPTOR']
1824 PyObject* py_descriptor = PyDict_GetItemString(dict, "DESCRIPTOR");
1825 if (py_descriptor == NULL) {
1826 PyErr_SetString(PyExc_TypeError, "Message class has no DESCRIPTOR");
1827 return NULL;
1828 }
1829
1830 const upb_MessageDef* m = PyUpb_Descriptor_GetDef(py_descriptor);
1831 PyObject* ret = PyUpb_ObjCache_Get(upb_MessageDef_MiniTable(m));
1832 if (ret) return ret;
1833 return PyUpb_MessageMeta_DoCreateClass(py_descriptor, name, dict);
1834}
1835
1836static void PyUpb_MessageMeta_Dealloc(PyObject* self) {
1837 PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self);
1838 PyUpb_ObjCache_Delete(meta->layout);
Protobuf Team Bote32d0942023-11-06 06:43:06 -08001839 // The MessageMeta type is a GC type, which means we should untrack the
1840 // object before invalidating internal state (so that code executed by the
1841 // GC doesn't see the invalid state). Unfortunately since we're calling
1842 // cpython_bits.type_dealloc, which also untracks the object, we can't.
1843 // Instead just make sure the internal state remains reasonable by using
1844 // Py_CLEAR(), which sets the struct member to NULL. The tp_traverse and
1845 // tp_clear methods, which are called by Python's GC, already allow for it
1846 // to be NULL.
1847 Py_CLEAR(meta->py_message_descriptor);
Adam Cozzette501ecec2023-09-26 14:36:20 -07001848 PyTypeObject* tp = Py_TYPE(self);
1849 cpython_bits.type_dealloc(self);
1850 Py_DECREF(tp);
1851}
1852
1853void PyUpb_MessageMeta_AddFieldNumber(PyObject* self, const upb_FieldDef* f) {
1854 PyObject* name =
1855 PyUnicode_FromFormat("%s_FIELD_NUMBER", upb_FieldDef_Name(f));
1856 PyObject* upper = PyObject_CallMethod(name, "upper", "");
1857 PyObject_SetAttr(self, upper, PyLong_FromLong(upb_FieldDef_Number(f)));
1858 Py_DECREF(name);
1859 Py_DECREF(upper);
1860}
1861
1862static PyObject* PyUpb_MessageMeta_GetDynamicAttr(PyObject* self,
1863 PyObject* name) {
1864 const char* name_buf = PyUpb_GetStrData(name);
1865 if (!name_buf) return NULL;
1866 const upb_MessageDef* msgdef = PyUpb_MessageMeta_GetMsgdef(self);
1867 const upb_FileDef* filedef = upb_MessageDef_File(msgdef);
1868 const upb_DefPool* symtab = upb_FileDef_Pool(filedef);
1869
1870 PyObject* py_key =
1871 PyBytes_FromFormat("%s.%s", upb_MessageDef_FullName(msgdef), name_buf);
1872 const char* key = PyUpb_GetStrData(py_key);
1873 PyObject* ret = NULL;
1874 const upb_MessageDef* nested = upb_DefPool_FindMessageByName(symtab, key);
1875 const upb_EnumDef* enumdef;
1876 const upb_EnumValueDef* enumval;
1877 const upb_FieldDef* ext;
1878
1879 if (nested) {
1880 ret = PyUpb_Descriptor_GetClass(nested);
1881 } else if ((enumdef = upb_DefPool_FindEnumByName(symtab, key))) {
1882 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1883 PyObject* klass = state->enum_type_wrapper_class;
1884 ret = PyUpb_EnumDescriptor_Get(enumdef);
1885 ret = PyObject_CallFunctionObjArgs(klass, ret, NULL);
1886 } else if ((enumval = upb_DefPool_FindEnumByNameval(symtab, key))) {
1887 ret = PyLong_FromLong(upb_EnumValueDef_Number(enumval));
1888 } else if ((ext = upb_DefPool_FindExtensionByName(symtab, key))) {
1889 ret = PyUpb_FieldDescriptor_Get(ext);
1890 }
1891
1892 Py_DECREF(py_key);
1893
1894 const char* suffix = "_FIELD_NUMBER";
1895 size_t n = strlen(name_buf);
1896 size_t suffix_n = strlen(suffix);
1897 if (n > suffix_n && memcmp(suffix, name_buf + n - suffix_n, suffix_n) == 0) {
1898 // We can't look up field names dynamically, because the <NAME>_FIELD_NUMBER
1899 // naming scheme upper-cases the field name and is therefore non-reversible.
1900 // So we just add all field numbers.
1901 int n = upb_MessageDef_FieldCount(msgdef);
1902 for (int i = 0; i < n; i++) {
1903 PyUpb_MessageMeta_AddFieldNumber(self, upb_MessageDef_Field(msgdef, i));
1904 }
1905 n = upb_MessageDef_NestedExtensionCount(msgdef);
1906 for (int i = 0; i < n; i++) {
1907 PyUpb_MessageMeta_AddFieldNumber(
1908 self, upb_MessageDef_NestedExtension(msgdef, i));
1909 }
1910 ret = PyObject_GenericGetAttr(self, name);
1911 }
1912
1913 return ret;
1914}
1915
1916static PyObject* PyUpb_MessageMeta_GetAttr(PyObject* self, PyObject* name) {
1917 // We want to first delegate to the type's tp_dict to retrieve any attributes
1918 // that were previously calculated and cached in the type's dict.
1919 PyObject* ret = cpython_bits.type_getattro(self, name);
1920 if (ret) return ret;
1921
1922 // We did not find a cached attribute. Try to calculate the attribute
1923 // dynamically, using the descriptor as an argument.
1924 PyErr_Clear();
1925 ret = PyUpb_MessageMeta_GetDynamicAttr(self, name);
1926
1927 if (ret) {
1928 PyObject_SetAttr(self, name, ret);
1929 PyErr_Clear();
1930 return ret;
1931 }
1932
1933 PyErr_SetObject(PyExc_AttributeError, name);
1934 return NULL;
1935}
1936
1937static int PyUpb_MessageMeta_Traverse(PyObject* self, visitproc visit,
1938 void* arg) {
1939 PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self);
1940 Py_VISIT(meta->py_message_descriptor);
1941 return cpython_bits.type_traverse(self, visit, arg);
1942}
1943
1944static int PyUpb_MessageMeta_Clear(PyObject* self, visitproc visit, void* arg) {
Protobuf Team Bote32d0942023-11-06 06:43:06 -08001945 PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self);
1946 Py_CLEAR(meta->py_message_descriptor);
Adam Cozzette501ecec2023-09-26 14:36:20 -07001947 return cpython_bits.type_clear(self);
1948}
1949
1950static PyType_Slot PyUpb_MessageMeta_Slots[] = {
1951 {Py_tp_new, PyUpb_MessageMeta_New},
1952 {Py_tp_dealloc, PyUpb_MessageMeta_Dealloc},
1953 {Py_tp_getattro, PyUpb_MessageMeta_GetAttr},
1954 {Py_tp_traverse, PyUpb_MessageMeta_Traverse},
1955 {Py_tp_clear, PyUpb_MessageMeta_Clear},
1956 {0, NULL}};
1957
1958static PyType_Spec PyUpb_MessageMeta_Spec = {
1959 PYUPB_MODULE_NAME ".MessageMeta", // tp_name
1960 0, // To be filled in by size of base // tp_basicsize
1961 0, // tp_itemsize
1962 // TODO: remove BASETYPE, Python should just use MessageMeta
1963 // directly instead of subclassing it.
1964 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, // tp_flags
1965 PyUpb_MessageMeta_Slots,
1966};
1967
1968static PyObject* PyUpb_MessageMeta_CreateType(void) {
1969 PyObject* bases = Py_BuildValue("(O)", &PyType_Type);
1970 if (!bases) return NULL;
1971 PyUpb_MessageMeta_Spec.basicsize =
1972 cpython_bits.type_basicsize + sizeof(PyUpb_MessageMeta);
1973 PyObject* type = PyType_FromSpecWithBases(&PyUpb_MessageMeta_Spec, bases);
1974 Py_DECREF(bases);
1975 return type;
1976}
1977
1978bool PyUpb_InitMessage(PyObject* m) {
1979 if (!PyUpb_CPythonBits_Init(&cpython_bits)) return false;
1980 PyObject* message_meta_type = PyUpb_MessageMeta_CreateType();
1981
1982 PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
1983 state->cmessage_type = PyUpb_AddClass(m, &PyUpb_Message_Spec);
1984 state->message_meta_type = (PyTypeObject*)message_meta_type;
1985
1986 if (!state->cmessage_type || !state->message_meta_type) return false;
1987 if (PyModule_AddObject(m, "MessageMeta", message_meta_type)) return false;
1988 state->listfields_item_key = PyObject_GetAttrString(
1989 (PyObject*)state->cmessage_type, "_ListFieldsItemKey");
1990
1991 PyObject* mod =
1992 PyImport_ImportModule(PYUPB_PROTOBUF_PUBLIC_PACKAGE ".message");
1993 if (mod == NULL) return false;
1994
1995 state->encode_error_class = PyObject_GetAttrString(mod, "EncodeError");
1996 state->decode_error_class = PyObject_GetAttrString(mod, "DecodeError");
1997 state->message_class = PyObject_GetAttrString(mod, "Message");
1998 Py_DECREF(mod);
1999
2000 PyObject* enum_type_wrapper = PyImport_ImportModule(
2001 PYUPB_PROTOBUF_INTERNAL_PACKAGE ".enum_type_wrapper");
2002 if (enum_type_wrapper == NULL) return false;
2003
2004 state->enum_type_wrapper_class =
2005 PyObject_GetAttrString(enum_type_wrapper, "EnumTypeWrapper");
2006 Py_DECREF(enum_type_wrapper);
2007
2008 if (!state->encode_error_class || !state->decode_error_class ||
2009 !state->message_class || !state->listfields_item_key ||
2010 !state->enum_type_wrapper_class) {
2011 return false;
2012 }
2013
2014 return true;
2015}