blob: 88e478d3c1c4ae5fb694f0a81f1086816d192f0d [file] [log] [blame]
Adam Cozzette501ecec2023-09-26 14:36:20 -07001// Protocol Buffers - Google's data interchange format
2// Copyright 2023 Google LLC. All rights reserved.
Adam Cozzette501ecec2023-09-26 14:36:20 -07003//
Protobuf Team Bot0fab7732023-11-20 13:38:15 -08004// Use of this source code is governed by a BSD-style
5// license that can be found in the LICENSE file or at
6// https://developers.google.com/open-source/licenses/bsd
Adam Cozzette501ecec2023-09-26 14:36:20 -07007
8#include "python/protobuf.h"
9
10#include "python/descriptor.h"
11#include "python/descriptor_containers.h"
12#include "python/descriptor_pool.h"
13#include "python/extension_dict.h"
14#include "python/map.h"
15#include "python/message.h"
16#include "python/repeated.h"
17#include "python/unknown_fields.h"
18
19static upb_Arena* PyUpb_NewArena(void);
20
21static void PyUpb_ModuleDealloc(void* module) {
22 PyUpb_ModuleState* s = PyModule_GetState(module);
23 PyUpb_WeakMap_Free(s->obj_cache);
24 if (s->c_descriptor_symtab) {
25 upb_DefPool_Free(s->c_descriptor_symtab);
26 }
27}
28
29PyObject* PyUpb_SetAllowOversizeProtos(PyObject* m, PyObject* arg) {
30 if (!arg || !PyBool_Check(arg)) {
31 PyErr_SetString(PyExc_TypeError,
32 "Argument to SetAllowOversizeProtos must be boolean");
33 return NULL;
34 }
35 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
36 state->allow_oversize_protos = PyObject_IsTrue(arg);
37 Py_INCREF(arg);
38 return arg;
39}
40
41static PyMethodDef PyUpb_ModuleMethods[] = {
42 {"SetAllowOversizeProtos", PyUpb_SetAllowOversizeProtos, METH_O,
43 "Enable/disable oversize proto parsing."},
44 {NULL, NULL}};
45
46static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT,
47 PYUPB_MODULE_NAME,
48 "Protobuf Module",
49 sizeof(PyUpb_ModuleState),
50 PyUpb_ModuleMethods, // m_methods
51 NULL, // m_slots
52 NULL, // m_traverse
53 NULL, // m_clear
54 PyUpb_ModuleDealloc};
55
56// -----------------------------------------------------------------------------
57// ModuleState
58// -----------------------------------------------------------------------------
59
60PyUpb_ModuleState* PyUpb_ModuleState_MaybeGet(void) {
61 PyObject* module = PyState_FindModule(&module_def);
62 return module ? PyModule_GetState(module) : NULL;
63}
64
65PyUpb_ModuleState* PyUpb_ModuleState_GetFromModule(PyObject* module) {
66 PyUpb_ModuleState* state = PyModule_GetState(module);
67 assert(state);
68 assert(PyModule_GetDef(module) == &module_def);
69 return state;
70}
71
72PyUpb_ModuleState* PyUpb_ModuleState_Get(void) {
73 PyObject* module = PyState_FindModule(&module_def);
74 assert(module);
75 return PyUpb_ModuleState_GetFromModule(module);
76}
77
78PyObject* PyUpb_GetWktBases(PyUpb_ModuleState* state) {
79 if (!state->wkt_bases) {
80 PyObject* wkt_module = PyImport_ImportModule(PYUPB_PROTOBUF_INTERNAL_PACKAGE
81 ".well_known_types");
82
83 if (wkt_module == NULL) {
84 return false;
85 }
86
87 state->wkt_bases = PyObject_GetAttrString(wkt_module, "WKTBASES");
88 PyObject* m = PyState_FindModule(&module_def);
89 // Reparent ownership to m.
90 PyModule_AddObject(m, "__internal_wktbases", state->wkt_bases);
91 Py_DECREF(wkt_module);
92 }
93
94 return state->wkt_bases;
95}
96
97// -----------------------------------------------------------------------------
98// WeakMap
99// -----------------------------------------------------------------------------
100
101struct PyUpb_WeakMap {
102 upb_inttable table;
103 upb_Arena* arena;
104};
105
106PyUpb_WeakMap* PyUpb_WeakMap_New(void) {
107 upb_Arena* arena = PyUpb_NewArena();
108 PyUpb_WeakMap* map = upb_Arena_Malloc(arena, sizeof(*map));
109 map->arena = arena;
110 upb_inttable_init(&map->table, map->arena);
111 return map;
112}
113
114void PyUpb_WeakMap_Free(PyUpb_WeakMap* map) { upb_Arena_Free(map->arena); }
115
116// To give better entropy in the table key, we shift away low bits that are
117// always zero.
118static const int PyUpb_PtrShift = (sizeof(void*) == 4) ? 2 : 3;
119
120uintptr_t PyUpb_WeakMap_GetKey(const void* key) {
121 uintptr_t n = (uintptr_t)key;
122 assert((n & ((1 << PyUpb_PtrShift) - 1)) == 0);
123 return n >> PyUpb_PtrShift;
124}
125
126void PyUpb_WeakMap_Add(PyUpb_WeakMap* map, const void* key, PyObject* py_obj) {
127 upb_inttable_insert(&map->table, PyUpb_WeakMap_GetKey(key),
128 upb_value_ptr(py_obj), map->arena);
129}
130
131void PyUpb_WeakMap_Delete(PyUpb_WeakMap* map, const void* key) {
132 upb_value val;
133 bool removed =
134 upb_inttable_remove(&map->table, PyUpb_WeakMap_GetKey(key), &val);
135 (void)removed;
136 assert(removed);
137}
138
139void PyUpb_WeakMap_TryDelete(PyUpb_WeakMap* map, const void* key) {
140 upb_inttable_remove(&map->table, PyUpb_WeakMap_GetKey(key), NULL);
141}
142
143PyObject* PyUpb_WeakMap_Get(PyUpb_WeakMap* map, const void* key) {
144 upb_value val;
145 if (upb_inttable_lookup(&map->table, PyUpb_WeakMap_GetKey(key), &val)) {
146 PyObject* ret = upb_value_getptr(val);
147 Py_INCREF(ret);
148 return ret;
149 } else {
150 return NULL;
151 }
152}
153
154bool PyUpb_WeakMap_Next(PyUpb_WeakMap* map, const void** key, PyObject** obj,
155 intptr_t* iter) {
156 uintptr_t u_key;
157 upb_value val;
158 if (!upb_inttable_next(&map->table, &u_key, &val, iter)) return false;
159 *key = (void*)(u_key << PyUpb_PtrShift);
160 *obj = upb_value_getptr(val);
161 return true;
162}
163
164void PyUpb_WeakMap_DeleteIter(PyUpb_WeakMap* map, intptr_t* iter) {
165 upb_inttable_removeiter(&map->table, iter);
166}
167
168// -----------------------------------------------------------------------------
169// ObjCache
170// -----------------------------------------------------------------------------
171
172PyUpb_WeakMap* PyUpb_ObjCache_Instance(void) {
173 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
174 return state->obj_cache;
175}
176
177void PyUpb_ObjCache_Add(const void* key, PyObject* py_obj) {
178 PyUpb_WeakMap_Add(PyUpb_ObjCache_Instance(), key, py_obj);
179}
180
181void PyUpb_ObjCache_Delete(const void* key) {
182 PyUpb_ModuleState* state = PyUpb_ModuleState_MaybeGet();
183 if (!state) {
184 // During the shutdown sequence, our object's Dealloc() methods can be
185 // called *after* our module Dealloc() method has been called. At that
186 // point our state will be NULL and there is nothing to delete out of the
187 // map.
188 return;
189 }
190 PyUpb_WeakMap_Delete(state->obj_cache, key);
191}
192
193PyObject* PyUpb_ObjCache_Get(const void* key) {
194 return PyUpb_WeakMap_Get(PyUpb_ObjCache_Instance(), key);
195}
196
197// -----------------------------------------------------------------------------
198// Arena
199// -----------------------------------------------------------------------------
200
201typedef struct {
202 PyObject_HEAD;
203 upb_Arena* arena;
204} PyUpb_Arena;
205
Adam Cozzette501ecec2023-09-26 14:36:20 -0700206#ifdef __GLIBC__
207#include <malloc.h> // malloc_trim()
208#endif
209
210// A special allocator that calls malloc_trim() periodically to release
211// memory to the OS. Without this call, we appear to leak memory, at least
212// as measured in RSS.
213//
Joshua Habermanbffd01c2024-06-11 11:42:00 -0700214// We opt to use this instead of PyMalloc (which would also solve the
Adam Cozzette501ecec2023-09-26 14:36:20 -0700215// problem) because the latter requires the GIL to be held. This would make
216// our messages unsafe to share with other languages that could free at
217// unpredictable
218// times.
219static void* upb_trim_allocfunc(upb_alloc* alloc, void* ptr, size_t oldsize,
220 size_t size) {
221 (void)alloc;
222 (void)oldsize;
223 if (size == 0) {
224 free(ptr);
225#ifdef __GLIBC__
226 static int count = 0;
227 if (++count == 10000) {
228 malloc_trim(0);
229 count = 0;
230 }
231#endif
232 return NULL;
233 } else {
234 return realloc(ptr, size);
235 }
236}
237static upb_alloc trim_alloc = {&upb_trim_allocfunc};
Joshua Habermanbffd01c2024-06-11 11:42:00 -0700238static upb_alloc* global_alloc = &trim_alloc;
Adam Cozzette501ecec2023-09-26 14:36:20 -0700239
240static upb_Arena* PyUpb_NewArena(void) {
241 return upb_Arena_Init(NULL, 0, global_alloc);
242}
243
244PyObject* PyUpb_Arena_New(void) {
245 PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
246 PyUpb_Arena* arena = (void*)PyType_GenericAlloc(state->arena_type, 0);
247 arena->arena = PyUpb_NewArena();
248 return &arena->ob_base;
249}
250
251static void PyUpb_Arena_Dealloc(PyObject* self) {
252 upb_Arena_Free(PyUpb_Arena_Get(self));
253 PyUpb_Dealloc(self);
254}
255
256upb_Arena* PyUpb_Arena_Get(PyObject* arena) {
257 return ((PyUpb_Arena*)arena)->arena;
258}
259
260static PyType_Slot PyUpb_Arena_Slots[] = {
261 {Py_tp_dealloc, PyUpb_Arena_Dealloc},
262 {0, NULL},
263};
264
265static PyType_Spec PyUpb_Arena_Spec = {
266 PYUPB_MODULE_NAME ".Arena",
267 sizeof(PyUpb_Arena),
268 0, // itemsize
269 Py_TPFLAGS_DEFAULT,
270 PyUpb_Arena_Slots,
271};
272
273static bool PyUpb_InitArena(PyObject* m) {
274 PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
275 state->arena_type = PyUpb_AddClass(m, &PyUpb_Arena_Spec);
276 return state->arena_type;
277}
278
279// -----------------------------------------------------------------------------
280// Utilities
281// -----------------------------------------------------------------------------
282
283PyTypeObject* AddObject(PyObject* m, const char* name, PyType_Spec* spec) {
284 PyObject* type = PyType_FromSpec(spec);
285 return type && PyModule_AddObject(m, name, type) == 0 ? (PyTypeObject*)type
286 : NULL;
287}
288
289static const char* PyUpb_GetClassName(PyType_Spec* spec) {
290 // spec->name contains a fully-qualified name, like:
291 // google.protobuf.pyext._message.FooBar
292 //
293 // Find the rightmost '.' to get "FooBar".
294 const char* name = strrchr(spec->name, '.');
295 assert(name);
296 return name + 1;
297}
298
299PyTypeObject* PyUpb_AddClass(PyObject* m, PyType_Spec* spec) {
300 PyObject* type = PyType_FromSpec(spec);
301 const char* name = PyUpb_GetClassName(spec);
302 if (PyModule_AddObject(m, name, type) < 0) {
303 Py_XDECREF(type);
304 return NULL;
305 }
306 return (PyTypeObject*)type;
307}
308
309PyTypeObject* PyUpb_AddClassWithBases(PyObject* m, PyType_Spec* spec,
310 PyObject* bases) {
311 PyObject* type = PyType_FromSpecWithBases(spec, bases);
312 const char* name = PyUpb_GetClassName(spec);
313 if (PyModule_AddObject(m, name, type) < 0) {
314 Py_XDECREF(type);
315 return NULL;
316 }
317 return (PyTypeObject*)type;
318}
319
Sandy Zhang5b329362024-03-05 17:54:43 -0800320PyTypeObject* PyUpb_AddClassWithRegister(PyObject* m, PyType_Spec* spec,
321 PyObject* virtual_base,
322 const char** methods) {
323 PyObject* type = PyType_FromSpec(spec);
324 PyObject* ret1 = PyObject_CallMethod(virtual_base, "register", "O", type);
325 if (!ret1) {
326 Py_XDECREF(type);
327 return NULL;
328 }
329 for (size_t i = 0; methods[i] != NULL; i++) {
330 PyObject* method = PyObject_GetAttrString(virtual_base, methods[i]);
331 if (!method) {
332 Py_XDECREF(type);
333 return NULL;
334 }
335 int ret2 = PyObject_SetAttrString(type, methods[i], method);
336 if (ret2 < 0) {
337 Py_XDECREF(type);
338 return NULL;
339 }
340 }
341
342 return (PyTypeObject*)type;
343}
344
Adam Cozzette501ecec2023-09-26 14:36:20 -0700345const char* PyUpb_GetStrData(PyObject* obj) {
346 if (PyUnicode_Check(obj)) {
347 return PyUnicode_AsUTF8AndSize(obj, NULL);
348 } else if (PyBytes_Check(obj)) {
349 return PyBytes_AsString(obj);
350 } else {
351 return NULL;
352 }
353}
354
355const char* PyUpb_VerifyStrData(PyObject* obj) {
356 const char* ret = PyUpb_GetStrData(obj);
357 if (ret) return ret;
358 PyErr_Format(PyExc_TypeError, "Expected string: %S", obj);
359 return NULL;
360}
361
362PyObject* PyUpb_Forbidden_New(PyObject* cls, PyObject* args, PyObject* kwds) {
363 PyObject* name = PyObject_GetAttrString(cls, "__name__");
364 PyErr_Format(PyExc_RuntimeError,
365 "Objects of type %U may not be created directly.", name);
366 Py_XDECREF(name);
367 return NULL;
368}
369
370bool PyUpb_IndexToRange(PyObject* index, Py_ssize_t size, Py_ssize_t* i,
371 Py_ssize_t* count, Py_ssize_t* step) {
372 assert(i && count && step);
373 if (PySlice_Check(index)) {
374 Py_ssize_t start, stop;
375 if (PySlice_Unpack(index, &start, &stop, step) < 0) return false;
376 *count = PySlice_AdjustIndices(size, &start, &stop, *step);
377 *i = start;
378 } else {
379 *i = PyNumber_AsSsize_t(index, PyExc_IndexError);
380
381 if (*i == -1 && PyErr_Occurred()) {
382 PyErr_SetString(PyExc_TypeError, "list indices must be integers");
383 return false;
384 }
385
386 if (*i < 0) *i += size;
387 *step = 0;
388 *count = 1;
389
390 if (*i < 0 || size <= *i) {
391 PyErr_Format(PyExc_IndexError, "list index out of range");
392 return false;
393 }
394 }
395 return true;
396}
397
398// -----------------------------------------------------------------------------
399// Module Entry Point
400// -----------------------------------------------------------------------------
401
402__attribute__((visibility("default"))) PyMODINIT_FUNC PyInit__message(void) {
403 PyObject* m = PyModule_Create(&module_def);
404 if (!m) return NULL;
405
406 PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
407
408 state->allow_oversize_protos = false;
409 state->wkt_bases = NULL;
410 state->obj_cache = PyUpb_WeakMap_New();
411 state->c_descriptor_symtab = NULL;
412
413 if (!PyUpb_InitDescriptorContainers(m) || !PyUpb_InitDescriptorPool(m) ||
414 !PyUpb_InitDescriptor(m) || !PyUpb_InitArena(m) ||
415 !PyUpb_InitExtensionDict(m) || !PyUpb_Map_Init(m) ||
416 !PyUpb_InitMessage(m) || !PyUpb_Repeated_Init(m) ||
417 !PyUpb_UnknownFields_Init(m)) {
418 Py_DECREF(m);
419 return NULL;
420 }
421
422 // Temporary: an cookie we can use in the tests to ensure we are testing upb
423 // and not another protobuf library on the system.
424 PyModule_AddIntConstant(m, "_IS_UPB", 1);
425
426 return m;
427}