blob: 8017bd89433af8c4be818d3b9ea1b0d788e6e1c4 [file] [log] [blame]
Adam Cozzette501ecec2023-09-26 14:36:20 -07001// Protocol Buffers - Google's data interchange format
2// Copyright 2023 Google LLC. All rights reserved.
Adam Cozzette501ecec2023-09-26 14:36:20 -07003//
Protobuf Team Bot0fab7732023-11-20 13:38:15 -08004// Use of this source code is governed by a BSD-style
5// license that can be found in the LICENSE file or at
6// https://developers.google.com/open-source/licenses/bsd
Adam Cozzette501ecec2023-09-26 14:36:20 -07007
8#include "python/unknown_fields.h"
9
10#include "python/message.h"
11#include "python/protobuf.h"
12#include "upb/wire/eps_copy_input_stream.h"
13#include "upb/wire/reader.h"
14#include "upb/wire/types.h"
15
16// -----------------------------------------------------------------------------
17// UnknownFieldSet
18// -----------------------------------------------------------------------------
19
20typedef struct {
21 PyObject_HEAD;
22 PyObject* fields;
23} PyUpb_UnknownFieldSet;
24
25static void PyUpb_UnknownFieldSet_Dealloc(PyObject* _self) {
26 PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
27 Py_XDECREF(self->fields);
28 PyUpb_Dealloc(self);
29}
30
31PyUpb_UnknownFieldSet* PyUpb_UnknownFieldSet_NewBare(void) {
32 PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
33 PyUpb_UnknownFieldSet* self =
34 (void*)PyType_GenericAlloc(s->unknown_fields_type, 0);
35 return self;
36}
37
38// For MessageSet the established behavior is for UnknownFieldSet to interpret
39// the MessageSet wire format:
40// message MessageSet {
41// repeated group Item = 1 {
42// required int32 type_id = 2;
43// required bytes message = 3;
44// }
45// }
46//
47// And create unknown fields like:
48// UnknownField(type_id, WIRE_TYPE_DELIMITED, message)
49//
50// For any unknown fields that are unexpected per the wire format defined above,
51// we drop them on the floor.
52
53enum {
54 kUpb_MessageSet_StartItemTag = (1 << 3) | kUpb_WireType_StartGroup,
55 kUpb_MessageSet_EndItemTag = (1 << 3) | kUpb_WireType_EndGroup,
56 kUpb_MessageSet_TypeIdTag = (2 << 3) | kUpb_WireType_Varint,
57 kUpb_MessageSet_MessageTag = (3 << 3) | kUpb_WireType_Delimited,
58};
59
60static const char* PyUpb_UnknownFieldSet_BuildMessageSetItem(
61 PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
62 const char* ptr) {
63 PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
64 int type_id = 0;
65 PyObject* msg = NULL;
66 while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
67 uint32_t tag;
68 ptr = upb_WireReader_ReadTag(ptr, &tag);
69 if (!ptr) goto err;
70 switch (tag) {
71 case kUpb_MessageSet_EndItemTag:
72 goto done;
73 case kUpb_MessageSet_TypeIdTag: {
74 uint64_t tmp;
75 ptr = upb_WireReader_ReadVarint(ptr, &tmp);
76 if (!ptr) goto err;
77 if (!type_id) type_id = tmp;
78 break;
79 }
80 case kUpb_MessageSet_MessageTag: {
81 int size;
82 ptr = upb_WireReader_ReadSize(ptr, &size);
83 if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) {
84 goto err;
85 }
86 const char* str = ptr;
87 ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size);
88 if (!msg) {
89 msg = PyBytes_FromStringAndSize(str, size);
90 if (!msg) goto err;
91 } else {
92 // already saw a message here so deliberately skipping the duplicate
93 }
94 break;
95 }
96 default:
97 ptr = upb_WireReader_SkipValue(ptr, tag, stream);
98 if (!ptr) goto err;
99 }
100 }
101
102done:
103 if (type_id && msg) {
104 PyObject* field = PyObject_CallFunction(
105 s->unknown_field_type, "iiO", type_id, kUpb_WireType_Delimited, msg);
106 if (!field) goto err;
107 PyList_Append(self->fields, field);
108 Py_DECREF(field);
109 }
110 Py_XDECREF(msg);
111 return ptr;
112
113err:
114 Py_XDECREF(msg);
115 return NULL;
116}
117
118static const char* PyUpb_UnknownFieldSet_BuildMessageSet(
119 PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
120 const char* ptr) {
121 self->fields = PyList_New(0);
122 while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
123 uint32_t tag;
124 ptr = upb_WireReader_ReadTag(ptr, &tag);
125 if (!ptr) goto err;
126 if (tag == kUpb_MessageSet_StartItemTag) {
127 ptr = PyUpb_UnknownFieldSet_BuildMessageSetItem(self, stream, ptr);
128 } else {
129 ptr = upb_WireReader_SkipValue(ptr, tag, stream);
130 }
131 if (!ptr) goto err;
132 }
133 if (upb_EpsCopyInputStream_IsError(stream)) goto err;
134 return ptr;
135
136err:
137 Py_DECREF(self->fields);
138 self->fields = NULL;
139 return NULL;
140}
141
142static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self,
143 upb_EpsCopyInputStream* stream,
144 const char* ptr,
145 int group_number);
146
147static const char* PyUpb_UnknownFieldSet_BuildValue(
148 PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
149 const char* ptr, int field_number, int wire_type, int group_number,
150 PyObject** data) {
151 switch (wire_type) {
152 case kUpb_WireType_Varint: {
153 uint64_t val;
154 ptr = upb_WireReader_ReadVarint(ptr, &val);
155 if (!ptr) return NULL;
156 *data = PyLong_FromUnsignedLongLong(val);
157 return ptr;
158 }
159 case kUpb_WireType_64Bit: {
160 uint64_t val;
161 ptr = upb_WireReader_ReadFixed64(ptr, &val);
162 *data = PyLong_FromUnsignedLongLong(val);
163 return ptr;
164 }
165 case kUpb_WireType_32Bit: {
166 uint32_t val;
167 ptr = upb_WireReader_ReadFixed32(ptr, &val);
168 *data = PyLong_FromUnsignedLongLong(val);
169 return ptr;
170 }
171 case kUpb_WireType_Delimited: {
172 int size;
173 ptr = upb_WireReader_ReadSize(ptr, &size);
174 if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) {
175 return NULL;
176 }
177 const char* str = ptr;
178 ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size);
179 *data = PyBytes_FromStringAndSize(str, size);
180 return ptr;
181 }
182 case kUpb_WireType_StartGroup: {
183 PyUpb_UnknownFieldSet* sub = PyUpb_UnknownFieldSet_NewBare();
184 if (!sub) return NULL;
185 *data = &sub->ob_base;
186 return PyUpb_UnknownFieldSet_Build(sub, stream, ptr, field_number);
187 }
188 default:
189 assert(0);
190 *data = NULL;
191 return NULL;
192 }
193}
194
195// For non-MessageSet we just build the unknown fields exactly as they exist on
196// the wire.
197static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self,
198 upb_EpsCopyInputStream* stream,
199 const char* ptr,
200 int group_number) {
201 PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
202 self->fields = PyList_New(0);
203 while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
204 uint32_t tag;
205 ptr = upb_WireReader_ReadTag(ptr, &tag);
206 if (!ptr) goto err;
207 PyObject* data = NULL;
208 int field_number = upb_WireReader_GetFieldNumber(tag);
209 int wire_type = upb_WireReader_GetWireType(tag);
210 if (wire_type == kUpb_WireType_EndGroup) {
211 if (field_number != group_number) return NULL;
212 return ptr;
213 }
214 ptr = PyUpb_UnknownFieldSet_BuildValue(self, stream, ptr, field_number,
215 wire_type, group_number, &data);
216 if (!ptr) {
217 Py_XDECREF(data);
218 goto err;
219 }
220 assert(data);
221 PyObject* field = PyObject_CallFunction(s->unknown_field_type, "iiN",
222 field_number, wire_type, data);
223 PyList_Append(self->fields, field);
224 Py_DECREF(field);
225 }
226 if (upb_EpsCopyInputStream_IsError(stream)) goto err;
227 return ptr;
228
229err:
230 Py_DECREF(self->fields);
231 self->fields = NULL;
232 return NULL;
233}
234
235static PyObject* PyUpb_UnknownFieldSet_New(PyTypeObject* type, PyObject* args,
236 PyObject* kwargs) {
237 char* kwlist[] = {"message", 0};
238 PyObject* py_msg = NULL;
239
240 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &py_msg)) {
241 return NULL;
242 }
243
244 if (!PyUpb_Message_Verify(py_msg)) return NULL;
245 PyUpb_UnknownFieldSet* self = PyUpb_UnknownFieldSet_NewBare();
246 upb_Message* msg = PyUpb_Message_GetIfReified(py_msg);
247 if (!msg) return &self->ob_base;
248
249 size_t size;
250 const char* ptr = upb_Message_GetUnknown(msg, &size);
251 if (size == 0) return &self->ob_base;
252
253 upb_EpsCopyInputStream stream;
254 upb_EpsCopyInputStream_Init(&stream, &ptr, size, true);
255 const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(py_msg);
256
257 bool ok;
258 if (upb_MessageDef_IsMessageSet(msgdef)) {
259 ok = PyUpb_UnknownFieldSet_BuildMessageSet(self, &stream, ptr) != NULL;
260 } else {
261 ok = PyUpb_UnknownFieldSet_Build(self, &stream, ptr, -1) != NULL;
262 }
263
264 if (!ok) {
265 Py_DECREF(&self->ob_base);
266 return NULL;
267 }
268
269 return &self->ob_base;
270}
271
272static Py_ssize_t PyUpb_UnknownFieldSet_Length(PyObject* _self) {
273 PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
274 return self->fields ? PyObject_Length(self->fields) : 0;
275}
276
277static PyObject* PyUpb_UnknownFieldSet_GetItem(PyObject* _self,
278 Py_ssize_t index) {
279 PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
280 if (!self->fields) {
281 PyErr_Format(PyExc_IndexError, "list index (%zd) out of range", index);
282 return NULL;
283 }
284 PyObject* ret = PyList_GetItem(self->fields, index);
285 if (ret) Py_INCREF(ret);
286 return ret;
287}
288
289static PyType_Slot PyUpb_UnknownFieldSet_Slots[] = {
290 {Py_tp_new, &PyUpb_UnknownFieldSet_New},
291 {Py_tp_dealloc, &PyUpb_UnknownFieldSet_Dealloc},
292 {Py_sq_length, PyUpb_UnknownFieldSet_Length},
293 {Py_sq_item, PyUpb_UnknownFieldSet_GetItem},
294 {Py_tp_hash, PyObject_HashNotImplemented},
295 {0, NULL},
296};
297
298static PyType_Spec PyUpb_UnknownFieldSet_Spec = {
299 PYUPB_MODULE_NAME ".UnknownFieldSet", // tp_name
300 sizeof(PyUpb_UnknownFieldSet), // tp_basicsize
301 0, // tp_itemsize
302 Py_TPFLAGS_DEFAULT, // tp_flags
303 PyUpb_UnknownFieldSet_Slots,
304};
305
306// -----------------------------------------------------------------------------
307// Top Level
308// -----------------------------------------------------------------------------
309
310PyObject* PyUpb_UnknownFieldSet_CreateNamedTuple(void) {
311 PyObject* mod = NULL;
312 PyObject* namedtuple = NULL;
313 PyObject* ret = NULL;
314
315 mod = PyImport_ImportModule("collections");
316 if (!mod) goto done;
317 namedtuple = PyObject_GetAttrString(mod, "namedtuple");
318 if (!namedtuple) goto done;
319 ret = PyObject_CallFunction(namedtuple, "s[sss]", "PyUnknownField",
320 "field_number", "wire_type", "data");
321
322done:
323 Py_XDECREF(mod);
324 Py_XDECREF(namedtuple);
325 return ret;
326}
327
328bool PyUpb_UnknownFields_Init(PyObject* m) {
329 PyUpb_ModuleState* s = PyUpb_ModuleState_GetFromModule(m);
330
331 s->unknown_fields_type = PyUpb_AddClass(m, &PyUpb_UnknownFieldSet_Spec);
332 s->unknown_field_type = PyUpb_UnknownFieldSet_CreateNamedTuple();
333
334 return s->unknown_fields_type && s->unknown_field_type;
335}