blob: 8804bc52d73296ea5d55f589c6d825f99b3ed160 [file] [log] [blame]
Protobuf Team Bot15596be2022-06-24 10:38:27 -07001/*
2 * Copyright (c) 2009-2021, Google LLC
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * upb_table Implementation
30 *
31 * Implementation is heavily inspired by Lua's ltable.c.
32 */
33
Protobuf Team Bot15596be2022-06-24 10:38:27 -070034#include <string.h>
35
Eric Saloe1e8f1e2023-07-28 19:59:51 -070036#include "upb/base/internal/log2.h"
Eric Salob3cb3fb2022-11-14 09:33:03 -080037#include "upb/hash/int_table.h"
38#include "upb/hash/str_table.h"
Eric Saloff8e1b42022-11-13 20:46:22 -080039
Protobuf Team Bot15596be2022-06-24 10:38:27 -070040// Must be last.
Eric Salof6307872022-11-05 16:16:27 -070041#include "upb/port/def.inc"
Protobuf Team Bot15596be2022-06-24 10:38:27 -070042
Eric Salo70566462022-11-16 18:47:10 -080043#define UPB_MAXARRSIZE 16 // 2**16 = 64k.
Protobuf Team Bot15596be2022-06-24 10:38:27 -070044
Eric Salo70566462022-11-16 18:47:10 -080045// From Chromium.
Protobuf Team Bot15596be2022-06-24 10:38:27 -070046#define ARRAY_SIZE(x) \
47 ((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
48
49static const double MAX_LOAD = 0.85;
50
51/* The minimum utilization of the array part of a mixed hash/array table. This
52 * is a speed/memory-usage tradeoff (though it's not straightforward because of
53 * cache effects). The lower this is, the more memory we'll use. */
54static const double MIN_DENSITY = 0.1;
55
56static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
57
58static upb_value _upb_value_val(uint64_t val) {
59 upb_value ret;
60 _upb_value_setval(&ret, val);
61 return ret;
62}
63
64static int log2ceil(uint64_t v) {
65 int ret = 0;
66 bool pow2 = is_pow2(v);
67 while (v >>= 1) ret++;
Eric Salo70566462022-11-16 18:47:10 -080068 ret = pow2 ? ret : ret + 1; // Ceiling.
Protobuf Team Bot15596be2022-06-24 10:38:27 -070069 return UPB_MIN(UPB_MAXARRSIZE, ret);
70}
71
72char* upb_strdup2(const char* s, size_t len, upb_Arena* a) {
73 size_t n;
74 char* p;
75
76 /* Prevent overflow errors. */
77 if (len == SIZE_MAX) return NULL;
78 /* Always null-terminate, even if binary data; but don't rely on the input to
79 * have a null-terminating byte since it may be a raw binary buffer. */
80 n = len + 1;
81 p = upb_Arena_Malloc(a, n);
82 if (p) {
Joshua Haberman6e1cbdf2023-02-03 14:37:58 -080083 if (len != 0) memcpy(p, s, len);
Protobuf Team Bot15596be2022-06-24 10:38:27 -070084 p[len] = 0;
85 }
86 return p;
87}
88
89/* A type to represent the lookup key of either a strtable or an inttable. */
90typedef union {
91 uintptr_t num;
92 struct {
93 const char* str;
94 size_t len;
95 } str;
96} lookupkey_t;
97
98static lookupkey_t strkey2(const char* str, size_t len) {
99 lookupkey_t k;
100 k.str.str = str;
101 k.str.len = len;
102 return k;
103}
104
105static lookupkey_t intkey(uintptr_t key) {
106 lookupkey_t k;
107 k.num = key;
108 return k;
109}
110
111typedef uint32_t hashfunc_t(upb_tabkey key);
112typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
113
114/* Base table (shared code) ***************************************************/
115
116static uint32_t upb_inthash(uintptr_t key) { return (uint32_t)key; }
117
118static const upb_tabent* upb_getentry(const upb_table* t, uint32_t hash) {
119 return t->entries + (hash & t->mask);
120}
121
122static bool upb_arrhas(upb_tabval key) { return key.val != (uint64_t)-1; }
123
124static bool isfull(upb_table* t) { return t->count == t->max_count; }
125
126static bool init(upb_table* t, uint8_t size_lg2, upb_Arena* a) {
127 size_t bytes;
128
129 t->count = 0;
130 t->size_lg2 = size_lg2;
131 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
132 t->max_count = upb_table_size(t) * MAX_LOAD;
133 bytes = upb_table_size(t) * sizeof(upb_tabent);
134 if (bytes > 0) {
135 t->entries = upb_Arena_Malloc(a, bytes);
136 if (!t->entries) return false;
137 memset(t->entries, 0, bytes);
138 } else {
139 t->entries = NULL;
140 }
141 return true;
142}
143
144static upb_tabent* emptyent(upb_table* t, upb_tabent* e) {
145 upb_tabent* begin = t->entries;
146 upb_tabent* end = begin + upb_table_size(t);
147 for (e = e + 1; e < end; e++) {
148 if (upb_tabent_isempty(e)) return e;
149 }
150 for (e = begin; e < end; e++) {
151 if (upb_tabent_isempty(e)) return e;
152 }
153 UPB_ASSERT(false);
154 return NULL;
155}
156
157static upb_tabent* getentry_mutable(upb_table* t, uint32_t hash) {
158 return (upb_tabent*)upb_getentry(t, hash);
159}
160
161static const upb_tabent* findentry(const upb_table* t, lookupkey_t key,
162 uint32_t hash, eqlfunc_t* eql) {
163 const upb_tabent* e;
164
165 if (t->size_lg2 == 0) return NULL;
166 e = upb_getentry(t, hash);
167 if (upb_tabent_isempty(e)) return NULL;
168 while (1) {
169 if (eql(e->key, key)) return e;
170 if ((e = e->next) == NULL) return NULL;
171 }
172}
173
174static upb_tabent* findentry_mutable(upb_table* t, lookupkey_t key,
175 uint32_t hash, eqlfunc_t* eql) {
176 return (upb_tabent*)findentry(t, key, hash, eql);
177}
178
179static bool lookup(const upb_table* t, lookupkey_t key, upb_value* v,
180 uint32_t hash, eqlfunc_t* eql) {
181 const upb_tabent* e = findentry(t, key, hash, eql);
182 if (e) {
183 if (v) {
184 _upb_value_setval(v, e->val.val);
185 }
186 return true;
187 } else {
188 return false;
189 }
190}
191
192/* The given key must not already exist in the table. */
193static void insert(upb_table* t, lookupkey_t key, upb_tabkey tabkey,
194 upb_value val, uint32_t hash, hashfunc_t* hashfunc,
195 eqlfunc_t* eql) {
196 upb_tabent* mainpos_e;
197 upb_tabent* our_e;
198
199 UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
200
201 t->count++;
202 mainpos_e = getentry_mutable(t, hash);
203 our_e = mainpos_e;
204
205 if (upb_tabent_isempty(mainpos_e)) {
206 /* Our main position is empty; use it. */
207 our_e->next = NULL;
208 } else {
209 /* Collision. */
210 upb_tabent* new_e = emptyent(t, mainpos_e);
211 /* Head of collider's chain. */
212 upb_tabent* chain = getentry_mutable(t, hashfunc(mainpos_e->key));
213 if (chain == mainpos_e) {
214 /* Existing ent is in its main position (it has the same hash as us, and
215 * is the head of our chain). Insert to new ent and append to this chain.
216 */
217 new_e->next = mainpos_e->next;
218 mainpos_e->next = new_e;
219 our_e = new_e;
220 } else {
221 /* Existing ent is not in its main position (it is a node in some other
222 * chain). This implies that no existing ent in the table has our hash.
223 * Evict it (updating its chain) and use its ent for head of our chain. */
224 *new_e = *mainpos_e; /* copies next. */
225 while (chain->next != mainpos_e) {
226 chain = (upb_tabent*)chain->next;
227 UPB_ASSERT(chain);
228 }
229 chain->next = new_e;
230 our_e = mainpos_e;
231 our_e->next = NULL;
232 }
233 }
234 our_e->key = tabkey;
235 our_e->val.val = val.val;
236 UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
237}
238
239static bool rm(upb_table* t, lookupkey_t key, upb_value* val,
240 upb_tabkey* removed, uint32_t hash, eqlfunc_t* eql) {
241 upb_tabent* chain = getentry_mutable(t, hash);
242 if (upb_tabent_isempty(chain)) return false;
243 if (eql(chain->key, key)) {
244 /* Element to remove is at the head of its chain. */
245 t->count--;
246 if (val) _upb_value_setval(val, chain->val.val);
247 if (removed) *removed = chain->key;
248 if (chain->next) {
249 upb_tabent* move = (upb_tabent*)chain->next;
250 *chain = *move;
251 move->key = 0; /* Make the slot empty. */
252 } else {
253 chain->key = 0; /* Make the slot empty. */
254 }
255 return true;
256 } else {
257 /* Element to remove is either in a non-head position or not in the
258 * table. */
259 while (chain->next && !eql(chain->next->key, key)) {
260 chain = (upb_tabent*)chain->next;
261 }
262 if (chain->next) {
263 /* Found element to remove. */
264 upb_tabent* rm = (upb_tabent*)chain->next;
265 t->count--;
266 if (val) _upb_value_setval(val, chain->next->val.val);
267 if (removed) *removed = rm->key;
268 rm->key = 0; /* Make the slot empty. */
269 chain->next = rm->next;
270 return true;
271 } else {
272 /* Element to remove is not in the table. */
273 return false;
274 }
275 }
276}
277
278static size_t next(const upb_table* t, size_t i) {
279 do {
280 if (++i >= upb_table_size(t)) return SIZE_MAX - 1; /* Distinct from -1. */
281 } while (upb_tabent_isempty(&t->entries[i]));
282
283 return i;
284}
285
286static size_t begin(const upb_table* t) { return next(t, -1); }
287
288/* upb_strtable ***************************************************************/
289
290/* A simple "subclass" of upb_table that only adds a hash function for strings.
291 */
292
293static upb_tabkey strcopy(lookupkey_t k2, upb_Arena* a) {
294 uint32_t len = (uint32_t)k2.str.len;
295 char* str = upb_Arena_Malloc(a, k2.str.len + sizeof(uint32_t) + 1);
296 if (str == NULL) return 0;
297 memcpy(str, &len, sizeof(uint32_t));
298 if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
299 str[sizeof(uint32_t) + k2.str.len] = '\0';
300 return (uintptr_t)str;
301}
302
303/* Adapted from ABSL's wyhash. */
304
305static uint64_t UnalignedLoad64(const void* p) {
306 uint64_t val;
307 memcpy(&val, p, 8);
308 return val;
309}
310
311static uint32_t UnalignedLoad32(const void* p) {
312 uint32_t val;
313 memcpy(&val, p, 4);
314 return val;
315}
316
317#if defined(_MSC_VER) && defined(_M_X64)
318#include <intrin.h>
319#endif
320
321/* Computes a * b, returning the low 64 bits of the result and storing the high
322 * 64 bits in |*high|. */
323static uint64_t upb_umul128(uint64_t v0, uint64_t v1, uint64_t* out_high) {
324#ifdef __SIZEOF_INT128__
325 __uint128_t p = v0;
326 p *= v1;
327 *out_high = (uint64_t)(p >> 64);
328 return (uint64_t)p;
329#elif defined(_MSC_VER) && defined(_M_X64)
330 return _umul128(v0, v1, out_high);
331#else
332 uint64_t a32 = v0 >> 32;
333 uint64_t a00 = v0 & 0xffffffff;
334 uint64_t b32 = v1 >> 32;
335 uint64_t b00 = v1 & 0xffffffff;
336 uint64_t high = a32 * b32;
337 uint64_t low = a00 * b00;
338 uint64_t mid1 = a32 * b00;
339 uint64_t mid2 = a00 * b32;
340 low += (mid1 << 32) + (mid2 << 32);
341 // Omit carry bit, for mixing we do not care about exact numerical precision.
342 high += (mid1 >> 32) + (mid2 >> 32);
343 *out_high = high;
344 return low;
345#endif
346}
347
348static uint64_t WyhashMix(uint64_t v0, uint64_t v1) {
349 uint64_t high;
350 uint64_t low = upb_umul128(v0, v1, &high);
351 return low ^ high;
352}
353
354static uint64_t Wyhash(const void* data, size_t len, uint64_t seed,
355 const uint64_t salt[]) {
356 const uint8_t* ptr = (const uint8_t*)data;
357 uint64_t starting_length = (uint64_t)len;
358 uint64_t current_state = seed ^ salt[0];
359
360 if (len > 64) {
361 // If we have more than 64 bytes, we're going to handle chunks of 64
362 // bytes at a time. We're going to build up two separate hash states
363 // which we will then hash together.
364 uint64_t duplicated_state = current_state;
365
366 do {
367 uint64_t a = UnalignedLoad64(ptr);
368 uint64_t b = UnalignedLoad64(ptr + 8);
369 uint64_t c = UnalignedLoad64(ptr + 16);
370 uint64_t d = UnalignedLoad64(ptr + 24);
371 uint64_t e = UnalignedLoad64(ptr + 32);
372 uint64_t f = UnalignedLoad64(ptr + 40);
373 uint64_t g = UnalignedLoad64(ptr + 48);
374 uint64_t h = UnalignedLoad64(ptr + 56);
375
376 uint64_t cs0 = WyhashMix(a ^ salt[1], b ^ current_state);
377 uint64_t cs1 = WyhashMix(c ^ salt[2], d ^ current_state);
378 current_state = (cs0 ^ cs1);
379
380 uint64_t ds0 = WyhashMix(e ^ salt[3], f ^ duplicated_state);
381 uint64_t ds1 = WyhashMix(g ^ salt[4], h ^ duplicated_state);
382 duplicated_state = (ds0 ^ ds1);
383
384 ptr += 64;
385 len -= 64;
386 } while (len > 64);
387
388 current_state = current_state ^ duplicated_state;
389 }
390
391 // We now have a data `ptr` with at most 64 bytes and the current state
392 // of the hashing state machine stored in current_state.
393 while (len > 16) {
394 uint64_t a = UnalignedLoad64(ptr);
395 uint64_t b = UnalignedLoad64(ptr + 8);
396
397 current_state = WyhashMix(a ^ salt[1], b ^ current_state);
398
399 ptr += 16;
400 len -= 16;
401 }
402
403 // We now have a data `ptr` with at most 16 bytes.
404 uint64_t a = 0;
405 uint64_t b = 0;
406 if (len > 8) {
407 // When we have at least 9 and at most 16 bytes, set A to the first 64
408 // bits of the input and B to the last 64 bits of the input. Yes, they will
409 // overlap in the middle if we are working with less than the full 16
410 // bytes.
411 a = UnalignedLoad64(ptr);
412 b = UnalignedLoad64(ptr + len - 8);
413 } else if (len > 3) {
414 // If we have at least 4 and at most 8 bytes, set A to the first 32
415 // bits and B to the last 32 bits.
416 a = UnalignedLoad32(ptr);
417 b = UnalignedLoad32(ptr + len - 4);
418 } else if (len > 0) {
419 // If we have at least 1 and at most 3 bytes, read all of the provided
420 // bits into A, with some adjustments.
421 a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]);
422 b = 0;
423 } else {
424 a = 0;
425 b = 0;
426 }
427
428 uint64_t w = WyhashMix(a ^ salt[1], b ^ current_state);
429 uint64_t z = salt[1] ^ starting_length;
430 return WyhashMix(w, z);
431}
432
433const uint64_t kWyhashSalt[5] = {
434 0x243F6A8885A308D3ULL, 0x13198A2E03707344ULL, 0xA4093822299F31D0ULL,
435 0x082EFA98EC4E6C89ULL, 0x452821E638D01377ULL,
436};
437
438uint32_t _upb_Hash(const void* p, size_t n, uint64_t seed) {
439 return Wyhash(p, n, seed, kWyhashSalt);
440}
441
442static uint32_t _upb_Hash_NoSeed(const char* p, size_t n) {
443 return _upb_Hash(p, n, 0);
444}
445
446static uint32_t strhash(upb_tabkey key) {
447 uint32_t len;
448 char* str = upb_tabstr(key, &len);
449 return _upb_Hash_NoSeed(str, len);
450}
451
452static bool streql(upb_tabkey k1, lookupkey_t k2) {
453 uint32_t len;
454 char* str = upb_tabstr(k1, &len);
455 return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
456}
457
458bool upb_strtable_init(upb_strtable* t, size_t expected_size, upb_Arena* a) {
459 // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2
460 // denominator.
461 size_t need_entries = (expected_size + 1) * 1204 / 1024;
462 UPB_ASSERT(need_entries >= expected_size * 0.85);
Eric Saloff8e1b42022-11-13 20:46:22 -0800463 int size_lg2 = upb_Log2Ceiling(need_entries);
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700464 return init(&t->t, size_lg2, a);
465}
466
467void upb_strtable_clear(upb_strtable* t) {
468 size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
469 t->t.count = 0;
470 memset((char*)t->t.entries, 0, bytes);
471}
472
473bool upb_strtable_resize(upb_strtable* t, size_t size_lg2, upb_Arena* a) {
474 upb_strtable new_table;
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700475 if (!init(&new_table.t, size_lg2, a)) return false;
Eric Salo70566462022-11-16 18:47:10 -0800476
477 intptr_t iter = UPB_STRTABLE_BEGIN;
478 upb_StringView key;
479 upb_value val;
480 while (upb_strtable_next2(t, &key, &val, &iter)) {
481 upb_strtable_insert(&new_table, key.data, key.size, val, a);
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700482 }
483 *t = new_table;
484 return true;
485}
486
487bool upb_strtable_insert(upb_strtable* t, const char* k, size_t len,
488 upb_value v, upb_Arena* a) {
489 lookupkey_t key;
490 upb_tabkey tabkey;
491 uint32_t hash;
492
493 if (isfull(&t->t)) {
494 /* Need to resize. New table of double the size, add old elements to it. */
495 if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
496 return false;
497 }
498 }
499
500 key = strkey2(k, len);
501 tabkey = strcopy(key, a);
502 if (tabkey == 0) return false;
503
504 hash = _upb_Hash_NoSeed(key.str.str, key.str.len);
505 insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
506 return true;
507}
508
509bool upb_strtable_lookup2(const upb_strtable* t, const char* key, size_t len,
510 upb_value* v) {
511 uint32_t hash = _upb_Hash_NoSeed(key, len);
512 return lookup(&t->t, strkey2(key, len), v, hash, &streql);
513}
514
515bool upb_strtable_remove2(upb_strtable* t, const char* key, size_t len,
516 upb_value* val) {
517 uint32_t hash = _upb_Hash_NoSeed(key, len);
518 upb_tabkey tabkey;
519 return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql);
520}
521
522/* Iteration */
523
524void upb_strtable_begin(upb_strtable_iter* i, const upb_strtable* t) {
525 i->t = t;
526 i->index = begin(&t->t);
527}
528
529void upb_strtable_next(upb_strtable_iter* i) {
530 i->index = next(&i->t->t, i->index);
531}
532
533bool upb_strtable_done(const upb_strtable_iter* i) {
534 if (!i->t) return true;
535 return i->index >= upb_table_size(&i->t->t) ||
536 upb_tabent_isempty(str_tabent(i));
537}
538
539upb_StringView upb_strtable_iter_key(const upb_strtable_iter* i) {
540 upb_StringView key;
541 uint32_t len;
542 UPB_ASSERT(!upb_strtable_done(i));
543 key.data = upb_tabstr(str_tabent(i)->key, &len);
544 key.size = len;
545 return key;
546}
547
548upb_value upb_strtable_iter_value(const upb_strtable_iter* i) {
549 UPB_ASSERT(!upb_strtable_done(i));
550 return _upb_value_val(str_tabent(i)->val.val);
551}
552
553void upb_strtable_iter_setdone(upb_strtable_iter* i) {
554 i->t = NULL;
555 i->index = SIZE_MAX;
556}
557
558bool upb_strtable_iter_isequal(const upb_strtable_iter* i1,
559 const upb_strtable_iter* i2) {
560 if (upb_strtable_done(i1) && upb_strtable_done(i2)) return true;
561 return i1->t == i2->t && i1->index == i2->index;
562}
563
564/* upb_inttable ***************************************************************/
565
566/* For inttables we use a hybrid structure where small keys are kept in an
567 * array and large keys are put in the hash table. */
568
569static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
570
571static bool inteql(upb_tabkey k1, lookupkey_t k2) { return k1 == k2.num; }
572
573static upb_tabval* mutable_array(upb_inttable* t) {
574 return (upb_tabval*)t->array;
575}
576
577static upb_tabval* inttable_val(upb_inttable* t, uintptr_t key) {
578 if (key < t->array_size) {
579 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
580 } else {
581 upb_tabent* e =
582 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
583 return e ? &e->val : NULL;
584 }
585}
586
587static const upb_tabval* inttable_val_const(const upb_inttable* t,
588 uintptr_t key) {
589 return inttable_val((upb_inttable*)t, key);
590}
591
592size_t upb_inttable_count(const upb_inttable* t) {
593 return t->t.count + t->array_count;
594}
595
596static void check(upb_inttable* t) {
597 UPB_UNUSED(t);
598#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
599 {
Eric Salo70566462022-11-16 18:47:10 -0800600 // This check is very expensive (makes inserts/deletes O(N)).
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700601 size_t count = 0;
Eric Salo70566462022-11-16 18:47:10 -0800602 intptr_t iter = UPB_INTTABLE_BEGIN;
603 uintptr_t key;
604 upb_value val;
605 while (upb_inttable_next(t, &key, &val, &iter)) {
606 UPB_ASSERT(upb_inttable_lookup(t, key, NULL));
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700607 }
608 UPB_ASSERT(count == upb_inttable_count(t));
609 }
610#endif
611}
612
613bool upb_inttable_sizedinit(upb_inttable* t, size_t asize, int hsize_lg2,
614 upb_Arena* a) {
615 size_t array_bytes;
616
617 if (!init(&t->t, hsize_lg2, a)) return false;
618 /* Always make the array part at least 1 long, so that we know key 0
619 * won't be in the hash part, which simplifies things. */
620 t->array_size = UPB_MAX(1, asize);
621 t->array_count = 0;
622 array_bytes = t->array_size * sizeof(upb_value);
623 t->array = upb_Arena_Malloc(a, array_bytes);
624 if (!t->array) {
625 return false;
626 }
627 memset(mutable_array(t), 0xff, array_bytes);
628 check(t);
629 return true;
630}
631
632bool upb_inttable_init(upb_inttable* t, upb_Arena* a) {
633 return upb_inttable_sizedinit(t, 0, 4, a);
634}
635
636bool upb_inttable_insert(upb_inttable* t, uintptr_t key, upb_value val,
637 upb_Arena* a) {
638 upb_tabval tabval;
639 tabval.val = val.val;
640 UPB_ASSERT(
641 upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */
642
643 if (key < t->array_size) {
644 UPB_ASSERT(!upb_arrhas(t->array[key]));
645 t->array_count++;
646 mutable_array(t)[key].val = val.val;
647 } else {
648 if (isfull(&t->t)) {
649 /* Need to resize the hash part, but we re-use the array part. */
650 size_t i;
651 upb_table new_table;
652
653 if (!init(&new_table, t->t.size_lg2 + 1, a)) {
654 return false;
655 }
656
657 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
658 const upb_tabent* e = &t->t.entries[i];
659 uint32_t hash;
660 upb_value v;
661
662 _upb_value_setval(&v, e->val.val);
663 hash = upb_inthash(e->key);
664 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
665 }
666
667 UPB_ASSERT(t->t.count == new_table.count);
668
669 t->t = new_table;
670 }
671 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
672 }
673 check(t);
674 return true;
675}
676
677bool upb_inttable_lookup(const upb_inttable* t, uintptr_t key, upb_value* v) {
678 const upb_tabval* table_v = inttable_val_const(t, key);
679 if (!table_v) return false;
680 if (v) _upb_value_setval(v, table_v->val);
681 return true;
682}
683
684bool upb_inttable_replace(upb_inttable* t, uintptr_t key, upb_value val) {
685 upb_tabval* table_v = inttable_val(t, key);
686 if (!table_v) return false;
687 table_v->val = val.val;
688 return true;
689}
690
691bool upb_inttable_remove(upb_inttable* t, uintptr_t key, upb_value* val) {
692 bool success;
693 if (key < t->array_size) {
694 if (upb_arrhas(t->array[key])) {
695 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
696 t->array_count--;
697 if (val) {
698 _upb_value_setval(val, t->array[key].val);
699 }
700 mutable_array(t)[key] = empty;
701 success = true;
702 } else {
703 success = false;
704 }
705 } else {
706 success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
707 }
708 check(t);
709 return success;
710}
711
712void upb_inttable_compact(upb_inttable* t, upb_Arena* a) {
713 /* A power-of-two histogram of the table keys. */
714 size_t counts[UPB_MAXARRSIZE + 1] = {0};
715
716 /* The max key in each bucket. */
717 uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
718
Eric Salo70566462022-11-16 18:47:10 -0800719 {
720 intptr_t iter = UPB_INTTABLE_BEGIN;
721 uintptr_t key;
722 upb_value val;
723 while (upb_inttable_next(t, &key, &val, &iter)) {
724 int bucket = log2ceil(key);
725 max[bucket] = UPB_MAX(max[bucket], key);
726 counts[bucket]++;
727 }
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700728 }
729
730 /* Find the largest power of two that satisfies the MIN_DENSITY
731 * definition (while actually having some keys). */
Eric Salo70566462022-11-16 18:47:10 -0800732 size_t arr_count = upb_inttable_count(t);
733 int size_lg2;
734 upb_inttable new_t;
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700735
736 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
737 if (counts[size_lg2] == 0) {
738 /* We can halve again without losing any entries. */
739 continue;
740 } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
741 break;
742 }
743
744 arr_count -= counts[size_lg2];
745 }
746
747 UPB_ASSERT(arr_count <= upb_inttable_count(t));
748
749 {
750 /* Insert all elements into new, perfectly-sized table. */
751 size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
752 size_t hash_count = upb_inttable_count(t) - arr_count;
753 size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
754 int hashsize_lg2 = log2ceil(hash_size);
755
756 upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
Eric Salo70566462022-11-16 18:47:10 -0800757
758 {
759 intptr_t iter = UPB_INTTABLE_BEGIN;
760 uintptr_t key;
761 upb_value val;
762 while (upb_inttable_next(t, &key, &val, &iter)) {
763 upb_inttable_insert(&new_t, key, val, a);
764 }
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700765 }
Eric Salo70566462022-11-16 18:47:10 -0800766
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700767 UPB_ASSERT(new_t.array_size == arr_size);
768 UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
769 }
770 *t = new_t;
771}
772
Eric Salo70566462022-11-16 18:47:10 -0800773// Iteration.
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700774
Eric Salo70566462022-11-16 18:47:10 -0800775bool upb_inttable_next(const upb_inttable* t, uintptr_t* key, upb_value* val,
776 intptr_t* iter) {
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700777 intptr_t i = *iter;
Eric Salo70566462022-11-16 18:47:10 -0800778 if ((size_t)(i + 1) <= t->array_size) {
Protobuf Team Botfdb45f02023-03-13 14:41:32 -0700779 while ((size_t)++i < t->array_size) {
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700780 upb_tabval ent = t->array[i];
781 if (upb_arrhas(ent)) {
782 *key = i;
783 *val = _upb_value_val(ent.val);
784 *iter = i;
785 return true;
786 }
787 }
Eric Salo70566462022-11-16 18:47:10 -0800788 i--; // Back up to exactly one position before the start of the table.
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700789 }
790
Eric Salo70566462022-11-16 18:47:10 -0800791 size_t tab_idx = next(&t->t, i - t->array_size);
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700792 if (tab_idx < upb_table_size(&t->t)) {
793 upb_tabent* ent = &t->t.entries[tab_idx];
794 *key = ent->key;
795 *val = _upb_value_val(ent->val.val);
796 *iter = tab_idx + t->array_size;
797 return true;
798 }
799
800 return false;
801}
802
803void upb_inttable_removeiter(upb_inttable* t, intptr_t* iter) {
804 intptr_t i = *iter;
Protobuf Team Botfdb45f02023-03-13 14:41:32 -0700805 if ((size_t)i < t->array_size) {
Protobuf Team Bot15596be2022-06-24 10:38:27 -0700806 t->array_count--;
807 mutable_array(t)[i].val = -1;
808 } else {
809 upb_tabent* ent = &t->t.entries[i - t->array_size];
810 upb_tabent* prev = NULL;
811
812 // Linear search, not great.
813 upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
814 for (upb_tabent* e = t->t.entries; e != end; e++) {
815 if (e->next == ent) {
816 prev = e;
817 break;
818 }
819 }
820
821 if (prev) {
822 prev->next = ent->next;
823 }
824
825 t->t.count--;
826 ent->key = 0;
827 ent->next = NULL;
828 }
829}
830
831bool upb_strtable_next2(const upb_strtable* t, upb_StringView* key,
832 upb_value* val, intptr_t* iter) {
833 size_t tab_idx = next(&t->t, *iter);
834 if (tab_idx < upb_table_size(&t->t)) {
835 upb_tabent* ent = &t->t.entries[tab_idx];
836 uint32_t len;
837 key->data = upb_tabstr(ent->key, &len);
838 key->size = len;
839 *val = _upb_value_val(ent->val.val);
840 *iter = tab_idx;
841 return true;
842 }
843
844 return false;
845}
846
847void upb_strtable_removeiter(upb_strtable* t, intptr_t* iter) {
848 intptr_t i = *iter;
849 upb_tabent* ent = &t->t.entries[i];
850 upb_tabent* prev = NULL;
851
852 // Linear search, not great.
853 upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
854 for (upb_tabent* e = t->t.entries; e != end; e++) {
855 if (e->next == ent) {
856 prev = e;
857 break;
858 }
859 }
860
861 if (prev) {
862 prev->next = ent->next;
863 }
864
865 t->t.count--;
866 ent->key = 0;
867 ent->next = NULL;
868}
Joshua Habermana0f520d2023-05-25 09:47:14 -0700869
870void upb_strtable_setentryvalue(upb_strtable* t, intptr_t iter, upb_value v) {
871 upb_tabent* ent = &t->t.entries[iter];
872 ent->val.val = v.val;
873}