blob: dfaa51918f90ac9f9a8ea52872c215b40b7caf86 [file] [log] [blame]
Protobuf Team Bota75def62023-07-11 11:11:59 -07001// Protocol Buffers - Google's data interchange format
2// Copyright 2023 Google LLC. All rights reserved.
Protobuf Team Bota75def62023-07-11 11:11:59 -07003//
Joshua Haberman4a513032023-09-08 17:12:50 -07004// Use of this source code is governed by a BSD-style
5// license that can be found in the LICENSE file or at
6// https://developers.google.com/open-source/licenses/bsd
Protobuf Team Bota75def62023-07-11 11:11:59 -07007
8//! Items specific to `bytes` and `string` fields.
9#![allow(dead_code)]
10#![allow(unused)]
11
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -070012use crate::__internal::{Private, PtrAndLen, RawMessage};
13use crate::__runtime::{BytesAbsentMutData, BytesPresentMutData, InnerBytesMut};
Alyssa Haroldsend5d207f2023-08-24 13:39:23 -070014use crate::macros::impl_forwarding_settable_value;
Alyssa Haroldsen614e29f2023-08-30 12:51:55 -070015use crate::{
16 AbsentField, FieldEntry, Mut, MutProxy, Optional, PresentField, Proxied, ProxiedWithPresence,
17 SettableValue, View, ViewProxy,
18};
Protobuf Team Bota75def62023-07-11 11:11:59 -070019use std::borrow::Cow;
20use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd};
21use std::convert::{AsMut, AsRef};
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -070022use std::fmt;
Protobuf Team Bota75def62023-07-11 11:11:59 -070023use std::hash::{Hash, Hasher};
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -070024use std::iter;
Protobuf Team Bota75def62023-07-11 11:11:59 -070025use std::ops::{Deref, DerefMut};
Protobuf Team Bot4b0e7632023-07-26 09:09:44 -070026use utf8::Utf8Chunks;
Protobuf Team Bota75def62023-07-11 11:11:59 -070027
Protobuf Team Bota75def62023-07-11 11:11:59 -070028/// A mutator for `bytes` fields - this type is `protobuf::Mut<'msg, [u8]>`.
29///
Protobuf Team Bot230a9a72023-07-12 14:34:42 -070030/// This type implements `Deref<Target = [u8]>`, so many operations are
Protobuf Team Bota75def62023-07-11 11:11:59 -070031/// provided through that, including indexing and slicing.
32///
Protobuf Team Bot230a9a72023-07-12 14:34:42 -070033/// Conceptually, this type is like a `&'msg mut &'msg str`, though the actual
34/// implementation is dependent on runtime and `'msg` is covariant.
35///
36/// Unlike `Vec<u8>`, this type has no in-place concatenation functions like
37/// `extend_from_slice`.
38///
39/// `BytesMut` is not intended to be grown and reallocated like a `Vec`. It's
40/// recommended to instead build a `Vec<u8>` or `String` and pass that directly
41/// to `set`, which will reuse the allocation if supported by the runtime.
Protobuf Team Bota75def62023-07-11 11:11:59 -070042#[derive(Debug)]
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -070043pub struct BytesMut<'msg> {
44 inner: InnerBytesMut<'msg>,
45}
46
47// SAFETY:
48// - Protobuf Rust messages don't allow shared mutation across threads.
49// - Protobuf Rust messages don't share arenas.
50// - All access that touches an arena occurs behind a `&mut`.
51// - All mutators that store an arena are `!Send`.
52unsafe impl Sync for BytesMut<'_> {}
Protobuf Team Bota75def62023-07-11 11:11:59 -070053
54impl<'msg> BytesMut<'msg> {
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -070055 /// Constructs a new `BytesMut` from its internal, runtime-dependent part.
56 #[doc(hidden)]
57 pub fn from_inner(_private: Private, inner: InnerBytesMut<'msg>) -> Self {
58 Self { inner }
59 }
60
61 /// Gets the current value of the field.
62 pub fn get(&self) -> &[u8] {
63 self.as_view()
64 }
65
Protobuf Team Bota75def62023-07-11 11:11:59 -070066 /// Sets the byte string to the given `val`, cloning any borrowed data.
67 ///
68 /// This method accepts both owned and borrowed byte strings; if the runtime
69 /// supports it, an owned value will not reallocate when setting the
70 /// string.
71 pub fn set(&mut self, val: impl SettableValue<[u8]>) {
72 val.set_on(Private, MutProxy::as_mut(self))
73 }
74
Protobuf Team Bot230a9a72023-07-12 14:34:42 -070075 /// Truncates the byte string.
Protobuf Team Bota75def62023-07-11 11:11:59 -070076 ///
77 /// Has no effect if `new_len` is larger than the current `len`.
78 pub fn truncate(&mut self, new_len: usize) {
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -070079 self.inner.truncate(new_len)
Protobuf Team Bota75def62023-07-11 11:11:59 -070080 }
81
82 /// Clears the byte string to the empty string.
83 ///
84 /// # Compared with `FieldEntry::clear`
85 ///
86 /// Note that this is different than marking an `optional bytes` field as
87 /// absent; if these `bytes` are in an `optional`, `FieldEntry::is_set`
88 /// will still return `true` after this method is invoked.
89 ///
90 /// This also means that if the field has a non-empty default,
91 /// `BytesMut::clear` results in the accessor returning an empty string
92 /// while `FieldEntry::clear` results in the non-empty default.
93 ///
Alyssa Haroldsen614e29f2023-08-30 12:51:55 -070094 /// However, for a proto3 `bytes` that has implicit presence, there is no
Protobuf Team Bota75def62023-07-11 11:11:59 -070095 /// distinction between these states: unset `bytes` is the same as empty
96 /// `bytes` and the default is always the empty string.
97 ///
98 /// In the C++ API, this is the difference between `msg.clear_bytes_field()`
99 /// and `msg.mutable_bytes_field()->clear()`.
100 ///
101 /// Having the same name and signature as `FieldEntry::clear` makes code
102 /// that calls `field_mut().clear()` easier to migrate from implicit
103 /// to explicit presence.
104 pub fn clear(&mut self) {
105 self.truncate(0);
106 }
107}
108
109impl Deref for BytesMut<'_> {
110 type Target = [u8];
111 fn deref(&self) -> &[u8] {
112 self.as_ref()
113 }
114}
115
Protobuf Team Bota75def62023-07-11 11:11:59 -0700116impl AsRef<[u8]> for BytesMut<'_> {
117 fn as_ref(&self) -> &[u8] {
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -0700118 unsafe { self.inner.get() }
Protobuf Team Bota75def62023-07-11 11:11:59 -0700119 }
120}
121
Protobuf Team Bota75def62023-07-11 11:11:59 -0700122impl Proxied for [u8] {
123 type View<'msg> = &'msg [u8];
124 type Mut<'msg> = BytesMut<'msg>;
125}
126
Protobuf Team Bota75def62023-07-11 11:11:59 -0700127impl ProxiedWithPresence for [u8] {
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -0700128 type PresentMutData<'msg> = BytesPresentMutData<'msg>;
129 type AbsentMutData<'msg> = BytesAbsentMutData<'msg>;
Protobuf Team Bota75def62023-07-11 11:11:59 -0700130
Alyssa Haroldsen1dd6a7d2023-12-11 11:02:23 -0800131 fn clear_present_field(present_mutator: Self::PresentMutData<'_>) -> Self::AbsentMutData<'_> {
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -0700132 present_mutator.clear()
Protobuf Team Bota75def62023-07-11 11:11:59 -0700133 }
134
Alyssa Haroldsen1dd6a7d2023-12-11 11:02:23 -0800135 fn set_absent_to_default(absent_mutator: Self::AbsentMutData<'_>) -> Self::PresentMutData<'_> {
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -0700136 absent_mutator.set_absent_to_default()
Protobuf Team Bota75def62023-07-11 11:11:59 -0700137 }
138}
139
140impl<'msg> ViewProxy<'msg> for &'msg [u8] {
141 type Proxied = [u8];
142
143 fn as_view(&self) -> &[u8] {
144 self
145 }
146
147 fn into_view<'shorter>(self) -> &'shorter [u8]
148 where
149 'msg: 'shorter,
150 {
151 self
152 }
153}
154
155impl<'msg> ViewProxy<'msg> for BytesMut<'msg> {
156 type Proxied = [u8];
157
158 fn as_view(&self) -> &[u8] {
159 self.as_ref()
160 }
161
162 fn into_view<'shorter>(self) -> &'shorter [u8]
163 where
164 'msg: 'shorter,
165 {
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -0700166 self.inner.get()
Protobuf Team Bota75def62023-07-11 11:11:59 -0700167 }
168}
169
170impl<'msg> MutProxy<'msg> for BytesMut<'msg> {
171 fn as_mut(&mut self) -> BytesMut<'_> {
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -0700172 BytesMut { inner: self.inner }
Protobuf Team Bota75def62023-07-11 11:11:59 -0700173 }
174
175 fn into_mut<'shorter>(self) -> BytesMut<'shorter>
176 where
177 'msg: 'shorter,
178 {
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -0700179 BytesMut { inner: self.inner }
Protobuf Team Bota75def62023-07-11 11:11:59 -0700180 }
181}
182
Alyssa Haroldsen614e29f2023-08-30 12:51:55 -0700183impl SettableValue<[u8]> for &'_ [u8] {
Alyssa Haroldsen1dd6a7d2023-12-11 11:02:23 -0800184 fn set_on<'msg>(self, _private: Private, mutator: Mut<'msg, [u8]>)
Jakob Buchgraberab11a0d2023-11-27 08:14:21 -0800185 where
Alyssa Haroldsen1dd6a7d2023-12-11 11:02:23 -0800186 [u8]: 'msg,
Jakob Buchgraberab11a0d2023-11-27 08:14:21 -0800187 {
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -0700188 // SAFETY: this is a `bytes` field with no restriction on UTF-8.
189 unsafe { mutator.inner.set(self) }
190 }
191
192 fn set_on_absent(
193 self,
194 _private: Private,
195 absent_mutator: <[u8] as ProxiedWithPresence>::AbsentMutData<'_>,
196 ) -> <[u8] as ProxiedWithPresence>::PresentMutData<'_> {
197 // SAFETY: this is a `bytes` field with no restriction on UTF-8.
198 unsafe { absent_mutator.set(self) }
199 }
200
201 fn set_on_present(
202 self,
203 _private: Private,
204 present_mutator: <[u8] as ProxiedWithPresence>::PresentMutData<'_>,
205 ) {
206 // SAFETY: this is a `bytes` field with no restriction on UTF-8.
207 unsafe {
208 present_mutator.set(self);
209 }
Protobuf Team Bota75def62023-07-11 11:11:59 -0700210 }
211}
212
Alyssa Haroldsen614e29f2023-08-30 12:51:55 -0700213impl<const N: usize> SettableValue<[u8]> for &'_ [u8; N] {
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -0700214 // forward to `self[..]`
215 impl_forwarding_settable_value!([u8], self => &self[..]);
Protobuf Team Bota75def62023-07-11 11:11:59 -0700216}
217
218impl SettableValue<[u8]> for Vec<u8> {
Sandy Zhang81068e82023-09-18 15:13:49 -0700219 // TODO: Investigate taking ownership of this when allowed by the
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -0700220 // runtime.
221 impl_forwarding_settable_value!([u8], self => &self[..]);
Protobuf Team Bota75def62023-07-11 11:11:59 -0700222}
223
224impl SettableValue<[u8]> for Cow<'_, [u8]> {
Sandy Zhang81068e82023-09-18 15:13:49 -0700225 // TODO: Investigate taking ownership of this when allowed by the
Alyssa Haroldsen9a0bc392023-08-23 11:05:31 -0700226 // runtime.
227 impl_forwarding_settable_value!([u8], self => &self[..]);
Protobuf Team Bota75def62023-07-11 11:11:59 -0700228}
229
230impl Hash for BytesMut<'_> {
231 fn hash<H: Hasher>(&self, state: &mut H) {
232 self.deref().hash(state)
233 }
234}
235
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700236impl Eq for BytesMut<'_> {}
237impl<'msg> Ord for BytesMut<'msg> {
238 fn cmp(&self, other: &BytesMut<'msg>) -> Ordering {
239 self.deref().cmp(other.deref())
240 }
241}
242
243/// The bytes were not valid UTF-8.
Protobuf Team Bot4b0e7632023-07-26 09:09:44 -0700244#[derive(Debug, PartialEq)]
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700245pub struct Utf8Error(pub(crate) ());
246
247impl From<std::str::Utf8Error> for Utf8Error {
248 fn from(_: std::str::Utf8Error) -> Utf8Error {
249 Utf8Error(())
250 }
251}
252
253/// A shared immutable view of a protobuf `string` field's contents.
254///
255/// Like a `str`, it can be cheaply accessed as bytes and
256/// is dynamically sized, requiring it be accessed through a pointer.
257///
258/// # UTF-8 and `&str` access
259///
260/// Protobuf [docs] state that a `string` field contains UTF-8 encoded text.
261/// However, not every runtime enforces this, and the Rust runtime is designed
262/// to integrate with other runtimes with FFI, like C++.
263///
264/// Because of this, in order to access the contents as a `&str`, users must
265/// call [`ProtoStr::to_str`] to perform a (possibly runtime-elided) UTF-8
266/// validation check. However, the Rust API only allows `set()`ting a `string`
267/// field with data should be valid UTF-8 like a `&str` or a
268/// `&ProtoStr`. This means that this check should rarely fail, but is necessary
269/// to prevent UB when interacting with C++, which has looser restrictions.
270///
271/// Most of the time, users should not perform direct `&str` access to the
272/// contents - this type implements `Display` and comparison with `str`,
273/// so it's best to avoid a UTF-8 check by working directly with `&ProtoStr`
274/// or converting to `&[u8]`.
275///
276/// # `Display` and `ToString`
277/// `ProtoStr` is ordinarily UTF-8 and so implements `Display`. If there are
278/// any invalid UTF-8 sequences, they are replaced with [`U+FFFD REPLACEMENT
279/// CHARACTER`]. Because anything implementing `Display` also implements
280/// `ToString`, `proto_str.to_string()` is equivalent to
281/// `String::from_utf8_lossy(proto_str.as_bytes()).into_owned()`.
282///
283/// [docs]: https://protobuf.dev/programming-guides/proto2/#scalar
284/// [dst]: https://doc.rust-lang.org/reference/dynamically-sized-types.html
285/// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
286#[repr(transparent)]
287pub struct ProtoStr([u8]);
288
289impl ProtoStr {
290 /// Converts `self` to a byte slice.
291 ///
292 /// Note: this type does not implement `Deref`; you must call `as_bytes()`
293 /// or `AsRef<[u8]>` to get access to bytes.
294 pub fn as_bytes(&self) -> &[u8] {
295 &self.0
296 }
297
298 /// Yields a `&str` slice if `self` contains valid UTF-8.
299 ///
300 /// This may perform a runtime check, dependent on runtime.
301 ///
302 /// `String::from_utf8_lossy(proto_str.as_bytes())` can be used to
303 /// infallibly construct a string, replacing invalid UTF-8 with
304 /// [`U+FFFD REPLACEMENT CHARACTER`].
305 ///
306 /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
307 // This is not `try_to_str` since `to_str` is shorter, with `CStr` as precedent.
308 pub fn to_str(&self) -> Result<&str, Utf8Error> {
309 Ok(std::str::from_utf8(&self.0)?)
310 }
311
312 /// Converts `self` to a string, including invalid characters.
313 ///
314 /// Invalid UTF-8 sequences are replaced with
315 /// [`U+FFFD REPLACEMENT CHARACTER`].
316 ///
317 /// Users should be prefer this to `.to_string()` provided by `Display`.
318 /// `.to_cow_lossy()` is the same operation, but it may avoid an
319 /// allocation if the string is already UTF-8.
320 ///
321 /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
322 //
323 // This method is named `to_string_lossy` in `CStr`, but since `to_string`
324 // also exists on this type, this name was chosen to avoid confusion.
325 pub fn to_cow_lossy(&self) -> Cow<'_, str> {
326 String::from_utf8_lossy(&self.0)
327 }
328
329 /// Returns `true` if `self` has a length of zero bytes.
330 pub fn is_empty(&self) -> bool {
331 self.0.is_empty()
332 }
333
334 /// Returns the length of `self`.
335 ///
336 /// Like `&str`, this is a length in bytes, not `char`s or graphemes.
337 pub fn len(&self) -> usize {
338 self.0.len()
339 }
340
341 /// Iterates over the `char`s in this protobuf `string`.
342 ///
343 /// Invalid UTF-8 sequences are replaced with
344 /// [`U+FFFD REPLACEMENT CHARACTER`].
345 ///
346 /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
347 pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
Protobuf Team Bot4b0e7632023-07-26 09:09:44 -0700348 Utf8Chunks::new(self.as_bytes()).flat_map(|chunk| {
349 let mut yield_replacement_char = !chunk.invalid().is_empty();
350 chunk.valid().chars().chain(iter::from_fn(move || {
351 // Yield a single replacement character for every
352 // non-empty invalid sequence.
353 yield_replacement_char.then(|| {
354 yield_replacement_char = false;
355 char::REPLACEMENT_CHARACTER
356 })
357 }))
358 })
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700359 }
360
361 /// Returns an iterator over chunks of UTF-8 data in the string.
362 ///
363 /// An `Ok(&str)` is yielded for every valid UTF-8 chunk, and an
Protobuf Team Bot4b0e7632023-07-26 09:09:44 -0700364 /// `Err(&[u8])` for each non-UTF-8 chunk. An `Err` will be emitted
365 /// multiple times in a row for contiguous invalid chunks. Each invalid
366 /// chunk in an `Err` has a maximum length of 3 bytes.
367 pub fn utf8_chunks(&self) -> impl Iterator<Item = Result<&str, &[u8]>> + '_ {
368 Utf8Chunks::new(self.as_bytes()).flat_map(|chunk| {
369 let valid = chunk.valid();
370 let invalid = chunk.invalid();
371 (!valid.is_empty())
372 .then_some(Ok(valid))
373 .into_iter()
374 .chain((!invalid.is_empty()).then_some(Err(invalid)))
375 })
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700376 }
377
378 /// Converts known-UTF-8 bytes to a `ProtoStr` without a check.
379 ///
380 /// # Safety
381 /// `bytes` must be valid UTF-8 if the current runtime requires it.
382 pub unsafe fn from_utf8_unchecked(bytes: &[u8]) -> &Self {
383 // SAFETY:
384 // - `ProtoStr` is `#[repr(transparent)]` over `[u8]`, so it has the same
385 // layout.
386 // - `ProtoStr` has the same pointer metadata and element size as `[u8]`.
387 unsafe { &*(bytes as *const [u8] as *const Self) }
388 }
389
390 /// Interprets a string slice as a `&ProtoStr`.
391 pub fn from_str(string: &str) -> &Self {
392 // SAFETY: `string.as_bytes()` is valid UTF-8.
393 unsafe { Self::from_utf8_unchecked(string.as_bytes()) }
394 }
395}
396
397impl AsRef<[u8]> for ProtoStr {
398 fn as_ref(&self) -> &[u8] {
399 self.as_bytes()
400 }
401}
402
403impl<'msg> From<&'msg ProtoStr> for &'msg [u8] {
404 fn from(val: &'msg ProtoStr) -> &'msg [u8] {
405 val.as_bytes()
406 }
407}
408
Alyssa Haroldsen614e29f2023-08-30 12:51:55 -0700409impl<'msg> From<&'msg str> for &'msg ProtoStr {
410 fn from(val: &'msg str) -> &'msg ProtoStr {
411 ProtoStr::from_str(val)
412 }
413}
414
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700415impl<'msg> TryFrom<&'msg ProtoStr> for &'msg str {
416 type Error = Utf8Error;
417
418 fn try_from(val: &'msg ProtoStr) -> Result<&'msg str, Utf8Error> {
419 val.to_str()
420 }
421}
422
Alyssa Haroldsen614e29f2023-08-30 12:51:55 -0700423impl<'msg> TryFrom<&'msg [u8]> for &'msg ProtoStr {
424 type Error = Utf8Error;
425
426 fn try_from(val: &'msg [u8]) -> Result<&'msg ProtoStr, Utf8Error> {
427 Ok(ProtoStr::from_str(std::str::from_utf8(val)?))
428 }
429}
430
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700431impl fmt::Debug for ProtoStr {
Protobuf Team Bot4b0e7632023-07-26 09:09:44 -0700432 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
433 fmt::Debug::fmt(&Utf8Chunks::new(self.as_bytes()).debug(), f)
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700434 }
435}
436
437impl fmt::Display for ProtoStr {
Protobuf Team Bot4b0e7632023-07-26 09:09:44 -0700438 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
439 use std::fmt::Write as _;
440 for chunk in Utf8Chunks::new(self.as_bytes()) {
441 fmt::Display::fmt(chunk.valid(), f)?;
442 if !chunk.invalid().is_empty() {
443 // One invalid chunk is emitted per detected invalid sequence.
444 f.write_char(char::REPLACEMENT_CHARACTER)?;
445 }
446 }
447 Ok(())
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700448 }
449}
450
Alyssa Haroldsen614e29f2023-08-30 12:51:55 -0700451impl Hash for ProtoStr {
452 fn hash<H: Hasher>(&self, state: &mut H) {
453 self.as_bytes().hash(state)
454 }
455}
456
457impl Eq for ProtoStr {}
458impl Ord for ProtoStr {
459 fn cmp(&self, other: &ProtoStr) -> Ordering {
460 self.as_bytes().cmp(other.as_bytes())
461 }
462}
463
464impl Proxied for ProtoStr {
465 type View<'msg> = &'msg ProtoStr;
466 type Mut<'msg> = ProtoStrMut<'msg>;
467}
468
469impl ProxiedWithPresence for ProtoStr {
470 type PresentMutData<'msg> = StrPresentMutData<'msg>;
471 type AbsentMutData<'msg> = StrAbsentMutData<'msg>;
472
473 fn clear_present_field(present_mutator: Self::PresentMutData<'_>) -> Self::AbsentMutData<'_> {
474 StrAbsentMutData(present_mutator.0.clear())
475 }
476
477 fn set_absent_to_default(absent_mutator: Self::AbsentMutData<'_>) -> Self::PresentMutData<'_> {
478 StrPresentMutData(absent_mutator.0.set_absent_to_default())
479 }
480}
481
482impl<'msg> ViewProxy<'msg> for &'msg ProtoStr {
483 type Proxied = ProtoStr;
484
485 fn as_view(&self) -> &ProtoStr {
486 self
487 }
488
489 fn into_view<'shorter>(self) -> &'shorter ProtoStr
490 where
491 'msg: 'shorter,
492 {
493 self
494 }
495}
496
497/// Non-exported newtype for `ProxiedWithPresence::PresentData`
498#[derive(Debug)]
499pub struct StrPresentMutData<'msg>(BytesPresentMutData<'msg>);
500
501impl<'msg> ViewProxy<'msg> for StrPresentMutData<'msg> {
502 type Proxied = ProtoStr;
503
504 fn as_view(&self) -> View<'_, ProtoStr> {
505 // SAFETY: The `ProtoStr` API guards against non-UTF-8 data. The runtime does
506 // not require `ProtoStr` to be UTF-8 if it could be mutated outside of these
507 // guards, such as through FFI.
508 unsafe { ProtoStr::from_utf8_unchecked(self.0.as_view()) }
509 }
510
511 fn into_view<'shorter>(self) -> View<'shorter, ProtoStr>
512 where
513 'msg: 'shorter,
514 {
515 // SAFETY: The `ProtoStr` API guards against non-UTF-8 data. The runtime does
516 // not require `ProtoStr` to be UTF-8 if it could be mutated outside of these
517 // guards, such as through FFI.
518 unsafe { ProtoStr::from_utf8_unchecked(self.0.into_view()) }
519 }
520}
521
522impl<'msg> MutProxy<'msg> for StrPresentMutData<'msg> {
523 fn as_mut(&mut self) -> Mut<'_, ProtoStr> {
524 ProtoStrMut { bytes: self.0.as_mut() }
525 }
526
527 fn into_mut<'shorter>(self) -> Mut<'shorter, ProtoStr>
528 where
529 'msg: 'shorter,
530 {
531 ProtoStrMut { bytes: self.0.into_mut() }
532 }
533}
534
535/// Non-exported newtype for `ProxiedWithPresence::AbsentData`
536#[derive(Debug)]
537pub struct StrAbsentMutData<'msg>(BytesAbsentMutData<'msg>);
538
539impl<'msg> ViewProxy<'msg> for StrAbsentMutData<'msg> {
540 type Proxied = ProtoStr;
541
542 fn as_view(&self) -> View<'_, ProtoStr> {
543 // SAFETY: The `ProtoStr` API guards against non-UTF-8 data. The runtime does
544 // not require `ProtoStr` to be UTF-8 if it could be mutated outside of these
545 // guards, such as through FFI.
546 unsafe { ProtoStr::from_utf8_unchecked(self.0.as_view()) }
547 }
548
549 fn into_view<'shorter>(self) -> View<'shorter, ProtoStr>
550 where
551 'msg: 'shorter,
552 {
553 // SAFETY: The `ProtoStr` API guards against non-UTF-8 data. The runtime does
554 // not require `ProtoStr` to be UTF-8 if it could be mutated outside of these
555 // guards, such as through FFI.
556 unsafe { ProtoStr::from_utf8_unchecked(self.0.into_view()) }
557 }
558}
559
560#[derive(Debug)]
561pub struct ProtoStrMut<'msg> {
562 bytes: BytesMut<'msg>,
563}
564
565impl<'msg> ProtoStrMut<'msg> {
566 /// Constructs a new `ProtoStrMut` from its internal, runtime-dependent
567 /// part.
568 #[doc(hidden)]
569 pub fn from_inner(_private: Private, inner: InnerBytesMut<'msg>) -> Self {
570 Self { bytes: BytesMut { inner } }
571 }
572
573 /// Converts a `bytes` `FieldEntry` into a `string` one. Used by gencode.
574 #[doc(hidden)]
575 pub fn field_entry_from_bytes(
576 _private: Private,
577 field_entry: FieldEntry<'_, [u8]>,
578 ) -> FieldEntry<ProtoStr> {
579 match field_entry {
580 Optional::Set(present) => {
581 Optional::Set(PresentField::from_inner(Private, StrPresentMutData(present.inner)))
582 }
583 Optional::Unset(absent) => {
584 Optional::Unset(AbsentField::from_inner(Private, StrAbsentMutData(absent.inner)))
585 }
586 }
587 }
588
589 /// Gets the current value of the field.
590 pub fn get(&self) -> &ProtoStr {
591 self.as_view()
592 }
593
594 /// Sets the string to the given `val`, cloning any borrowed data.
595 ///
596 /// This method accepts both owned and borrowed strings; if the runtime
597 /// supports it, an owned value will not reallocate when setting the
598 /// string.
599 pub fn set(&mut self, val: impl SettableValue<ProtoStr>) {
600 val.set_on(Private, MutProxy::as_mut(self))
601 }
602
603 /// Truncates the string.
604 ///
605 /// Has no effect if `new_len` is larger than the current `len`.
606 ///
607 /// If `new_len` does not lie on a UTF-8 `char` boundary, behavior is
608 /// runtime-dependent. If this occurs, the runtime may:
609 ///
610 /// - Panic
611 /// - Truncate the string further to be on a `char` boundary.
612 /// - Truncate to `new_len`, resulting in a `ProtoStr` with a non-UTF8 tail.
613 pub fn truncate(&mut self, new_len: usize) {
614 self.bytes.truncate(new_len)
615 }
616
617 /// Clears the string, setting it to the empty string.
618 ///
619 /// # Compared with `FieldEntry::clear`
620 ///
621 /// Note that this is different than marking an `optional string` field as
622 /// absent; if this cleared `string` is in an `optional`,
623 /// `FieldEntry::is_set` will still return `true` after this method is
624 /// invoked.
625 ///
626 /// This also means that if the field has a non-empty default,
627 /// `ProtoStrMut::clear` results in the accessor returning an empty string
628 /// while `FieldEntry::clear` results in the non-empty default.
629 ///
630 /// However, for a proto3 `string` that has implicit presence, there is no
631 /// distinction between these states: unset `string` is the same as empty
632 /// `string` and the default is always the empty string.
633 ///
634 /// In the C++ API, this is the difference between
635 /// `msg.clear_string_field()`
636 /// and `msg.mutable_string_field()->clear()`.
637 ///
638 /// Having the same name and signature as `FieldEntry::clear` makes code
639 /// that calls `field_mut().clear()` easier to migrate from implicit
640 /// to explicit presence.
641 pub fn clear(&mut self) {
642 self.truncate(0);
643 }
644}
645
646impl Deref for ProtoStrMut<'_> {
647 type Target = ProtoStr;
648 fn deref(&self) -> &ProtoStr {
649 self.as_view()
650 }
651}
652
653impl AsRef<ProtoStr> for ProtoStrMut<'_> {
654 fn as_ref(&self) -> &ProtoStr {
655 self.as_view()
656 }
657}
658
659impl AsRef<[u8]> for ProtoStrMut<'_> {
660 fn as_ref(&self) -> &[u8] {
661 self.as_view().as_bytes()
662 }
663}
664
665impl<'msg> ViewProxy<'msg> for ProtoStrMut<'msg> {
666 type Proxied = ProtoStr;
667
668 fn as_view(&self) -> &ProtoStr {
669 // SAFETY: The `ProtoStr` API guards against non-UTF-8 data. The runtime does
670 // not require `ProtoStr` to be UTF-8 if it could be mutated outside of these
671 // guards, such as through FFI.
672 unsafe { ProtoStr::from_utf8_unchecked(self.bytes.as_view()) }
673 }
674
675 fn into_view<'shorter>(self) -> &'shorter ProtoStr
676 where
677 'msg: 'shorter,
678 {
679 unsafe { ProtoStr::from_utf8_unchecked(self.bytes.into_view()) }
680 }
681}
682
683impl<'msg> MutProxy<'msg> for ProtoStrMut<'msg> {
684 fn as_mut(&mut self) -> ProtoStrMut<'_> {
685 ProtoStrMut { bytes: BytesMut { inner: self.bytes.inner } }
686 }
687
688 fn into_mut<'shorter>(self) -> ProtoStrMut<'shorter>
689 where
690 'msg: 'shorter,
691 {
692 ProtoStrMut { bytes: BytesMut { inner: self.bytes.inner } }
693 }
694}
695
696impl SettableValue<ProtoStr> for &'_ ProtoStr {
Jakob Buchgraberab11a0d2023-11-27 08:14:21 -0800697 fn set_on<'b>(self, _private: Private, mutator: Mut<'b, ProtoStr>)
698 where
699 ProtoStr: 'b,
700 {
Alyssa Haroldsen614e29f2023-08-30 12:51:55 -0700701 // SAFETY: A `ProtoStr` has the same UTF-8 validity requirement as the runtime.
702 unsafe { mutator.bytes.inner.set(self.as_bytes()) }
703 }
704
705 fn set_on_absent(
706 self,
707 _private: Private,
708 absent_mutator: <ProtoStr as ProxiedWithPresence>::AbsentMutData<'_>,
709 ) -> <ProtoStr as ProxiedWithPresence>::PresentMutData<'_> {
710 // SAFETY: A `ProtoStr` has the same UTF-8 validity requirement as the runtime.
711 StrPresentMutData(unsafe { absent_mutator.0.set(self.as_bytes()) })
712 }
713
714 fn set_on_present(
715 self,
716 _private: Private,
717 present_mutator: <ProtoStr as ProxiedWithPresence>::PresentMutData<'_>,
718 ) {
719 // SAFETY: A `ProtoStr` has the same UTF-8 validity requirement as the runtime.
720 unsafe {
721 present_mutator.0.set(self.as_bytes());
722 }
723 }
724}
725
726impl SettableValue<ProtoStr> for &'_ str {
727 impl_forwarding_settable_value!(ProtoStr, self => ProtoStr::from_str(self));
728}
729
730impl SettableValue<ProtoStr> for String {
Sandy Zhang81068e82023-09-18 15:13:49 -0700731 // TODO: Investigate taking ownership of this when allowed by the
Alyssa Haroldsen614e29f2023-08-30 12:51:55 -0700732 // runtime.
733 impl_forwarding_settable_value!(ProtoStr, self => ProtoStr::from_str(&self));
734}
735
736impl SettableValue<ProtoStr> for Cow<'_, str> {
Sandy Zhang81068e82023-09-18 15:13:49 -0700737 // TODO: Investigate taking ownership of this when allowed by the
Alyssa Haroldsen614e29f2023-08-30 12:51:55 -0700738 // runtime.
739 impl_forwarding_settable_value!(ProtoStr, self => ProtoStr::from_str(&self));
740}
741
742impl Hash for ProtoStrMut<'_> {
743 fn hash<H: Hasher>(&self, state: &mut H) {
744 self.deref().hash(state)
745 }
746}
747
748impl Eq for ProtoStrMut<'_> {}
749impl<'msg> Ord for ProtoStrMut<'msg> {
750 fn cmp(&self, other: &ProtoStrMut<'msg>) -> Ordering {
751 self.deref().cmp(other.deref())
752 }
753}
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700754
Protobuf Team Bota75def62023-07-11 11:11:59 -0700755/// Implements `PartialCmp` and `PartialEq` for the `lhs` against the `rhs`
756/// using `AsRef<[u8]>`.
Sandy Zhang81068e82023-09-18 15:13:49 -0700757// TODO: consider improving to not require a `<()>` if no generics are
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700758// needed
Protobuf Team Bota75def62023-07-11 11:11:59 -0700759macro_rules! impl_bytes_partial_cmp {
760 ($(<($($generics:tt)*)> $lhs:ty => $rhs:ty),+ $(,)?) => {
761 $(
762 impl<$($generics)*> PartialEq<$rhs> for $lhs {
763 fn eq(&self, other: &$rhs) -> bool {
764 AsRef::<[u8]>::as_ref(self) == AsRef::<[u8]>::as_ref(other)
765 }
766 }
767 impl<$($generics)*> PartialOrd<$rhs> for $lhs {
768 fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
769 AsRef::<[u8]>::as_ref(self).partial_cmp(AsRef::<[u8]>::as_ref(other))
770 }
771 }
772 )*
773 };
774}
775
776impl_bytes_partial_cmp!(
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700777 // Should `BytesMut` compare with `str` and `ProtoStr[Mut]` with `[u8]`?
778 // `[u8]` and `str` do not compare with each other in the stdlib.
779
780 // `BytesMut` against protobuf types
Protobuf Team Bota75def62023-07-11 11:11:59 -0700781 <('a, 'b)> BytesMut<'a> => BytesMut<'b>,
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700782
783 // `BytesMut` against foreign types
Protobuf Team Bota75def62023-07-11 11:11:59 -0700784 <('a)> BytesMut<'a> => [u8],
Protobuf Team Bota75def62023-07-11 11:11:59 -0700785 <('a)> [u8] => BytesMut<'a>,
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700786 <('a, const N: usize)> BytesMut<'a> => [u8; N],
Protobuf Team Bota75def62023-07-11 11:11:59 -0700787 <('a, const N: usize)> [u8; N] => BytesMut<'a>,
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700788
789 // `ProtoStr` against protobuf types
790 <()> ProtoStr => ProtoStr,
Alyssa Haroldsen614e29f2023-08-30 12:51:55 -0700791 <('a)> ProtoStr => ProtoStrMut<'a>,
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700792
793 // `ProtoStr` against foreign types
794 <()> ProtoStr => str,
795 <()> str => ProtoStr,
796
Alyssa Haroldsen614e29f2023-08-30 12:51:55 -0700797 // `ProtoStrMut` against protobuf types
798 <('a, 'b)> ProtoStrMut<'a> => ProtoStrMut<'b>,
799 <('a)> ProtoStrMut<'a> => ProtoStr,
800
801 // `ProtoStrMut` against foreign types
802 <('a)> ProtoStrMut<'a> => str,
803 <('a)> str => ProtoStrMut<'a>,
Protobuf Team Bota75def62023-07-11 11:11:59 -0700804);
805
Protobuf Team Bot8fe9d8e2023-07-13 11:47:47 -0700806#[cfg(test)]
807mod tests {
Protobuf Team Bot4b0e7632023-07-26 09:09:44 -0700808 use super::*;
809
Sandy Zhang81068e82023-09-18 15:13:49 -0700810 // TODO: Add unit tests
Protobuf Team Bot4b0e7632023-07-26 09:09:44 -0700811
812 // Shorter and safe utility function to construct `ProtoStr` from bytes for
813 // testing.
814 fn test_proto_str(bytes: &[u8]) -> &ProtoStr {
815 // SAFETY: The runtime that this test executes under does not elide UTF-8 checks
816 // inside of `ProtoStr`.
817 unsafe { ProtoStr::from_utf8_unchecked(bytes) }
818 }
819
820 // UTF-8 test cases copied from:
821 // https://github.com/rust-lang/rust/blob/e8ee0b7/library/core/tests/str_lossy.rs
822
823 #[test]
824 fn proto_str_debug() {
825 assert_eq!(&format!("{:?}", test_proto_str(b"Hello There")), "\"Hello There\"");
826 assert_eq!(
827 &format!(
828 "{:?}",
829 test_proto_str(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa"),
830 ),
831 "\"Hello\\xC0\\x80 There\\xE6\\x83 Goodbye\\u{10d4ea}\"",
832 );
833 }
834
835 #[test]
836 fn proto_str_display() {
837 assert_eq!(&test_proto_str(b"Hello There").to_string(), "Hello There");
838 assert_eq!(
839 &test_proto_str(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa").to_string(),
840 "Hello�� There� Goodbye\u{10d4ea}",
841 );
842 }
843
844 #[test]
845 fn proto_str_to_rust_str() {
846 assert_eq!(test_proto_str(b"hello").to_str(), Ok("hello"));
847 assert_eq!(test_proto_str("ศไทย中华Việt Nam".as_bytes()).to_str(), Ok("ศไทย中华Việt Nam"));
848 for expect_fail in [
849 &b"Hello\xC2 There\xFF Goodbye"[..],
850 b"Hello\xC0\x80 There\xE6\x83 Goodbye",
851 b"\xF5foo\xF5\x80bar",
852 b"\xF1foo\xF1\x80bar\xF1\x80\x80baz",
853 b"\xF4foo\xF4\x80bar\xF4\xBFbaz",
854 b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar",
855 b"\xED\xA0\x80foo\xED\xBF\xBFbar",
856 ] {
857 assert_eq!(test_proto_str(expect_fail).to_str(), Err(Utf8Error(())), "{expect_fail:?}");
858 }
859 }
860
861 #[test]
862 fn proto_str_to_cow() {
863 assert_eq!(test_proto_str(b"hello").to_cow_lossy(), Cow::Borrowed("hello"));
864 assert_eq!(
865 test_proto_str("ศไทย中华Việt Nam".as_bytes()).to_cow_lossy(),
866 Cow::Borrowed("ศไทย中华Việt Nam")
867 );
868 for (bytes, lossy_str) in [
869 (&b"Hello\xC2 There\xFF Goodbye"[..], "Hello� There� Goodbye"),
870 (b"Hello\xC0\x80 There\xE6\x83 Goodbye", "Hello�� There� Goodbye"),
871 (b"\xF5foo\xF5\x80bar", "�foo��bar"),
872 (b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", "�foo�bar�baz"),
873 (b"\xF4foo\xF4\x80bar\xF4\xBFbaz", "�foo�bar��baz"),
874 (b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", "����foo\u{10000}bar"),
875 (b"\xED\xA0\x80foo\xED\xBF\xBFbar", "���foo���bar"),
876 ] {
877 let cow = test_proto_str(bytes).to_cow_lossy();
878 assert!(matches!(cow, Cow::Owned(_)));
879 assert_eq!(&*cow, lossy_str, "{bytes:?}");
880 }
881 }
882
883 #[test]
884 fn proto_str_utf8_chunks() {
885 macro_rules! assert_chunks {
886 ($bytes:expr, $($chunks:expr),* $(,)?) => {
887 let bytes = $bytes;
888 let chunks: &[Result<&str, &[u8]>] = &[$($chunks),*];
889 let s = test_proto_str(bytes);
890 let mut got_chunks = s.utf8_chunks();
891 let mut expected_chars = chunks.iter().copied();
892 assert!(got_chunks.eq(expected_chars), "{bytes:?} -> {chunks:?}");
893 };
894 }
895 assert_chunks!(b"hello", Ok("hello"));
896 assert_chunks!("ศไทย中华Việt Nam".as_bytes(), Ok("ศไทย中华Việt Nam"));
897 assert_chunks!(
898 b"Hello\xC2 There\xFF Goodbye",
899 Ok("Hello"),
900 Err(b"\xC2"),
901 Ok(" There"),
902 Err(b"\xFF"),
903 Ok(" Goodbye"),
904 );
905 assert_chunks!(
906 b"Hello\xC0\x80 There\xE6\x83 Goodbye",
907 Ok("Hello"),
908 Err(b"\xC0"),
909 Err(b"\x80"),
910 Ok(" There"),
911 Err(b"\xE6\x83"),
912 Ok(" Goodbye"),
913 );
914 assert_chunks!(
915 b"\xF5foo\xF5\x80bar",
916 Err(b"\xF5"),
917 Ok("foo"),
918 Err(b"\xF5"),
919 Err(b"\x80"),
920 Ok("bar"),
921 );
922 assert_chunks!(
923 b"\xF1foo\xF1\x80bar\xF1\x80\x80baz",
924 Err(b"\xF1"),
925 Ok("foo"),
926 Err(b"\xF1\x80"),
927 Ok("bar"),
928 Err(b"\xF1\x80\x80"),
929 Ok("baz"),
930 );
931 assert_chunks!(
932 b"\xF4foo\xF4\x80bar\xF4\xBFbaz",
933 Err(b"\xF4"),
934 Ok("foo"),
935 Err(b"\xF4\x80"),
936 Ok("bar"),
937 Err(b"\xF4"),
938 Err(b"\xBF"),
939 Ok("baz"),
940 );
941 assert_chunks!(
942 b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar",
943 Err(b"\xF0"),
944 Err(b"\x80"),
945 Err(b"\x80"),
946 Err(b"\x80"),
947 Ok("foo\u{10000}bar"),
948 );
949 assert_chunks!(
950 b"\xED\xA0\x80foo\xED\xBF\xBFbar",
951 Err(b"\xED"),
952 Err(b"\xA0"),
953 Err(b"\x80"),
954 Ok("foo"),
955 Err(b"\xED"),
956 Err(b"\xBF"),
957 Err(b"\xBF"),
958 Ok("bar"),
959 );
960 }
961
962 #[test]
963 fn proto_str_chars() {
964 macro_rules! assert_chars {
965 ($bytes:expr, $chars:expr) => {
966 let bytes = $bytes;
967 let chars = $chars;
968 let s = test_proto_str(bytes);
969 let mut got_chars = s.chars();
970 let mut expected_chars = chars.into_iter();
971 assert!(got_chars.eq(expected_chars), "{bytes:?} -> {chars:?}");
972 };
973 }
974 assert_chars!(b"hello", ['h', 'e', 'l', 'l', 'o']);
975 assert_chars!(
976 "ศไทย中华Việt Nam".as_bytes(),
977 ['ศ', 'ไ', 'ท', 'ย', '中', '华', 'V', 'i', 'ệ', 't', ' ', 'N', 'a', 'm']
978 );
979 assert_chars!(
980 b"Hello\xC2 There\xFF Goodbye",
981 [
982 'H', 'e', 'l', 'l', 'o', '�', ' ', 'T', 'h', 'e', 'r', 'e', '�', ' ', 'G', 'o',
983 'o', 'd', 'b', 'y', 'e'
984 ]
985 );
986 assert_chars!(
987 b"Hello\xC0\x80 There\xE6\x83 Goodbye",
988 [
989 'H', 'e', 'l', 'l', 'o', '�', '�', ' ', 'T', 'h', 'e', 'r', 'e', '�', ' ', 'G',
990 'o', 'o', 'd', 'b', 'y', 'e'
991 ]
992 );
993 assert_chars!(b"\xF5foo\xF5\x80bar", ['�', 'f', 'o', 'o', '�', '�', 'b', 'a', 'r']);
994 assert_chars!(
995 b"\xF1foo\xF1\x80bar\xF1\x80\x80baz",
996 ['�', 'f', 'o', 'o', '�', 'b', 'a', 'r', '�', 'b', 'a', 'z']
997 );
998 assert_chars!(
999 b"\xF4foo\xF4\x80bar\xF4\xBFbaz",
1000 ['�', 'f', 'o', 'o', '�', 'b', 'a', 'r', '�', '�', 'b', 'a', 'z']
1001 );
1002 assert_chars!(
1003 b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar",
1004 ['�', '�', '�', '�', 'f', 'o', 'o', '\u{10000}', 'b', 'a', 'r']
1005 );
1006 assert_chars!(
1007 b"\xED\xA0\x80foo\xED\xBF\xBFbar",
1008 ['�', '�', '�', 'f', 'o', 'o', '�', '�', '�', 'b', 'a', 'r']
1009 );
1010 }
Protobuf Team Bota75def62023-07-11 11:11:59 -07001011}