Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 1 | // Protocol Buffers - Google's data interchange format |
| 2 | // Copyright 2023 Google LLC. All rights reserved. |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 3 | // |
Joshua Haberman | 4a51303 | 2023-09-08 17:12:50 -0700 | [diff] [blame] | 4 | // Use of this source code is governed by a BSD-style |
| 5 | // license that can be found in the LICENSE file or at |
| 6 | // https://developers.google.com/open-source/licenses/bsd |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 7 | |
| 8 | //! Items specific to `bytes` and `string` fields. |
| 9 | #![allow(dead_code)] |
| 10 | #![allow(unused)] |
| 11 | |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 12 | use crate::__internal::{Private, PtrAndLen, RawMessage}; |
| 13 | use crate::__runtime::{BytesAbsentMutData, BytesPresentMutData, InnerBytesMut}; |
Alyssa Haroldsen | d5d207f | 2023-08-24 13:39:23 -0700 | [diff] [blame] | 14 | use crate::macros::impl_forwarding_settable_value; |
Alyssa Haroldsen | 614e29f | 2023-08-30 12:51:55 -0700 | [diff] [blame] | 15 | use crate::{ |
| 16 | AbsentField, FieldEntry, Mut, MutProxy, Optional, PresentField, Proxied, ProxiedWithPresence, |
| 17 | SettableValue, View, ViewProxy, |
| 18 | }; |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 19 | use std::borrow::Cow; |
| 20 | use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd}; |
| 21 | use std::convert::{AsMut, AsRef}; |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 22 | use std::fmt; |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 23 | use std::hash::{Hash, Hasher}; |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 24 | use std::iter; |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 25 | use std::ops::{Deref, DerefMut}; |
Protobuf Team Bot | 4b0e763 | 2023-07-26 09:09:44 -0700 | [diff] [blame] | 26 | use utf8::Utf8Chunks; |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 27 | |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 28 | /// A mutator for `bytes` fields - this type is `protobuf::Mut<'msg, [u8]>`. |
| 29 | /// |
Protobuf Team Bot | 230a9a7 | 2023-07-12 14:34:42 -0700 | [diff] [blame] | 30 | /// This type implements `Deref<Target = [u8]>`, so many operations are |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 31 | /// provided through that, including indexing and slicing. |
| 32 | /// |
Protobuf Team Bot | 230a9a7 | 2023-07-12 14:34:42 -0700 | [diff] [blame] | 33 | /// Conceptually, this type is like a `&'msg mut &'msg str`, though the actual |
| 34 | /// implementation is dependent on runtime and `'msg` is covariant. |
| 35 | /// |
| 36 | /// Unlike `Vec<u8>`, this type has no in-place concatenation functions like |
| 37 | /// `extend_from_slice`. |
| 38 | /// |
| 39 | /// `BytesMut` is not intended to be grown and reallocated like a `Vec`. It's |
| 40 | /// recommended to instead build a `Vec<u8>` or `String` and pass that directly |
| 41 | /// to `set`, which will reuse the allocation if supported by the runtime. |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 42 | #[derive(Debug)] |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 43 | pub struct BytesMut<'msg> { |
| 44 | inner: InnerBytesMut<'msg>, |
| 45 | } |
| 46 | |
| 47 | // SAFETY: |
| 48 | // - Protobuf Rust messages don't allow shared mutation across threads. |
| 49 | // - Protobuf Rust messages don't share arenas. |
| 50 | // - All access that touches an arena occurs behind a `&mut`. |
| 51 | // - All mutators that store an arena are `!Send`. |
| 52 | unsafe impl Sync for BytesMut<'_> {} |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 53 | |
| 54 | impl<'msg> BytesMut<'msg> { |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 55 | /// Constructs a new `BytesMut` from its internal, runtime-dependent part. |
| 56 | #[doc(hidden)] |
| 57 | pub fn from_inner(_private: Private, inner: InnerBytesMut<'msg>) -> Self { |
| 58 | Self { inner } |
| 59 | } |
| 60 | |
| 61 | /// Gets the current value of the field. |
| 62 | pub fn get(&self) -> &[u8] { |
| 63 | self.as_view() |
| 64 | } |
| 65 | |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 66 | /// Sets the byte string to the given `val`, cloning any borrowed data. |
| 67 | /// |
| 68 | /// This method accepts both owned and borrowed byte strings; if the runtime |
| 69 | /// supports it, an owned value will not reallocate when setting the |
| 70 | /// string. |
| 71 | pub fn set(&mut self, val: impl SettableValue<[u8]>) { |
| 72 | val.set_on(Private, MutProxy::as_mut(self)) |
| 73 | } |
| 74 | |
Protobuf Team Bot | 230a9a7 | 2023-07-12 14:34:42 -0700 | [diff] [blame] | 75 | /// Truncates the byte string. |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 76 | /// |
| 77 | /// Has no effect if `new_len` is larger than the current `len`. |
| 78 | pub fn truncate(&mut self, new_len: usize) { |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 79 | self.inner.truncate(new_len) |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 80 | } |
| 81 | |
| 82 | /// Clears the byte string to the empty string. |
| 83 | /// |
| 84 | /// # Compared with `FieldEntry::clear` |
| 85 | /// |
| 86 | /// Note that this is different than marking an `optional bytes` field as |
| 87 | /// absent; if these `bytes` are in an `optional`, `FieldEntry::is_set` |
| 88 | /// will still return `true` after this method is invoked. |
| 89 | /// |
| 90 | /// This also means that if the field has a non-empty default, |
| 91 | /// `BytesMut::clear` results in the accessor returning an empty string |
| 92 | /// while `FieldEntry::clear` results in the non-empty default. |
| 93 | /// |
Alyssa Haroldsen | 614e29f | 2023-08-30 12:51:55 -0700 | [diff] [blame] | 94 | /// However, for a proto3 `bytes` that has implicit presence, there is no |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 95 | /// distinction between these states: unset `bytes` is the same as empty |
| 96 | /// `bytes` and the default is always the empty string. |
| 97 | /// |
| 98 | /// In the C++ API, this is the difference between `msg.clear_bytes_field()` |
| 99 | /// and `msg.mutable_bytes_field()->clear()`. |
| 100 | /// |
| 101 | /// Having the same name and signature as `FieldEntry::clear` makes code |
| 102 | /// that calls `field_mut().clear()` easier to migrate from implicit |
| 103 | /// to explicit presence. |
| 104 | pub fn clear(&mut self) { |
| 105 | self.truncate(0); |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | impl Deref for BytesMut<'_> { |
| 110 | type Target = [u8]; |
| 111 | fn deref(&self) -> &[u8] { |
| 112 | self.as_ref() |
| 113 | } |
| 114 | } |
| 115 | |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 116 | impl AsRef<[u8]> for BytesMut<'_> { |
| 117 | fn as_ref(&self) -> &[u8] { |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 118 | unsafe { self.inner.get() } |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 119 | } |
| 120 | } |
| 121 | |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 122 | impl Proxied for [u8] { |
| 123 | type View<'msg> = &'msg [u8]; |
| 124 | type Mut<'msg> = BytesMut<'msg>; |
| 125 | } |
| 126 | |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 127 | impl ProxiedWithPresence for [u8] { |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 128 | type PresentMutData<'msg> = BytesPresentMutData<'msg>; |
| 129 | type AbsentMutData<'msg> = BytesAbsentMutData<'msg>; |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 130 | |
Alyssa Haroldsen | 1dd6a7d | 2023-12-11 11:02:23 -0800 | [diff] [blame] | 131 | fn clear_present_field(present_mutator: Self::PresentMutData<'_>) -> Self::AbsentMutData<'_> { |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 132 | present_mutator.clear() |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 133 | } |
| 134 | |
Alyssa Haroldsen | 1dd6a7d | 2023-12-11 11:02:23 -0800 | [diff] [blame] | 135 | fn set_absent_to_default(absent_mutator: Self::AbsentMutData<'_>) -> Self::PresentMutData<'_> { |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 136 | absent_mutator.set_absent_to_default() |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 137 | } |
| 138 | } |
| 139 | |
| 140 | impl<'msg> ViewProxy<'msg> for &'msg [u8] { |
| 141 | type Proxied = [u8]; |
| 142 | |
| 143 | fn as_view(&self) -> &[u8] { |
| 144 | self |
| 145 | } |
| 146 | |
| 147 | fn into_view<'shorter>(self) -> &'shorter [u8] |
| 148 | where |
| 149 | 'msg: 'shorter, |
| 150 | { |
| 151 | self |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | impl<'msg> ViewProxy<'msg> for BytesMut<'msg> { |
| 156 | type Proxied = [u8]; |
| 157 | |
| 158 | fn as_view(&self) -> &[u8] { |
| 159 | self.as_ref() |
| 160 | } |
| 161 | |
| 162 | fn into_view<'shorter>(self) -> &'shorter [u8] |
| 163 | where |
| 164 | 'msg: 'shorter, |
| 165 | { |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 166 | self.inner.get() |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 167 | } |
| 168 | } |
| 169 | |
| 170 | impl<'msg> MutProxy<'msg> for BytesMut<'msg> { |
| 171 | fn as_mut(&mut self) -> BytesMut<'_> { |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 172 | BytesMut { inner: self.inner } |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 173 | } |
| 174 | |
| 175 | fn into_mut<'shorter>(self) -> BytesMut<'shorter> |
| 176 | where |
| 177 | 'msg: 'shorter, |
| 178 | { |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 179 | BytesMut { inner: self.inner } |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 180 | } |
| 181 | } |
| 182 | |
Alyssa Haroldsen | 614e29f | 2023-08-30 12:51:55 -0700 | [diff] [blame] | 183 | impl SettableValue<[u8]> for &'_ [u8] { |
Alyssa Haroldsen | 1dd6a7d | 2023-12-11 11:02:23 -0800 | [diff] [blame] | 184 | fn set_on<'msg>(self, _private: Private, mutator: Mut<'msg, [u8]>) |
Jakob Buchgraber | ab11a0d | 2023-11-27 08:14:21 -0800 | [diff] [blame] | 185 | where |
Alyssa Haroldsen | 1dd6a7d | 2023-12-11 11:02:23 -0800 | [diff] [blame] | 186 | [u8]: 'msg, |
Jakob Buchgraber | ab11a0d | 2023-11-27 08:14:21 -0800 | [diff] [blame] | 187 | { |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 188 | // SAFETY: this is a `bytes` field with no restriction on UTF-8. |
| 189 | unsafe { mutator.inner.set(self) } |
| 190 | } |
| 191 | |
| 192 | fn set_on_absent( |
| 193 | self, |
| 194 | _private: Private, |
| 195 | absent_mutator: <[u8] as ProxiedWithPresence>::AbsentMutData<'_>, |
| 196 | ) -> <[u8] as ProxiedWithPresence>::PresentMutData<'_> { |
| 197 | // SAFETY: this is a `bytes` field with no restriction on UTF-8. |
| 198 | unsafe { absent_mutator.set(self) } |
| 199 | } |
| 200 | |
| 201 | fn set_on_present( |
| 202 | self, |
| 203 | _private: Private, |
| 204 | present_mutator: <[u8] as ProxiedWithPresence>::PresentMutData<'_>, |
| 205 | ) { |
| 206 | // SAFETY: this is a `bytes` field with no restriction on UTF-8. |
| 207 | unsafe { |
| 208 | present_mutator.set(self); |
| 209 | } |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 210 | } |
| 211 | } |
| 212 | |
Alyssa Haroldsen | 614e29f | 2023-08-30 12:51:55 -0700 | [diff] [blame] | 213 | impl<const N: usize> SettableValue<[u8]> for &'_ [u8; N] { |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 214 | // forward to `self[..]` |
| 215 | impl_forwarding_settable_value!([u8], self => &self[..]); |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 216 | } |
| 217 | |
| 218 | impl SettableValue<[u8]> for Vec<u8> { |
Sandy Zhang | 81068e8 | 2023-09-18 15:13:49 -0700 | [diff] [blame] | 219 | // TODO: Investigate taking ownership of this when allowed by the |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 220 | // runtime. |
| 221 | impl_forwarding_settable_value!([u8], self => &self[..]); |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 222 | } |
| 223 | |
| 224 | impl SettableValue<[u8]> for Cow<'_, [u8]> { |
Sandy Zhang | 81068e8 | 2023-09-18 15:13:49 -0700 | [diff] [blame] | 225 | // TODO: Investigate taking ownership of this when allowed by the |
Alyssa Haroldsen | 9a0bc39 | 2023-08-23 11:05:31 -0700 | [diff] [blame] | 226 | // runtime. |
| 227 | impl_forwarding_settable_value!([u8], self => &self[..]); |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 228 | } |
| 229 | |
| 230 | impl Hash for BytesMut<'_> { |
| 231 | fn hash<H: Hasher>(&self, state: &mut H) { |
| 232 | self.deref().hash(state) |
| 233 | } |
| 234 | } |
| 235 | |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 236 | impl Eq for BytesMut<'_> {} |
| 237 | impl<'msg> Ord for BytesMut<'msg> { |
| 238 | fn cmp(&self, other: &BytesMut<'msg>) -> Ordering { |
| 239 | self.deref().cmp(other.deref()) |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | /// The bytes were not valid UTF-8. |
Protobuf Team Bot | 4b0e763 | 2023-07-26 09:09:44 -0700 | [diff] [blame] | 244 | #[derive(Debug, PartialEq)] |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 245 | pub struct Utf8Error(pub(crate) ()); |
| 246 | |
| 247 | impl From<std::str::Utf8Error> for Utf8Error { |
| 248 | fn from(_: std::str::Utf8Error) -> Utf8Error { |
| 249 | Utf8Error(()) |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | /// A shared immutable view of a protobuf `string` field's contents. |
| 254 | /// |
| 255 | /// Like a `str`, it can be cheaply accessed as bytes and |
| 256 | /// is dynamically sized, requiring it be accessed through a pointer. |
| 257 | /// |
| 258 | /// # UTF-8 and `&str` access |
| 259 | /// |
| 260 | /// Protobuf [docs] state that a `string` field contains UTF-8 encoded text. |
| 261 | /// However, not every runtime enforces this, and the Rust runtime is designed |
| 262 | /// to integrate with other runtimes with FFI, like C++. |
| 263 | /// |
| 264 | /// Because of this, in order to access the contents as a `&str`, users must |
| 265 | /// call [`ProtoStr::to_str`] to perform a (possibly runtime-elided) UTF-8 |
| 266 | /// validation check. However, the Rust API only allows `set()`ting a `string` |
| 267 | /// field with data should be valid UTF-8 like a `&str` or a |
| 268 | /// `&ProtoStr`. This means that this check should rarely fail, but is necessary |
| 269 | /// to prevent UB when interacting with C++, which has looser restrictions. |
| 270 | /// |
| 271 | /// Most of the time, users should not perform direct `&str` access to the |
| 272 | /// contents - this type implements `Display` and comparison with `str`, |
| 273 | /// so it's best to avoid a UTF-8 check by working directly with `&ProtoStr` |
| 274 | /// or converting to `&[u8]`. |
| 275 | /// |
| 276 | /// # `Display` and `ToString` |
| 277 | /// `ProtoStr` is ordinarily UTF-8 and so implements `Display`. If there are |
| 278 | /// any invalid UTF-8 sequences, they are replaced with [`U+FFFD REPLACEMENT |
| 279 | /// CHARACTER`]. Because anything implementing `Display` also implements |
| 280 | /// `ToString`, `proto_str.to_string()` is equivalent to |
| 281 | /// `String::from_utf8_lossy(proto_str.as_bytes()).into_owned()`. |
| 282 | /// |
| 283 | /// [docs]: https://protobuf.dev/programming-guides/proto2/#scalar |
| 284 | /// [dst]: https://doc.rust-lang.org/reference/dynamically-sized-types.html |
| 285 | /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER |
| 286 | #[repr(transparent)] |
| 287 | pub struct ProtoStr([u8]); |
| 288 | |
| 289 | impl ProtoStr { |
| 290 | /// Converts `self` to a byte slice. |
| 291 | /// |
| 292 | /// Note: this type does not implement `Deref`; you must call `as_bytes()` |
| 293 | /// or `AsRef<[u8]>` to get access to bytes. |
| 294 | pub fn as_bytes(&self) -> &[u8] { |
| 295 | &self.0 |
| 296 | } |
| 297 | |
| 298 | /// Yields a `&str` slice if `self` contains valid UTF-8. |
| 299 | /// |
| 300 | /// This may perform a runtime check, dependent on runtime. |
| 301 | /// |
| 302 | /// `String::from_utf8_lossy(proto_str.as_bytes())` can be used to |
| 303 | /// infallibly construct a string, replacing invalid UTF-8 with |
| 304 | /// [`U+FFFD REPLACEMENT CHARACTER`]. |
| 305 | /// |
| 306 | /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER |
| 307 | // This is not `try_to_str` since `to_str` is shorter, with `CStr` as precedent. |
| 308 | pub fn to_str(&self) -> Result<&str, Utf8Error> { |
| 309 | Ok(std::str::from_utf8(&self.0)?) |
| 310 | } |
| 311 | |
| 312 | /// Converts `self` to a string, including invalid characters. |
| 313 | /// |
| 314 | /// Invalid UTF-8 sequences are replaced with |
| 315 | /// [`U+FFFD REPLACEMENT CHARACTER`]. |
| 316 | /// |
| 317 | /// Users should be prefer this to `.to_string()` provided by `Display`. |
| 318 | /// `.to_cow_lossy()` is the same operation, but it may avoid an |
| 319 | /// allocation if the string is already UTF-8. |
| 320 | /// |
| 321 | /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER |
| 322 | // |
| 323 | // This method is named `to_string_lossy` in `CStr`, but since `to_string` |
| 324 | // also exists on this type, this name was chosen to avoid confusion. |
| 325 | pub fn to_cow_lossy(&self) -> Cow<'_, str> { |
| 326 | String::from_utf8_lossy(&self.0) |
| 327 | } |
| 328 | |
| 329 | /// Returns `true` if `self` has a length of zero bytes. |
| 330 | pub fn is_empty(&self) -> bool { |
| 331 | self.0.is_empty() |
| 332 | } |
| 333 | |
| 334 | /// Returns the length of `self`. |
| 335 | /// |
| 336 | /// Like `&str`, this is a length in bytes, not `char`s or graphemes. |
| 337 | pub fn len(&self) -> usize { |
| 338 | self.0.len() |
| 339 | } |
| 340 | |
| 341 | /// Iterates over the `char`s in this protobuf `string`. |
| 342 | /// |
| 343 | /// Invalid UTF-8 sequences are replaced with |
| 344 | /// [`U+FFFD REPLACEMENT CHARACTER`]. |
| 345 | /// |
| 346 | /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER |
| 347 | pub fn chars(&self) -> impl Iterator<Item = char> + '_ { |
Protobuf Team Bot | 4b0e763 | 2023-07-26 09:09:44 -0700 | [diff] [blame] | 348 | Utf8Chunks::new(self.as_bytes()).flat_map(|chunk| { |
| 349 | let mut yield_replacement_char = !chunk.invalid().is_empty(); |
| 350 | chunk.valid().chars().chain(iter::from_fn(move || { |
| 351 | // Yield a single replacement character for every |
| 352 | // non-empty invalid sequence. |
| 353 | yield_replacement_char.then(|| { |
| 354 | yield_replacement_char = false; |
| 355 | char::REPLACEMENT_CHARACTER |
| 356 | }) |
| 357 | })) |
| 358 | }) |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 359 | } |
| 360 | |
| 361 | /// Returns an iterator over chunks of UTF-8 data in the string. |
| 362 | /// |
| 363 | /// An `Ok(&str)` is yielded for every valid UTF-8 chunk, and an |
Protobuf Team Bot | 4b0e763 | 2023-07-26 09:09:44 -0700 | [diff] [blame] | 364 | /// `Err(&[u8])` for each non-UTF-8 chunk. An `Err` will be emitted |
| 365 | /// multiple times in a row for contiguous invalid chunks. Each invalid |
| 366 | /// chunk in an `Err` has a maximum length of 3 bytes. |
| 367 | pub fn utf8_chunks(&self) -> impl Iterator<Item = Result<&str, &[u8]>> + '_ { |
| 368 | Utf8Chunks::new(self.as_bytes()).flat_map(|chunk| { |
| 369 | let valid = chunk.valid(); |
| 370 | let invalid = chunk.invalid(); |
| 371 | (!valid.is_empty()) |
| 372 | .then_some(Ok(valid)) |
| 373 | .into_iter() |
| 374 | .chain((!invalid.is_empty()).then_some(Err(invalid))) |
| 375 | }) |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 376 | } |
| 377 | |
| 378 | /// Converts known-UTF-8 bytes to a `ProtoStr` without a check. |
| 379 | /// |
| 380 | /// # Safety |
| 381 | /// `bytes` must be valid UTF-8 if the current runtime requires it. |
| 382 | pub unsafe fn from_utf8_unchecked(bytes: &[u8]) -> &Self { |
| 383 | // SAFETY: |
| 384 | // - `ProtoStr` is `#[repr(transparent)]` over `[u8]`, so it has the same |
| 385 | // layout. |
| 386 | // - `ProtoStr` has the same pointer metadata and element size as `[u8]`. |
| 387 | unsafe { &*(bytes as *const [u8] as *const Self) } |
| 388 | } |
| 389 | |
| 390 | /// Interprets a string slice as a `&ProtoStr`. |
| 391 | pub fn from_str(string: &str) -> &Self { |
| 392 | // SAFETY: `string.as_bytes()` is valid UTF-8. |
| 393 | unsafe { Self::from_utf8_unchecked(string.as_bytes()) } |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | impl AsRef<[u8]> for ProtoStr { |
| 398 | fn as_ref(&self) -> &[u8] { |
| 399 | self.as_bytes() |
| 400 | } |
| 401 | } |
| 402 | |
| 403 | impl<'msg> From<&'msg ProtoStr> for &'msg [u8] { |
| 404 | fn from(val: &'msg ProtoStr) -> &'msg [u8] { |
| 405 | val.as_bytes() |
| 406 | } |
| 407 | } |
| 408 | |
Alyssa Haroldsen | 614e29f | 2023-08-30 12:51:55 -0700 | [diff] [blame] | 409 | impl<'msg> From<&'msg str> for &'msg ProtoStr { |
| 410 | fn from(val: &'msg str) -> &'msg ProtoStr { |
| 411 | ProtoStr::from_str(val) |
| 412 | } |
| 413 | } |
| 414 | |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 415 | impl<'msg> TryFrom<&'msg ProtoStr> for &'msg str { |
| 416 | type Error = Utf8Error; |
| 417 | |
| 418 | fn try_from(val: &'msg ProtoStr) -> Result<&'msg str, Utf8Error> { |
| 419 | val.to_str() |
| 420 | } |
| 421 | } |
| 422 | |
Alyssa Haroldsen | 614e29f | 2023-08-30 12:51:55 -0700 | [diff] [blame] | 423 | impl<'msg> TryFrom<&'msg [u8]> for &'msg ProtoStr { |
| 424 | type Error = Utf8Error; |
| 425 | |
| 426 | fn try_from(val: &'msg [u8]) -> Result<&'msg ProtoStr, Utf8Error> { |
| 427 | Ok(ProtoStr::from_str(std::str::from_utf8(val)?)) |
| 428 | } |
| 429 | } |
| 430 | |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 431 | impl fmt::Debug for ProtoStr { |
Protobuf Team Bot | 4b0e763 | 2023-07-26 09:09:44 -0700 | [diff] [blame] | 432 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 433 | fmt::Debug::fmt(&Utf8Chunks::new(self.as_bytes()).debug(), f) |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 434 | } |
| 435 | } |
| 436 | |
| 437 | impl fmt::Display for ProtoStr { |
Protobuf Team Bot | 4b0e763 | 2023-07-26 09:09:44 -0700 | [diff] [blame] | 438 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 439 | use std::fmt::Write as _; |
| 440 | for chunk in Utf8Chunks::new(self.as_bytes()) { |
| 441 | fmt::Display::fmt(chunk.valid(), f)?; |
| 442 | if !chunk.invalid().is_empty() { |
| 443 | // One invalid chunk is emitted per detected invalid sequence. |
| 444 | f.write_char(char::REPLACEMENT_CHARACTER)?; |
| 445 | } |
| 446 | } |
| 447 | Ok(()) |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 448 | } |
| 449 | } |
| 450 | |
Alyssa Haroldsen | 614e29f | 2023-08-30 12:51:55 -0700 | [diff] [blame] | 451 | impl Hash for ProtoStr { |
| 452 | fn hash<H: Hasher>(&self, state: &mut H) { |
| 453 | self.as_bytes().hash(state) |
| 454 | } |
| 455 | } |
| 456 | |
| 457 | impl Eq for ProtoStr {} |
| 458 | impl Ord for ProtoStr { |
| 459 | fn cmp(&self, other: &ProtoStr) -> Ordering { |
| 460 | self.as_bytes().cmp(other.as_bytes()) |
| 461 | } |
| 462 | } |
| 463 | |
| 464 | impl Proxied for ProtoStr { |
| 465 | type View<'msg> = &'msg ProtoStr; |
| 466 | type Mut<'msg> = ProtoStrMut<'msg>; |
| 467 | } |
| 468 | |
| 469 | impl ProxiedWithPresence for ProtoStr { |
| 470 | type PresentMutData<'msg> = StrPresentMutData<'msg>; |
| 471 | type AbsentMutData<'msg> = StrAbsentMutData<'msg>; |
| 472 | |
| 473 | fn clear_present_field(present_mutator: Self::PresentMutData<'_>) -> Self::AbsentMutData<'_> { |
| 474 | StrAbsentMutData(present_mutator.0.clear()) |
| 475 | } |
| 476 | |
| 477 | fn set_absent_to_default(absent_mutator: Self::AbsentMutData<'_>) -> Self::PresentMutData<'_> { |
| 478 | StrPresentMutData(absent_mutator.0.set_absent_to_default()) |
| 479 | } |
| 480 | } |
| 481 | |
| 482 | impl<'msg> ViewProxy<'msg> for &'msg ProtoStr { |
| 483 | type Proxied = ProtoStr; |
| 484 | |
| 485 | fn as_view(&self) -> &ProtoStr { |
| 486 | self |
| 487 | } |
| 488 | |
| 489 | fn into_view<'shorter>(self) -> &'shorter ProtoStr |
| 490 | where |
| 491 | 'msg: 'shorter, |
| 492 | { |
| 493 | self |
| 494 | } |
| 495 | } |
| 496 | |
| 497 | /// Non-exported newtype for `ProxiedWithPresence::PresentData` |
| 498 | #[derive(Debug)] |
| 499 | pub struct StrPresentMutData<'msg>(BytesPresentMutData<'msg>); |
| 500 | |
| 501 | impl<'msg> ViewProxy<'msg> for StrPresentMutData<'msg> { |
| 502 | type Proxied = ProtoStr; |
| 503 | |
| 504 | fn as_view(&self) -> View<'_, ProtoStr> { |
| 505 | // SAFETY: The `ProtoStr` API guards against non-UTF-8 data. The runtime does |
| 506 | // not require `ProtoStr` to be UTF-8 if it could be mutated outside of these |
| 507 | // guards, such as through FFI. |
| 508 | unsafe { ProtoStr::from_utf8_unchecked(self.0.as_view()) } |
| 509 | } |
| 510 | |
| 511 | fn into_view<'shorter>(self) -> View<'shorter, ProtoStr> |
| 512 | where |
| 513 | 'msg: 'shorter, |
| 514 | { |
| 515 | // SAFETY: The `ProtoStr` API guards against non-UTF-8 data. The runtime does |
| 516 | // not require `ProtoStr` to be UTF-8 if it could be mutated outside of these |
| 517 | // guards, such as through FFI. |
| 518 | unsafe { ProtoStr::from_utf8_unchecked(self.0.into_view()) } |
| 519 | } |
| 520 | } |
| 521 | |
| 522 | impl<'msg> MutProxy<'msg> for StrPresentMutData<'msg> { |
| 523 | fn as_mut(&mut self) -> Mut<'_, ProtoStr> { |
| 524 | ProtoStrMut { bytes: self.0.as_mut() } |
| 525 | } |
| 526 | |
| 527 | fn into_mut<'shorter>(self) -> Mut<'shorter, ProtoStr> |
| 528 | where |
| 529 | 'msg: 'shorter, |
| 530 | { |
| 531 | ProtoStrMut { bytes: self.0.into_mut() } |
| 532 | } |
| 533 | } |
| 534 | |
| 535 | /// Non-exported newtype for `ProxiedWithPresence::AbsentData` |
| 536 | #[derive(Debug)] |
| 537 | pub struct StrAbsentMutData<'msg>(BytesAbsentMutData<'msg>); |
| 538 | |
| 539 | impl<'msg> ViewProxy<'msg> for StrAbsentMutData<'msg> { |
| 540 | type Proxied = ProtoStr; |
| 541 | |
| 542 | fn as_view(&self) -> View<'_, ProtoStr> { |
| 543 | // SAFETY: The `ProtoStr` API guards against non-UTF-8 data. The runtime does |
| 544 | // not require `ProtoStr` to be UTF-8 if it could be mutated outside of these |
| 545 | // guards, such as through FFI. |
| 546 | unsafe { ProtoStr::from_utf8_unchecked(self.0.as_view()) } |
| 547 | } |
| 548 | |
| 549 | fn into_view<'shorter>(self) -> View<'shorter, ProtoStr> |
| 550 | where |
| 551 | 'msg: 'shorter, |
| 552 | { |
| 553 | // SAFETY: The `ProtoStr` API guards against non-UTF-8 data. The runtime does |
| 554 | // not require `ProtoStr` to be UTF-8 if it could be mutated outside of these |
| 555 | // guards, such as through FFI. |
| 556 | unsafe { ProtoStr::from_utf8_unchecked(self.0.into_view()) } |
| 557 | } |
| 558 | } |
| 559 | |
| 560 | #[derive(Debug)] |
| 561 | pub struct ProtoStrMut<'msg> { |
| 562 | bytes: BytesMut<'msg>, |
| 563 | } |
| 564 | |
| 565 | impl<'msg> ProtoStrMut<'msg> { |
| 566 | /// Constructs a new `ProtoStrMut` from its internal, runtime-dependent |
| 567 | /// part. |
| 568 | #[doc(hidden)] |
| 569 | pub fn from_inner(_private: Private, inner: InnerBytesMut<'msg>) -> Self { |
| 570 | Self { bytes: BytesMut { inner } } |
| 571 | } |
| 572 | |
| 573 | /// Converts a `bytes` `FieldEntry` into a `string` one. Used by gencode. |
| 574 | #[doc(hidden)] |
| 575 | pub fn field_entry_from_bytes( |
| 576 | _private: Private, |
| 577 | field_entry: FieldEntry<'_, [u8]>, |
| 578 | ) -> FieldEntry<ProtoStr> { |
| 579 | match field_entry { |
| 580 | Optional::Set(present) => { |
| 581 | Optional::Set(PresentField::from_inner(Private, StrPresentMutData(present.inner))) |
| 582 | } |
| 583 | Optional::Unset(absent) => { |
| 584 | Optional::Unset(AbsentField::from_inner(Private, StrAbsentMutData(absent.inner))) |
| 585 | } |
| 586 | } |
| 587 | } |
| 588 | |
| 589 | /// Gets the current value of the field. |
| 590 | pub fn get(&self) -> &ProtoStr { |
| 591 | self.as_view() |
| 592 | } |
| 593 | |
| 594 | /// Sets the string to the given `val`, cloning any borrowed data. |
| 595 | /// |
| 596 | /// This method accepts both owned and borrowed strings; if the runtime |
| 597 | /// supports it, an owned value will not reallocate when setting the |
| 598 | /// string. |
| 599 | pub fn set(&mut self, val: impl SettableValue<ProtoStr>) { |
| 600 | val.set_on(Private, MutProxy::as_mut(self)) |
| 601 | } |
| 602 | |
| 603 | /// Truncates the string. |
| 604 | /// |
| 605 | /// Has no effect if `new_len` is larger than the current `len`. |
| 606 | /// |
| 607 | /// If `new_len` does not lie on a UTF-8 `char` boundary, behavior is |
| 608 | /// runtime-dependent. If this occurs, the runtime may: |
| 609 | /// |
| 610 | /// - Panic |
| 611 | /// - Truncate the string further to be on a `char` boundary. |
| 612 | /// - Truncate to `new_len`, resulting in a `ProtoStr` with a non-UTF8 tail. |
| 613 | pub fn truncate(&mut self, new_len: usize) { |
| 614 | self.bytes.truncate(new_len) |
| 615 | } |
| 616 | |
| 617 | /// Clears the string, setting it to the empty string. |
| 618 | /// |
| 619 | /// # Compared with `FieldEntry::clear` |
| 620 | /// |
| 621 | /// Note that this is different than marking an `optional string` field as |
| 622 | /// absent; if this cleared `string` is in an `optional`, |
| 623 | /// `FieldEntry::is_set` will still return `true` after this method is |
| 624 | /// invoked. |
| 625 | /// |
| 626 | /// This also means that if the field has a non-empty default, |
| 627 | /// `ProtoStrMut::clear` results in the accessor returning an empty string |
| 628 | /// while `FieldEntry::clear` results in the non-empty default. |
| 629 | /// |
| 630 | /// However, for a proto3 `string` that has implicit presence, there is no |
| 631 | /// distinction between these states: unset `string` is the same as empty |
| 632 | /// `string` and the default is always the empty string. |
| 633 | /// |
| 634 | /// In the C++ API, this is the difference between |
| 635 | /// `msg.clear_string_field()` |
| 636 | /// and `msg.mutable_string_field()->clear()`. |
| 637 | /// |
| 638 | /// Having the same name and signature as `FieldEntry::clear` makes code |
| 639 | /// that calls `field_mut().clear()` easier to migrate from implicit |
| 640 | /// to explicit presence. |
| 641 | pub fn clear(&mut self) { |
| 642 | self.truncate(0); |
| 643 | } |
| 644 | } |
| 645 | |
| 646 | impl Deref for ProtoStrMut<'_> { |
| 647 | type Target = ProtoStr; |
| 648 | fn deref(&self) -> &ProtoStr { |
| 649 | self.as_view() |
| 650 | } |
| 651 | } |
| 652 | |
| 653 | impl AsRef<ProtoStr> for ProtoStrMut<'_> { |
| 654 | fn as_ref(&self) -> &ProtoStr { |
| 655 | self.as_view() |
| 656 | } |
| 657 | } |
| 658 | |
| 659 | impl AsRef<[u8]> for ProtoStrMut<'_> { |
| 660 | fn as_ref(&self) -> &[u8] { |
| 661 | self.as_view().as_bytes() |
| 662 | } |
| 663 | } |
| 664 | |
| 665 | impl<'msg> ViewProxy<'msg> for ProtoStrMut<'msg> { |
| 666 | type Proxied = ProtoStr; |
| 667 | |
| 668 | fn as_view(&self) -> &ProtoStr { |
| 669 | // SAFETY: The `ProtoStr` API guards against non-UTF-8 data. The runtime does |
| 670 | // not require `ProtoStr` to be UTF-8 if it could be mutated outside of these |
| 671 | // guards, such as through FFI. |
| 672 | unsafe { ProtoStr::from_utf8_unchecked(self.bytes.as_view()) } |
| 673 | } |
| 674 | |
| 675 | fn into_view<'shorter>(self) -> &'shorter ProtoStr |
| 676 | where |
| 677 | 'msg: 'shorter, |
| 678 | { |
| 679 | unsafe { ProtoStr::from_utf8_unchecked(self.bytes.into_view()) } |
| 680 | } |
| 681 | } |
| 682 | |
| 683 | impl<'msg> MutProxy<'msg> for ProtoStrMut<'msg> { |
| 684 | fn as_mut(&mut self) -> ProtoStrMut<'_> { |
| 685 | ProtoStrMut { bytes: BytesMut { inner: self.bytes.inner } } |
| 686 | } |
| 687 | |
| 688 | fn into_mut<'shorter>(self) -> ProtoStrMut<'shorter> |
| 689 | where |
| 690 | 'msg: 'shorter, |
| 691 | { |
| 692 | ProtoStrMut { bytes: BytesMut { inner: self.bytes.inner } } |
| 693 | } |
| 694 | } |
| 695 | |
| 696 | impl SettableValue<ProtoStr> for &'_ ProtoStr { |
Jakob Buchgraber | ab11a0d | 2023-11-27 08:14:21 -0800 | [diff] [blame] | 697 | fn set_on<'b>(self, _private: Private, mutator: Mut<'b, ProtoStr>) |
| 698 | where |
| 699 | ProtoStr: 'b, |
| 700 | { |
Alyssa Haroldsen | 614e29f | 2023-08-30 12:51:55 -0700 | [diff] [blame] | 701 | // SAFETY: A `ProtoStr` has the same UTF-8 validity requirement as the runtime. |
| 702 | unsafe { mutator.bytes.inner.set(self.as_bytes()) } |
| 703 | } |
| 704 | |
| 705 | fn set_on_absent( |
| 706 | self, |
| 707 | _private: Private, |
| 708 | absent_mutator: <ProtoStr as ProxiedWithPresence>::AbsentMutData<'_>, |
| 709 | ) -> <ProtoStr as ProxiedWithPresence>::PresentMutData<'_> { |
| 710 | // SAFETY: A `ProtoStr` has the same UTF-8 validity requirement as the runtime. |
| 711 | StrPresentMutData(unsafe { absent_mutator.0.set(self.as_bytes()) }) |
| 712 | } |
| 713 | |
| 714 | fn set_on_present( |
| 715 | self, |
| 716 | _private: Private, |
| 717 | present_mutator: <ProtoStr as ProxiedWithPresence>::PresentMutData<'_>, |
| 718 | ) { |
| 719 | // SAFETY: A `ProtoStr` has the same UTF-8 validity requirement as the runtime. |
| 720 | unsafe { |
| 721 | present_mutator.0.set(self.as_bytes()); |
| 722 | } |
| 723 | } |
| 724 | } |
| 725 | |
| 726 | impl SettableValue<ProtoStr> for &'_ str { |
| 727 | impl_forwarding_settable_value!(ProtoStr, self => ProtoStr::from_str(self)); |
| 728 | } |
| 729 | |
| 730 | impl SettableValue<ProtoStr> for String { |
Sandy Zhang | 81068e8 | 2023-09-18 15:13:49 -0700 | [diff] [blame] | 731 | // TODO: Investigate taking ownership of this when allowed by the |
Alyssa Haroldsen | 614e29f | 2023-08-30 12:51:55 -0700 | [diff] [blame] | 732 | // runtime. |
| 733 | impl_forwarding_settable_value!(ProtoStr, self => ProtoStr::from_str(&self)); |
| 734 | } |
| 735 | |
| 736 | impl SettableValue<ProtoStr> for Cow<'_, str> { |
Sandy Zhang | 81068e8 | 2023-09-18 15:13:49 -0700 | [diff] [blame] | 737 | // TODO: Investigate taking ownership of this when allowed by the |
Alyssa Haroldsen | 614e29f | 2023-08-30 12:51:55 -0700 | [diff] [blame] | 738 | // runtime. |
| 739 | impl_forwarding_settable_value!(ProtoStr, self => ProtoStr::from_str(&self)); |
| 740 | } |
| 741 | |
| 742 | impl Hash for ProtoStrMut<'_> { |
| 743 | fn hash<H: Hasher>(&self, state: &mut H) { |
| 744 | self.deref().hash(state) |
| 745 | } |
| 746 | } |
| 747 | |
| 748 | impl Eq for ProtoStrMut<'_> {} |
| 749 | impl<'msg> Ord for ProtoStrMut<'msg> { |
| 750 | fn cmp(&self, other: &ProtoStrMut<'msg>) -> Ordering { |
| 751 | self.deref().cmp(other.deref()) |
| 752 | } |
| 753 | } |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 754 | |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 755 | /// Implements `PartialCmp` and `PartialEq` for the `lhs` against the `rhs` |
| 756 | /// using `AsRef<[u8]>`. |
Sandy Zhang | 81068e8 | 2023-09-18 15:13:49 -0700 | [diff] [blame] | 757 | // TODO: consider improving to not require a `<()>` if no generics are |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 758 | // needed |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 759 | macro_rules! impl_bytes_partial_cmp { |
| 760 | ($(<($($generics:tt)*)> $lhs:ty => $rhs:ty),+ $(,)?) => { |
| 761 | $( |
| 762 | impl<$($generics)*> PartialEq<$rhs> for $lhs { |
| 763 | fn eq(&self, other: &$rhs) -> bool { |
| 764 | AsRef::<[u8]>::as_ref(self) == AsRef::<[u8]>::as_ref(other) |
| 765 | } |
| 766 | } |
| 767 | impl<$($generics)*> PartialOrd<$rhs> for $lhs { |
| 768 | fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> { |
| 769 | AsRef::<[u8]>::as_ref(self).partial_cmp(AsRef::<[u8]>::as_ref(other)) |
| 770 | } |
| 771 | } |
| 772 | )* |
| 773 | }; |
| 774 | } |
| 775 | |
| 776 | impl_bytes_partial_cmp!( |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 777 | // Should `BytesMut` compare with `str` and `ProtoStr[Mut]` with `[u8]`? |
| 778 | // `[u8]` and `str` do not compare with each other in the stdlib. |
| 779 | |
| 780 | // `BytesMut` against protobuf types |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 781 | <('a, 'b)> BytesMut<'a> => BytesMut<'b>, |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 782 | |
| 783 | // `BytesMut` against foreign types |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 784 | <('a)> BytesMut<'a> => [u8], |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 785 | <('a)> [u8] => BytesMut<'a>, |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 786 | <('a, const N: usize)> BytesMut<'a> => [u8; N], |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 787 | <('a, const N: usize)> [u8; N] => BytesMut<'a>, |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 788 | |
| 789 | // `ProtoStr` against protobuf types |
| 790 | <()> ProtoStr => ProtoStr, |
Alyssa Haroldsen | 614e29f | 2023-08-30 12:51:55 -0700 | [diff] [blame] | 791 | <('a)> ProtoStr => ProtoStrMut<'a>, |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 792 | |
| 793 | // `ProtoStr` against foreign types |
| 794 | <()> ProtoStr => str, |
| 795 | <()> str => ProtoStr, |
| 796 | |
Alyssa Haroldsen | 614e29f | 2023-08-30 12:51:55 -0700 | [diff] [blame] | 797 | // `ProtoStrMut` against protobuf types |
| 798 | <('a, 'b)> ProtoStrMut<'a> => ProtoStrMut<'b>, |
| 799 | <('a)> ProtoStrMut<'a> => ProtoStr, |
| 800 | |
| 801 | // `ProtoStrMut` against foreign types |
| 802 | <('a)> ProtoStrMut<'a> => str, |
| 803 | <('a)> str => ProtoStrMut<'a>, |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 804 | ); |
| 805 | |
Protobuf Team Bot | 8fe9d8e | 2023-07-13 11:47:47 -0700 | [diff] [blame] | 806 | #[cfg(test)] |
| 807 | mod tests { |
Protobuf Team Bot | 4b0e763 | 2023-07-26 09:09:44 -0700 | [diff] [blame] | 808 | use super::*; |
| 809 | |
Sandy Zhang | 81068e8 | 2023-09-18 15:13:49 -0700 | [diff] [blame] | 810 | // TODO: Add unit tests |
Protobuf Team Bot | 4b0e763 | 2023-07-26 09:09:44 -0700 | [diff] [blame] | 811 | |
| 812 | // Shorter and safe utility function to construct `ProtoStr` from bytes for |
| 813 | // testing. |
| 814 | fn test_proto_str(bytes: &[u8]) -> &ProtoStr { |
| 815 | // SAFETY: The runtime that this test executes under does not elide UTF-8 checks |
| 816 | // inside of `ProtoStr`. |
| 817 | unsafe { ProtoStr::from_utf8_unchecked(bytes) } |
| 818 | } |
| 819 | |
| 820 | // UTF-8 test cases copied from: |
| 821 | // https://github.com/rust-lang/rust/blob/e8ee0b7/library/core/tests/str_lossy.rs |
| 822 | |
| 823 | #[test] |
| 824 | fn proto_str_debug() { |
| 825 | assert_eq!(&format!("{:?}", test_proto_str(b"Hello There")), "\"Hello There\""); |
| 826 | assert_eq!( |
| 827 | &format!( |
| 828 | "{:?}", |
| 829 | test_proto_str(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa"), |
| 830 | ), |
| 831 | "\"Hello\\xC0\\x80 There\\xE6\\x83 Goodbye\\u{10d4ea}\"", |
| 832 | ); |
| 833 | } |
| 834 | |
| 835 | #[test] |
| 836 | fn proto_str_display() { |
| 837 | assert_eq!(&test_proto_str(b"Hello There").to_string(), "Hello There"); |
| 838 | assert_eq!( |
| 839 | &test_proto_str(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa").to_string(), |
| 840 | "Hello�� There� Goodbye\u{10d4ea}", |
| 841 | ); |
| 842 | } |
| 843 | |
| 844 | #[test] |
| 845 | fn proto_str_to_rust_str() { |
| 846 | assert_eq!(test_proto_str(b"hello").to_str(), Ok("hello")); |
| 847 | assert_eq!(test_proto_str("ศไทย中华Việt Nam".as_bytes()).to_str(), Ok("ศไทย中华Việt Nam")); |
| 848 | for expect_fail in [ |
| 849 | &b"Hello\xC2 There\xFF Goodbye"[..], |
| 850 | b"Hello\xC0\x80 There\xE6\x83 Goodbye", |
| 851 | b"\xF5foo\xF5\x80bar", |
| 852 | b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", |
| 853 | b"\xF4foo\xF4\x80bar\xF4\xBFbaz", |
| 854 | b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", |
| 855 | b"\xED\xA0\x80foo\xED\xBF\xBFbar", |
| 856 | ] { |
| 857 | assert_eq!(test_proto_str(expect_fail).to_str(), Err(Utf8Error(())), "{expect_fail:?}"); |
| 858 | } |
| 859 | } |
| 860 | |
| 861 | #[test] |
| 862 | fn proto_str_to_cow() { |
| 863 | assert_eq!(test_proto_str(b"hello").to_cow_lossy(), Cow::Borrowed("hello")); |
| 864 | assert_eq!( |
| 865 | test_proto_str("ศไทย中华Việt Nam".as_bytes()).to_cow_lossy(), |
| 866 | Cow::Borrowed("ศไทย中华Việt Nam") |
| 867 | ); |
| 868 | for (bytes, lossy_str) in [ |
| 869 | (&b"Hello\xC2 There\xFF Goodbye"[..], "Hello� There� Goodbye"), |
| 870 | (b"Hello\xC0\x80 There\xE6\x83 Goodbye", "Hello�� There� Goodbye"), |
| 871 | (b"\xF5foo\xF5\x80bar", "�foo��bar"), |
| 872 | (b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", "�foo�bar�baz"), |
| 873 | (b"\xF4foo\xF4\x80bar\xF4\xBFbaz", "�foo�bar��baz"), |
| 874 | (b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", "����foo\u{10000}bar"), |
| 875 | (b"\xED\xA0\x80foo\xED\xBF\xBFbar", "���foo���bar"), |
| 876 | ] { |
| 877 | let cow = test_proto_str(bytes).to_cow_lossy(); |
| 878 | assert!(matches!(cow, Cow::Owned(_))); |
| 879 | assert_eq!(&*cow, lossy_str, "{bytes:?}"); |
| 880 | } |
| 881 | } |
| 882 | |
| 883 | #[test] |
| 884 | fn proto_str_utf8_chunks() { |
| 885 | macro_rules! assert_chunks { |
| 886 | ($bytes:expr, $($chunks:expr),* $(,)?) => { |
| 887 | let bytes = $bytes; |
| 888 | let chunks: &[Result<&str, &[u8]>] = &[$($chunks),*]; |
| 889 | let s = test_proto_str(bytes); |
| 890 | let mut got_chunks = s.utf8_chunks(); |
| 891 | let mut expected_chars = chunks.iter().copied(); |
| 892 | assert!(got_chunks.eq(expected_chars), "{bytes:?} -> {chunks:?}"); |
| 893 | }; |
| 894 | } |
| 895 | assert_chunks!(b"hello", Ok("hello")); |
| 896 | assert_chunks!("ศไทย中华Việt Nam".as_bytes(), Ok("ศไทย中华Việt Nam")); |
| 897 | assert_chunks!( |
| 898 | b"Hello\xC2 There\xFF Goodbye", |
| 899 | Ok("Hello"), |
| 900 | Err(b"\xC2"), |
| 901 | Ok(" There"), |
| 902 | Err(b"\xFF"), |
| 903 | Ok(" Goodbye"), |
| 904 | ); |
| 905 | assert_chunks!( |
| 906 | b"Hello\xC0\x80 There\xE6\x83 Goodbye", |
| 907 | Ok("Hello"), |
| 908 | Err(b"\xC0"), |
| 909 | Err(b"\x80"), |
| 910 | Ok(" There"), |
| 911 | Err(b"\xE6\x83"), |
| 912 | Ok(" Goodbye"), |
| 913 | ); |
| 914 | assert_chunks!( |
| 915 | b"\xF5foo\xF5\x80bar", |
| 916 | Err(b"\xF5"), |
| 917 | Ok("foo"), |
| 918 | Err(b"\xF5"), |
| 919 | Err(b"\x80"), |
| 920 | Ok("bar"), |
| 921 | ); |
| 922 | assert_chunks!( |
| 923 | b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", |
| 924 | Err(b"\xF1"), |
| 925 | Ok("foo"), |
| 926 | Err(b"\xF1\x80"), |
| 927 | Ok("bar"), |
| 928 | Err(b"\xF1\x80\x80"), |
| 929 | Ok("baz"), |
| 930 | ); |
| 931 | assert_chunks!( |
| 932 | b"\xF4foo\xF4\x80bar\xF4\xBFbaz", |
| 933 | Err(b"\xF4"), |
| 934 | Ok("foo"), |
| 935 | Err(b"\xF4\x80"), |
| 936 | Ok("bar"), |
| 937 | Err(b"\xF4"), |
| 938 | Err(b"\xBF"), |
| 939 | Ok("baz"), |
| 940 | ); |
| 941 | assert_chunks!( |
| 942 | b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", |
| 943 | Err(b"\xF0"), |
| 944 | Err(b"\x80"), |
| 945 | Err(b"\x80"), |
| 946 | Err(b"\x80"), |
| 947 | Ok("foo\u{10000}bar"), |
| 948 | ); |
| 949 | assert_chunks!( |
| 950 | b"\xED\xA0\x80foo\xED\xBF\xBFbar", |
| 951 | Err(b"\xED"), |
| 952 | Err(b"\xA0"), |
| 953 | Err(b"\x80"), |
| 954 | Ok("foo"), |
| 955 | Err(b"\xED"), |
| 956 | Err(b"\xBF"), |
| 957 | Err(b"\xBF"), |
| 958 | Ok("bar"), |
| 959 | ); |
| 960 | } |
| 961 | |
| 962 | #[test] |
| 963 | fn proto_str_chars() { |
| 964 | macro_rules! assert_chars { |
| 965 | ($bytes:expr, $chars:expr) => { |
| 966 | let bytes = $bytes; |
| 967 | let chars = $chars; |
| 968 | let s = test_proto_str(bytes); |
| 969 | let mut got_chars = s.chars(); |
| 970 | let mut expected_chars = chars.into_iter(); |
| 971 | assert!(got_chars.eq(expected_chars), "{bytes:?} -> {chars:?}"); |
| 972 | }; |
| 973 | } |
| 974 | assert_chars!(b"hello", ['h', 'e', 'l', 'l', 'o']); |
| 975 | assert_chars!( |
| 976 | "ศไทย中华Việt Nam".as_bytes(), |
| 977 | ['ศ', 'ไ', 'ท', 'ย', '中', '华', 'V', 'i', 'ệ', 't', ' ', 'N', 'a', 'm'] |
| 978 | ); |
| 979 | assert_chars!( |
| 980 | b"Hello\xC2 There\xFF Goodbye", |
| 981 | [ |
| 982 | 'H', 'e', 'l', 'l', 'o', '�', ' ', 'T', 'h', 'e', 'r', 'e', '�', ' ', 'G', 'o', |
| 983 | 'o', 'd', 'b', 'y', 'e' |
| 984 | ] |
| 985 | ); |
| 986 | assert_chars!( |
| 987 | b"Hello\xC0\x80 There\xE6\x83 Goodbye", |
| 988 | [ |
| 989 | 'H', 'e', 'l', 'l', 'o', '�', '�', ' ', 'T', 'h', 'e', 'r', 'e', '�', ' ', 'G', |
| 990 | 'o', 'o', 'd', 'b', 'y', 'e' |
| 991 | ] |
| 992 | ); |
| 993 | assert_chars!(b"\xF5foo\xF5\x80bar", ['�', 'f', 'o', 'o', '�', '�', 'b', 'a', 'r']); |
| 994 | assert_chars!( |
| 995 | b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", |
| 996 | ['�', 'f', 'o', 'o', '�', 'b', 'a', 'r', '�', 'b', 'a', 'z'] |
| 997 | ); |
| 998 | assert_chars!( |
| 999 | b"\xF4foo\xF4\x80bar\xF4\xBFbaz", |
| 1000 | ['�', 'f', 'o', 'o', '�', 'b', 'a', 'r', '�', '�', 'b', 'a', 'z'] |
| 1001 | ); |
| 1002 | assert_chars!( |
| 1003 | b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", |
| 1004 | ['�', '�', '�', '�', 'f', 'o', 'o', '\u{10000}', 'b', 'a', 'r'] |
| 1005 | ); |
| 1006 | assert_chars!( |
| 1007 | b"\xED\xA0\x80foo\xED\xBF\xBFbar", |
| 1008 | ['�', '�', '�', 'f', 'o', 'o', '�', '�', '�', 'b', 'a', 'r'] |
| 1009 | ); |
| 1010 | } |
Protobuf Team Bot | a75def6 | 2023-07-11 11:11:59 -0700 | [diff] [blame] | 1011 | } |