blob: 3266a5250a84ff8fc9de2340f94f14e47cefac9b [file] [log] [blame]
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
//! Items specific to `bytes` and `string` fields.
#![allow(dead_code)]
#![allow(unused)]
use crate::__internal::runtime::InnerProtoString;
use crate::__internal::{Private, SealedInternal};
use crate::{AsView, IntoProxied, IntoView, Mut, MutProxied, Optional, Proxied, View};
use std::borrow::Cow;
use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd};
use std::convert::{AsMut, AsRef};
use std::ffi::{OsStr, OsString};
use std::fmt;
use std::hash::{Hash, Hasher};
use std::iter;
use std::ops::{Deref, DerefMut};
use std::ptr;
use std::rc::Rc;
use std::sync::Arc;
pub struct ProtoBytes {
pub(crate) inner: InnerProtoString,
}
impl ProtoBytes {
// Returns the kernel-specific container. This method is private in spirit and
// must not be called by a user.
#[doc(hidden)]
pub fn into_inner(self, _private: Private) -> InnerProtoString {
self.inner
}
#[doc(hidden)]
pub fn from_inner(_private: Private, inner: InnerProtoString) -> ProtoBytes {
Self { inner }
}
pub fn as_view(&self) -> &[u8] {
self.inner.as_bytes()
}
}
impl AsRef<[u8]> for ProtoBytes {
fn as_ref(&self) -> &[u8] {
self.inner.as_bytes()
}
}
impl From<&[u8]> for ProtoBytes {
fn from(v: &[u8]) -> ProtoBytes {
ProtoBytes { inner: InnerProtoString::from(v) }
}
}
impl<const N: usize> From<&[u8; N]> for ProtoBytes {
fn from(v: &[u8; N]) -> ProtoBytes {
ProtoBytes { inner: InnerProtoString::from(v.as_ref()) }
}
}
impl SealedInternal for ProtoBytes {}
impl Proxied for ProtoBytes {
type View<'msg> = &'msg [u8];
}
impl AsView for ProtoBytes {
type Proxied = Self;
fn as_view(&self) -> &[u8] {
self.as_view()
}
}
impl IntoProxied<ProtoBytes> for &[u8] {
fn into_proxied(self, _private: Private) -> ProtoBytes {
ProtoBytes::from(self)
}
}
impl<const N: usize> IntoProxied<ProtoBytes> for &[u8; N] {
fn into_proxied(self, _private: Private) -> ProtoBytes {
ProtoBytes::from(self.as_ref())
}
}
impl IntoProxied<ProtoBytes> for Vec<u8> {
fn into_proxied(self, _private: Private) -> ProtoBytes {
ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
}
}
impl IntoProxied<ProtoBytes> for &Vec<u8> {
fn into_proxied(self, _private: Private) -> ProtoBytes {
ProtoBytes::from(AsRef::<[u8]>::as_ref(self))
}
}
impl IntoProxied<ProtoBytes> for Box<[u8]> {
fn into_proxied(self, _private: Private) -> ProtoBytes {
ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
}
}
impl IntoProxied<ProtoBytes> for Cow<'_, [u8]> {
fn into_proxied(self, _private: Private) -> ProtoBytes {
ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
}
}
impl IntoProxied<ProtoBytes> for Rc<[u8]> {
fn into_proxied(self, _private: Private) -> ProtoBytes {
ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
}
}
impl IntoProxied<ProtoBytes> for Arc<[u8]> {
fn into_proxied(self, _private: Private) -> ProtoBytes {
ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
}
}
impl SealedInternal for &[u8] {}
impl AsView for &[u8] {
type Proxied = ProtoBytes;
fn as_view(&self) -> &[u8] {
self
}
}
impl<'msg> IntoView<'msg> for &'msg [u8] {
fn into_view<'shorter>(self) -> &'shorter [u8]
where
'msg: 'shorter,
{
self
}
}
/// The bytes were not valid UTF-8.
#[derive(Debug, PartialEq)]
pub struct Utf8Error {
pub(crate) inner: std::str::Utf8Error,
}
impl std::fmt::Display for Utf8Error {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
self.inner.fmt(f)
}
}
impl std::error::Error for Utf8Error {}
impl From<std::str::Utf8Error> for Utf8Error {
fn from(inner: std::str::Utf8Error) -> Utf8Error {
Utf8Error { inner }
}
}
/// An owned type representing protobuf `string` field's contents.
///
/// # UTF-8
///
/// Protobuf [docs] state that a `string` field contains UTF-8 encoded text.
/// However, not every runtime enforces this, and the Rust runtime is designed
/// to integrate with other runtimes with FFI, like C++.
///
/// `ProtoString` represents a string type that is expected to contain valid
/// UTF-8. However, `ProtoString` is not validated, so users must
/// call [`ProtoString::to_string`] to perform a (possibly runtime-elided) UTF-8
/// validation check. This validation should rarely fail in pure Rust programs,
/// but is necessary to prevent UB when interacting with C++, or other languages
/// with looser restrictions.
///
///
/// # `Display` and `ToString`
/// `ProtoString` is ordinarily UTF-8 and so implements `Display`. If there are
/// any invalid UTF-8 sequences, they are replaced with [`U+FFFD REPLACEMENT
/// CHARACTER`]. Because anything implementing `Display` also implements
/// `ToString`, `ProtoString::to_string()` is equivalent to
/// `String::from_utf8_lossy(proto_string.as_bytes()).into_owned()`.
///
/// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
pub struct ProtoString {
pub(crate) inner: InnerProtoString,
}
impl ProtoString {
pub fn as_view(&self) -> &ProtoStr {
unsafe { ProtoStr::from_utf8_unchecked(self.as_bytes()) }
}
pub fn as_bytes(&self) -> &[u8] {
self.inner.as_bytes()
}
// Returns the kernel-specific container. This method is private in spirit and
// must not be called by a user.
#[doc(hidden)]
pub fn into_inner(self, _private: Private) -> InnerProtoString {
self.inner
}
#[doc(hidden)]
pub fn from_inner(_private: Private, inner: InnerProtoString) -> ProtoString {
Self { inner }
}
}
impl SealedInternal for ProtoString {}
impl AsRef<[u8]> for ProtoString {
fn as_ref(&self) -> &[u8] {
self.inner.as_bytes()
}
}
impl From<ProtoString> for ProtoBytes {
fn from(v: ProtoString) -> Self {
ProtoBytes { inner: v.inner }
}
}
impl From<&str> for ProtoString {
fn from(v: &str) -> Self {
Self::from(v.as_bytes())
}
}
impl From<&[u8]> for ProtoString {
fn from(v: &[u8]) -> Self {
Self { inner: InnerProtoString::from(v) }
}
}
impl SealedInternal for &str {}
impl SealedInternal for &ProtoStr {}
impl IntoProxied<ProtoString> for &str {
fn into_proxied(self, _private: Private) -> ProtoString {
ProtoString::from(self)
}
}
impl IntoProxied<ProtoString> for &ProtoStr {
fn into_proxied(self, _private: Private) -> ProtoString {
ProtoString::from(self.as_bytes())
}
}
impl IntoProxied<ProtoString> for String {
fn into_proxied(self, _private: Private) -> ProtoString {
ProtoString::from(self.as_str())
}
}
impl IntoProxied<ProtoString> for &String {
fn into_proxied(self, _private: Private) -> ProtoString {
ProtoString::from(self.as_bytes())
}
}
impl IntoProxied<ProtoString> for OsString {
fn into_proxied(self, private: Private) -> ProtoString {
self.as_os_str().into_proxied(private)
}
}
impl IntoProxied<ProtoString> for &OsStr {
fn into_proxied(self, _private: Private) -> ProtoString {
ProtoString::from(self.as_encoded_bytes())
}
}
impl IntoProxied<ProtoString> for Box<str> {
fn into_proxied(self, _private: Private) -> ProtoString {
ProtoString::from(AsRef::<str>::as_ref(&self))
}
}
impl IntoProxied<ProtoString> for Cow<'_, str> {
fn into_proxied(self, _private: Private) -> ProtoString {
ProtoString::from(AsRef::<str>::as_ref(&self))
}
}
impl IntoProxied<ProtoString> for Rc<str> {
fn into_proxied(self, _private: Private) -> ProtoString {
ProtoString::from(AsRef::<str>::as_ref(&self))
}
}
impl IntoProxied<ProtoString> for Arc<str> {
fn into_proxied(self, _private: Private) -> ProtoString {
ProtoString::from(AsRef::<str>::as_ref(&self))
}
}
/// A shared immutable view of a protobuf `string` field's contents.
///
/// Like a `str`, it can be cheaply accessed as bytes and
/// is dynamically sized, requiring it be accessed through a pointer.
///
/// # UTF-8 and `&str` access
///
/// Protobuf [docs] state that a `string` field contains UTF-8 encoded text.
/// However, not every runtime enforces this, and the Rust runtime is designed
/// to integrate with other runtimes with FFI, like C++.
///
/// Because of this, in order to access the contents as a `&str`, users must
/// call [`ProtoStr::to_str`] to perform a (possibly runtime-elided) UTF-8
/// validation check. However, the Rust API only allows `set()`ting a `string`
/// field with data should be valid UTF-8 like a `&str` or a
/// `&ProtoStr`. This means that this check should rarely fail, but is necessary
/// to prevent UB when interacting with C++, which has looser restrictions.
///
/// Most of the time, users should not perform direct `&str` access to the
/// contents - this type implements `Display` and comparison with `str`,
/// so it's best to avoid a UTF-8 check by working directly with `&ProtoStr`
/// or converting to `&[u8]`.
///
/// # `Display` and `ToString`
/// `ProtoStr` is ordinarily UTF-8 and so implements `Display`. If there are
/// any invalid UTF-8 sequences, they are replaced with [`U+FFFD REPLACEMENT
/// CHARACTER`]. Because anything implementing `Display` also implements
/// `ToString`, `proto_str.to_string()` is equivalent to
/// `String::from_utf8_lossy(proto_str.as_bytes()).into_owned()`.
///
/// [docs]: https://protobuf.dev/programming-guides/proto2/#scalar
/// [dst]: https://doc.rust-lang.org/reference/dynamically-sized-types.html
/// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
#[repr(transparent)]
pub struct ProtoStr([u8]);
impl ProtoStr {
/// Converts `self` to a byte slice.
///
/// Note: this type does not implement `Deref`; you must call `as_bytes()`
/// or `AsRef<[u8]>` to get access to bytes.
pub fn as_bytes(&self) -> &[u8] {
&self.0
}
/// Yields a `&str` slice if `self` contains valid UTF-8.
///
/// This may perform a runtime check, dependent on runtime.
///
/// `String::from_utf8_lossy(proto_str.as_bytes())` can be used to
/// infallibly construct a string, replacing invalid UTF-8 with
/// [`U+FFFD REPLACEMENT CHARACTER`].
///
/// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
// This is not `try_to_str` since `to_str` is shorter, with `CStr` as precedent.
pub fn to_str(&self) -> Result<&str, Utf8Error> {
Ok(std::str::from_utf8(&self.0)?)
}
/// Converts `self` to a string, including invalid characters.
///
/// Invalid UTF-8 sequences are replaced with
/// [`U+FFFD REPLACEMENT CHARACTER`].
///
/// Users should be prefer this to `.to_string()` provided by `Display`.
/// `.to_cow_lossy()` is the same operation, but it may avoid an
/// allocation if the string is already UTF-8.
///
/// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
//
// This method is named `to_string_lossy` in `CStr`, but since `to_string`
// also exists on this type, this name was chosen to avoid confusion.
pub fn to_cow_lossy(&self) -> Cow<'_, str> {
String::from_utf8_lossy(&self.0)
}
/// Returns `true` if `self` has a length of zero bytes.
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
/// Returns the length of `self`.
///
/// Like `&str`, this is a length in bytes, not `char`s or graphemes.
pub fn len(&self) -> usize {
self.0.len()
}
/// Converts known-UTF-8 bytes to a `ProtoStr` without a check.
///
/// # Safety
/// `bytes` must be valid UTF-8 if the current runtime requires it.
pub unsafe fn from_utf8_unchecked(bytes: &[u8]) -> &Self {
// SAFETY:
// - `ProtoStr` is `#[repr(transparent)]` over `[u8]`, so it has the same
// layout.
// - `ProtoStr` has the same pointer metadata and element size as `[u8]`.
unsafe { &*(bytes as *const [u8] as *const Self) }
}
/// Interprets a string slice as a `&ProtoStr`.
pub fn from_str(string: &str) -> &Self {
// SAFETY: `string.as_bytes()` is valid UTF-8.
unsafe { Self::from_utf8_unchecked(string.as_bytes()) }
}
}
impl AsRef<[u8]> for ProtoStr {
fn as_ref(&self) -> &[u8] {
self.as_bytes()
}
}
impl<'msg> From<&'msg ProtoStr> for &'msg [u8] {
fn from(val: &'msg ProtoStr) -> &'msg [u8] {
val.as_bytes()
}
}
impl<'msg> From<&'msg str> for &'msg ProtoStr {
fn from(val: &'msg str) -> &'msg ProtoStr {
ProtoStr::from_str(val)
}
}
impl<'msg> TryFrom<&'msg ProtoStr> for &'msg str {
type Error = Utf8Error;
fn try_from(val: &'msg ProtoStr) -> Result<&'msg str, Utf8Error> {
val.to_str()
}
}
impl<'msg> TryFrom<&'msg [u8]> for &'msg ProtoStr {
type Error = Utf8Error;
fn try_from(val: &'msg [u8]) -> Result<&'msg ProtoStr, Utf8Error> {
Ok(ProtoStr::from_str(std::str::from_utf8(val)?))
}
}
impl fmt::Debug for ProtoStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "\"");
for chunk in self.as_bytes().utf8_chunks() {
for ch in chunk.valid().chars() {
write!(f, "{}", ch.escape_debug());
}
for byte in chunk.invalid() {
// Format byte as \xff.
write!(f, "\\x{:02X}", byte);
}
}
write!(f, "\"");
Ok(())
}
}
impl fmt::Display for ProtoStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&String::from_utf8_lossy(self.as_bytes()), f)?;
Ok(())
}
}
impl Hash for ProtoStr {
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_bytes().hash(state)
}
}
impl Eq for ProtoStr {}
impl Ord for ProtoStr {
fn cmp(&self, other: &ProtoStr) -> Ordering {
self.as_bytes().cmp(other.as_bytes())
}
}
impl Proxied for ProtoString {
type View<'msg> = &'msg ProtoStr;
}
impl AsView for ProtoString {
type Proxied = Self;
fn as_view(&self) -> &ProtoStr {
self.as_view()
}
}
impl AsView for &ProtoStr {
type Proxied = ProtoString;
fn as_view(&self) -> &ProtoStr {
self
}
}
impl<'msg> IntoView<'msg> for &'msg ProtoStr {
fn into_view<'shorter>(self) -> &'shorter ProtoStr
where
'msg: 'shorter,
{
self
}
}
/// Implements `PartialCmp` and `PartialEq` for the `lhs` against the `rhs`
/// using `AsRef<[u8]>`.
// TODO: consider improving to not require a `<()>` if no generics are
// needed
macro_rules! impl_bytes_partial_cmp {
($(<($($generics:tt)*)> $lhs:ty => $rhs:ty),+ $(,)?) => {
$(
impl<$($generics)*> PartialEq<$rhs> for $lhs {
fn eq(&self, other: &$rhs) -> bool {
AsRef::<[u8]>::as_ref(self) == AsRef::<[u8]>::as_ref(other)
}
}
impl<$($generics)*> PartialOrd<$rhs> for $lhs {
fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
AsRef::<[u8]>::as_ref(self).partial_cmp(AsRef::<[u8]>::as_ref(other))
}
}
)*
};
}
impl_bytes_partial_cmp!(
// `ProtoStr` against protobuf types
<()> ProtoStr => ProtoStr,
// `ProtoStr` against foreign types
<()> ProtoStr => str,
<()> str => ProtoStr,
);
impl std::fmt::Debug for ProtoString {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
std::fmt::Debug::fmt(self.as_view(), f)
}
}
impl std::fmt::Debug for ProtoBytes {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
std::fmt::Debug::fmt(self.as_view(), f)
}
}
unsafe impl Sync for ProtoString {}
unsafe impl Send for ProtoString {}
unsafe impl Send for ProtoBytes {}
unsafe impl Sync for ProtoBytes {}
#[cfg(test)]
mod tests {
use super::*;
use googletest::prelude::*;
// TODO: Add unit tests
// Shorter and safe utility function to construct `ProtoStr` from bytes for
// testing.
fn test_proto_str(bytes: &[u8]) -> &ProtoStr {
// SAFETY: The runtime that this test executes under does not elide UTF-8 checks
// inside of `ProtoStr`.
unsafe { ProtoStr::from_utf8_unchecked(bytes) }
}
}