blob: dc081e098c525d00409bbd93249a34d88463566e [file] [log] [blame]
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef GOOGLE_PROTOBUF_JSON_INTERNAL_ZERO_COPY_BUFFERED_STREAM_H__
#define GOOGLE_PROTOBUF_JSON_INTERNAL_ZERO_COPY_BUFFERED_STREAM_H__
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <string>
#include <utility>
#include <vector>
#include "google/protobuf/stubs/logging.h"
#include "google/protobuf/stubs/common.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/io/zero_copy_stream.h"
#include "google/protobuf/stubs/status_macros.h"
// Must be included last.
#include "google/protobuf/port_def.inc"
// Utilities for parsing contiguous buffers out of ZeroCopyInputStreams.
namespace google {
namespace protobuf {
namespace json_internal {
// Forward decl. for use by helper types below.
class ZeroCopyBufferedStream;
// An RAII type that represents holding a reference into the backing buffer
// of a ZeroCopyBufferedStream. This allows for automatic management of the
// backing buffer.
class BufferingGuard {
public:
explicit BufferingGuard(ZeroCopyBufferedStream* owner = nullptr);
~BufferingGuard();
BufferingGuard(const BufferingGuard& other) : BufferingGuard(other.owner_) {}
BufferingGuard& operator=(const BufferingGuard& other) {
this->~BufferingGuard();
new (this) BufferingGuard(other);
return *this;
}
private:
friend class Mark;
ZeroCopyBufferedStream* owner_ = nullptr;
};
// A string that may own its contents, or live inside of a buffer owned by
// a ZeroCopyBufferedStream.
//
// Note that this type holds onto a reference to the owning
// ZeroCopyBufferedStream; this allows it to be durable against strings being
// moved around for buffering puroses.
class MaybeOwnedString {
public:
explicit MaybeOwnedString(std::string value) : data_(std::move(value)) {}
MaybeOwnedString(ZeroCopyBufferedStream* stream, size_t start, size_t len,
BufferingGuard token)
: data_(StreamOwned{stream, start, len}), token_(token) {}
// Returns the string as a view, regardless of whether it is owned or not.
absl::string_view AsView() const {
if (auto* unowned = absl::get_if<StreamOwned>(&data_)) {
return unowned->AsView();
}
return absl::get<std::string>(data_);
}
operator absl::string_view() const { return AsView(); } // NOLINT
// Returns a reference to an owned string; if the wrapped string is not
// owned, this function will perform a copy and make it owned.
std::string& ToString() {
if (auto* unowned = absl::get_if<StreamOwned>(&data_)) {
data_ = std::string(unowned->AsView());
token_ = BufferingGuard{};
}
return absl::get<std::string>(data_);
}
template <typename String>
friend bool operator==(const MaybeOwnedString& lhs, const String& rhs) {
return lhs.AsView() == rhs;
}
template <typename String>
friend bool operator!=(const MaybeOwnedString& lhs, const String& rhs) {
return !(lhs == rhs);
}
private:
struct StreamOwned {
ZeroCopyBufferedStream* stream;
size_t start, len;
absl::string_view AsView() const;
};
absl::variant<std::string, StreamOwned> data_;
BufferingGuard token_;
};
// A mark in a stream. See ZeroCopyBufferedStream::Mark().
class Mark {
public:
// Returns a maybe-owned string up to the unread bytes boundary, except for
// the last `clip` bytes.
MaybeOwnedString UpToUnread(size_t clip = 0) const;
// Discards this mark and its hold on the buffer.
void Discard() && { guard_ = BufferingGuard(); }
private:
friend ZeroCopyBufferedStream;
Mark(size_t offset, BufferingGuard guard) : offset_(offset), guard_(guard) {}
size_t offset_;
BufferingGuard guard_;
};
// A wrapper over a ZeroCopyInputStream that allows doing as-needed buffer for
// obtaining contiguous chunks larger than those the underlying stream might
// provide, while minimizing the amount of actual copying.
class ZeroCopyBufferedStream {
public:
explicit ZeroCopyBufferedStream(io::ZeroCopyInputStream* stream)
: stream_(stream) {}
// Returns whether the stream is currently at eof.
//
// This function will buffer at least one character to verify whether it
// actually *is* at EOF.
bool AtEof() {
(void)BufferAtLeast(1);
return eof_;
}
// Takes exactly n characters from a string.
absl::StatusOr<MaybeOwnedString> Take(size_t len) {
auto buffering = BufferAtLeast(len);
RETURN_IF_ERROR(buffering.status());
size_t start = cursor_;
RETURN_IF_ERROR(Advance(len));
return MaybeOwnedString(this, start, len, *buffering);
}
// Takes characters to form a string, according to the given predicate. Stops
// early if an EOF is hit.
//
// The predicate must have type `(int, char) -> bool`; the first argument
// is the index of the character.
template <typename Pred>
absl::StatusOr<MaybeOwnedString> TakeWhile(Pred p);
// Places a mark in the stream, ensuring that all characters consumed after
// the mark are buffered. This can be used to parse some characters and then
// recover everything that follows as a contiguous string_view so that it may
// be processed a second time.
//
// The returned value is an RAII type that ensure the buffer sticks around
// long enough.
Mark BeginMark() { return Mark(cursor_, BufferingGuard(this)); }
// Peeks the next character in the stream.
//
// This function will not enable buffering on its own, and will read past the
// end of the buffer if at EOF; BufferAtLeast() should be called before
// calling this function.
char PeekChar() {
GOOGLE_DCHECK(!Unread().empty());
return Unread()[0];
}
// Advances the cursor by the given number of bytes.
absl::Status Advance(size_t bytes);
// Returns a view of the current buffer, which may be either the owned
// `buf_` or the stream-owned `last_chunk_`.
//
// The returned view is unstable: calling any function may invalidate it,
// because there will not be a `BufferingGuard` to guard it.
absl::string_view RawBuffer(size_t start,
size_t len = absl::string_view::npos) const;
// Returns a view of RawBuffer, unread bytes; this will not be the entirety
// of the underlying stream.
absl::string_view Unread() const { return RawBuffer(cursor_); }
bool IsBuffering() const { return using_buf_; }
// Buffers at least `bytes` bytes ahead of the current cursor position,
// possibly enabling buffering.
//
// Returns an error if that many bytes could not be RawBuffer.
absl::StatusOr<BufferingGuard> BufferAtLeast(size_t bytes);
private:
friend BufferingGuard;
friend Mark;
friend MaybeOwnedString;
// Increments the buffering refcount; this will also update `buffer_start_` if
// necessary.
void UpRefBuffer() {
if (outstanding_buffer_borrows_++ == 0) {
buffer_start_ = cursor_;
}
}
// Decrements the buffering refcount; calling this function if the refcount is
// zero is undefined behavior.
//
// This function should not be called directly; it is called automatically
// by the destructor of `BufferingGuard`.
void DownRefBuffer();
// Obtains a new chunk from the underlying stream; returns whether there is
// still more data to read.
bool ReadChunk();
// The streamer implements a buffering stream on top of the given stream, by
// the following mechanism:
// - `cursor_` is an offset into either `last_chunk_` or `buf_`, which can
// be obtained via RawBuffer() and Unread():
// - If `using_buf_` is true, it is an offset into `buf_`.
// - Otherwise it is an offset into `last_chunk_`.
// - If `outstanding_buffer_borrows_ > 0`, someone needs the buffer to stick
// around. MaybeUnownedString::StreamOwned is implemented such that it does
// not hold onto `last_chunk_` directly, so we can freely copy it into
// `buf_` as needed arises.
// - Note that we can copy only part if we update `buffer_start_`; see
// RawBuffer().
// - If we would read more data and `outstanding_buffer_borrows_ > 0`, instead
// of trashing `last_chunk_`, we copy it into `buf_` and append to `buf_`
// each time we read.
// - If `outstanding_buffer_borrows_ == 0`, we can trash `buf_` and go back to
// using `last_chunk_` directly. See `DownRefBuffer()`.
io::ZeroCopyInputStream* stream_;
absl::string_view last_chunk_;
std::vector<char> buf_;
bool using_buf_ = false;
size_t cursor_ = 0;
// Invariant: this always refers to the earliest point at which we requested
// buffering, since the last time outstanding_buffer_borrows_ was zero.
size_t buffer_start_ = 0;
bool eof_ = false;
int outstanding_buffer_borrows_ = 0;
};
// These functions all rely on the definition of ZeroCopyBufferedStream, so must
// come after it.
inline BufferingGuard::BufferingGuard(ZeroCopyBufferedStream* owner)
: owner_(owner) {
if (owner_ != nullptr) {
owner_->UpRefBuffer();
}
}
inline BufferingGuard::~BufferingGuard() {
if (owner_ != nullptr) {
owner_->DownRefBuffer();
owner_ = nullptr;
}
}
inline absl::string_view MaybeOwnedString::StreamOwned::AsView() const {
return stream->RawBuffer(start, len);
}
inline MaybeOwnedString Mark::UpToUnread(size_t clip) const {
return MaybeOwnedString(guard_.owner_, offset_,
guard_.owner_->cursor_ - offset_ - clip, guard_);
}
template <typename Pred>
absl::StatusOr<MaybeOwnedString> ZeroCopyBufferedStream::TakeWhile(Pred p) {
size_t start = cursor_;
BufferingGuard guard(this);
while (true) {
if (!BufferAtLeast(1).ok()) {
// We treat EOF as ending the take, rather than being an error.
break;
}
if (!p(cursor_ - start, PeekChar())) {
break;
}
RETURN_IF_ERROR(Advance(1));
}
return MaybeOwnedString(this, start, cursor_ - start, guard);
}
inline absl::string_view ZeroCopyBufferedStream::RawBuffer(size_t start,
size_t len) const {
absl::string_view view = last_chunk_;
if (using_buf_) {
GOOGLE_DCHECK_LE(buffer_start_, start);
start -= buffer_start_;
view = absl::string_view(buf_.data(), buf_.size());
}
#if 0
// This print statement is especially useful for trouble-shooting low-level
// bugs in the buffering logic.
GOOGLE_LOG(INFO) << absl::StreamFormat("%s(\"%s\")[%d:%d]/%d:%d @ %p",
using_buf_ ? "buf_" : "last_chunk_",
view, start, static_cast<int>(len),
buffer_start_, cursor_, this);
#endif
GOOGLE_DCHECK_LE(start, view.size());
if (len == absl::string_view::npos) {
return view.substr(start);
}
GOOGLE_DCHECK_LE(start + len, view.size());
return view.substr(start, len);
}
} // namespace json_internal
} // namespace protobuf
} // namespace google
#include "google/protobuf/port_undef.inc"
#endif // GOOGLE_PROTOBUF_JSON_INTERNAL_ZERO_COPY_BUFFERED_STREAM_H__