blob: 913838f00cdbd7c016eaac9a172dcb3f950a5f0a [file] [log] [blame]
// Copyright 2020 The Pigweed Authors
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.
// decode.h defines classes that implement tokenized string decoding. These
// classes should not be used directly; instead decode tokenized messages with
// the Detokenizer class, defined in pw_tokenizer/detokenize.h.
#pragma once
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <span>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
// Decoding errors are marked with prefix and suffix so that they stand out from
// the rest of the decoded strings. These macros are used to build decoding
// error strings.
#define PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX "<["
#define PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX "]>"
#define PW_TOKENIZER_ARG_DECODING_ERROR(message) \
PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX message \
PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX
namespace pw::tokenizer {
// The status of an argument that was decoded from an encoded tokenized string.
// This enum should match the values in decode.py's DecodedArg class.
class ArgStatus {
public:
// The Code enum tracks issues arise when decoding a tokenized string
// argument. Each value is one bit, and an ArgStatus will have multiple bits
// set if multiple issues are encountered.
enum Code : unsigned {
kOk = 0, // Decoding was successful.
kMissing = 1, // The argument was not present in the data.
kTruncated = 2, // The argument was truncated during encoding.
kDecodeError = 4, // An error occurred while decoding the argument.
kSkipped = 8, // Argument was skipped due to a previous error.
};
constexpr ArgStatus(Code code = kOk) : status_(code) {}
// Sets additional status bits.
constexpr void Update(ArgStatus status) { status_ |= status.status_; }
// True if no decoding errors occurred. Truncated is considered OK, since
// encoding and decoding still occurs successfully when a string is truncated.
constexpr bool ok() const { return status_ == kOk || status_ == kTruncated; }
// Checks if an error flag is set in the status.
constexpr bool HasError(Code code) const { return (status_ & code) != 0u; }
private:
// Since multiple Code bits may be set in an ArgStatus, the status is stored
// as an unsigned instead of a Code.
unsigned status_;
};
// An argument decoded from an encoded tokenized message.
class DecodedArg {
public:
// Constructs a DecodedArg from a decoded value. The value is formatted into a
// string using the provided format string. The number of bytes that were
// decoded to get the value are provided in raw_size_bytes.
template <typename ArgumentType>
static DecodedArg FromValue(const char* format_string,
ArgumentType value,
size_t raw_size_bytes,
ArgStatus arg_status = ArgStatus::kOk);
// Constructs a DecodedArg that represents a string literal in the format
// string (plain text or % character).
DecodedArg(const std::string& literal)
: value_(literal), raw_data_size_bytes_(0) {}
// Constructs a DecodedArg that encountered an error during decoding.
DecodedArg(ArgStatus error,
const std::string_view& spec,
size_t raw_size_bytes = 0u,
const std::string_view& value = {});
// This argument's value as a string. If an error occurred while decoding this
// argument, value() will be an error message.
const std::string& value() const { return value_; }
// Returns the conversion specification for this argument (e.g. %02x). This is
// empty for literals or "%%".
const std::string& spec() const { return spec_; }
// True if this argument decoded successfully.
bool ok() const { return status_.ok(); }
// How many bytes this arg occupied in the encoded arguments.
size_t raw_size_bytes() const { return raw_data_size_bytes_; }
private:
DecodedArg(const char* format, size_t raw_size_bytes, ArgStatus status)
: spec_(format), raw_data_size_bytes_(raw_size_bytes), status_(status) {}
std::string value_;
std::string spec_;
size_t raw_data_size_bytes_;
ArgStatus status_;
};
// Represents a segment of a printf-style format string. Each StringSegment
// contains either literal text or a format specifier.
class StringSegment {
public:
// Parses a format specifier from the text and returns a StringSegment that
// represents it. Returns an empty StringSegment if no valid format specifier
// was found.
static StringSegment ParseFormatSpec(const char* format);
// Creates a StringSegment that represents a piece of plain text.
StringSegment(const std::string_view& text) : StringSegment(text, kLiteral) {}
// Returns the DecodedArg with this StringSegment decoded according to the
// provided arguments.
DecodedArg Decode(const std::span<const uint8_t>& arguments) const;
// Skips decoding this StringSegment. Literals and %% are expanded as normal.
DecodedArg Skip() const;
bool empty() const { return text_.empty(); }
const std::string& text() const { return text_; }
private:
enum Type {
kLiteral,
kPercent, // %% format specifier
kString,
kSignedInt,
kUnsigned32,
kUnsigned64,
kFloatingPoint,
};
// Varargs-promoted size of args on this machine; only needed for ints or %p.
enum ArgSize : bool { k32Bit, k64Bit };
template <typename T>
static constexpr ArgSize VarargSize() {
return sizeof(T) == sizeof(int64_t) ? k64Bit : k32Bit;
}
static ArgSize VarargSize(std::array<char, 2> length, char spec);
StringSegment() : type_(kLiteral) {}
StringSegment(const std::string_view& text, Type type)
: StringSegment(text, type, VarargSize<void*>()) {}
StringSegment(const std::string_view& text, Type type, ArgSize local_size)
: text_(text), type_(type), local_size_(local_size) {}
DecodedArg DecodeString(const std::span<const uint8_t>& arguments) const;
DecodedArg DecodeInteger(const std::span<const uint8_t>& arguments) const;
DecodedArg DecodeFloatingPoint(
const std::span<const uint8_t>& arguments) const;
std::string text_;
Type type_;
ArgSize local_size_; // Arg size to use for snprintf on this machine.
};
// The result of decoding a tokenized message with a FormatString. Stores
// decoded arguments and whether there was any undecoded data. This is returned
// from a FormatString::Format call.
class DecodedFormatString {
public:
DecodedFormatString(std::vector<DecodedArg>&& segments,
size_t remaining_bytes)
: segments_(std::move(segments)), remaining_bytes_(remaining_bytes) {}
DecodedFormatString(const DecodedFormatString&) = default;
DecodedFormatString(DecodedFormatString&&) = default;
DecodedFormatString& operator=(const DecodedFormatString&) = default;
DecodedFormatString& operator=(DecodedFormatString&&) = default;
// Returns the decoded format string. If any argument decoding errors
// occurred, the % conversion specifiers are included unmodified.
std::string value() const;
// Returns the decoded format string, with error messages for any arguments
// that failed to decode.
std::string value_with_errors() const;
bool ok() const { return remaining_bytes() == 0u && decoding_errors() == 0u; }
// Returns the number of bytes that remained after decoding.
size_t remaining_bytes() const { return remaining_bytes_; }
// Returns the number of arguments in the format string. %% is not included.
size_t argument_count() const;
// Returns the number of arguments that failed to decode.
size_t decoding_errors() const;
private:
std::vector<DecodedArg> segments_;
size_t remaining_bytes_;
};
// Represents a printf-style format string. The string is stored as a vector of
// StringSegments.
class FormatString {
public:
// Constructs a FormatString from a null-terminated format string.
FormatString(const char* format_string);
// Formats this format string according to the provided encoded arguments and
// returns a string.
DecodedFormatString Format(std::span<const uint8_t> arguments) const;
DecodedFormatString Format(const std::string_view& arguments) const {
return Format(std::span(reinterpret_cast<const uint8_t*>(arguments.data()),
arguments.size()));
}
private:
std::vector<StringSegment> segments_;
};
// Implementation of DecodedArg::FromValue template function.
template <typename ArgumentType>
DecodedArg DecodedArg::FromValue(const char* format,
ArgumentType value,
size_t raw_size_bytes,
ArgStatus status) {
DecodedArg arg(format, raw_size_bytes, status);
const int value_size = std::snprintf(nullptr, 0u, format, value);
if (value_size < 0) {
arg.status_.Update(ArgStatus::kDecodeError);
return arg;
}
// Reserve space in the value string for the snprintf call.
arg.value_.append(value_size + 1, '\0');
// Print the value to the string in the reserved space, then pop off the \0.
std::snprintf(arg.value_.data(), arg.value_.size(), format, value);
arg.value_.pop_back(); // Remove the trailing \0.
return arg;
}
} // namespace pw::tokenizer