pw_log: Update proto
- Update proto to use a single bytes field for optionally tokenized
fields.
- Start with a minimal set of fields, but document fields planned for
future expansion.
Change-Id: I5af8e65335001ffda03acc31e5077edbca9deff3
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/47780
Commit-Queue: Wyatt Hepler <hepler@google.com>
Reviewed-by: Keir Mierle <keir@google.com>
diff --git a/pw_env_setup/BUILD.gn b/pw_env_setup/BUILD.gn
index 48be4eb..afd46b7 100644
--- a/pw_env_setup/BUILD.gn
+++ b/pw_env_setup/BUILD.gn
@@ -35,6 +35,7 @@
"$dir_pw_doctor/py",
"$dir_pw_env_setup/py",
"$dir_pw_hdlc/py",
+ "$dir_pw_log:protos.python",
"$dir_pw_log_tokenized/py",
"$dir_pw_module/py",
"$dir_pw_package/py",
diff --git a/pw_log/BUILD.gn b/pw_log/BUILD.gn
index 2b73b97..25b5d0d 100644
--- a/pw_log/BUILD.gn
+++ b/pw_log/BUILD.gn
@@ -78,8 +78,13 @@
pw_proto_library("protos") {
sources = [ "log.proto" ]
prefix = "pw_log/proto"
+ deps = [ "$dir_pw_tokenizer:proto" ]
}
pw_doc_group("docs") {
- sources = [ "docs.rst" ]
+ sources = [
+ "docs.rst",
+ "protobuf.rst",
+ ]
+ inputs = [ "log.proto" ]
}
diff --git a/pw_log/docs.rst b/pw_log/docs.rst
index 8924fe1..9816681 100644
--- a/pw_log/docs.rst
+++ b/pw_log/docs.rst
@@ -10,6 +10,15 @@
1. The facade (this module) which is only a macro interface layer
2. The backend, provided elsewhere, that implements the low level logging
+``pw_log`` also defines a logging protobuf and RPC service for efficiently
+storing and transmitting log messages. See :ref:`module-pw_log-protobuf` for
+details.
+
+.. toctree::
+ :hidden:
+
+ protobuf
+
Usage examples
--------------
Here is a typical usage example, showing setting the module name, and using the
diff --git a/pw_log/log.proto b/pw_log/log.proto
index 2e38f42..5975ad2 100644
--- a/pw_log/log.proto
+++ b/pw_log/log.proto
@@ -1,4 +1,4 @@
-// Copyright 2020 The Pigweed Authors
+// Copyright 2021 The Pigweed Authors
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
@@ -12,36 +12,32 @@
// License for the specific language governing permissions and limitations under
// the License.
-syntax = "proto2";
+syntax = "proto3";
package pw.log;
-option java_package = "pw.rpc.proto";
+import "pw_tokenizer/proto/options.proto";
+
option java_outer_classname = "Log";
-// A log with a tokenized message, a string message, or dropped indicator. A
-// message can be one of three types:
+// A log message and metadata. Logs come in a few different forms:
//
// 1. A tokenized log message (recommended for production)
// 2. A non-tokenized log message (good for development)
// 3. A "log missed" tombstone, indicating that some logs were dropped
//
-// Size analysis:
+// Size analysis for tokenized log messages, including each field's proto tag:
//
-// For tokenized log messages in the common case; including the proto tag for
-// the field (so adding the fields gives the total proto message size):
+// - message - 6-12 bytes, depending on number and value of arguments
+// - line_level - 3 bytes; 4 bytes if line > 2048 (uncommon)
+// - timestamp - 3 bytes; assuming delta encoding
+// - thread_name - 6 bytes
//
-// - message_tokenized - 6-12 bytes, depending on # and value of arguments
-// - line_level - 3 bytes; 4 bytes if line > 2048 (uncommon)
-// - timestamp - 3 bytes; assuming delta encoding
-// - thread_tokenized - 3 bytes
-//
-// Total:
+// Adding the fields gives the total proto message size:
//
// 6-12 bytes - log
// 9-15 bytes - log + level + line
// 12-18 bytes - log + level + line + timestamp
-// 15-21 bytes - log + level + line + timestamp + task
//
// An analysis of a project's log token database revealed the following
// distribution of the number of arguments to log messages:
@@ -58,30 +54,35 @@
// enable storing more logs in limited memory. This is why, for example, there
// is no separate "DroppedLog" type, or a "TokenizedLog" and "StringLog", which
// would add at least 2 extra bytes per message
-// Note: Time-related fields will likely support specifying the time as a ratio
-// (period) and an absolute time separate from the current delta fields.
message LogEntry {
- // The tokenized log message. Internally, the format has a 32-bit token
- // followed by the arguments for that message. The unformatted log string
- // corresponding to the token in the token database must follow this format:
+ // The log message, which may be tokenized.
//
- // file|module|message
+ // If tokenized logging is used, implementations may encode metadata in the
+ // log message rather than as separate proto fields. This reduces the size of
+ // the protobuf with no overhead.
//
- // For example:
+ // The standard format for encoding metadata in the log message is defined by
+ // the pw_log_tokenized module. The message and metadata are encoded as
+ // key-value pairs using ■ and ♦ as delimiters. For example:
//
- // ../boot/bluetooth.cc|BOOT|Bluetooth is on the fritz; error code: %d
+ // ■msg♦This is the log message: %d■module♦wifi■file♦../path/to/file.cc
//
- // Note: The level and flags are not included since level and flags are
- // runtime values and so cannot be tokenized.
+ // See http://pigweed.dev/pw_log_tokenized for full details. When
+ // pw_log_tokenized is used, this metadata is automatically included as
+ // described.
//
- // Size analysis:
+ // The level and flags are not included since they may be runtime values and
+ // thus cannot always be tokenized. The line number is not included because
+ // line numbers change frequently and a new token is created for each line.
+ //
+ // Size analysis when tokenized:
//
// tag+wire = 1 byte
// size = 1 byte; payload will almost always be < 127 bytes
// payload = N bytes; typically 4-10 in practice
//
// Total: 2 + N ~= 6-12 bytes
- optional bytes message_tokenized = 1;
+ optional bytes message = 1 [(tokenizer.format) = TOKENIZATION_OPTIONAL];
// Packed log level and line number. Structure:
//
@@ -93,73 +94,87 @@
// omits the line number.
optional uint32 line_level = 2;
- // Some log messages have flags to indicate for example assert or PII. The
- // particular flags are product- and implementation-dependent. When no flags
- // are present, the field is omitted entirely.
+ // Some log messages have flags to indicate attributes such as whether they
+ // are from an assert or if they contain PII. The particular flags are
+ // product- and implementation-dependent.
optional uint32 flags = 3;
- // The task or thread that created the log message.
- //
- // In practice, the task token and tag should be just 3 bytes, since a 14 bit
- // token for the task name should be enough.
- optional uint32 thread_tokenized = 4;
+ // Timestamps are either specified with an absolute timestamp or relative to
+ // the previous log entry.
+ oneof time {
+ // The absolute timestamp in implementation-defined ticks. Applications
+ // determine how to interpret this on the receiving end. In the simplest
+ // case, these ticks might be milliseconds or microseconds since boot.
+ // Applications could also access clock information out-of-band with a
+ // ClockParameters protobuf.
+ int64 timestamp = 4;
- // Timestamp. Note: The units here are TBD and will likely require a separate
- // mechanism to indicate units. This field is likely to change as we figure
- // out the right strategy for timestamps in Pigweed. This is a variable-sized
- // integer to enable scaling this up to a uint64 later on without impacting
- // the wire format.
- optional int64 timestamp = 5;
+ // Time since the last entry in implementation-defined ticks, as for the
+ // timestamp field. This enables delta encoding when batching entries
+ // together.
+ //
+ // Size analysis for this field including tag and varint, assuming 1 kHz
+ // ticks:
+ //
+ // < 127 ms gap == 127 ms == 7 bits == 2 bytes
+ // < 16,000 ms gap == 16 seconds == 14 bits == 3 bytes
+ // < 2,000,000 ms gap == 35 minutes == 21 bits == 4 bytes
+ // < 300,000,000 ms gap == 74 hours == 28 bits == 5 bytes
+ //
+ // Log bursts will thus consume just 2 bytes (tag + up to 127ms delta) for
+ // the timestamp, which is a good improvement over an absolute timestamp.
+ int64 time_since_last_entry = 5;
+ }
- // Time since the last entry. Generally, one of timestamp or this field will
- // be specified. This enables delta encoding when batching entries together.
- //
- // Size analysis for this field including tag and varint:
- //
- // < 127 ms gap == 127 ms == 7 bits == 2 bytes
- // < 16,000 ms gap == 16 seconds == 14 bits == 3 bytes
- // < 2,000,000 ms gap == 35 minutes == 21 bits == 4 bytes
- // < 300,000,000 ms gap == 74 hours == 28 bits == 5 bytes
- //
- // Log bursts will thus consume just 2 bytes (tag + up to 127ms delta) for
- // the timestamp, which is a good improvement over timestamp in many cases.
- // Note: The units of this field are TBD and will likely require a separate
- // mechanism to indicate units. The calculations above assume milliseconds
- // and may change if the units differ.
- optional int64 elapsed_time_since_last_entry = 6;
-
- // Fully formatted textual log message.
- optional string message_string = 16;
-
- // For non-tokenized logging, the file name.
- optional string file = 17;
-
- // String representation of the task that created the log message.
- optional string thread_string = 18;
+ // The following fields are planned but will not be added until they are
+ // needed. Protobuf field numbers over 15 use an extra byte, so these fields
+ // are left out for now to avoid reserving field numbers unnecessarily.
// When the log buffers are full but more logs come in, the logs are counted
// and a special log message is omitted with only counts for the number of
// messages dropped. The timestamp indicates the time that the "missed logs"
// message was inserted into the queue.
//
- // Missed logs messages will only have one of the timestamp fields and these
- // counters specified.
- optional uint32 dropped = 19;
- optional uint32 dropped_warning_or_above = 20;
+ // As an alternative to these fields, implementations may simply send a
+ // message stating the drop count.
+ // optional uint32 dropped = ?;
+ // optional uint32 dropped_warning_or_above = ?;
+
+ // Represents the device from which the log originated. The meaning of this
+ // field is implementation defined
+ // optional uint32 source_id = ?;
+
+ // The task or thread name that created the log message. If the log was not
+ // created on a thread, it should use a name appropriate to that context.
+ // optional bytes thread_name = ?
+ // [(tokenizer.format) = TOKENIZATION_OPTIONAL];
+
+ // The file path where this log was created, if not encoded in the message.
+ // optional bytes file = ? [(tokenizer.format) = TOKENIZATION_OPTIONAL];
+
+ // The PW_LOG_MODULE_NAME for this log message, if it is not encoded in the
+ // message.
+ // optional bytes module_name = ?
+ // [(tokenizer.format) = TOKENIZATION_OPTIONAL];
// Some messages are associated with trace events, which may carry additional
// contextual data. This is a tuple of a data format string which could be
// used by the decoder to identify the data (e.g. printf-style tokens) and the
// data itself in bytes.
- optional string data_format_string = 21;
- optional bytes data = 22;
+ // optional bytes data_format = ?
+ // [(tokenizer.format) = TOKENIZATION_OPTIONAL];
+ // optional bytes data = ?;
}
-message LogRequest {}
+message LogRequest {
+ // This will include fields for configuring log filtering.
+}
+
message LogEntries {
repeated LogEntry entries = 1;
}
+// RPC service for accessing logs.
service Logs {
- rpc Get(LogRequest) returns (stream LogEntries) {}
+ rpc Get(LogRequest) returns (stream LogEntries);
}
diff --git a/pw_log/protobuf.rst b/pw_log/protobuf.rst
new file mode 100644
index 0000000..2ee7ea3
--- /dev/null
+++ b/pw_log/protobuf.rst
@@ -0,0 +1,52 @@
+.. _module-pw_log-protobuf:
+
+-------------------
+The pw_log protobuf
+-------------------
+``pw_log`` defines a protocol buffer for storing and transmitting log messages.
+The protobuf is optimized to be maximally efficient.
+
+Fields
+======
+The ``pw_log`` protobuf is defined in ``log.proto``.
+
+.. literalinclude:: log.proto
+ :language: protobuf
+ :lines: 14-
+
+Timestamps
+----------
+Timestamps are specified in implementation-defined ticks. Ticks could be
+milliseconds, microsends, or any arbitrary duration derived from the device’s
+clock.
+
+For many applications, the timestamp field can be interpreted based on the prior
+knowledge of the system. For example, ``timestamp`` might be known to be
+milliseconds since boot for one core or microseconds since boot for another.
+
+If desired, a project could collect information about clock parameters
+separately from ``pw_log``, and use that to interpret log timestamps. For
+example, they may call an RPC that returns a ``pw_chrono`` ``ClockParamters``
+protobuf. The values from that could be used to interpret timestamp from that
+device.
+
+The ``pw_log`` proto contains two timestamp fields, only one of which may be set
+at a time:
+
+- ``timestamp`` – Absolute time when message was logged.
+- ``time_since_last_entry`` – When the message was logged, relative
+ to the previously encoded log message. This is used when multiple log entries
+ are sent in a single ``LogEntries`` proto. The previous log entry must use the
+ same time source. If logs with multiple time sources are intermingled in a
+ single ``LogEntries`` proto, they must use an absolute timestamp each time the
+ time source changes.
+
+Optionally tokenized text fields
+--------------------------------
+Several fields in the ``pw_log`` proto store text. Examples include ``message``
+and ``thread_name``. These fields may contain either plain or tokenized text,
+either of which is represented as a single bytes field. These fields are marked
+with a protocol buffer option so the ``pw_tokenizer.proto`` module can detect
+and detokenize tokenized fields as appropriate.
+
+See :ref:`module-pw_tokenizer-proto` for details.
diff --git a/pw_log_multisink/log_queue.cc b/pw_log_multisink/log_queue.cc
index 0547449..d9a6242 100644
--- a/pw_log_multisink/log_queue.cc
+++ b/pw_log_multisink/log_queue.cc
@@ -34,24 +34,26 @@
uint32_t flags,
uint32_t level,
uint32_t line,
- uint32_t thread,
+ uint32_t /* thread */,
int64_t timestamp) {
pw::protobuf::NestedEncoder nested_encoder(encode_buffer_);
pw::log::LogEntry::Encoder encoder(&nested_encoder);
Status status;
- encoder.WriteMessageTokenized(message);
+ encoder.WriteMessage(message);
encoder.WriteLineLevel((level & PW_LOG_LEVEL_BITMASK) |
((line << PW_LOG_LEVEL_BITS) & ~PW_LOG_LEVEL_BITMASK));
encoder.WriteFlags(flags);
- encoder.WriteThreadTokenized(thread);
+ // TODO(prashanthsw): Update when the thread_name field is added.
+ // encoder.WriteThreadName(bytes::CopyInOrder(std::endian::little, &thread));
// TODO(prashanthsw): Add support for delta encoding of the timestamp.
encoder.WriteTimestamp(timestamp);
- if (dropped_entries_ > 0) {
- encoder.WriteDropped(dropped_entries_);
- }
+ // TODO(prashanthsw): Handle dropped messages.
+ // if (dropped_entries_ > 0) {
+ // encoder.WriteDropped(dropped_entries_);
+ // }
ConstByteSpan log_entry;
status = nested_encoder.Encode(&log_entry);
diff --git a/pw_log_multisink/log_queue_test.cc b/pw_log_multisink/log_queue_test.cc
index 3d77148..9708576 100644
--- a/pw_log_multisink/log_queue_test.cc
+++ b/pw_log_multisink/log_queue_test.cc
@@ -38,7 +38,7 @@
const uint32_t expected_flags,
const uint32_t expected_level,
const uint32_t expected_line,
- const uint32_t expected_tokenized_thread,
+ const uint32_t /* expected_tokenized_thread */,
const int64_t expected_timestamp) {
ConstByteSpan log_entry_message;
EXPECT_TRUE(log_decoder.Next().ok()); // preamble
@@ -68,15 +68,16 @@
EXPECT_TRUE(entry_decoder.ReadUint32(&flags).ok());
EXPECT_EQ(expected_flags, flags);
- uint32_t tokenized_thread;
- EXPECT_TRUE(entry_decoder.Next().ok()); // tokenized_thread
- EXPECT_EQ(4U, entry_decoder.FieldNumber());
- EXPECT_TRUE(entry_decoder.ReadUint32(&tokenized_thread).ok());
- EXPECT_EQ(expected_tokenized_thread, tokenized_thread);
+ // TODO(prashanthsw): Add thread name when supported.
+ // uint32_t tokenized_thread;
+ // EXPECT_TRUE(entry_decoder.Next().ok()); // tokenized_thread
+ // EXPECT_EQ(4U, entry_decoder.FieldNumber());
+ // EXPECT_TRUE(entry_decoder.ReadUint32(&tokenized_thread).ok());
+ // EXPECT_EQ(expected_tokenized_thread, tokenized_thread);
int64_t timestamp;
EXPECT_TRUE(entry_decoder.Next().ok()); // timestamp
- EXPECT_EQ(5U, entry_decoder.FieldNumber());
+ EXPECT_EQ(4U, entry_decoder.FieldNumber());
EXPECT_TRUE(entry_decoder.ReadInt64(×tamp).ok());
EXPECT_EQ(expected_timestamp, timestamp);
}
diff --git a/pw_log_rpc/logs_rpc.cc b/pw_log_rpc/logs_rpc.cc
index bf5f352..d548c84 100644
--- a/pw_log_rpc/logs_rpc.cc
+++ b/pw_log_rpc/logs_rpc.cc
@@ -21,13 +21,14 @@
namespace pw::log_rpc {
namespace {
-Result<ConstByteSpan> GenerateDroppedEntryMessage(ByteSpan encode_buffer,
- size_t dropped_entries) {
- pw::protobuf::NestedEncoder nested_encoder(encode_buffer);
- pw::log::LogEntry::Encoder encoder(&nested_encoder);
- encoder.WriteDropped(dropped_entries);
- return nested_encoder.Encode();
-}
+// TODO(prashanthsw): Handle dropped messages.
+// Result<ConstByteSpan> GenerateDroppedEntryMessage(ByteSpan encode_buffer,
+// size_t dropped_entries) {
+// pw::protobuf::NestedEncoder nested_encoder(encode_buffer);
+// pw::log::LogEntry::Encoder encoder(&nested_encoder);
+// encoder.WriteDropped(dropped_entries);
+// return nested_encoder.Encode();
+// }
} // namespace
@@ -44,13 +45,14 @@
// If previous calls to flush resulted in dropped entries, generate a
// dropped entry message and write it before further log messages.
- if (dropped_entries_ > 0) {
- ByteSpan payload = response_writer_.PayloadBuffer();
- Result dropped_log = GenerateDroppedEntryMessage(payload, dropped_entries_);
- PW_TRY(dropped_log.status());
- PW_TRY(response_writer_.Write(dropped_log.value()));
- dropped_entries_ = 0;
- }
+ // TODO(prashanthsw): Handle dropped messages.
+ // if (dropped_entries_ > 0) {
+ // ByteSpan payload = response_writer_.PayloadBuffer();
+ // Result dropped_log = GenerateDroppedEntryMessage(payload,
+ // dropped_entries_); PW_TRY(dropped_log.status());
+ // PW_TRY(response_writer_.Write(dropped_log.value()));
+ // dropped_entries_ = 0;
+ // }
// Write logs to the response writer. An important limitation of this
// implementation is that if this RPC call fails, the logs are lost -
diff --git a/pw_log_sink/log_sink.cc b/pw_log_sink/log_sink.cc
index b3ef96b..98bb086 100644
--- a/pw_log_sink/log_sink.cc
+++ b/pw_log_sink/log_sink.cc
@@ -79,8 +79,7 @@
va_start(args, message);
buffer.FormatVaList(message, args);
va_end(args);
- encoder.WriteMessageString(buffer.c_str());
- encoder.WriteThreadString("");
+ encoder.WriteMessage(std::as_bytes(std::span(buffer.view())));
ConstByteSpan log_entry;
Status status = nested_encoder.Encode(&log_entry);
diff --git a/pw_log_sink/log_sink_test.cc b/pw_log_sink/log_sink_test.cc
index f3432c1..2fb2982 100644
--- a/pw_log_sink/log_sink_test.cc
+++ b/pw_log_sink/log_sink_test.cc
@@ -35,8 +35,8 @@
EXPECT_TRUE(log_decoder.Next().ok()); // line_level - 2
EXPECT_TRUE(log_decoder.Next().ok()); // flags - 3
EXPECT_TRUE(log_decoder.Next().ok()); // timestamp - 5
- EXPECT_TRUE(log_decoder.Next().ok()); // message_string - 16
- EXPECT_EQ(16U, log_decoder.FieldNumber());
+ EXPECT_TRUE(log_decoder.Next().ok()); // message - 1
+ EXPECT_EQ(1u, log_decoder.FieldNumber());
EXPECT_TRUE(log_decoder.ReadString(&log_entry_message).ok());
return std::string(log_entry_message);