samples: add sample and test for pw_tokenizer

Include a sample that encodes and decodes a simple string

Bug: b/236263182
Change-Id: If10be9360e7ef13e552b49e36853af634577dc42
Signed-off-by: Yuval Peress <peress@google.com>
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/zephyr-integration/+/109476
Reviewed-by: Wyatt Hepler <hepler@google.com>
diff --git a/samples/pw_tokenizer/CMakeLists.txt b/samples/pw_tokenizer/CMakeLists.txt
new file mode 100644
index 0000000..e042dfd
--- /dev/null
+++ b/samples/pw_tokenizer/CMakeLists.txt
@@ -0,0 +1,52 @@
+# Copyright 2022 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+cmake_minimum_required(VERSION 3.20)
+
+set(BOARD native_posix)
+set(NO_BUILD_TYPE_WARNING ON)
+set(CMAKE_VERBOSE_MAKEFILE ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+# Use this copy of Pigweed
+get_filename_component(PW_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../../pigweed" ABSOLUTE)
+set(ENV{PW_ROOT} ${PW_ROOT})
+
+set(pw_third_party_nanopb_ADD_SUBDIRECTORY ON CACHE BOOL "" FORCE)
+
+list(APPEND ZEPHYR_EXTRA_MODULES ${PW_ROOT})
+
+find_package(Zephyr REQUIRED PATHS $ENV{ZEPHYR_BASE})
+project(tokenizer_demo)
+
+target_sources(app PRIVATE src/main.cc)
+
+# Add custom logic to generate database.bin
+# The application expects it to be in the binary dir.
+add_custom_target(database_binary
+  DEPENDS
+    ${CMAKE_BINARY_DIR}/database.bin
+)
+add_custom_command(
+    OUTPUT ${CMAKE_BINARY_DIR}/database.bin
+    COMMAND
+    ${PYTHON_EXECUTABLE}
+    ${PW_ROOT}/pw_tokenizer/py/pw_tokenizer/database.py
+    create
+    --type binary
+    -d ${CMAKE_BINARY_DIR}/database.bin
+    ${CMAKE_BINARY_DIR}/zephyr/zephyr_pre0.elf
+    DEPENDS zephyr_pre0
+)
+add_dependencies(${logical_target_for_zephyr_elf} database_binary)
diff --git a/samples/pw_tokenizer/prj.conf b/samples/pw_tokenizer/prj.conf
new file mode 100644
index 0000000..fc095e2
--- /dev/null
+++ b/samples/pw_tokenizer/prj.conf
@@ -0,0 +1,23 @@
+# Copyright 2022 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+CONFIG_ASSERT=y
+
+CONFIG_PIGWEED_ASSERT=y
+CONFIG_PIGWEED_TOKENIZER=y
+CONFIG_PIGWEED_DETOKENIZER=y
+
+CONFIG_CPLUSPLUS=y
+CONFIG_STD_CPP17=y
+CONFIG_LIB_CPLUSPLUS=y
diff --git a/samples/pw_tokenizer/sample.yaml b/samples/pw_tokenizer/sample.yaml
new file mode 100644
index 0000000..e4ecca8
--- /dev/null
+++ b/samples/pw_tokenizer/sample.yaml
@@ -0,0 +1,14 @@
+sample:
+  name: pw_tokenizer sample
+tests:
+  sample.pw_tokenizer:
+    tags: pw_tokenizer
+    platform_allow: native_posix
+    timeout: 10
+    harness: console
+    harness_config:
+      type: multi_line
+      regex:
+        - "token=\\d+"
+        - "tokenized buffer size is 9 bytes"
+        - "detokenized message size is 17 bytes"
diff --git a/samples/pw_tokenizer/src/main.cc b/samples/pw_tokenizer/src/main.cc
new file mode 100644
index 0000000..f3a61df
--- /dev/null
+++ b/samples/pw_tokenizer/src/main.cc
@@ -0,0 +1,81 @@
+// Copyright 2022 The Pigweed Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include <cstring>
+#include <fstream>
+#include <pw_assert/assert.h>
+#include <pw_tokenizer/detokenize.h>
+#include <pw_tokenizer/tokenize.h>
+#include <string_view>
+#include <vector>
+#include <zephyr/kernel.h>
+
+static std::vector<uint8_t> ReadWholeFile(const char *path) {
+  // Open the file
+  std::ifstream file(path, std::ios::binary);
+
+  // Stop eating new lines in binary mode
+  file.unsetf(std::ios::skipws);
+
+  // Get the file size
+  std::streampos file_size;
+
+  file.seekg(0, std::ios::end);
+  file_size = file.tellg();
+  file.seekg(0, std::ios::beg);
+
+  // Reserve capacity
+  std::vector<uint8_t> data;
+  data.reserve(file_size);
+
+  // Read the data
+  data.insert(data.begin(), std::istream_iterator<uint8_t>(file),
+              std::istream_iterator<uint8_t>());
+
+  return data;
+}
+
+pw::tokenizer::Detokenizer OpenDatabase(const char *path) {
+  std::vector<uint8_t> data = ReadWholeFile(path);
+
+  pw::tokenizer::TokenDatabase database =
+      pw::tokenizer::TokenDatabase::Create(data);
+
+  // This checks if the file contained a valid database.
+  PW_ASSERT(database.ok());
+  return pw::tokenizer::Detokenizer(database);
+}
+
+constexpr uint32_t kHelloWorldToken = PW_TOKENIZE_STRING("Hello tokenized world!");
+
+void main(void) {
+  char expected_string[1024];
+  uint8_t buffer[1024];
+  size_t size_bytes = sizeof(buffer);
+
+  pw::tokenizer::Detokenizer detokenizer = OpenDatabase("database.bin");
+
+  sprintf(expected_string, "token=%u\n", kHelloWorldToken);
+  printk(expected_string);
+  PW_TOKENIZE_TO_BUFFER(buffer, &size_bytes, "token=%u\n", kHelloWorldToken);
+
+  printk("tokenized buffer size is %u bytes\n", size_bytes);
+
+  auto detokenized_string = detokenizer.Detokenize(buffer, size_bytes);
+  PW_ASSERT(detokenized_string.ok());
+  PW_ASSERT(strcmp(detokenized_string.BestString().c_str(), expected_string) ==
+            0);
+  printk("detokenized message size is %u bytes\n",
+         strlen(detokenized_string.BestString().c_str()));
+}