Add coverage symbolizer library #Centipede

PiperOrigin-RevId: 561764679
diff --git a/centipede/BUILD b/centipede/BUILD
index 52e507c..a955033 100644
--- a/centipede/BUILD
+++ b/centipede/BUILD
@@ -82,6 +82,20 @@
 #                             C++ libraries
 ################################################################################
 
+cc_library(
+    name = "coverage_symbolizer",
+    srcs = ["coverage_symbolizer.cc"],
+    hdrs = ["coverage_symbolizer.h"],
+    deps = [
+        ":coverage",
+        ":feature",
+        "@com_google_absl//absl/status",
+        "@com_google_absl//absl/status:statusor",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+    ],
+)
+
 # This lib must have zero non-trivial dependencies (other than libc).
 cc_library(
     name = "int_utils",
@@ -1287,6 +1301,18 @@
 )
 
 cc_test(
+    name = "coverage_symbolizer_test",
+    srcs = ["coverage_symbolizer_test.cc"],
+    deps = [
+        ":coverage_symbolizer",
+        ":feature",
+        "@com_google_absl//absl/status",
+        "@com_google_absl//absl/strings",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
     name = "runner_cmp_trace_test",
     srcs = ["runner_cmp_trace_test.cc"],
     deps = [
diff --git a/centipede/coverage_symbolizer.cc b/centipede/coverage_symbolizer.cc
new file mode 100644
index 0000000..9bfc1d8
--- /dev/null
+++ b/centipede/coverage_symbolizer.cc
@@ -0,0 +1,90 @@
+// Copyright 2023 The Centipede Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "./centipede/coverage_symbolizer.h"
+
+#include <stddef.h>
+
+#include <functional>
+#include <string>
+
+#include "absl/status/status.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+#include "./centipede/feature.h"
+#include "./centipede/symbol_table.h"
+
+namespace centipede {
+
+DomainSymbolizer::DomainSymbolizer(size_t domain_id)
+    : domain_id_(domain_id), initialized_(false) {
+  func_ = [domain_id](size_t idx) -> std::string {
+    return absl::StrFormat("unknown symbol: domain_id=%d, idx=%d", domain_id,
+                           idx);
+  };
+}
+
+absl::StatusOr<SymbolTable *>
+DomainSymbolizer::InitializeByPopulatingSymbolTable() {
+  if (initialized_) {
+    return absl::FailedPreconditionError(absl::StrCat(
+        "Already initialized this domain symbolizer for domain_id=",
+        domain_id_));
+  }
+  initialized_ = true;
+  func_ = [this](size_t idx) -> std::string {
+    return symbols_.full_description(idx);
+  };
+  return &symbols_;
+}
+
+absl::Status DomainSymbolizer::InitializeWithSymbolizationFunction(
+    const std::function<std::string(size_t idx)> &func) {
+  if (initialized_) {
+    return absl::FailedPreconditionError(absl::StrCat(
+        "Already initialized this domain symbolizer for domain_id=",
+        domain_id_));
+  }
+  initialized_ = true;
+  func_ = func;
+  return absl::OkStatus();
+}
+
+std::string DomainSymbolizer::GetSymbolForIndex(size_t idx) const {
+  return func_(idx);
+}
+
+CoverageSymbolizer::CoverageSymbolizer() {
+  for (size_t i = 0; i < feature_domains::kLastDomain.domain_id(); ++i) {
+    symbolizers_.emplace_back(/*domain_id=*/i);
+  }
+}
+
+absl::StatusOr<DomainSymbolizer *> CoverageSymbolizer::GetSymbolizerForDomain(
+    feature_domains::Domain domain) {
+  if (domain.domain_id() >= feature_domains::kLastDomain.domain_id()) {
+    return absl::InvalidArgumentError(
+        absl::StrCat("Provided invalid domain_id: ", domain.domain_id()));
+  }
+  return &symbolizers_[domain.domain_id()];
+}
+
+std::string CoverageSymbolizer::GetSymbolForFeature(feature_t feature) const {
+  size_t domain_id = feature_domains::Domain::FeatureToDomainId(feature);
+  size_t domain_index = feature_domains::Domain::FeatureToDomainIndex(feature);
+  return symbolizers_[domain_id].GetSymbolForIndex(domain_index);
+}
+
+}  // namespace centipede
diff --git a/centipede/coverage_symbolizer.h b/centipede/coverage_symbolizer.h
new file mode 100644
index 0000000..b109322
--- /dev/null
+++ b/centipede/coverage_symbolizer.h
@@ -0,0 +1,86 @@
+// Copyright 2023 The Centipede Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef FUZZTEST_CENTIPEDE_COVERAGE_SYMBOLIZER_H_
+#define FUZZTEST_CENTIPEDE_COVERAGE_SYMBOLIZER_H_
+
+#include <stddef.h>
+
+#include <functional>
+#include <string>
+#include <vector>
+
+#include "absl/status/status.h"
+#include "absl/status/statusor.h"
+#include "./centipede/feature.h"
+#include "./centipede/symbol_table.h"
+
+namespace centipede {
+
+// Provides symbols for one type of coverage features in a domain.
+// Note: Not thread-safe.
+class DomainSymbolizer {
+ public:
+  // Instantiates a DomainSymbolizer for Domain with `domain_id`.
+  explicit DomainSymbolizer(size_t domain_id);
+
+  // Returns a pointer to a symbol table that can be populated with entries for
+  // coverage features.
+  absl::StatusOr<SymbolTable *> InitializeByPopulatingSymbolTable();
+  // Registers a function to be used for symbolizing coverage features.
+  // Given an index into the domain, the function should return the description
+  // of the feature at that index.
+  absl::Status InitializeWithSymbolizationFunction(
+      const std::function<std::string(size_t idx)> &func);
+
+  // Returns a description of the feature at the provided index in the domain.
+  // If the symbolizer is uninitialized, returns an "unknown feature" message.
+  std::string GetSymbolForIndex(size_t idx) const;
+
+ private:
+  // Holds symbols for coverage features. Unpopulated if initialized with
+  // symbolization function.
+  SymbolTable symbols_;
+  // Function that symbolizes the feature at the provided index `idx`. If
+  // initialized by populating `symbols_`, looks up the relevant symbol in
+  // `symbols_`.
+  std::function<std::string(size_t idx)> func_;
+  // Domain ID of the domain this object symbolizes.
+  size_t domain_id_;
+  // Ensures that we cannot be initialized more than once.
+  bool initialized_;
+};
+
+// Provides symbols for features in any domain.
+// Note: Not thread-safe.
+class CoverageSymbolizer {
+ public:
+  CoverageSymbolizer();
+
+  // Returns pointer to corresponding symbolizer for `domain`.
+  absl::StatusOr<DomainSymbolizer *> GetSymbolizerForDomain(
+      feature_domains::Domain domain);
+
+  // Returns the symbol for `feature`. The symbol will be "unknown feature" for
+  // uninitialized domain symbolizers.
+  std::string GetSymbolForFeature(feature_t feature) const;
+
+ private:
+  // Symbolizers for the valid domains.
+  std::vector<DomainSymbolizer> symbolizers_;
+};
+
+}  // namespace centipede
+
+#endif  // FUZZTEST_CENTIPEDE_COVERAGE_SYMBOLIZER_H_
diff --git a/centipede/coverage_symbolizer_test.cc b/centipede/coverage_symbolizer_test.cc
new file mode 100644
index 0000000..ddcc307
--- /dev/null
+++ b/centipede/coverage_symbolizer_test.cc
@@ -0,0 +1,139 @@
+// Copyright 2023 The Centipede Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "./centipede/coverage_symbolizer.h"
+
+#include <stddef.h>
+
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/status/status.h"
+#include "absl/strings/str_cat.h"
+#include "./centipede/feature.h"
+
+namespace centipede {
+namespace {
+
+constexpr size_t kUnusedDomainId = 13;
+
+TEST(DomainSymbolizerTest, InitializeByPopulatingSymbolTable) {
+  DomainSymbolizer symbolizer(kUnusedDomainId);
+  ASSERT_OK_AND_ASSIGN(auto symbols,
+                       symbolizer.InitializeByPopulatingSymbolTable());
+  symbols->AddEntry("func_a", "file_line_col:1");
+  symbols->AddEntry("func_b", "file_line_col:2");
+  symbols->AddEntry("func_c", "file_line_col:3");
+
+  EXPECT_EQ(symbolizer.GetSymbolForIndex(0), "func_a file_line_col:1");
+  EXPECT_EQ(symbolizer.GetSymbolForIndex(2), "func_c file_line_col:3");
+  EXPECT_EQ(symbolizer.GetSymbolForIndex(1), "func_b file_line_col:2");
+}
+
+TEST(DomainSymbolizerTest, InitializeWithSymbolizationFunction) {
+  DomainSymbolizer symbolizer(kUnusedDomainId);
+  ASSERT_OK(symbolizer.InitializeWithSymbolizationFunction(
+      [](size_t idx) { return absl::StrCat(idx, "_llama"); }));
+
+  EXPECT_EQ(symbolizer.GetSymbolForIndex(0), "0_llama");
+  EXPECT_EQ(symbolizer.GetSymbolForIndex(2), "2_llama");
+  EXPECT_EQ(symbolizer.GetSymbolForIndex(1), "1_llama");
+}
+
+TEST(DomainSymbolizerTest, CannotDoubleInitialize) {
+  DomainSymbolizer symbolizer1(kUnusedDomainId);
+  ASSERT_OK(symbolizer1.InitializeByPopulatingSymbolTable());
+  EXPECT_THAT(
+      symbolizer1.InitializeWithSymbolizationFunction(
+          [](size_t idx) { return "never_used"; }),
+      ::testing::status::StatusIs(absl::StatusCode::kFailedPrecondition));
+
+  DomainSymbolizer symbolizer2(kUnusedDomainId);
+  ASSERT_OK(symbolizer2.InitializeWithSymbolizationFunction(
+      [](size_t idx) { return "never_used"; }));
+  EXPECT_THAT(
+      symbolizer2.InitializeByPopulatingSymbolTable(),
+      ::testing::status::StatusIs(absl::StatusCode::kFailedPrecondition));
+
+  DomainSymbolizer symbolizer3(kUnusedDomainId);
+  ASSERT_OK(symbolizer3.InitializeByPopulatingSymbolTable());
+  EXPECT_THAT(
+      symbolizer3.InitializeByPopulatingSymbolTable(),
+      ::testing::status::StatusIs(absl::StatusCode::kFailedPrecondition));
+}
+
+TEST(DomainSymbolizerTest, UnknownSymbolIfUninitialized) {
+  DomainSymbolizer symbolizer(/*domain_id=*/12);
+  std::string expected_symbol = "unknown symbol: domain_id=12, idx=10006";
+  EXPECT_EQ(symbolizer.GetSymbolForIndex(10006), expected_symbol);
+}
+
+TEST(CoverageSymbolizerTest, GetSymbolizerForDomain) {
+  CoverageSymbolizer symbolizers;
+  ASSERT_OK_AND_ASSIGN(auto pc_symbolizer, symbolizers.GetSymbolizerForDomain(
+                                               feature_domains::kPCs));
+  ASSERT_OK_AND_ASSIGN(
+      auto bounded_path_symbolizer,
+      symbolizers.GetSymbolizerForDomain(feature_domains::kBoundedPath));
+  EXPECT_NE(pc_symbolizer, bounded_path_symbolizer);
+
+  ASSERT_OK_AND_ASSIGN(
+      auto same_pc_symbolizer,
+      symbolizers.GetSymbolizerForDomain(feature_domains::kPCs));
+  EXPECT_EQ(pc_symbolizer, same_pc_symbolizer);
+}
+
+TEST(CoverageSymbolizerTest, CannotGetSymbolizerForInvalidDomain) {
+  CoverageSymbolizer symbolizers;
+  EXPECT_THAT(symbolizers.GetSymbolizerForDomain(feature_domains::kLastDomain),
+              ::testing::status::StatusIs(absl::StatusCode::kInvalidArgument));
+  EXPECT_THAT(symbolizers.GetSymbolizerForDomain(feature_domains::Domain(777)),
+              ::testing::status::StatusIs(absl::StatusCode::kInvalidArgument));
+}
+
+TEST(CoverageSymbolizerTest, UnknownSymbolForUninitializedDomains) {
+  CoverageSymbolizer symbolizers;
+  feature_t feature_pc = feature_domains::kPCs.ConvertToMe(7);
+  std::string expected_pc_symbol = "unknown symbol: domain_id=1, idx=7";
+  EXPECT_EQ(symbolizers.GetSymbolForFeature(feature_pc), expected_pc_symbol);
+
+  feature_t feature_8bit = feature_domains::k8bitCounters.ConvertToMe(2);
+  std::string expected_8bit_symbol = "unknown symbol: domain_id=2, idx=2";
+  EXPECT_EQ(symbolizers.GetSymbolForFeature(feature_8bit),
+            expected_8bit_symbol);
+}
+
+TEST(CoverageSymbolizerTest, GetSymbolForFeature) {
+  CoverageSymbolizer symbolizers;
+  ASSERT_OK_AND_ASSIGN(auto pc_symbolizer, symbolizers.GetSymbolizerForDomain(
+                                               feature_domains::kPCs));
+  ASSERT_OK(pc_symbolizer->InitializeWithSymbolizationFunction(
+      [](size_t idx) { return absl::StrCat("pc_", idx); }));
+  ASSERT_OK_AND_ASSIGN(
+      auto bounded_path_symbolizer,
+      symbolizers.GetSymbolizerForDomain(feature_domains::kBoundedPath));
+  ASSERT_OK(bounded_path_symbolizer->InitializeWithSymbolizationFunction(
+      [](size_t idx) { return absl::StrCat("bounded_path_", idx); }));
+
+  feature_t feature_pc = feature_domains::kPCs.ConvertToMe(7);
+  EXPECT_EQ(symbolizers.GetSymbolForFeature(feature_pc), "pc_7");
+
+  feature_t feature_bounded_path = feature_domains::kBoundedPath.ConvertToMe(7);
+  EXPECT_EQ(symbolizers.GetSymbolForFeature(feature_bounded_path),
+            "bounded_path_7");
+}
+
+}  // namespace
+}  // namespace centipede
diff --git a/centipede/feature.h b/centipede/feature.h
index 7c61dec..10072df 100644
--- a/centipede/feature.h
+++ b/centipede/feature.h
@@ -94,6 +94,11 @@
     return feature / kDomainSize;
   }
 
+  // Returns the index into the domain of a feature.
+  static size_t FeatureToDomainIndex(feature_t feature) {
+    return feature % kDomainSize;
+  }
+
  private:
   const size_t domain_id_;
 };