Add support for split_coverage_post_processing (#2000)

This change introduces `experimental_use_coverage_metadata_files`
(https://github.com/bazelbuild/rules_rust/issues/2082) which is required
to support
[--experimental_split_coverage_postprocessing](https://bazel.build/reference/command-line-reference#flag--experimental_split_coverage_postprocessing)'

Changes:
- Implemented coverage collection logic in Rust.
- Added a flag
`--@rules_rust//rust/settings:experimental_use_coverage_metadata_files`
to toggle the changes necessary for supporting
`--experimental_split_coverage_postprocessing`.
- Added regression testing in CI to test
`--experimental_split_coverage_postprocessing`.
diff --git a/.bazelci/presubmit.yml b/.bazelci/presubmit.yml
index b3b549a..302944f 100644
--- a/.bazelci/presubmit.yml
+++ b/.bazelci/presubmit.yml
@@ -42,6 +42,10 @@
       ; 1>&2 cat bazel-out/_coverage/_coverage_report.dat \
       ; exit 1 \
       ; }
+split_coverage_postprocessing_shell_commands: &split_coverage_postprocessing_shell_commands
+  - echo "coverage --experimental_fetch_all_coverage_outputs" >> user.bazelrc
+  - echo "coverage --experimental_split_coverage_postprocessing" >> user.bazelrc
+  - echo "build --//rust/settings:experimental_use_coverage_metadata_files" >> user.bazelrc
 tasks:
   ubuntu2004:
     build_targets: *default_linux_targets
@@ -69,6 +73,18 @@
   windows:
     build_targets: *default_windows_targets
     test_targets: *default_windows_targets
+  ubuntu2004_split_coverage_postprocessing:
+    name: Split Coverage Postprocessing
+    platform: ubuntu2004
+    shell_commands: *split_coverage_postprocessing_shell_commands
+    coverage_targets: *default_linux_targets
+    post_shell_commands: *coverage_validation_post_shell_commands
+  macos_split_coverage_postprocessing:
+    name: Split Coverage Postprocessing
+    platform: macos
+    shell_commands: *split_coverage_postprocessing_shell_commands
+    coverage_targets: *default_macos_targets
+    post_shell_commands: *coverage_validation_post_shell_commands
   ubuntu2004_opt:
     name: Opt Mode
     platform: ubuntu2004
diff --git a/rust/private/rust.bzl b/rust/private/rust.bzl
index c096139..1a3c4eb 100644
--- a/rust/private/rust.bzl
+++ b/rust/private/rust.bzl
@@ -480,13 +480,17 @@
         if not toolchain.llvm_profdata:
             fail("toolchain.llvm_profdata is required if toolchain.llvm_cov is set.")
 
-        llvm_cov_path = toolchain.llvm_cov.short_path
-        if llvm_cov_path.startswith("../"):
-            llvm_cov_path = llvm_cov_path[len("../"):]
+        if toolchain._experimental_use_coverage_metadata_files:
+            llvm_cov_path = toolchain.llvm_cov.path
+            llvm_profdata_path = toolchain.llvm_profdata.path
+        else:
+            llvm_cov_path = toolchain.llvm_cov.short_path
+            if llvm_cov_path.startswith("../"):
+                llvm_cov_path = llvm_cov_path[len("../"):]
 
-        llvm_profdata_path = toolchain.llvm_profdata.short_path
-        if llvm_profdata_path.startswith("../"):
-            llvm_profdata_path = llvm_profdata_path[len("../"):]
+            llvm_profdata_path = toolchain.llvm_profdata.short_path
+            if llvm_profdata_path.startswith("../"):
+                llvm_profdata_path = llvm_profdata_path[len("../"):]
 
         env["RUST_LLVM_COV"] = llvm_cov_path
         env["RUST_LLVM_PROFDATA"] = llvm_profdata_path
@@ -742,7 +746,7 @@
 
 _coverage_attrs = {
     "_collect_cc_coverage": attr.label(
-        default = Label("//util:collect_coverage"),
+        default = Label("//util/collect_coverage"),
         executable = True,
         cfg = "exec",
     ),
diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl
index 9f3b0c2..af958a4 100644
--- a/rust/private/rustc.bzl
+++ b/rust/private/rustc.bzl
@@ -997,6 +997,7 @@
         rustc_flags.add("proc_macro")
 
     if toolchain.llvm_cov and ctx.configuration.coverage_enabled:
+        # https://doc.rust-lang.org/rustc/instrument-coverage.html
         rustc_flags.add("--codegen=instrument-coverage")
 
     # Make bin crate data deps available to tests.
@@ -1364,8 +1365,10 @@
     if toolchain.llvm_cov and ctx.configuration.coverage_enabled and crate_info.is_test:
         coverage_runfiles = [toolchain.llvm_cov, toolchain.llvm_profdata]
 
+    experimental_use_coverage_metadata_files = toolchain._experimental_use_coverage_metadata_files
+
     runfiles = ctx.runfiles(
-        files = getattr(ctx.files, "data", []) + coverage_runfiles,
+        files = getattr(ctx.files, "data", []) + ([] if experimental_use_coverage_metadata_files else coverage_runfiles),
         collect_data = True,
     )
     if getattr(ctx.attr, "crate", None):
@@ -1376,18 +1379,29 @@
     # https://github.com/bazelbuild/rules_rust/issues/771
     out_binary = getattr(attr, "out_binary", False)
 
+    executable = crate_info.output if crate_info.type == "bin" or crate_info.is_test or out_binary else None
+
+    instrumented_files_kwargs = {
+        "dependency_attributes": ["deps", "crate"],
+        "extensions": ["rs"],
+        "source_attributes": ["srcs"],
+    }
+
+    if experimental_use_coverage_metadata_files:
+        instrumented_files_kwargs.update({
+            "metadata_files": coverage_runfiles + [executable] if executable else [],
+        })
+
     providers = [
         DefaultInfo(
             # nb. This field is required for cc_library to depend on our output.
             files = depset(outputs),
             runfiles = runfiles,
-            executable = crate_info.output if crate_info.type == "bin" or crate_info.is_test or out_binary else None,
+            executable = executable,
         ),
         coverage_common.instrumented_files_info(
             ctx,
-            dependency_attributes = ["deps", "crate"],
-            extensions = ["rs"],
-            source_attributes = ["srcs"],
+            **instrumented_files_kwargs
         ),
     ]
 
diff --git a/rust/repositories.bzl b/rust/repositories.bzl
index 07be2bd..3306ed0 100644
--- a/rust/repositories.bzl
+++ b/rust/repositories.bzl
@@ -67,11 +67,11 @@
     maybe(
         http_archive,
         name = "bazel_skylib",
+        sha256 = "66ffd9315665bfaafc96b52278f57c7e2dd09f5ede279ea6d39b2be471e7e3aa",
         urls = [
-            "https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.0/bazel-skylib-1.2.0.tar.gz",
-            "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.2.0/bazel-skylib-1.2.0.tar.gz",
+            "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.4.2/bazel-skylib-1.4.2.tar.gz",
+            "https://github.com/bazelbuild/bazel-skylib/releases/download/1.4.2/bazel-skylib-1.4.2.tar.gz",
         ],
-        sha256 = "af87959afe497dc8dfd4c6cb66e1279cb98ccc84284619ebfec27d9c09a903de",
     )
 
     # Make the iOS simulator constraint available, which is referenced in abi_to_constraints()
diff --git a/rust/settings/BUILD.bazel b/rust/settings/BUILD.bazel
index bdb43ec..0cf1253 100644
--- a/rust/settings/BUILD.bazel
+++ b/rust/settings/BUILD.bazel
@@ -52,6 +52,13 @@
     build_setting_default = False,
 )
 
+# A flag to have coverage tooling added as `coverage_common.instrumented_files_info.metadata_files` instead of
+# reporting tools like `llvm-cov` and `llvm-profdata` as runfiles to each test.
+bool_flag(
+    name = "experimental_use_coverage_metadata_files",
+    build_setting_default = False,
+)
+
 bzl_library(
     name = "bzl_lib",
     srcs = glob(["**/*.bzl"]),
diff --git a/rust/toolchain.bzl b/rust/toolchain.bzl
index 6214d67..af34e8f 100644
--- a/rust/toolchain.bzl
+++ b/rust/toolchain.bzl
@@ -640,6 +640,7 @@
         _pipelined_compilation = pipelined_compilation,
         _experimental_use_cc_common_link = experimental_use_cc_common_link,
         _experimental_use_global_allocator = experimental_use_global_allocator,
+        _experimental_use_coverage_metadata_files = ctx.attr._experimental_use_coverage_metadata_files[BuildSettingInfo].value,
         _no_std = no_std,
     )
     return [
@@ -784,6 +785,9 @@
         "_cc_toolchain": attr.label(
             default = Label("@bazel_tools//tools/cpp:current_cc_toolchain"),
         ),
+        "_experimental_use_coverage_metadata_files": attr.label(
+            default = Label("//rust/settings:experimental_use_coverage_metadata_files"),
+        ),
         "_experimental_use_global_allocator": attr.label(
             default = Label("//rust/settings:experimental_use_global_allocator"),
             doc = (
diff --git a/util/BUILD.bazel b/util/BUILD.bazel
index 8502870..1070f05 100644
--- a/util/BUILD.bazel
+++ b/util/BUILD.bazel
@@ -4,8 +4,8 @@
     tags = ["manual"],
 )
 
-filegroup(
+alias(
     name = "collect_coverage",
-    srcs = ["collect_coverage.sh"],
+    actual = "//util/collect_coverage",
     visibility = ["//visibility:public"],
 )
diff --git a/util/collect_coverage.sh b/util/collect_coverage.sh
deleted file mode 100755
index 648de5e..0000000
--- a/util/collect_coverage.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env bash
-
-set -euo pipefail
-
-if [[ -n "${VERBOSE_COVERAGE:-}" ]]; then
-  set -x
-fi
-
-if [[ "${RUNFILES_DIR:0:1}" != "/" ]]; then
-  if [[ -n "${ROOT}" ]]; then
-    RUNFILES_DIR="${ROOT}/${RUNFILES_DIR}"
-  fi
-fi
-
-readonly profdata_file=$COVERAGE_DIR/coverage.profdata
-
-"$RUNFILES_DIR/$RUST_LLVM_PROFDATA" \
-  merge \
-  --sparse "$COVERAGE_DIR"/*.profraw \
-  -output "$profdata_file"
-
-"$RUNFILES_DIR/$RUST_LLVM_COV" \
-  export \
-  -format=lcov \
-  -instr-profile "$profdata_file" \
-  -ignore-filename-regex='.*external/.+' \
-  -ignore-filename-regex='/tmp/.+' \
-  -path-equivalence=.,"$ROOT" \
-  "$RUNFILES_DIR/$TEST_WORKSPACE/$TEST_BINARY" \
-  @"$COVERAGE_MANIFEST" \
-  | sed 's#/proc/self/cwd/##' > "$COVERAGE_DIR/rust_coverage.dat"
-
-# Bazel doesn't support LLVM profdata coverage amongst other coverage formats.
-rm "$profdata_file"
diff --git a/util/collect_coverage/BUILD.bazel b/util/collect_coverage/BUILD.bazel
new file mode 100644
index 0000000..c3aa8e1
--- /dev/null
+++ b/util/collect_coverage/BUILD.bazel
@@ -0,0 +1,8 @@
+load("//rust:defs.bzl", "rust_binary")
+
+rust_binary(
+    name = "collect_coverage",
+    srcs = ["collect_coverage.rs"],
+    edition = "2018",
+    visibility = ["//visibility:public"],
+)
diff --git a/util/collect_coverage/collect_coverage.rs b/util/collect_coverage/collect_coverage.rs
new file mode 100644
index 0000000..fcd3c0a
--- /dev/null
+++ b/util/collect_coverage/collect_coverage.rs
@@ -0,0 +1,183 @@
+//! This script collects code coverage data for Rust sources, after the tests
+//! were executed.
+//!
+//! By taking advantage of Bazel C++ code coverage collection, this script is
+//! able to be executed by the existing coverage collection mechanics.
+//!
+//! Bazel uses the lcov tool for gathering coverage data. There is also
+//! an experimental support for clang llvm coverage, which uses the .profraw
+//! data files to compute the coverage report.
+//!
+//! This script assumes the following environment variables are set:
+//! - COVERAGE_DIR            Directory containing metadata files needed for
+//!                           coverage collection (e.g. gcda files, profraw).
+//! - COVERAGE_OUTPUT_FILE    The coverage action output path.
+//! - ROOT                    Location from where the code coverage collection
+//!                           was invoked.
+//! - RUNFILES_DIR            Location of the test's runfiles.
+//! - VERBOSE_COVERAGE        Print debug info from the coverage scripts
+//!
+//! The script looks in $COVERAGE_DIR for the Rust metadata coverage files
+//! (profraw) and uses lcov to get the coverage data. The coverage data
+//! is placed in $COVERAGE_DIR as a `coverage.dat` file.
+
+use std::env;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process;
+
+macro_rules! log {
+    ($($arg:tt)*) => {
+        if env::var("VERBOSE_COVERAGE").is_ok() {
+            eprintln!($($arg)*);
+        }
+    };
+}
+
+fn find_metadata_file(execroot: &Path, runfiles_dir: &Path, path: &str) -> PathBuf {
+    if execroot.join(path).exists() {
+        return execroot.join(path);
+    }
+
+    log!(
+        "File does not exist in execroot, falling back to runfiles: {}",
+        path
+    );
+
+    runfiles_dir.join(path)
+}
+
+fn find_test_binary(execroot: &Path, runfiles_dir: &Path) -> PathBuf {
+    let test_binary = runfiles_dir
+        .join(env::var("TEST_WORKSPACE").unwrap())
+        .join(env::var("TEST_BINARY").unwrap());
+
+    if !test_binary.exists() {
+        let configuration = runfiles_dir
+            .strip_prefix(execroot)
+            .expect("RUNFILES_DIR should be relative to ROOT")
+            .components()
+            .enumerate()
+            .filter_map(|(i, part)| {
+                // Keep only `bazel-out/<configuration>/bin`
+                if i < 3 {
+                    Some(PathBuf::from(part.as_os_str()))
+                } else {
+                    None
+                }
+            })
+            .fold(PathBuf::new(), |mut path, part| {
+                path.push(part);
+                path
+            });
+
+        let test_binary = execroot
+            .join(configuration)
+            .join(env::var("TEST_BINARY").unwrap());
+
+        log!(
+            "TEST_BINARY is not found in runfiles. Falling back to: {}",
+            test_binary.display()
+        );
+
+        test_binary
+    } else {
+        test_binary
+    }
+}
+
+fn main() {
+    let coverage_dir = PathBuf::from(env::var("COVERAGE_DIR").unwrap());
+    let execroot = PathBuf::from(env::var("ROOT").unwrap());
+    let mut runfiles_dir = PathBuf::from(env::var("RUNFILES_DIR").unwrap());
+
+    if !runfiles_dir.is_absolute() {
+        runfiles_dir = execroot.join(runfiles_dir);
+    }
+
+    log!("ROOT: {}", execroot.display());
+    log!("RUNFILES_DIR: {}", runfiles_dir.display());
+
+    let coverage_output_file = coverage_dir.join("coverage.dat");
+    let profdata_file = coverage_dir.join("coverage.profdata");
+    let llvm_cov = find_metadata_file(
+        &execroot,
+        &runfiles_dir,
+        &env::var("RUST_LLVM_COV").unwrap(),
+    );
+    let llvm_profdata = find_metadata_file(
+        &execroot,
+        &runfiles_dir,
+        &env::var("RUST_LLVM_PROFDATA").unwrap(),
+    );
+    let test_binary = find_test_binary(&execroot, &runfiles_dir);
+    let profraw_files: Vec<PathBuf> = fs::read_dir(coverage_dir)
+        .unwrap()
+        .flatten()
+        .filter_map(|entry| {
+            let path = entry.path();
+            if let Some(ext) = path.extension() {
+                if ext == "profraw" {
+                    return Some(path);
+                }
+            }
+            None
+        })
+        .collect();
+
+    let mut llvm_profdata_cmd = process::Command::new(llvm_profdata);
+    llvm_profdata_cmd
+        .arg("merge")
+        .arg("--sparse")
+        .args(profraw_files)
+        .arg("--output")
+        .arg(&profdata_file);
+
+    log!("Spawning {:#?}", llvm_profdata_cmd);
+    let status = llvm_profdata_cmd
+        .status()
+        .expect("Failed to spawn llvm-profdata process");
+
+    if !status.success() {
+        process::exit(status.code().unwrap_or(1));
+    }
+
+    let mut llvm_cov_cmd = process::Command::new(llvm_cov);
+    llvm_cov_cmd
+        .arg("export")
+        .arg("-format=lcov")
+        .arg("-instr-profile")
+        .arg(&profdata_file)
+        .arg("-ignore-filename-regex='.*external/.+'")
+        .arg("-ignore-filename-regex='/tmp/.+'")
+        .arg(format!("-path-equivalence=.,'{}'", execroot.display()))
+        .arg(test_binary)
+        .stdout(process::Stdio::piped());
+
+    log!("Spawning {:#?}", llvm_cov_cmd);
+    let child = llvm_cov_cmd
+        .spawn()
+        .expect("Failed to spawn llvm-cov process");
+
+    let output = child.wait_with_output().expect("llvm-cov process failed");
+
+    // Parse the child process's stdout to a string now that it's complete.
+    log!("Parsing llvm-cov output");
+    let report_str = std::str::from_utf8(&output.stdout).expect("Failed to parse llvm-cov output");
+
+    log!("Writing output to {}", coverage_output_file.display());
+    fs::write(
+        coverage_output_file,
+        report_str
+            .replace("#/proc/self/cwd/", "")
+            .replace(&execroot.display().to_string(), ""),
+    )
+    .unwrap();
+
+    // Destroy the intermediate binary file so lcov_merger doesn't parse it twice.
+    log!("Cleaning up {}", profdata_file.display());
+    fs::remove_file(profdata_file).unwrap();
+
+    log!("Success!");
+}