pw_bloat: Fix broken size reports

Source filter argument from GN template was getting passed even if
empty, causing Bloaty to return an empty size report. Using a json
file to hold all the arguments from the GN build will make it
easier to store and parse arguments in bloat.py compared to
reading over command line arguments.

Change-Id: I67cf1691d304236ac8de78fc607370e80ac0c4ca
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/108220
Reviewed-by: Alexei Frolov <frolv@google.com>
Commit-Queue: Brandon Vu <brandonvu@google.com>
diff --git a/pw_bloat/bloat.gni b/pw_bloat/bloat.gni
index 4eb1d75..dbf6aae 100644
--- a/pw_bloat/bloat.gni
+++ b/pw_bloat/bloat.gni
@@ -52,7 +52,7 @@
 # Example:
 #   pw_size_report("foo_bloat") {
 #      target = ":foo_static"
-#      datasources = [ "symbols" ]
+#      datasources = "symbols,segment_names"
 #      source_filter = "foo"
 #   }
 #
@@ -61,31 +61,48 @@
     assert(defined(invoker.target),
            "Size report must defined a 'target' variable")
     _all_target_dependencies = [ invoker.target ]
-
-    _bloat_script_args = [
-      "--bloaty-config",
-      rebase_path(pw_bloat_BLOATY_CONFIG, root_build_dir),
-      "--out-dir",
-      rebase_path(target_gen_dir, root_build_dir),
-      "--target",
-      target_name,
-      "--single-target",
-      "<TARGET_FILE(${invoker.target})>",
-    ]
+    _binary_args = []
 
     if (defined(invoker.source_filter)) {
-      _bloat_script_args += [
-        "--source-filter",
-        invoker.source_filter,
-      ]
+      curr_source_filter = invoker.source_filter
+    } else {
+      curr_source_filter = ""
     }
 
     if (defined(invoker.data_sources)) {
-      _bloat_script_args += [
-        "--data-sources",
-        string_join(";", invoker.data_sources),
-      ]
+      curr_data_sources = string_split(invoker.data_sources, ",")
+    } else {
+      curr_data_sources = ""
     }
+    _binary_args = [
+      {
+        bloaty_config = rebase_path(pw_bloat_BLOATY_CONFIG, root_build_dir)
+        out_dir = rebase_path(target_gen_dir, root_build_dir)
+        target = "<TARGET_FILE(${invoker.target})>"
+        source_filter = curr_source_filter
+        data_sources = curr_data_sources
+      },
+    ]
+
+    _file_name = "${target_name}_single_binary.json"
+    _args_path = rebase_path("$target_gen_dir/${_file_name}")
+    write_file(_args_path,
+               {
+                 binaries = _binary_args
+                 target_name = target_name
+                 out_dir = rebase_path(target_gen_dir, root_build_dir)
+                 root = rebase_path("//", root_build_dir)
+                 toolchain = current_toolchain
+                 default_toolchain = default_toolchain
+                 cwd = rebase_path(".", root_build_dir)
+               },
+               "json")
+
+    _bloat_script_args = [
+      "--gn-arg-path",
+      _args_path,
+      "--single-report",
+    ]
 
     _doc_rst_output = "$target_gen_dir/${target_name}"
 
@@ -142,6 +159,10 @@
 #   base: The default base executable target to run the diff against. May be
 #     omitted if all binaries provide their own base.
 #   source_filter: Optional global regex to filter data source names in Bloaty.
+#   data_sources: List of datasources from bloaty config file
+#     or built-in datasources. Order of sources determines hierarchical
+#     output. Optional.
+#     github.com/google/bloaty/blob/a1bbc93f5f6f969242046dffd9deb379f6735020/doc/using.md
 #   binaries: List of executables to compare in the diff.
 #     Each binary in the list is a scope containing up to three variables:
 #       label: Descriptive name for the executable. Required.
@@ -149,15 +170,14 @@
 #       base: Optional base diff target. Overrides global base argument.
 #       source_filter: Optional regex to filter data source names.
 #         Overrides global source_filter argument.
-#   data_sources: List of datasources from bloaty config file
-#     or built-in datasources. Order of sources determines hierarchical
-#     output. Optional.
-#     github.com/google/bloaty/blob/a1bbc93f5f6f969242046dffd9deb379f6735020/doc/using.md
-#   title: Optional title string to display with the size report.
+#       data_sources: Optional List of datasources from bloaty config file
+#         Overrides global data_sources argument.
+
 #
 # Example:
 #   pw_size_diff("foo_bloat") {
 #     base = ":foo_base"
+#     data_sources = "segment,symbols"
 #     binaries = [
 #       {
 #         target = ":foo_static"
@@ -166,9 +186,9 @@
 #       {
 #         target = ":foo_dynamic"
 #         label = "Dynamic"
+#         data_sources = "segment_names"
 #       },
 #     ]
-#     title = "static vs. dynamic foo"
 #   }
 #
 template("pw_size_diff") {
@@ -185,10 +205,13 @@
       _all_target_dependencies += [ _global_source_filter ]
     }
 
+    if (defined(invoker.data_sources)) {
+      _global_data_sources = string_split(invoker.data_sources, ",")
+    }
+
+    # TODO(brandonvu): Remove once all downstream projects are updated
     if (defined(invoker.title)) {
-      _title = invoker.title
-    } else {
-      _title = target_name
+      not_needed(invoker, [ "title" ])
     }
 
     # This template creates an action which invokes a Python script to run a
@@ -197,20 +220,16 @@
     # anything is changed. Most of the code below builds the command-line
     # arguments to pass each of the targets into the script.
 
-    _binary_paths = []
-    _binary_labels = []
+    # Process each of the binaries, creating an object and storing all the
+    # needed variables into a json. Json is parsed in bloat.py
+    _binaries_args = []
     _bloaty_configs = []
-    _binary_source_filters = []
 
-    # Process each of the binaries, resolving their full output paths and
-    # building them into a list of command-line arguments to the bloat script.
     foreach(binary, invoker.binaries) {
       assert(defined(binary.label) && defined(binary.target),
              "Size report binaries must define 'label' and 'target' variables")
       _all_target_dependencies += [ binary.target ]
 
-      _binary_path = "<TARGET_FILE(${binary.target})>"
-
       # If the binary defines its own base, use that instead of the global base.
       if (defined(binary.base)) {
         _binary_base = binary.base
@@ -222,48 +241,61 @@
       }
 
       if (defined(binary.source_filter)) {
-        _binary_source_filters += [ binary.source_filter ]
+        _binary_source_filter = binary.source_filter
       } else if (defined(_global_source_filter)) {
-        _binary_source_filters += [ _global_source_filter ]
+        _binary_source_filter = _global_source_filter
       } else {
-        _binary_source_filters += [ "" ]
+        _binary_source_filter = ""
+      }
+
+      if (defined(binary.data_sources)) {
+        _binary_data_sources = string_split(binary.data_sources, ",")
+      } else if (defined(_global_data_sources)) {
+        _binary_data_sources = _global_data_sources
+      } else {
+        _binary_data_sources = ""
       }
 
       # Allow each binary to override the global bloaty config.
       if (defined(binary.bloaty_config)) {
+        _binary_bloaty_config = binary.bloaty_config
         _bloaty_configs += [ binary.bloaty_config ]
       } else {
+        _binary_bloaty_config = pw_bloat_BLOATY_CONFIG
         _bloaty_configs += [ pw_bloat_BLOATY_CONFIG ]
       }
 
-      _binary_path += ";" + "<TARGET_FILE($_binary_base)>"
-
-      _binary_paths += [ _binary_path ]
-      _binary_labels += [ binary.label ]
-    }
-
-    _bloat_script_args = [
-      "--bloaty-config",
-      string_join(";", rebase_path(_bloaty_configs, root_build_dir)),
-      "--out-dir",
-      rebase_path(target_gen_dir, root_build_dir),
-      "--target",
-      target_name,
-      "--title",
-      _title,
-      "--labels",
-      string_join(";", _binary_labels),
-      "--source-filter",
-      string_join(";", _binary_source_filters),
-    ]
-
-    if (defined(invoker.data_sources)) {
-      _bloat_script_args += [
-        "--data-sources",
-        string_join(";", invoker.data_sources),
+      _binaries_args += [
+        {
+          bloaty_config = rebase_path(_binary_bloaty_config, root_build_dir)
+          target = "<TARGET_FILE(${binary.target})>"
+          base = "<TARGET_FILE($_binary_base)>"
+          source_filter = _binary_source_filter
+          label = binary.label
+          data_sources = _binary_data_sources
+        },
       ]
     }
 
+    _file_name = "${target_name}_binaries.json"
+    _diff_path = rebase_path("$target_gen_dir/${_file_name}")
+    write_file(_diff_path,
+               {
+                 binaries = _binaries_args
+                 target_name = target_name
+                 out_dir = rebase_path(target_gen_dir, root_build_dir)
+                 root = rebase_path("//", root_build_dir)
+                 toolchain = current_toolchain
+                 default_toolchain = default_toolchain
+                 cwd = rebase_path(".", root_build_dir)
+               },
+               "json")
+
+    _bloat_script_args = [
+      "--gn-arg-path",
+      _diff_path,
+    ]
+
     # TODO(brandonvu): Remove once all downstream projects are updated
     if (defined(invoker.full_report)) {
       not_needed(invoker, [ "full_report" ])
@@ -305,7 +337,7 @@
           _doc_rst_output,
         ]
         deps = _all_target_dependencies
-        args = _bloat_script_args + _binary_paths
+        args = _bloat_script_args
 
         # Print size reports to stdout when they are generated, if requested.
         capture_output = !pw_bloat_SHOW_SIZE_REPORTS
diff --git a/pw_bloat/docs.rst b/pw_bloat/docs.rst
index 1899d8b..9150c97 100644
--- a/pw_bloat/docs.rst
+++ b/pw_bloat/docs.rst
@@ -26,12 +26,13 @@
 
 **Arguments**
 
-* ``title``: Title for the report card.
 * ``base``: Optional default base target for all listed binaries.
+* ``source_filter``: Optional global regex to filter labels in the diff output.
+* ``data_sources``: Optional global list of datasources from bloaty config file
 * ``binaries``: List of binaries to size diff. Each binary specifies a target,
-  a label for the diff, and optionally a base target that overrides the default
-  base.
-* ``source_filter``: Optional regex to filter labels in the diff output.
+  a label for the diff, and optionally a base target, source filter, and data
+  sources that override the global ones (if specified).
+
 
 .. code::
 
@@ -52,6 +53,7 @@
   pw_size_diff("my_size_report") {
     title = "Hello world program using printf vs. iostream"
     base = ":empty_base"
+    data_sources = "symbols,segments"
     binaries = [
       {
         target = ":hello_world_printf"
@@ -60,6 +62,7 @@
       {
         target = ":hello_world_iostream"
         label = "Hello world using iostream"
+        data_sources = "symbols"
       },
     ]
   }
@@ -90,6 +93,8 @@
 
   pw_size_report("hello_world_iostream_size_report") {
     target = ":hello_iostream"
+    data_sources = "segments,symbols"
+    source_filter = "pw::hello"
   }
 
 Sample Single Binary ASCII Table Generated
diff --git a/pw_bloat/py/BUILD.gn b/pw_bloat/py/BUILD.gn
index 22b6479..eaa035a 100644
--- a/pw_bloat/py/BUILD.gn
+++ b/pw_bloat/py/BUILD.gn
@@ -36,5 +36,8 @@
     "label_test.py",
   ]
   pylintrc = "$dir_pigweed/.pylintrc"
-  python_deps = [ "$dir_pw_cli/py" ]
+  python_deps = [
+    "$dir_pw_build/py",
+    "$dir_pw_cli/py",
+  ]
 }
diff --git a/pw_bloat/py/pw_bloat/bloat.py b/pw_bloat/py/pw_bloat/bloat.py
index 56f7b7c..b37fff5 100755
--- a/pw_bloat/py/pw_bloat/bloat.py
+++ b/pw_bloat/py/pw_bloat/bloat.py
@@ -1,4 +1,4 @@
-# Copyright 2019 The Pigweed Authors
+# Copyright 2022 The Pigweed Authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 # use this file except in compliance with the License. You may obtain a copy of
@@ -20,10 +20,13 @@
 import os
 import subprocess
 import sys
-from typing import List, Iterable, Optional
+import json
+from typing import Iterable, Optional
+from pathlib import Path
 
 import pw_cli.log
 
+from pw_build.python_runner import expand_expressions, GnPaths
 from pw_bloat.label import from_bloaty_tsv
 from pw_bloat.label_output import (BloatTableOutput, LineCharset, RstOutput,
                                    AsciiCharset)
@@ -35,58 +38,16 @@
 
 def parse_args() -> argparse.Namespace:
     """Parses the script's arguments."""
-    def delimited_list(delimiter: str, items: Optional[int] = None):
-        def _parser(arg: str):
-            args = arg.split(delimiter)
-
-            if items and len(args) != items:
-                raise argparse.ArgumentTypeError(
-                    'Argument must be a '
-                    f'{delimiter}-delimited list with {items} items: "{arg}"')
-
-            return args
-
-        return _parser
 
     parser = argparse.ArgumentParser(
         'Generate a size report card for binaries')
-    parser.add_argument('--bloaty-config',
-                        type=delimited_list(';'),
-                        required=True,
-                        help='Data source configuration for Bloaty')
-    parser.add_argument('--full',
-                        action='store_true',
-                        help='Display full bloat breakdown by symbol')
-    parser.add_argument('--data-sources',
-                        type=delimited_list(';'),
-                        help='List of sources to scan for')
-    parser.add_argument('--labels',
-                        type=delimited_list(';'),
-                        default='',
-                        help='Labels for output binaries')
-    parser.add_argument('--out-dir',
+    parser.add_argument('--gn-arg-path',
                         type=str,
                         required=True,
-                        help='Directory in which to write output files')
-    parser.add_argument('--target',
-                        type=str,
-                        required=True,
-                        help='Build target name')
-    parser.add_argument('--title',
-                        type=str,
-                        default='pw_bloat',
-                        help='Report title')
-    parser.add_argument('--source-filter',
-                        type=delimited_list(';'),
-                        help='Bloaty data source filter')
-    parser.add_argument('--single-target',
-                        type=str,
-                        help='Single executable target')
-    parser.add_argument('diff_targets',
-                        type=delimited_list(';', 2),
-                        nargs='*',
-                        metavar='DIFF_TARGET',
-                        help='Binary;base pairs to process')
+                        help='File path to json of binaries')
+    parser.add_argument('--single-report',
+                        action="store_true",
+                        help='Determine if calling single size report')
 
     return parser.parse_args()
 
@@ -144,12 +105,13 @@
 
 
 def single_target_output(target: str, bloaty_config: str, target_out_file: str,
-                         out_dir: str, extra_args: Iterable[str]) -> int:
+                         out_dir: str, data_sources: Iterable[str],
+                         extra_args: Iterable[str]) -> int:
 
     try:
         single_output = run_bloaty(target,
                                    bloaty_config,
-                                   data_sources=['segment_names', 'symbols'],
+                                   data_sources=data_sources,
                                    extra_args=extra_args)
 
     except subprocess.CalledProcessError:
@@ -172,61 +134,91 @@
     return 0
 
 
+# TODO(frolv) Copied from python_runner.py
+def _abspath(path: Path) -> Path:
+    """Turns a path into an absolute path, not resolving symlinks."""
+    return Path(os.path.abspath(path))
+
+
+def _translate_file_paths(gn_arg_dict: dict, single_report: bool) -> dict:
+    tool = gn_arg_dict['toolchain'] if gn_arg_dict['toolchain'] != gn_arg_dict[
+        'default_toolchain'] else ''
+    paths = GnPaths(root=_abspath(gn_arg_dict['root']),
+                    build=_abspath(Path.cwd()),
+                    toolchain=tool,
+                    cwd=_abspath(gn_arg_dict['cwd']))
+    for curr_arg in gn_arg_dict['binaries']:
+        curr_arg['target'] = list(expand_expressions(paths,
+                                                     curr_arg['target']))[0]
+        if not single_report:
+            curr_arg['base'] = list(expand_expressions(paths,
+                                                       curr_arg['base']))[0]
+    return gn_arg_dict
+
+
 def main() -> int:
     """Program entry point."""
 
     args = parse_args()
     extra_args = ['--tsv']
+    data_sources = ['segment_names', 'symbols']
+    gn_arg_dict = {}
+    json_file = open(args.gn_arg_path)
+    gn_arg_dict = json.load(json_file)
 
-    if args.single_target is not None:
-        if args.source_filter:
-            extra_args.extend(['--source-filter', args.source_filter])
-        return single_target_output(args.single_target, args.bloaty_config[0],
-                                    args.target, args.out_dir, extra_args)
+    gn_arg_dict = _translate_file_paths(gn_arg_dict, args.single_report)
 
-    base_binaries: List[str] = []
-    diff_binaries: List[str] = []
+    if args.single_report:
+        single_binary_args = gn_arg_dict['binaries'][0]
+        if single_binary_args['source_filter']:
+            extra_args.extend(
+                ['--source-filter', single_binary_args['source_filter']])
+        if single_binary_args['data_sources']:
+            data_sources = single_binary_args['data_sources']
 
-    try:
-        for binary, base in args.diff_targets:
-            diff_binaries.append(binary)
-            base_binaries.append(base)
+        return single_target_output(single_binary_args['target'],
+                                    single_binary_args['bloaty_config'],
+                                    gn_arg_dict['target_name'],
+                                    gn_arg_dict['out_dir'], data_sources,
+                                    extra_args)
 
-    except RuntimeError as err:
-        _LOG.error('%s: %s', sys.argv[0], err)
-        return 1
-
-    data_sources = args.data_sources
-
-    if args.data_sources is None:
-        data_sources = ['segment_names', 'symbols']
+    default_data_sources = ['segment_names', 'symbols']
 
     diff_report = ''
     rst_diff_report = ''
+    for curr_diff_binary in gn_arg_dict['binaries']:
 
-    for i, binary in enumerate(diff_binaries):
-        if args.source_filter is not None and args.source_filter[i]:
-            extra_args.extend(['--source-filter', args.source_filter[i]])
+        curr_extra_args = extra_args.copy()
+        data_sources = default_data_sources
+
+        if curr_diff_binary['source_filter']:
+            curr_extra_args.extend(
+                ['--source-filter', curr_diff_binary['source_filter']])
+
+        if curr_diff_binary['data_sources']:
+            data_sources = curr_diff_binary['data_sources']
+
         try:
-            single_output_base = run_bloaty(base_binaries[i],
-                                            args.bloaty_config[i],
+            single_output_base = run_bloaty(curr_diff_binary["base"],
+                                            curr_diff_binary['bloaty_config'],
                                             data_sources=data_sources,
-                                            extra_args=extra_args)
+                                            extra_args=curr_extra_args)
 
         except subprocess.CalledProcessError:
             _LOG.error('%s: failed to run base size report on %s', sys.argv[0],
-                       base_binaries[i])
+                       curr_diff_binary["base"])
             return 1
 
         try:
-            single_output_target = run_bloaty(binary,
-                                              args.bloaty_config[i],
-                                              data_sources=data_sources,
-                                              extra_args=extra_args)
+            single_output_target = run_bloaty(
+                curr_diff_binary["target"],
+                curr_diff_binary['bloaty_config'],
+                data_sources=data_sources,
+                extra_args=curr_extra_args)
 
         except subprocess.CalledProcessError:
             _LOG.error('%s: failed to run target size report on %s',
-                       sys.argv[0], binary)
+                       sys.argv[0], curr_diff_binary["target"])
             return 1
 
         if not single_output_target or not single_output_base:
@@ -241,15 +233,16 @@
                                         LineCharset).create_table()
 
         print(diff_report)
-        curr_rst_report = RstOutput(diff_dsm, args.labels[i])
-        if i == 0:
+        curr_rst_report = RstOutput(diff_dsm, curr_diff_binary['label'])
+        if rst_diff_report == '':
             rst_diff_report = curr_rst_report.create_table()
         else:
             rst_diff_report += f"{curr_rst_report.add_report_row()}\n"
-        extra_args = ['--tsv']
 
-    write_file(args.target, rst_diff_report, args.out_dir)
-    write_file(f'{args.target}.txt', diff_report, args.out_dir)
+    write_file(gn_arg_dict['target_name'], rst_diff_report,
+               gn_arg_dict['out_dir'])
+    write_file(f"{gn_arg_dict['target_name']}.txt", diff_report,
+               gn_arg_dict['out_dir'])
 
     return 0
 
diff --git a/pw_bloat/py/pw_bloat/label.py b/pw_bloat/py/pw_bloat/label.py
index 60481b5..26653f0 100644
--- a/pw_bloat/py/pw_bloat/label.py
+++ b/pw_bloat/py/pw_bloat/label.py
@@ -251,7 +251,7 @@
 
 
 def from_bloaty_tsv(raw_tsv: Iterable[str]) -> DataSourceMap:
-    """Read in Bloaty CSV output and store in DataSourceMap."""
+    """Read in Bloaty TSV output and store in DataSourceMap."""
     reader = csv.reader(raw_tsv, delimiter='\t')
     top_row = next(reader)
     vmsize_index = top_row.index('vmsize')