blob: 6b992c8f52b0940f8604883f56c6d377f5805533 [file] [log] [blame] [edit]
#!/usr/bin/env python3
"""Calculate average sizes from bloaty CSV or TinyUSB metrics JSON outputs."""
import argparse
import csv
import glob
import io
import json
import os
import sys
from collections import defaultdict
def expand_files(file_patterns):
"""Expand file patterns (globs) to list of files.
Args:
file_patterns: List of file paths or glob patterns
Returns:
List of expanded file paths
"""
expanded = []
for pattern in file_patterns:
if '*' in pattern or '?' in pattern:
expanded.extend(glob.glob(pattern))
else:
expanded.append(pattern)
return expanded
def parse_bloaty_csv(csv_text, filters=None):
"""Parse bloaty CSV text and return normalized JSON data structure."""
filters = filters or []
reader = csv.DictReader(io.StringIO(csv_text))
size_by_unit = defaultdict(int)
symbols_by_unit: dict[str, defaultdict[str, int]] = defaultdict(lambda: defaultdict(int))
sections_by_unit: dict[str, defaultdict[str, int]] = defaultdict(lambda: defaultdict(int))
for row in reader:
compile_unit = row.get("compileunits") or row.get("compileunit") or row.get("path")
if compile_unit is None:
continue
if str(compile_unit).upper() == "TOTAL":
continue
if filters and not any(filt in compile_unit for filt in filters):
continue
try:
vmsize = int(row.get("vmsize", 0))
except ValueError:
continue
size_by_unit[compile_unit] += vmsize
symbol_name = row.get("symbols", "")
if symbol_name:
symbols_by_unit[compile_unit][symbol_name] += vmsize
section_name = row.get("sections") or row.get("section")
if section_name and vmsize:
sections_by_unit[compile_unit][section_name] += vmsize
files = []
for unit_path, total_size in size_by_unit.items():
symbols = [
{"name": sym, "size": sz}
for sym, sz in sorted(symbols_by_unit[unit_path].items(), key=lambda x: x[1], reverse=True)
]
sections = {sec: sz for sec, sz in sections_by_unit[unit_path].items() if sz}
files.append(
{
"file": os.path.basename(unit_path) or unit_path,
"path": unit_path,
"size": total_size,
"symbols": symbols,
"sections": sections,
}
)
total_all = sum(size_by_unit.values())
return {"files": files, "TOTAL": total_all}
def combine_files(input_files, filters=None):
"""Combine multiple metrics inputs (bloaty CSV or metrics JSON) into a single data set."""
filters = filters or []
all_json_data = {"file_list": [], "data": []}
for fin in input_files:
if not os.path.exists(fin):
print(f"Warning: {fin} not found, skipping", file=sys.stderr)
continue
try:
if fin.endswith(".json"):
with open(fin, "r", encoding="utf-8") as f:
json_data = json.load(f)
if filters:
json_data["files"] = [
f
for f in json_data.get("files", [])
if f.get("path") and any(filt in f["path"] for filt in filters)
]
elif fin.endswith(".csv"):
with open(fin, "r", encoding="utf-8") as f:
csv_text = f.read()
json_data = parse_bloaty_csv(csv_text, filters)
else:
if fin.endswith(".elf"):
print(f"Warning: {fin} is an ELF; please run bloaty with --csv output first. Skipping.",
file=sys.stderr)
else:
print(f"Warning: {fin} is not a supported CSV or JSON metrics input. Skipping.",
file=sys.stderr)
continue
# Drop any fake TOTAL entries that slipped in as files
json_data["files"] = [
f for f in json_data.get("files", [])
if str(f.get("file", "")).upper() != "TOTAL"
]
all_json_data["file_list"].append(fin)
all_json_data["data"].append(json_data)
except Exception as e: # pragma: no cover - defensive
print(f"Warning: Failed to analyze {fin}: {e}", file=sys.stderr)
continue
return all_json_data
def compute_avg(all_json_data):
"""Compute average sizes from combined json_data.
Args:
all_json_data: Dictionary with file_list and data from combine_files()
Returns:
json_average: Dictionary with averaged size data
"""
if not all_json_data["data"]:
return None
# Merge files with the same 'file' value and compute averages
file_accumulator = {} # key: file name, value: {"sizes": [sizes], "symbols": {name: [sizes]}, "sections": {name: [sizes]}}
for json_data in all_json_data["data"]:
for f in json_data.get("files", []):
fname = f["file"]
if fname not in file_accumulator:
file_accumulator[fname] = {
"sizes": [],
"path": f.get("path"),
"symbols": defaultdict(list),
"sections": defaultdict(list),
}
size_val = f.get("size", 0)
file_accumulator[fname]["sizes"].append(size_val)
for sym in f.get("symbols", []):
name = sym.get("name")
if name is None:
continue
file_accumulator[fname]["symbols"][name].append(sym.get("size", 0))
sections_map = f.get("sections") or {}
for sname, ssize in sections_map.items():
file_accumulator[fname]["sections"][sname].append(ssize)
# Build json_average with averaged values
files_average = []
for fname, data in file_accumulator.items():
avg_size = round(sum(data["sizes"]) / len(data["sizes"])) if data["sizes"] else 0
symbols_avg = []
for sym_name, sizes in data["symbols"].items():
if not sizes:
continue
symbols_avg.append({"name": sym_name, "size": round(sum(sizes) / len(sizes))})
symbols_avg.sort(key=lambda x: x["size"], reverse=True)
sections_avg = {
sec_name: round(sum(sizes) / len(sizes))
for sec_name, sizes in data["sections"].items()
if sizes
}
files_average.append(
{
"file": fname,
"path": data["path"],
"size": avg_size,
"symbols": symbols_avg,
"sections": sections_avg,
}
)
total_size = sum(f["size"] for f in files_average) or 1
for f in files_average:
f["percent"] = (f["size"] / total_size) * 100 if total_size else 0
for sym in f["symbols"]:
sym["percent"] = (sym["size"] / f["size"]) * 100 if f["size"] else 0
json_average = {
"file_list": all_json_data["file_list"],
"files": files_average,
}
return json_average
def compare_files(base_file, new_file, filters=None):
"""Compare two CSV or JSON inputs and generate difference report."""
filters = filters or []
base_avg = compute_avg(combine_files([base_file], filters))
new_avg = compute_avg(combine_files([new_file], filters))
if not base_avg or not new_avg:
return None
base_files = {f["file"]: f for f in base_avg["files"]}
new_files = {f["file"]: f for f in new_avg["files"]}
all_file_names = set(base_files.keys()) | set(new_files.keys())
comparison_files = []
for fname in sorted(all_file_names):
b = base_files.get(fname, {})
n = new_files.get(fname, {})
b_size = b.get("size", 0)
n_size = n.get("size", 0)
base_sections = b.get("sections") or {}
new_sections = n.get("sections") or {}
# Symbol diffs
b_syms = {s["name"]: s for s in b.get("symbols", [])}
n_syms = {s["name"]: s for s in n.get("symbols", [])}
all_syms = set(b_syms.keys()) | set(n_syms.keys())
symbols = []
for sym in all_syms:
sb = b_syms.get(sym, {}).get("size", 0)
sn = n_syms.get(sym, {}).get("size", 0)
symbols.append({"name": sym, "base": sb, "new": sn, "diff": sn - sb})
symbols.sort(key=lambda x: abs(x["diff"]), reverse=True)
comparison_files.append({
"file": fname,
"size": {"base": b_size, "new": n_size, "diff": n_size - b_size},
"symbols": symbols,
"sections": {
name: {
"base": base_sections.get(name, 0),
"new": new_sections.get(name, 0),
"diff": new_sections.get(name, 0) - base_sections.get(name, 0),
}
for name in sorted(set(base_sections) | set(new_sections))
},
})
base_total = sum(f["size"] for f in base_avg["files"])
new_total = sum(f["size"] for f in new_avg["files"])
total = {
"base": base_total,
"new": new_total,
"diff": new_total - base_total,
}
return {
"base_file": base_file,
"new_file": new_file,
"total": total,
"files": comparison_files,
}
def get_sort_key(sort_order):
"""Get sort key function based on sort order.
Args:
sort_order: One of 'size-', 'size+', 'name-', 'name+'
Returns:
Tuple of (key_func, reverse)
"""
def _size_val(entry):
return entry.get('size', 0)
if sort_order == 'size-':
return _size_val, True
elif sort_order == 'size+':
return _size_val, False
elif sort_order == 'name-':
return lambda x: x.get('file', ''), True
else: # name+
return lambda x: x.get('file', ''), False
def format_diff(base, new, diff):
"""Format a diff value with percentage."""
if diff == 0:
return f"{new}"
if base == 0 or new == 0:
return f"{base} ➙ {new}"
pct = (diff / base) * 100
sign = "+" if diff > 0 else ""
return f"{base} ➙ {new} ({sign}{diff}, {sign}{pct:.1f}%)"
def write_json_output(json_data, path):
"""Write JSON output with indentation."""
with open(path, "w", encoding="utf-8") as outf:
json.dump(json_data, outf, indent=2)
def render_combine_table(json_data, sort_order='name+'):
"""Render averaged sizes as markdown table lines (no title)."""
files = json_data.get("files", [])
if not files:
return ["No entries."]
key_func, reverse = get_sort_key(sort_order)
files_sorted = sorted(files, key=key_func, reverse=reverse)
total_size = json_data.get("TOTAL") or sum(f.get("size", 0) for f in files_sorted)
pct_strings = [
f"{(f.get('percent') if f.get('percent') is not None else (f.get('size', 0) / total_size * 100 if total_size else 0)):.1f}%"
for f in files_sorted]
pct_width = 6
size_width = max(len("size"), *(len(str(f.get("size", 0))) for f in files_sorted), len(str(total_size)))
file_width = max(len("File"), *(len(f.get("file", "")) for f in files_sorted), len("TOTAL"))
# Build section totals on the fly from file data
sections_global = defaultdict(int)
for f in files_sorted:
for name, size in (f.get("sections") or {}).items():
sections_global[name] += size
# Display sections in reverse alphabetical order for stable column layout
section_names = sorted(sections_global.keys(), reverse=True)
section_widths = {}
for name in section_names:
max_val = max((f.get("sections", {}).get(name, 0) for f in files_sorted), default=0)
section_widths[name] = max(len(name), len(str(max_val)), 1)
if not section_names:
header = f"| {'File':<{file_width}} | {'size':>{size_width}} | {'%':>{pct_width}} |"
separator = f"| :{'-' * (file_width - 1)} | {'-' * (size_width - 1)}: | {'-' * (pct_width - 1)}: |"
else:
header_parts = [f"| {'File':<{file_width}} |"]
sep_parts = [f"| :{'-' * (file_width - 1)} |"]
for name in section_names:
header_parts.append(f" {name:>{section_widths[name]}} |")
sep_parts.append(f" {'-' * (section_widths[name] - 1)}: |")
header_parts.append(f" {'size':>{size_width}} | {'%':>{pct_width}} |")
sep_parts.append(f" {'-' * (size_width - 1)}: | {'-' * (pct_width - 1)}: |")
header = "".join(header_parts)
separator = "".join(sep_parts)
lines = [header, separator]
for f, pct_str in zip(files_sorted, pct_strings):
size_val = f.get("size", 0)
parts = [f"| {f.get('file', ''):<{file_width}} |"]
if section_names:
sections_map = f.get("sections") or {}
for name in section_names:
parts.append(f" {sections_map.get(name, 0):>{section_widths[name]}} |")
parts.append(f" {size_val:>{size_width}} | {pct_str:>{pct_width}} |")
lines.append("".join(parts))
total_parts = [f"| {'TOTAL':<{file_width}} |"]
if section_names:
for name in section_names:
total_parts.append(f" {sections_global.get(name, 0):>{section_widths[name]}} |")
total_parts.append(f" {total_size:>{size_width}} | {'100.0%':>{pct_width}} |")
lines.append("".join(total_parts))
return lines
def write_combine_markdown(json_data, path, sort_order='name+', title="TinyUSB Average Code Size Metrics"):
"""Write averaged size data to a markdown file."""
md_lines = [f"# {title}", ""]
md_lines.extend(render_combine_table(json_data, sort_order))
md_lines.append("")
if json_data.get("file_list"):
md_lines.extend(["<details>", "<summary>Input files</summary>", ""])
md_lines.extend([f"- {mf}" for mf in json_data["file_list"]])
md_lines.extend(["", "</details>", ""])
with open(path, "w", encoding="utf-8") as f:
f.write("\n".join(md_lines))
def write_compare_markdown(comparison, path, sort_order='size'):
"""Write comparison data to markdown file."""
md_lines = [
"# Size Difference Report",
"",
"Because TinyUSB code size varies by port and configuration, the metrics below represent the averaged totals across all example builds.",
"",
"Note: If there is no change, only one value is shown.",
"",
]
significant, minor, unchanged = _split_by_significance(comparison["files"], sort_order)
def render(title, rows, collapsed=False):
if collapsed:
md_lines.append(f"<details><summary>{title}</summary>")
md_lines.append("")
else:
md_lines.append(f"## {title}")
md_lines.extend(render_compare_table(_build_rows(rows, sort_order), include_sum=True))
md_lines.append("")
if collapsed:
md_lines.append("</details>")
md_lines.append("")
render("Changes >1% in size", significant)
render("Changes <1% in size", minor)
render("No changes", unchanged, collapsed=True)
with open(path, "w", encoding="utf-8") as f:
f.write("\n".join(md_lines))
def print_compare_summary(comparison, sort_order='name+'):
"""Print diff report to stdout in table form."""
files = comparison["files"]
rows = _build_rows(files, sort_order)
lines = render_compare_table(rows, include_sum=True)
for line in lines:
print(line)
def _build_rows(files, sort_order):
"""Sort files and prepare printable fields."""
def sort_key(file_row):
if sort_order == 'size-':
return abs(file_row["size"]["diff"])
if sort_order in ('size', 'size+'):
return abs(file_row["size"]["diff"])
if sort_order == 'name-':
return file_row['file']
return file_row['file']
reverse = sort_order in ('size-', 'name-')
files_sorted = sorted(files, key=sort_key, reverse=reverse)
rows = []
for f in files_sorted:
sd = f["size"]
diff_val = sd['new'] - sd['base']
if sd['base'] == 0:
pct_str = "n/a"
else:
pct_val = (diff_val / sd['base']) * 100
pct_str = f"{pct_val:+.1f}%"
rows.append({
"file": f['file'],
"base": sd['base'],
"new": sd['new'],
"diff": diff_val,
"pct": pct_str,
"sections": f.get("sections", {}),
})
return rows
def _split_by_significance(files, sort_order):
"""Split files into >1% changes, <1% changes, and no changes."""
def is_significant(file_row):
base = file_row["size"]["base"]
diff = abs(file_row["size"]["diff"])
if base == 0:
return diff != 0
return (diff / base) * 100 > 1.0
rows_sorted = sorted(
files,
key=lambda f: abs(f["size"]["diff"]) if sort_order.startswith("size") else f["file"],
reverse=sort_order in ('size-', 'name-'),
)
significant = []
minor = []
unchanged = []
for f in rows_sorted:
if f["size"]["diff"] == 0:
unchanged.append(f)
else:
(significant if is_significant(f) else minor).append(f)
return significant, minor, unchanged
def render_compare_table(rows, include_sum):
"""Return markdown table lines for given rows."""
if not rows:
return ["No entries.", ""]
# collect section columns (reverse alpha)
section_names = sorted(
{name for r in rows for name in (r.get("sections") or {})},
reverse=True,
)
def fmt_abs(val_old, val_new):
diff = val_new - val_old
if diff == 0:
return f"{val_new}"
sign = "+" if diff > 0 else ""
return f"{val_old} ➙ {val_new} ({sign}{diff})"
sum_base = sum(r["base"] for r in rows)
sum_new = sum(r["new"] for r in rows)
total_diff = sum_new - sum_base
total_pct = "n/a" if sum_base == 0 else f"{(total_diff / sum_base) * 100:+.1f}%"
file_width = max(len("file"), *(len(r["file"]) for r in rows), len("TOTAL"))
size_width = max(
len("size"),
*(len(fmt_abs(r["base"], r["new"])) for r in rows),
len(fmt_abs(sum_base, sum_new)),
)
pct_width = max(len("% diff"), *(len(r["pct"]) for r in rows), len(total_pct))
section_widths = {}
for name in section_names:
max_val_len = 0
for r in rows:
sec_entry = (r.get("sections") or {}).get(name, {"base": 0, "new": 0})
max_val_len = max(max_val_len, len(fmt_abs(sec_entry.get("base", 0), sec_entry.get("new", 0))))
section_widths[name] = max(len(name), max_val_len, 1)
header_parts = [f"| {'file':<{file_width}} |"]
sep_parts = [f"| :{'-' * (file_width - 1)} |"]
for name in section_names:
header_parts.append(f" {name:>{section_widths[name]}} |")
sep_parts.append(f" {'-' * (section_widths[name] - 1)}: |")
header_parts.append(f" {'size':>{size_width}} | {'% diff':>{pct_width}} |")
sep_parts.append(f" {'-' * (size_width - 1)}: | {'-' * (pct_width - 1)}: |")
header = "".join(header_parts)
separator = "".join(sep_parts)
lines = [header, separator]
for r in rows:
parts = [f"| {r['file']:<{file_width}} |"]
sections_map = r.get("sections") or {}
for name in section_names:
sec_entry = sections_map.get(name, {"base": 0, "new": 0})
parts.append(f" {fmt_abs(sec_entry.get('base', 0), sec_entry.get('new', 0)):>{section_widths[name]}} |")
parts.append(f" {fmt_abs(r['base'], r['new']):>{size_width}} | {r['pct']:>{pct_width}} |")
lines.append("".join(parts))
if include_sum:
total_parts = [f"| {'TOTAL':<{file_width}} |"]
for name in section_names:
total_base = sum((r.get("sections") or {}).get(name, {}).get("base", 0) for r in rows)
total_new = sum((r.get("sections") or {}).get(name, {}).get("new", 0) for r in rows)
total_parts.append(f" {fmt_abs(total_base, total_new):>{section_widths[name]}} |")
total_parts.append(f" {fmt_abs(sum_base, sum_new):>{size_width}} | {total_pct:>{pct_width}} |")
lines.append("".join(total_parts))
return lines
def cmd_combine(args):
"""Handle combine subcommand."""
input_files = expand_files(args.files)
all_json_data = combine_files(input_files, args.filters)
json_average = compute_avg(all_json_data)
if json_average is None:
print("No valid map files found", file=sys.stderr)
sys.exit(1)
if not args.quiet:
for line in render_combine_table(json_average, sort_order=args.sort):
print(line)
if args.json_out:
write_json_output(json_average, args.out + '.json')
if args.markdown_out:
write_combine_markdown(json_average, args.out + '.md', sort_order=args.sort,
title="TinyUSB Average Code Size Metrics")
def cmd_compare(args):
"""Handle compare subcommand."""
comparison = compare_files(args.base, args.new, args.filters)
if comparison is None:
print("Failed to compare files", file=sys.stderr)
sys.exit(1)
if not args.quiet:
print_compare_summary(comparison, args.sort)
if args.markdown_out:
write_compare_markdown(comparison, args.out + '.md', args.sort)
if not args.quiet:
print(f"Comparison written to {args.out}.md")
def main(argv=None):
parser = argparse.ArgumentParser(description='Code size metrics tool')
subparsers = parser.add_subparsers(dest='command', required=True, help='Available commands')
# Combine subcommand
combine_parser = subparsers.add_parser('combine', help='Combine and average bloaty CSV outputs or metrics JSON files')
combine_parser.add_argument('files', nargs='+',
help='Path to bloaty CSV output or TinyUSB metrics JSON file(s) (including linkermap-generated) or glob pattern(s)')
combine_parser.add_argument('-f', '--filter', dest='filters', action='append', default=[],
help='Only include compile units whose path contains this substring (can be repeated)')
combine_parser.add_argument('-o', '--out', dest='out', default='metrics',
help='Output path basename for JSON and Markdown files (default: metrics)')
combine_parser.add_argument('-j', '--json', dest='json_out', action='store_true',
help='Write JSON output file')
combine_parser.add_argument('-m', '--markdown', dest='markdown_out', action='store_true',
help='Write Markdown output file')
combine_parser.add_argument('-q', '--quiet', dest='quiet', action='store_true',
help='Suppress summary output')
combine_parser.add_argument('-S', '--sort', dest='sort', default='size-',
choices=['size', 'size-', 'size+', 'name', 'name-', 'name+'],
help='Sort order: size/size- (descending), size+ (ascending), name/name+ (ascending), name- (descending). Default: size-')
# Compare subcommand
compare_parser = subparsers.add_parser('compare', help='Compare two metrics inputs (bloaty CSV or metrics JSON)')
compare_parser.add_argument('base', help='Base CSV/metrics JSON file')
compare_parser.add_argument('new', help='New CSV/metrics JSON file')
compare_parser.add_argument('-f', '--filter', dest='filters', action='append', default=[],
help='Only include compile units whose path contains this substring (can be repeated)')
compare_parser.add_argument('-o', '--out', dest='out', default='metrics_compare',
help='Output path basename for Markdown/JSON files (default: metrics_compare)')
compare_parser.add_argument('-m', '--markdown', dest='markdown_out', action='store_true',
help='Write Markdown output file')
compare_parser.add_argument('-S', '--sort', dest='sort', default='name+',
choices=['size', 'size-', 'size+', 'name', 'name-', 'name+'],
help='Sort order: size/size- (descending), size+ (ascending), name/name+ (ascending), name- (descending). Default: name+')
compare_parser.add_argument('-q', '--quiet', dest='quiet', action='store_true',
help='Suppress stdout summary output')
args = parser.parse_args(argv)
if args.command == 'combine':
cmd_combine(args)
elif args.command == 'compare':
cmd_compare(args)
if __name__ == '__main__':
main()