| # |
| # Copyright (c) 2021 Project CHIP Authors |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| """Sqlite3 database of binary sizes over time.""" |
| |
| import collections |
| import json |
| import logging |
| import sqlite3 |
| import zipfile |
| |
| from pathlib import Path |
| from typing import Dict, IO, Iterable, List, Optional, Union |
| |
| import memdf.util.sqlite |
| |
| ChangeInfo = collections.namedtuple('ChangeInfo', [ |
| 'columns', 'rows', 'things', 'builds', 'stale_builds', 'artifacts', |
| 'stale_artifacts' |
| ]) |
| |
| |
| class SizeDatabase(memdf.util.sqlite.Database): |
| """A database for recording and comparing size reports.""" |
| on_open = ["PRAGMA foreign_keys = ON", "PRAGMA encoding = 'UTF-8'"] |
| on_writable = [ |
| """ |
| -- A ‘thing’ identifies the kind of built object. |
| -- Builds of the same thing are comparable. |
| CREATE TABLE IF NOT EXISTS thing ( |
| id INTEGER PRIMARY KEY, |
| platform TEXT NOT NULL, -- Build platform |
| config TEXT NOT NULL, -- Build configuration discriminator |
| target TEXT NOT NULL, -- Build target |
| UNIQUE(platform, config, target) |
| ) |
| """, """ |
| -- A ‘build’ identifies a built instance of a thing at some point. |
| CREATE TABLE IF NOT EXISTS build ( |
| id INTEGER PRIMARY KEY, |
| thing_id INTEGER REFERENCES thing(id), |
| hash TEXT NOT NULL, -- Commit hash |
| parent TEXT NOT NULL, -- Parent commit hash |
| pr INTEGER DEFAULT 0, -- Github PR number |
| time INTEGER NOT NULL, -- Unix-epoch timestamp |
| artifact INTEGER DEFAULT 0, -- Github artifact ID |
| commented INTEGER DEFAULT 0, -- 1 if recorded in a GH comment |
| ref TEXT, -- Target git ref |
| event TEXT, -- Github build trigger event |
| UNIQUE(thing_id, hash, parent, pr, time, artifact) |
| ) |
| """, """ |
| -- A ‘size’ entry gives the size of a section for a particular build. |
| CREATE TABLE IF NOT EXISTS size ( |
| build_id INTEGER REFERENCES build(id), |
| name TEXT NOT NULL, -- Section name |
| size INTEGER NOT NULL, -- Section size in bytes |
| PRIMARY KEY (build_id, name) |
| ) |
| """ |
| ] |
| |
| def __init__(self, filename: str, writable: bool = True): |
| super().__init__(filename, writable) |
| |
| def add_sizes(self, **kwargs): |
| """ |
| Add a size report to the database. |
| |
| The incoming arguments must contain the required non-ID column names |
| from ‘thing’ and ‘build’ tables, plus a 'sizes' entry that is a |
| sequence of mappings containing 'name' and 'size'. |
| """ |
| td = {k: kwargs[k] for k in ('platform', 'config', 'target')} |
| thing = self.store_and_return_id('thing', **td) |
| bd = {k: kwargs[k] for k in ('hash', 'parent', 'time', 'event')} |
| if 'ref' in kwargs: |
| bd['ref'] = kwargs['ref'] |
| cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')} |
| build = self.store_and_return_id('build', thing_id=thing, **bd, **cd) |
| if build is None: |
| logging.error('Failed to store %s %s %s', thing, bd, cd) |
| else: |
| for d in kwargs['sizes']: |
| self.store('size', build_id=build, **d) |
| |
| def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict): |
| """Add sizes from a JSON size report.""" |
| r = origin.copy() |
| r.update(json.loads(s)) |
| r['sizes'] = [] |
| # Add section sizes. |
| for i in r['frames'].get('section', []): |
| r['sizes'].append({'name': i['section'], 'size': i['size']}) |
| # Add segment sizes. |
| for i in r['frames'].get('wr', []): |
| r['sizes'].append({ |
| 'name': ('(read only)', '(read/write)')[int(i['wr'])], |
| 'size': |
| i['size'] |
| }) |
| self.add_sizes(**r) |
| |
| def add_sizes_from_zipfile(self, f: Union[IO, Path], origin: Dict): |
| """Add size reports from a zip.""" |
| with zipfile.ZipFile(f, 'r') as zip_file: |
| for i in zip_file.namelist(): |
| if i.endswith('-sizes.json'): |
| origin['member'] = i |
| with zip_file.open(i) as member: |
| self.add_sizes_from_json(member.read(), origin) |
| |
| def add_sizes_from_file(self, filename: str): |
| """Add size reports from a file.""" |
| origin = {'file': filename} |
| path = Path(filename) |
| if path.suffix == '.json': |
| logging.info('ASJ: reading JSON %s', path) |
| with open(path, encoding='utf-8') as f: |
| self.add_sizes_from_json(f.read(), origin) |
| elif path.suffix == '.zip': |
| logging.info('ASZ: reading ZIP %s', path) |
| self.add_sizes_from_zipfile(path, origin) |
| else: |
| logging.warning('Unknown file type "%s" ignored', filename) |
| |
| def select_thing_id(self, platform: str, config: str, |
| target: str) -> Optional[str]: |
| cur = self.execute( |
| 'SELECT id FROM thing WHERE platform=? AND config=? AND target=?', |
| (platform, config, target)) |
| row = cur.fetchone() |
| return row[0] if row else None |
| |
| def select_sections_for_thing(self, thing: str) -> List[str]: |
| cur = self.execute( |
| ''' |
| SELECT DISTINCT name FROM size WHERE build_id = ( |
| SELECT DISTINCT id FROM build WHERE thing_id == ?) |
| ORDER BY name |
| ''', (thing,)) |
| return [row[0] for row in cur.fetchall()] |
| |
| def select_matching_commits(self): |
| """Find matching builds, where one's commit is the other's parent.""" |
| return self.execute(''' |
| SELECT DISTINCT |
| c.event as event, |
| c.pr AS pr, |
| c.hash AS hash, |
| p.hash AS parent |
| FROM build c |
| INNER JOIN build p ON p.hash = c.parent |
| WHERE c.commented = 0 |
| ORDER BY c.time DESC, c.pr, c.hash, p.hash |
| ''') |
| |
| def select_changes(self, parent: str, commit: str) -> ChangeInfo: |
| """Returns size changes between the given commits.""" |
| cur = self.execute( |
| ''' |
| SELECT DISTINCT |
| t.id AS thing, |
| cb.artifact AS artifact, |
| pb.id AS parent_build, |
| cb.id AS commit_build, |
| t.platform, t.config, t.target, |
| cs.name AS name, |
| ps.size AS parent_size, |
| cs.size AS commit_size, |
| cb.time AS time |
| FROM thing t |
| INNER JOIN build cb ON cb.thing_id = t.id |
| INNER JOIN build pb ON pb.thing_id = t.id AND pb.hash = cb.parent |
| INNER JOIN size cs ON cs.build_id = cb.id |
| INNER JOIN size ps ON ps.build_id = pb.id AND cs.name = ps.name |
| WHERE cb.hash = ? AND pb.hash = ? |
| ORDER BY t.platform, t.config, t.target, |
| cs.name, cb.time DESC, pb.time DESC |
| ''', (commit, parent)) |
| |
| keep = ('platform', 'target', 'config', 'name', 'parent_size', |
| 'commit_size') |
| things: set[int] = set() |
| artifacts: set[int] = set() |
| builds: set[int] = set() |
| stale_builds: set[int] = set() |
| stale_artifacts: set[int] = set() |
| previous: Optional[sqlite3.Row] = None |
| rows = [] |
| |
| for row in cur.fetchall(): |
| row = sqlite3.Row(cur, row) |
| things.add(row['thing']) |
| if (previous is not None and row['thing'] == previous['thing'] |
| and row['name'] == previous['name']): |
| # This is duplicate build, older because we sort descending, |
| # presumably from a partial workflow re-run. |
| if row['parent_build'] != previous['parent_build']: |
| stale_builds.add(row['parent_build']) |
| if row['commit_build'] != previous['commit_build']: |
| stale_builds.add(row['commit_build']) |
| stale_artifacts.add(row['artifact']) |
| else: |
| previous = row |
| new = [row[k] for k in keep] |
| parent_size = row['parent_size'] |
| commit_size = row['commit_size'] |
| new.append(commit_size - parent_size) |
| new.append(self.percent_change(parent_size, commit_size)) |
| rows.append(new) |
| artifacts.add(row['artifact']) |
| builds.add(row['commit_build']) |
| |
| return ChangeInfo(('platform', 'target', 'config', 'section', |
| parent[:8], commit[:8], 'change', '% change'), rows, |
| things, builds, stale_builds, artifacts, |
| stale_artifacts) |
| |
| def set_commented(self, build_ids: Iterable[int]): |
| """Set the commented flag for the given builds.""" |
| if not build_ids: |
| return |
| for build_id in build_ids: |
| self.execute('UPDATE build SET commented = 1 WHERE id = ?', |
| (build_id, )) |
| self.commit() |
| |
| def delete_builds(self, build_ids: Iterable[int]): |
| """Delete the given builds.""" |
| if not build_ids: |
| return |
| for build_id in build_ids: |
| self.execute('DELETE FROM size WHERE build_id = ?', (build_id, )) |
| self.execute('DELETE FROM build WHERE id = ?', (build_id, )) |
| self.commit() |
| |
| @staticmethod |
| def percent_change(a: int, b: int) -> float: |
| if a == 0: |
| return 0.0 if b == 0 else float('inf') |
| return 100. * (b - a) / a |