blob: 7e59caedd042e66f7ef161323489aec97cb8c3d9 [file] [log] [blame]
#
# Copyright (c) 2021 Project CHIP Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Sqlite3 database of binary sizes over time."""
import collections
import json
import logging
import sqlite3
import zipfile
from pathlib import Path
from typing import Dict, IO, Iterable, List, Optional, Union
import memdf.util.sqlite
ChangeInfo = collections.namedtuple('ChangeInfo', [
'columns', 'rows', 'things', 'builds', 'stale_builds', 'artifacts',
'stale_artifacts'
])
class SizeDatabase(memdf.util.sqlite.Database):
"""A database for recording and comparing size reports."""
on_open = ["PRAGMA foreign_keys = ON", "PRAGMA encoding = 'UTF-8'"]
on_writable = [
"""
-- A ‘thing’ identifies the kind of built object.
-- Builds of the same thing are comparable.
CREATE TABLE IF NOT EXISTS thing (
id INTEGER PRIMARY KEY,
platform TEXT NOT NULL, -- Build platform
config TEXT NOT NULL, -- Build configuration discriminator
target TEXT NOT NULL, -- Build target
UNIQUE(platform, config, target)
)
""", """
-- A ‘build’ identifies a built instance of a thing at some point.
CREATE TABLE IF NOT EXISTS build (
id INTEGER PRIMARY KEY,
thing_id INTEGER REFERENCES thing(id),
hash TEXT NOT NULL, -- Commit hash
parent TEXT NOT NULL, -- Parent commit hash
pr INTEGER DEFAULT 0, -- Github PR number
time INTEGER NOT NULL, -- Unix-epoch timestamp
artifact INTEGER DEFAULT 0, -- Github artifact ID
commented INTEGER DEFAULT 0, -- 1 if recorded in a GH comment
ref TEXT, -- Target git ref
event TEXT, -- Github build trigger event
UNIQUE(thing_id, hash, parent, pr, time, artifact)
)
""", """
-- A ‘size’ entry gives the size of a section for a particular build.
CREATE TABLE IF NOT EXISTS size (
build_id INTEGER REFERENCES build(id),
name TEXT NOT NULL, -- Section name
size INTEGER NOT NULL, -- Section size in bytes
PRIMARY KEY (build_id, name)
)
"""
]
def __init__(self, filename: str, writable: bool = True):
super().__init__(filename, writable)
def add_sizes(self, **kwargs):
"""
Add a size report to the database.
The incoming arguments must contain the required non-ID column names
from ‘thing’ and ‘build’ tables, plus a 'sizes' entry that is a
sequence of mappings containing 'name' and 'size'.
"""
td = {k: kwargs[k] for k in ('platform', 'config', 'target')}
thing = self.store_and_return_id('thing', **td)
bd = {k: kwargs[k] for k in ('hash', 'parent', 'time', 'event')}
if 'ref' in kwargs:
bd['ref'] = kwargs['ref']
cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')}
build = self.store_and_return_id('build', thing_id=thing, **bd, **cd)
if build is None:
logging.error('Failed to store %s %s %s', thing, bd, cd)
else:
for d in kwargs['sizes']:
self.store('size', build_id=build, **d)
def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict):
"""Add sizes from a JSON size report."""
r = origin.copy()
r.update(json.loads(s))
r['sizes'] = []
# Add section sizes.
for i in r['frames'].get('section', []):
r['sizes'].append({'name': i['section'], 'size': i['size']})
# Add segment sizes.
for i in r['frames'].get('wr', []):
r['sizes'].append({
'name': ('(read only)', '(read/write)')[int(i['wr'])],
'size':
i['size']
})
self.add_sizes(**r)
def add_sizes_from_zipfile(self, f: Union[IO, Path], origin: Dict):
"""Add size reports from a zip."""
with zipfile.ZipFile(f, 'r') as zip_file:
for i in zip_file.namelist():
if i.endswith('-sizes.json'):
origin['member'] = i
with zip_file.open(i) as member:
self.add_sizes_from_json(member.read(), origin)
def add_sizes_from_file(self, filename: str):
"""Add size reports from a file."""
origin = {'file': filename}
path = Path(filename)
if path.suffix == '.json':
logging.info('ASJ: reading JSON %s', path)
with open(path, encoding='utf-8') as f:
self.add_sizes_from_json(f.read(), origin)
elif path.suffix == '.zip':
logging.info('ASZ: reading ZIP %s', path)
self.add_sizes_from_zipfile(path, origin)
else:
logging.warning('Unknown file type "%s" ignored', filename)
def select_thing_id(self, platform: str, config: str,
target: str) -> Optional[str]:
cur = self.execute(
'SELECT id FROM thing WHERE platform=? AND config=? AND target=?',
(platform, config, target))
row = cur.fetchone()
return row[0] if row else None
def select_sections_for_thing(self, thing: str) -> List[str]:
cur = self.execute(
'''
SELECT DISTINCT name FROM size WHERE build_id = (
SELECT DISTINCT id FROM build WHERE thing_id == ?)
ORDER BY name
''', (thing,))
return [row[0] for row in cur.fetchall()]
def select_matching_commits(self):
"""Find matching builds, where one's commit is the other's parent."""
return self.execute('''
SELECT DISTINCT
c.event as event,
c.pr AS pr,
c.hash AS hash,
p.hash AS parent
FROM build c
INNER JOIN build p ON p.hash = c.parent
WHERE c.commented = 0
ORDER BY c.time DESC, c.pr, c.hash, p.hash
''')
def select_changes(self, parent: str, commit: str) -> ChangeInfo:
"""Returns size changes between the given commits."""
cur = self.execute(
'''
SELECT DISTINCT
t.id AS thing,
cb.artifact AS artifact,
pb.id AS parent_build,
cb.id AS commit_build,
t.platform, t.config, t.target,
cs.name AS name,
ps.size AS parent_size,
cs.size AS commit_size,
cb.time AS time
FROM thing t
INNER JOIN build cb ON cb.thing_id = t.id
INNER JOIN build pb ON pb.thing_id = t.id AND pb.hash = cb.parent
INNER JOIN size cs ON cs.build_id = cb.id
INNER JOIN size ps ON ps.build_id = pb.id AND cs.name = ps.name
WHERE cb.hash = ? AND pb.hash = ?
ORDER BY t.platform, t.config, t.target,
cs.name, cb.time DESC, pb.time DESC
''', (commit, parent))
keep = ('platform', 'target', 'config', 'name', 'parent_size',
'commit_size')
things: set[int] = set()
artifacts: set[int] = set()
builds: set[int] = set()
stale_builds: set[int] = set()
stale_artifacts: set[int] = set()
previous: Optional[sqlite3.Row] = None
rows = []
for row in cur.fetchall():
row = sqlite3.Row(cur, row)
things.add(row['thing'])
if (previous is not None and row['thing'] == previous['thing']
and row['name'] == previous['name']):
# This is duplicate build, older because we sort descending,
# presumably from a partial workflow re-run.
if row['parent_build'] != previous['parent_build']:
stale_builds.add(row['parent_build'])
if row['commit_build'] != previous['commit_build']:
stale_builds.add(row['commit_build'])
stale_artifacts.add(row['artifact'])
else:
previous = row
new = [row[k] for k in keep]
parent_size = row['parent_size']
commit_size = row['commit_size']
new.append(commit_size - parent_size)
new.append(self.percent_change(parent_size, commit_size))
rows.append(new)
artifacts.add(row['artifact'])
builds.add(row['commit_build'])
return ChangeInfo(('platform', 'target', 'config', 'section',
parent[:8], commit[:8], 'change', '% change'), rows,
things, builds, stale_builds, artifacts,
stale_artifacts)
def set_commented(self, build_ids: Iterable[int]):
"""Set the commented flag for the given builds."""
if not build_ids:
return
for build_id in build_ids:
self.execute('UPDATE build SET commented = 1 WHERE id = ?',
(build_id, ))
self.commit()
def delete_builds(self, build_ids: Iterable[int]):
"""Delete the given builds."""
if not build_ids:
return
for build_id in build_ids:
self.execute('DELETE FROM size WHERE build_id = ?', (build_id, ))
self.execute('DELETE FROM build WHERE id = ?', (build_id, ))
self.commit()
@staticmethod
def percent_change(a: int, b: int) -> float:
if a == 0:
return 0.0 if b == 0 else float('inf')
return 100. * (b - a) / a