blob: e42262e567ba7f565287451954163508b01e7635 [file] [log] [blame]
#!/usr/bin/env python3
#
# Copyright (c) 2022 Project CHIP Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Common queries on a size database."""
import datetime
import logging
import sys
from typing import cast, Dict, List, Mapping, Optional, Tuple
import pandas as pd # type: ignore
import memdf.report
import memdf.util.config
import memdf.util.sqlite
from memdf.sizedb import SizeDatabase
from memdf import Config
QUERY_CONFIG = {
Config.group_map('query'): {
'group': 'output'
},
'report.increases': {
'help': 'Highlight large increases',
'metavar': 'PERCENT',
'default': 0.0,
'argparse': {
'alias': ['--threshold'],
'type': float,
},
},
'query.where': {
'help': 'SQL filter',
'metavar': 'SQL-EXPR',
'default': '',
'argparse': {
'alias': ['--where'],
},
},
'query.order': {
'help': 'sort order',
'metavar': 'COLUMN[,COLUMN]*',
'default': '',
'argparse': {
'alias': ['--order'],
},
},
'query.limit': {
'help': 'limit result size',
'metavar': 'ROWS',
'default': 0,
'argparse': {
'alias': ['--limit'],
},
},
}
def argsplit(metavar: str, value: str) -> Tuple[Optional[Tuple], Dict]:
"""Given comma-separated metavar and values, match them up."""
values = tuple(value.split(','))
names = metavar.split(',')
if len(names) < len(values):
logging.error('Too many values for %s', metavar)
return (None, {})
if len(names) > len(values):
logging.error('Missing %s for %s', ','.join(names[len(values):]),
metavar)
return (None, {})
return (values, dict(zip(names, values)))
def postprocess_canned_sql_option(config: Config, key: str,
info: Mapping) -> None:
"""Record information from simple SQL query options in one place."""
value = config[key]
if not value:
return
title = info['sql']['title']
if isinstance(value, str):
metavar = info.get('metavar', 'VALUE')
if ',' in metavar:
values, args = argsplit(metavar, value)
if not values:
return
else:
values = (value,)
args = {metavar: value}
title = title.format(**args)
else:
values = tuple()
if config['queries'] is None:
config['queries'] = []
cast(list, config['queries']).append((title, key, values, info))
def make_query(config: Config, info: Mapping) -> str:
"""Construct an SQL query string for a simple SQL query option."""
args = {'where': '', 'order': '', 'limit': ''}
if where := config.get('query.where'):
if kw := info['sql'].get('where'):
args['where'] = f'{kw} {where}'
if order := (config.get('query.order') or info['sql'].get('order')):
args['order'] = f'ORDER BY {order}'
if limit := config.get('query.limit'):
args['limit'] = f'LIMIT {limit}'
return info['sql']['query'].format(**args)
def postprocess_df_time(_config: Config, df: pd.DataFrame) -> pd.DataFrame:
"""Convert a DataFrame ‘time’ column from Unix timestamp to ISO."""
df['time'] = df['time'].map(lambda t: datetime.datetime.utcfromtimestamp(t)
.isoformat())
return df
def postprocess_df_changes(config: Config, df: pd.DataFrame) -> pd.DataFrame:
"""Given ‘parent_size’and ‘commit_size’ columns, add change columns."""
df['change'] = df.apply(lambda row: row.commit_size - row.parent_size,
axis=1)
df['% change'] = df.apply(lambda row: SizeDatabase.percent_change(
row.parent_size, row.commit_size),
axis=1)
if threshold := config['report.increases']:
df = df[df['% change'] > threshold]
return df
QUERY_CONFIG |= {
'query.platforms': {
'help': 'List known platforms',
'default': False,
'postprocess': postprocess_canned_sql_option,
'sql': {
'title': 'Platforms',
'query': '''
SELECT DISTINCT platform FROM thing {where} {order} {limit}
''',
'where': 'WHERE',
'order': 'platform',
},
'argparse': {
'alias': ['--platforms'],
},
},
'query.platform-targets': {
'help': 'List known targets for the given platform',
'metavar': 'PLATFORM',
'default': '',
'postprocess': postprocess_canned_sql_option,
'sql': {
'title': 'Platform Targets',
'query': '''
SELECT DISTINCT platform, config, target
FROM thing
WHERE platform=? {where}
{order} {limit}
''',
'where': 'AND',
'order': 'platform, config, target',
},
'argparse': {
'alias': ['--platform-targets'],
},
},
'query.platform-sections': {
'help': 'List known sections for the given platform',
'metavar': 'PLATFORM',
'default': '',
'postprocess': postprocess_canned_sql_option,
'sql': {
'title': 'Platform Sections',
'query': '''
SELECT DISTINCT platform, s.name AS section
FROM thing t
INNER JOIN build b ON t.id == b.thing_id
INNER JOIN size s ON b.id == s.build_id
WHERE platform=? {where}
{order} {limit}
''',
'where': 'AND',
'order': 'platform, section',
},
'argparse': {
'alias': ['--platform-sections'],
},
},
'query.section-sizes': {
'help': 'List size data for a given build section',
'metavar': 'PLATFORM,CONFIG,TARGET,SECTION',
'default': '',
'postprocess': postprocess_canned_sql_option,
'sql': {
'title': 'Sizes for {PLATFORM} {CONFIG} {TARGET} {SECTION}',
'query': '''
SELECT DISTINCT time, hash, pr, size
FROM build b
INNER JOIN size s ON b.id == s.build_id
WHERE b.thing_id == (SELECT id FROM thing
WHERE platform == ?
AND config == ?
AND target == ?)
AND name == ?
{where}
{order} {limit}
''',
'where': 'AND',
'order': 'time',
'postprocess': [postprocess_df_time],
},
},
'query.section-changes': {
'help': 'List size changes for a given build section',
'metavar': 'PLATFORM,CONFIG,TARGET,SECTION',
'default': '',
'postprocess': postprocess_canned_sql_option,
'sql': {
'title': 'Changes for {PLATFORM} {CONFIG} {TARGET} {SECTION}',
'query': '''
WITH builds (bid, pid, time, pr, hash) AS (
SELECT DISTINCT b.id, p.id, b.time, b.pr, b.hash
FROM build b
INNER JOIN build p
ON p.hash = b.parent AND p.thing_id == b.thing_id
WHERE b.thing_id == (SELECT id FROM thing
WHERE platform == ?
AND config == ?
AND target == ?)
)
SELECT DISTINCT
time, hash, pr,
ps.size as parent_size,
bs.size as commit_size
FROM builds
INNER JOIN size bs ON builds.bid == bs.build_id
INNER JOIN size ps ON builds.pid == ps.build_id
WHERE bs.name == ? AND ps.name == bs.name
{where}
{order} {limit}
''',
'where': 'AND',
'order': 'time',
'postprocess': [postprocess_df_time, postprocess_df_changes],
},
},
'query.all-changes': {
'help': 'List all size changes',
'default': False,
'postprocess': postprocess_canned_sql_option,
'sql': {
'title': 'Size Changes',
'query': '''
WITH
builds (bid, pid, time, pr, hash, thing_id) AS (
SELECT DISTINCT b.id, p.id, b.time, b.pr, b.hash, b.thing_id
FROM build b
INNER JOIN build p
ON p.hash = b.parent AND p.thing_id == b.thing_id
),
changes (bid, tid, name, parent_size, commit_size, change) AS (
SELECT DISTINCT
bs.build_id,
thing_id,
bs.name,
ps.size as parent_size,
bs.size as commit_size,
bs.size - ps.size as change
FROM builds
INNER JOIN size bs ON builds.bid == bs.build_id
INNER JOIN size ps ON builds.pid == ps.build_id
WHERE bs.name == ps.name
)
SELECT
time, hash,
platform, config, target, name,
parent_size, commit_size, change
FROM changes
INNER JOIN build ON bid == build.id
INNER JOIN thing ON tid == thing.id
{where} {order} {limit}
''',
'where': 'AND',
'order': 'time',
'postprocess': [postprocess_df_time, postprocess_df_changes],
},
},
'query.build-sizes': {
# SQLite doesn't have PIVOT so we have to script this.
'help': 'List size changes for a given build',
'metavar': 'PLATFORM,CONFIG,TARGET',
'default': '',
},
}
def get_build_sections(db: SizeDatabase, build: str) -> Optional[Tuple]:
"""Split a build arg and get its thing_id and sections."""
values, args = argsplit('PLATFORM,CONFIG,TARGET', build)
if not values:
return None
platform = args['PLATFORM']
pconfig = args['CONFIG']
ptarget = args['TARGET']
thing_id = db.select_thing_id(platform, pconfig, ptarget)
if not thing_id:
logging.error('No match for %s,%s,%s', platform, pconfig, ptarget)
return None
sections = db.select_sections_for_thing(thing_id)
if not sections:
logging.warning('No sections for %s,%s,%s', platform, pconfig, ptarget)
return None
return (platform, pconfig, ptarget, thing_id, sections)
def make_build_sizes_query(config: Config, thing_id: str,
sections: List[str]) -> Tuple[List[str], str]:
"""Construct and SQL query for all section sizes for a given thing."""
# SQLite doesn't have PIVOT so we need to construct a query with
# a column for each section.
columns = ['time', 'hash', 'pr']
cols = ', '.join(columns)
joins = ''
where = f' WHERE b.thing_id == {thing_id}'
for i, s in enumerate(sections):
columns.append(s)
cols += f', s{i}.size AS s{i}z'
joins += f' INNER JOIN size s{i} ON b.id == s{i}.build_id'
where += f' AND s{i}.name == "{s}"'
if qw := config['query.where']:
where += f' AND {qw}'
query = f'''SELECT {cols}
FROM build b
{joins}
{where}
ORDER BY {config.get('query.order') or 'time'}'''
if limit := config['query.limit']:
query += f' LIMIT {limit}'
return (columns, query)
def query_build_sizes(config: Config, db: SizeDatabase,
build: str) -> Optional[pd.DataFrame]:
"""Get all sizes for the given build."""
t = get_build_sections(db, build)
if not t:
return None
platform, pconfig, ptarget, thing_id, sections = t
columns, query = make_build_sizes_query(config, thing_id, sections)
logging.debug('Query: %s', query)
cur = db.execute(query)
rows = cur.fetchall()
if rows:
df = pd.DataFrame(rows, columns=columns)
df.attrs = {
'name': f'qbs-{build}',
'title': f'Sizes for {platform} {pconfig} {ptarget}',
}
return postprocess_df_time(config, df)
return None
def main(argv):
status = 0
try:
cfg = {
**memdf.util.config.CONFIG,
**memdf.util.sqlite.CONFIG,
**memdf.report.OUTPUT_CONFIG,
**QUERY_CONFIG,
}
cfg['database.file']['argparse']['required'] = True
config = Config().init(cfg)
config.parse(argv)
db = SizeDatabase(config['database.file'], writable=False)
db.open()
dfs = {}
q = 0
for title, key, values, info in config.get('queries', []):
q += 1
query = make_query(config, info)
logging.debug('Option: %s', key)
logging.debug('Title: %s', title)
logging.debug('Query: %s', query.strip())
logging.debug('With: %s', values)
cur = db.execute(query, values)
columns = [i[0] for i in cur.description]
rows = cur.fetchall()
if rows:
df = pd.DataFrame(rows, columns=columns)
df.attrs = {'name': f'query{q}', 'title': title}
for f in info['sql'].get('postprocess', []):
df = f(config, df)
dfs[df.attrs['name']] = df
if build := config['query.build-sizes']:
q += 1
if (df := query_build_sizes(config, db, build)) is not None:
dfs[df.attrs['name']] = df
if q == 0:
config.argparse.print_help()
return 1
memdf.report.write_dfs(config,
dfs,
hierify=config['hierify'],
title=True,
floatfmt='5.1f')
except Exception as exception:
raise exception
return status
if __name__ == '__main__':
sys.exit(main(sys.argv))