bench.py at 75dafcb54f2c

84:75dafcb54f2c
Anton Shestakov <av6@dwimlabs.net>, Fri, 21 Oct 2016 18:58:17 +0800
viewer: lighter color for hovered row
next change	88:3bf161cb8471
previous change	83:ad889ea48249
bench.py

Permissions: -rwxr-xr-x
File Latest Revisions Annotate Diff Comparison
Other formats:
JSON Raw
Feeds:
Atom RSS
#!/usr/bin/env python
import atexit
import errno
import logging
import os
import shutil
import sqlite3
import time
from argparse import ArgumentParser
from collections import OrderedDict
from subprocess import check_output, CalledProcessError, STDOUT
from settings import DBPATH, HG, LOCKFILE, TESTHGREPO, TESTREPO, rel
REVSETWIP = '(parents(not public()) or not public() or . or (head() and branch(default))) and (not obsolete() or unstable()^) and not closed()'
MARKS = OrderedDict((
    ('blame', 'hg blame README'),
    ('grepall', 'hg grep "version" --all README'),
    ('grepallf', 'hg grep "version" --all --follow README'),
    ('diff', 'hg diff -r "tip~100:tip" README'),
    ('diffg', 'hg diff -r "tip~100:tip" --git README'),
    ('stcp', 'hg status --copies README'),
    ('logfile', 'hg log README'),
    ('logfilecp', 'hg log --copies README'),
    ('log1', 'hg log -l1'),
    ('log1000', 'hg log -l1000'),
    ('revsetor', 'hg log -r "0|1|2|3|4|5|6|7|8|9"'),
    ('revsetwip', 'hg log -r "' + REVSETWIP + '"'),
))
parser = ArgumentParser(description='Benchmark revisions and put results in the db.')
group = parser.add_mutually_exclusive_group()
group.add_argument(
    'revsets', metavar='REVSET', nargs='*', default=('last(all(), 120)',),
    help='update these revisions (default: last 120)')
group.add_argument(
    '--auto', metavar='MAXREVS', type=int,
    help='guess revisions, up to MAXREVS')
parser.add_argument(
    '--marks', metavar='MARKS', default='all',
    help='test only these commands (comma-separated, default: all)')
parser.add_argument(
    '--retry', action='store_true',
    help='try and reduce existing timings')
parser.add_argument(
    '--mintime', metavar='N', type=float, default=1.0,
    help='run each command for a total of at least N seconds (default: 1.0)')
parser.add_argument(
    '--mintries', metavar='N', type=int, default=3,
    help='run each command at least N times (default: 3)')
def test(mark, mintime, mintries, dropcache=True):
    results = []
    cmd = [rel(TESTHGREPO, 'hg'), '-R', TESTREPO]
    filename = rel(TESTREPO, 'README')
    if mark == 'blame':
        cmd += ['blame', filename]
    elif mark == 'grepall':
        cmd += ['grep', '--all', 'version', filename]
    elif mark == 'grepallf':
        cmd += ['grep', '--all', '--follow', 'version', filename]
    elif mark == 'stcp':
        cmd += ['status', '--copies', filename]
    elif mark == 'diff':
        cmd += ['diff', '-r', 'tip~100:tip', filename]
    elif mark == 'diffg':
        cmd += ['diff', '-r', 'tip~100:tip', '--git', filename]
    elif mark == 'logfile':
        cmd += ['log', filename]
    elif mark == 'logfilecp':
        cmd += ['log', '--copies', filename]
    elif mark == 'log1':
        cmd += ['log', '-l1']
    elif mark == 'log1000':
        cmd += ['log', '-l1000']
    elif mark == 'revsetor':
        cmd += ['log', '-r', '0|1|2|3|4|5|6|7|8|9']
    elif mark == 'revsetwip':
        cmd += ['log', '-r', REVSETWIP]
    while sum(results) < mintime or len(results) < mintries:
        if dropcache:
            shutil.rmtree(rel(TESTREPO, '.hg', 'cache'), ignore_errors=True)
        start = time.time()
        try:
            check_output(cmd)
        except CalledProcessError:
            return None
        results.append(time.time() - start)
    return min(results)
def getnodes(revsets):
    cmd = [HG, 'log', '-R', TESTHGREPO, '-T', '{node}\n']
    for revset in revsets:
        cmd += ['-r', revset]
    output = check_output(cmd)
    return output.split()
def guessnew(maxrevs, marks):
    """ Pick one continuous span of nodes that still need testing. """
    cmd = [HG, 'log', '-R', TESTHGREPO, '-T', '{node}\n', '-r', 'sort(all(), rev)']
    output = check_output(cmd)
    nodes = output.split()
    picking = False
    todo = []
    conn = sqlite3.connect(DBPATH)
    while len(todo) < maxrevs:
        node = nodes.pop()
        count = conn.execute(
            'SELECT COUNT(*) FROM results'
            ' WHERE node = ?',
            (node,)).fetchone()[0]
        if count < len(marks) * len(('without cache', 'with cache')):
            todo.append(node)
            picking = True
        elif picking:
            break
    conn.close()
    return todo
def guessspikes(maxrevs, marks):
    cmd = [HG, 'log', '-R', TESTHGREPO, '-T', '{node}\n', '-r', 'sort(all(), -rev)']
    output = check_output(cmd)
    nodes = output.split()
    todo = []
    conn = sqlite3.connect(DBPATH)
    limits = {
        mark:
            conn.execute(
                'SELECT MIN(time), MAX(time) FROM results'
                ' WHERE mark = ? AND cache = ?',
                (mark, False)).fetchone()
            +
            conn.execute(
                'SELECT MIN(time), MAX(time) FROM results'
                ' WHERE mark = ? AND cache = ?',
                (mark, True)).fetchone()
        for mark in marks
    }
    results = {}
    for node in nodes:
        resultsq = conn.execute(
            'SELECT mark, time, cache FROM results WHERE node = ?',
            (node,))
        for mark, t, cache in resultsq:
            results.setdefault(node, {}).setdefault(mark, [None, None])
            if not cache:
                results[node][mark][0] = t
            else:
                results[node][mark][1] = t
    conn.close()
    for i in range(1, len(nodes) - 1):
        if len(todo) >= maxrevs:
            break
        node1 = nodes[i - 1]
        node2 = nodes[i]
        node3 = nodes[i + 1]
        for mark in marks:
            for cache in (False, True):
                try:
                    eps = abs(results[node1][mark][cache] - results[node3][mark][cache])
                    delta = results[node2][mark][cache] - results[node1][mark][cache]
                    l = limits[mark][2:4] if cache else limits[mark][0:2]
                    if delta > eps * 10 and delta > (l[1] - l[0]) * 0.1:
                        if node2 not in todo:
                            todo.append(node2)
                except (KeyError, TypeError):
                    continue
    return todo
def makeclean():
    check_output(['make', '--directory', TESTHGREPO, 'clean'], stderr=STDOUT)
def makelocal(node):
    check_output([HG, 'update', '-R', TESTHGREPO, '--clean', node], stderr=STDOUT)
    check_output(['make', '--directory', TESTHGREPO, 'local'], stderr=STDOUT)
def dbinit():
    conn = sqlite3.connect(DBPATH)
    conn.execute(
        'CREATE TABLE IF NOT EXISTS results ('
        '  node CHAR(40) NOT NULL,'
        '  mark VARCHAR(40) NOT NULL,'
        '  time FLOAT NOT NULL,'
        '  cache BOOL NOT NULL'
        ')')
    conn.execute(
        'CREATE INDEX IF NOT EXISTS idx_results_node'
        ' ON results (node)')
    conn.execute(
        'CREATE INDEX IF NOT EXISTS idx_results_limits'
        ' ON results (mark, cache, node, time ASC)')
    conn.commit()
    conn.close()
def dbupdate(revsets, marks, mintime, mintries, retry=False):
    conn = sqlite3.connect(DBPATH)
    makeclean()
    nodes = getnodes(revsets)
    numwidth = len(str(len(nodes)))
    markwidth = max(len(mark) for mark in marks)
    for i, node in enumerate(nodes, 1):
        madelocal = False
        for mark in marks:
            for cache in (False, True):
                old = conn.execute(
                    'SELECT time FROM results'
                    ' WHERE node = ? AND mark = ? AND cache = ?',
                    (node, mark, cache)).fetchall()
                oldtime = old[0][0] if old else None
                if oldtime is not None and not retry:
                    continue
                if not madelocal:
                    makelocal(node)
                    madelocal = True
                time = test(mark, mintime, mintries, dropcache=not cache)
                if time is None:
                    status = 'noop'
                elif oldtime is None:
                    status = 'new'
                elif time < oldtime:
                    status = 'better'
                else:
                    status = 'noop'
                logging.info(
                    '%0*d/%0*d %s %-*s %s (%s)',
                    numwidth, i, numwidth, len(nodes), node, markwidth, mark, time, status)
                if time is None:
                    continue
                if oldtime is None:
                    conn.execute(
                        'INSERT INTO results (node, mark, time, cache)'
                        ' VALUES (?, ?, ?, ?)',
                        (node, mark, time, cache))
                elif time < oldtime:
                    conn.execute(
                        'UPDATE results SET time = ?'
                        ' WHERE node = ? AND mark = ? AND cache = ?',
                        (time, node, mark, cache))
                conn.commit()
    conn.execute('VACUUM')
    conn.execute('ANALYZE')
    conn.close()
def lock(path):
    try:
        fd = os.open(path, os.O_WRONLY | os.O_CREAT | os.O_EXCL)
        os.write(fd, '%d' % os.getpid())
        return fd
    except OSError as e:
        if e.errno == errno.EEXIST:
            logging.error('cannot lock data directory')
            raise
        else:
            raise
def unlock(fd, path):
    try:
        os.close(fd)
        os.remove(path)
    except OSError as e:
        if e.errno == errno.ENOENT:
            logging.info("lock file %r doesn't exist", path)
        else:
            raise
def main(args):
    if args.marks == 'all':
        marks = MARKS
    else:
        names = args.marks.split(',')
        marks = OrderedDict((k, v) for k, v in MARKS.items() if k in names)
    lockfd = lock(LOCKFILE)
    atexit.register(unlock, lockfd, LOCKFILE)
    dbinit()
    if args.auto:
        if args.retry:
            args.revsets = guessspikes(args.auto, marks)
        else:
            args.revsets = guessnew(args.auto, marks)
    dbupdate(args.revsets, marks, args.mintime, args.mintries, args.retry)
if __name__ == '__main__':
    logging.basicConfig(format='%(levelname).1s %(asctime)s %(message)s', level=logging.INFO)
    args = parser.parse_args()
    main(args)