bench.py at beb7b8bbf3bc

37:beb7b8bbf3bc
Anton Shestakov <av6@dwimlabs.net>, Mon, 24 Aug 2015 01:21:36 +0800
bench: revsetor and revsetwip marks
next change	38:c3db41612b8c
previous change	32:1a20eb26c5df
bench.py

Permissions: -rwxr-xr-x
File Latest Revisions Annotate Diff Comparison
Other formats:
JSON Raw
Feeds:
Atom RSS
#!/usr/bin/env python
import errno
import logging
import os
import shutil
import sqlite3
import subprocess
import time
from argparse import ArgumentParser
from collections import OrderedDict
from settings import DBPATH, HG, LOCKFILE, TESTHGREPO, TESTREPO, rel
REVSETWIP = '(parents(not public()) or not public() or . or (head() and branch(default))) and (not obsolete() or unstable()^) and not closed()'
MARKS = OrderedDict((
    ('blame', 'hg blame README'),
    ('grepall', 'hg grep "version" --all README'),
    ('grepallf', 'hg grep "version" --all --follow README'),
    ('diff', 'hg diff -r "tip~100:tip" README'),
    ('diffg', 'hg diff -r "tip~100:tip" --git README'),
    ('stcp', 'hg status --copies README'),
    ('logfile', 'hg log README'),
    ('logfilecp', 'hg log --copies README'),
    ('log1', 'hg log -l1'),
    ('log1000', 'hg log -l1000'),
    ('revsetor', 'hg log -r "0|1|2|3|4|5|6|7|8|9"'),
    ('revsetwip', 'hg log -r "' + REVSETWIP + '"'),
))
parser = ArgumentParser(description='Benchmark revisions and put results in the db.')
group = parser.add_mutually_exclusive_group()
group.add_argument('revsets', metavar='REVSET', default=('last(all(), 120)',), nargs='*', help='update these revisions')
group.add_argument('--auto', metavar='MAXREVS', type=int, help='guess revisions, up to MAXREVS')
parser.add_argument('--retry', action='store_true', help='try and reduce existing timings')
def test(mark, mintime=1.0, mintries=3, dropcache=True):
    results = []
    cmd = [rel(TESTHGREPO, 'hg'), '-R', TESTREPO]
    if mark == 'blame':
        cmd += ['blame', rel(TESTREPO, 'README')]
    elif mark == 'grepall':
        cmd += ['grep', '--all', 'version',  rel(TESTREPO, 'README')]
    elif mark == 'grepallf':
        cmd += ['grep', '--all', '--follow', 'version',  rel(TESTREPO, 'README')]
    elif mark == 'stco':
        cmd += ['status', '--copies', rel(TESTREPO, 'README')]
    elif mark == 'diff':
        cmd += ['diff', '-r', 'tip~100:tip', rel(TESTREPO, 'README')]
    elif mark == 'diffg':
        cmd += ['diff', '-r', 'tip~100:tip', '--git', rel(TESTREPO, 'README')]
    elif mark == 'logfile':
        cmd += ['log', rel(TESTREPO, 'README')]
    elif mark == 'logfilecp':
        cmd += ['log', '--copies', rel(TESTREPO, 'README')]
    elif mark == 'log1':
        cmd += ['log', '-l1']
    elif mark == 'log1000':
        cmd += ['log', '-l1000']
    elif mark == 'revsetor':
        cmd += ['log', '-r', '0|1|2|3|4|5|6|7|8|9']
    elif mark == 'revsetwip':
        cmd += ['log', '-r', REVSETWIP]
    while sum(results) < mintime or len(results) < mintries:
        if dropcache:
            shutil.rmtree(rel(TESTREPO, '.hg', 'cache'), ignore_errors=True)
        start = time.time()
        try:
            subprocess.check_output(cmd)
        except subprocess.CalledProcessError:
            return None
        results.append(time.time() - start)
    return min(results)
def getnodes(revsets):
    cmd = [HG, 'log', '-R', TESTHGREPO, '-T', '{node}\n']
    for revset in revsets:
        cmd += ['-r', revset]
    output = subprocess.check_output(cmd)
    return output.split()
def guessnew(maxrevs):
    """ Pick one continuous span of nodes that still need testing. """
    cmd = [HG, 'log', '-R', TESTHGREPO, '-T', '{node}\n', '-r', 'sort(all(), rev)']
    output = subprocess.check_output(cmd)
    nodes = output.split()
    picking = False
    todo = []
    conn = sqlite3.connect(DBPATH)
    while len(todo) < maxrevs:
        node = nodes.pop()
        count = conn.execute('SELECT COUNT(*) FROM results WHERE node = ?', (node,)).fetchone()[0]
        if count < len(MARKS) * len(('without cache', 'with cache')):
            todo.append(node)
            picking = True
        elif picking:
            break
    conn.close()
    return todo
def guessspikes(maxrevs):
    cmd = [HG, 'log', '-R', TESTHGREPO, '-T', '{node}\n', '-r', 'sort(all(), -rev)']
    output = subprocess.check_output(cmd)
    nodes = output.split()
    todo = []
    conn = sqlite3.connect(DBPATH)
    limits = {
        mark:
            conn.execute(
                'SELECT MIN(time), MAX(time) FROM results WHERE mark = ? AND cache = ?',
                (mark, False)).fetchone()
            +
            conn.execute(
                'SELECT MIN(time), MAX(time) FROM results WHERE mark = ? AND cache = ?',
                (mark, True)).fetchone()
        for mark in MARKS
    }
    results = {}
    for node in nodes:
        resultsq = conn.execute(
            'SELECT mark, time, cache FROM results WHERE node = ?',
            (node,))
        for mark, t, cache in resultsq:
            results.setdefault(node, {}).setdefault(mark, [None, None])
            if not cache:
                results[node][mark][0] = t
            else:
                results[node][mark][1] = t
    conn.close()
    for i in range(1, len(nodes) - 1):
        if len(todo) >= maxrevs:
            break
        node1 = nodes[i - 1]
        node2 = nodes[i]
        node3 = nodes[i + 1]
        for mark in MARKS:
            for cache in (False, True):
                try:
                    eps = abs(results[node1][mark][cache] - results[node3][mark][cache])
                    delta = results[node2][mark][cache] - results[node1][mark][cache]
                    l = limits[mark][2:4] if cache else limits[mark][0:2]
                    if delta > eps * 10 and delta > (l[1] - l[0]) * 0.1:
                        if node2 not in todo:
                            todo.append(node2)
                except (KeyError, TypeError):
                    continue
    return todo
def makeclean():
    subprocess.check_output(['make', '--directory', TESTHGREPO, 'clean'], stderr=subprocess.STDOUT)
def makelocal(node):
    subprocess.check_output([HG, 'update', '-R', TESTHGREPO, '--clean', node], stderr=subprocess.STDOUT)
    subprocess.check_output(['make', '--directory', TESTHGREPO, 'local'], stderr=subprocess.STDOUT)
def dbinit():
    conn = sqlite3.connect(DBPATH)
    conn.execute(
        'CREATE TABLE IF NOT EXISTS results ('
        '  node CHAR(40) NOT NULL,'
        '  mark VARCHAR(40) NOT NULL,'
        '  time FLOAT NOT NULL,'
        '  cache BOOL NOT NULL'
        ')')
    conn.execute(
        'CREATE INDEX IF NOT EXISTS idx_results_node ON results (node)')
    conn.execute(
        'CREATE INDEX IF NOT EXISTS idx_results_limits ON results (mark, cache, node, time ASC)')
    conn.commit()
    conn.close()
def dbupdate(revsets, retry=False):
    conn = sqlite3.connect(DBPATH)
    makeclean()
    nodes = getnodes(revsets)
    numwidth = len(str(len(nodes)))
    for i, node in enumerate(nodes, 1):
        madelocal = False
        for mark in MARKS:
            for cache in (False, True):
                old = conn.execute(
                    'SELECT time FROM results WHERE node = ? AND mark = ? AND cache = ?',
                    (node, mark, cache)).fetchall()
                oldtime = old[0][0] if old else None
                if oldtime is not None and not retry:
                    continue
                if not madelocal:
                    makelocal(node)
                    madelocal = True
                time = test(mark, dropcache=not cache)
                logging.info('%0*d/%0*d %s %s %s', numwidth, i, numwidth, len(nodes), node, mark, time)
                if time is None:
                    continue
                if oldtime is None:
                    conn.execute(
                        'INSERT INTO results (node, mark, time, cache) VALUES (?, ?, ?, ?)',
                        (node, mark, time, cache))
                elif time < oldtime:
                    conn.execute(
                        'UPDATE results SET time = ? WHERE node = ? AND mark = ? AND cache = ?',
                        (time, node, mark, cache))
                conn.commit()
    conn.execute('VACUUM')
    conn.execute('ANALYZE')
    conn.close()
def lock():
    flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
    try:
        return os.fdopen(os.open(LOCKFILE, flags), 'w')
    except OSError as e:
        if e.errno == errno.EEXIST:
            logging.error('cannot lock data directory')
            raise
        else:
            raise
def unlock():
    os.remove(LOCKFILE)
def main(args):
    lock()
    try:
        dbinit()
        if args.auto:
            if args.retry:
                args.revsets = guessspikes(args.auto)
            else:
                args.revsets = guessnew(args.auto)
        dbupdate(args.revsets, args.retry)
    finally:
        unlock()
if __name__ == '__main__':
    logging.basicConfig(format='%(levelname).1s %(asctime)s %(message)s', level=logging.INFO)
    args = parser.parse_args()
    main(args)