diff --git a/benchmark/scripts/Benchmark_DTrace.in b/benchmark/scripts/Benchmark_DTrace.in index 300291813b96d..fdc98f6a8aba6 100644 --- a/benchmark/scripts/Benchmark_DTrace.in +++ b/benchmark/scripts/Benchmark_DTrace.in @@ -103,6 +103,7 @@ class DTraceBenchmarkDriver(perf_test_driver.BenchmarkDriver): stdout=subprocess.PIPE, stderr=open("/dev/null", "w"), env=e, + universal_newlines=True, ) results = [x for x in p.communicate()[0].split("\n") if len(x) > 0] return [ @@ -136,7 +137,9 @@ class DTraceBenchmarkDriver(perf_test_driver.BenchmarkDriver): results.append(result_3) results.append(single_iter) - return DTraceResult(test_name, int(not foundInstability), results) + return DTraceResult( + test_name, int(not foundInstability), results, self.csv_output + ) SWIFT_BIN_DIR = os.path.dirname(os.path.abspath(__file__)) diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index 1e84738562bfe..708e3d6ffdd0d 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -26,6 +26,7 @@ class `BenchmarkDoctor` analyzes performance tests, implements `check` COMMAND. """ import argparse +import functools import glob import logging import math @@ -64,7 +65,9 @@ class BenchmarkDriver(object): os.environ["SWIFT_DETERMINISTIC_HASHING"] = "1" def _invoke(self, cmd): - return self._subprocess.check_output(cmd, stderr=self._subprocess.STDOUT) + return self._subprocess.check_output( + cmd, stderr=self._subprocess.STDOUT, universal_newlines=True + ) @property def test_harness(self): @@ -165,7 +168,7 @@ class BenchmarkDriver(object): ) output = self._invoke(cmd) results = self.parser.results_from_string(output) - return results.items()[0][1] if test else results + return list(results.items())[0][1] if test else results def _cmd_run( self, @@ -207,7 +210,7 @@ class BenchmarkDriver(object): a.merge(b) return a - return reduce( + return functools.reduce( merge_results, [ self.run(test, measure_memory=True, num_iters=1, quantile=20) @@ -249,19 +252,21 @@ class BenchmarkDriver(object): print(format(values)) def result_values(r): - return map( - str, - [ - r.test_num, - r.name, - r.num_samples, - r.min, - r.samples.q1, - r.median, - r.samples.q3, - r.max, - r.max_rss, - ], + return list( + map( + str, + [ + r.test_num, + r.name, + r.num_samples, + r.min, + r.samples.q1, + r.median, + r.samples.q3, + r.max, + r.max_rss, + ], + ) ) header = [ @@ -370,7 +375,12 @@ class MarkdownReportHandler(logging.StreamHandler): msg = self.format(record) stream = self.stream try: - if isinstance(msg, unicode) and getattr(stream, "encoding", None): + # In Python 2 Unicode strings have a special type + unicode_type = unicode + except NameError: + unicode_type = str + try: + if isinstance(msg, unicode_type) and getattr(stream, "encoding", None): stream.write(msg.encode(stream.encoding)) else: stream.write(msg) @@ -487,16 +497,14 @@ class BenchmarkDoctor(object): name = measurements["name"] setup, ratio = BenchmarkDoctor._setup_overhead(measurements) setup = 0 if ratio < 0.05 else setup - runtime = min( - [ - (result.samples.min - correction) - for i_series in [ - BenchmarkDoctor._select(measurements, num_iters=i) - for correction in [(setup / i) for i in [1, 2]] - ] - for result in i_series - ] - ) + + runtimes = [] + for i in range(1, 3): + correction = setup / i + i_series = BenchmarkDoctor._select(measurements, num_iters=i) + for result in i_series: + runtimes.append(result.samples.min - correction) + runtime = min(runtimes) threshold = 1000 if threshold < runtime: @@ -572,7 +580,9 @@ class BenchmarkDoctor(object): @staticmethod def _reasonable_setup_time(measurements): - setup = min([result.setup for result in BenchmarkDoctor._select(measurements)]) + setup = min( + [result.setup or 0 for result in BenchmarkDoctor._select(measurements)] + ) if 200000 < setup: # 200 ms BenchmarkDoctor.log_runtime.error( "'%s' setup took at least %d μs.", measurements["name"], setup @@ -857,6 +867,7 @@ def parse_args(args): help="See COMMAND -h for additional arguments", metavar="COMMAND", ) + subparsers.required = True shared_benchmarks_parser = argparse.ArgumentParser(add_help=False) benchmarks_group = shared_benchmarks_parser.add_mutually_exclusive_group() diff --git a/benchmark/scripts/Benchmark_QuickCheck.in b/benchmark/scripts/Benchmark_QuickCheck.in index a2cc257476240..4f78101bac076 100644 --- a/benchmark/scripts/Benchmark_QuickCheck.in +++ b/benchmark/scripts/Benchmark_QuickCheck.in @@ -63,6 +63,7 @@ class QuickCheckBenchmarkDriver(perf_test_driver.BenchmarkDriver): ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True, ) error_out = p.communicate()[1].split("\n") result = p.returncode @@ -76,7 +77,7 @@ class QuickCheckBenchmarkDriver(perf_test_driver.BenchmarkDriver): try: args = [data, num_iters] perf_test_driver.run_with_timeout(self.run_test_inner, args) - except Exception, e: + except Exception as e: sys.stderr.write( "Child Process Failed! (%s,%s). Error: %s\n" % (data["path"], data["test_name"], e) diff --git a/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in b/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in index 756af2348c6b5..aab05c9821194 100644 --- a/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in +++ b/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in @@ -89,6 +89,7 @@ class LeaksRunnerBenchmarkDriver(perf_test_driver.BenchmarkDriver): ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True, ) error_out = p.communicate()[1].split("\n") result = p.returncode @@ -102,7 +103,7 @@ class LeaksRunnerBenchmarkDriver(perf_test_driver.BenchmarkDriver): try: args = [data, num_iters] result = perf_test_driver.run_with_timeout(self.run_test_inner, args) - except Exception, e: + except Exception as e: sys.stderr.write( "Child Process Failed! (%s,%s). Error: %s\n" % (data["path"], data["test_name"], e) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 69450cb4b97b5..ef461dbde2aa6 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -30,6 +30,7 @@ class `ReportFormatter` creates the test comparison report in specified format. from __future__ import print_function import argparse +import functools import re import sys from bisect import bisect, bisect_left, bisect_right @@ -142,7 +143,7 @@ def num_samples(self): @property def all_samples(self): """List of all samples in ascending order.""" - return sorted(self.samples + self.outliers, key=lambda s: s.i) + return sorted(self.samples + self.outliers, key=lambda s: s.i or -1) @property def min(self): @@ -189,13 +190,16 @@ def sd(self): return 0 if self.count < 2 else sqrt(self.S_runtime / (self.count - 1)) @staticmethod - def running_mean_variance((k, M_, S_), x): + def running_mean_variance(stats, x): """Compute running variance, B. P. Welford's method. See Knuth TAOCP vol 2, 3rd edition, page 232, or https://www.johndcook.com/blog/standard_deviation/ M is mean, Standard Deviation is defined as sqrt(S/k-1) """ + + (k, M_, S_) = stats + k = float(k + 1) M = M_ + (x - M_) / k S = S_ + (x - M_) * (x - M) @@ -247,7 +251,7 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False, meta=Fal runtimes = csv_row[3:mem_index] if memory or meta else csv_row[3:] if delta: runtimes = [int(x) if x else 0 for x in runtimes] - runtimes = reduce( + runtimes = functools.reduce( lambda l, x: l.append(l[-1] + x) or l if l else [x], # runnin runtimes, None, @@ -315,7 +319,8 @@ def merge(self, r): """ # Statistics if self.samples and r.samples: - map(self.samples.add, r.samples.samples) + for sample in r.samples.samples: + self.samples.add(sample) sams = self.samples self.num_samples = sams.num_samples self.min, self.max, self.median, self.mean, self.sd = ( @@ -490,7 +495,7 @@ def add_or_merge(names, r): names[r.name].merge(r) return names - return reduce(add_or_merge, tests, dict()) + return functools.reduce(add_or_merge, tests, dict()) @staticmethod def results_from_string(log_contents): @@ -544,10 +549,12 @@ def __init__(self, old_results, new_results, delta_threshold): def compare(name): return ResultComparison(old_results[name], new_results[name]) - comparisons = map(compare, comparable_tests) + comparisons = list(map(compare, comparable_tests)) def partition(l, p): - return reduce(lambda x, y: x[not p(y)].append(y) or x, l, ([], [])) + return functools.reduce( + lambda x, y: x[not p(y)].append(y) or x, l, ([], []) + ) decreased, not_decreased = partition( comparisons, lambda c: c.ratio < (1 - delta_threshold) @@ -668,7 +675,7 @@ def _column_widths(self): def max_widths(maximum, widths): return map(max, zip(maximum, widths)) - return reduce(max_widths, widths, [0] * 5) + return list(functools.reduce(max_widths, widths, [0] * 5)) def _formatted_text( self, label_formatter, COLUMN_SEPARATOR, DELIMITER_ROW, SEPARATOR, SECTION diff --git a/benchmark/scripts/perf_test_driver/perf_test_driver.py b/benchmark/scripts/perf_test_driver/perf_test_driver.py index 7f8929f771764..ef8ffd2b600cd 100644 --- a/benchmark/scripts/perf_test_driver/perf_test_driver.py +++ b/benchmark/scripts/perf_test_driver/perf_test_driver.py @@ -111,7 +111,8 @@ def process_input(self, data): def run_for_opt_level(self, binary, opt_level, test_filter): print("testing driver at path: %s" % binary) names = [] - for l in subprocess.check_output([binary, "--list"]).split("\n")[1:]: + output = subprocess.check_output([binary, "--list"], universal_newlines=True) + for l in output.split("\n")[1:]: m = BENCHMARK_OUTPUT_RE.match(l) if m is None: continue diff --git a/benchmark/scripts/test_Benchmark_Driver.py b/benchmark/scripts/test_Benchmark_Driver.py index 570fee82f2f8b..62d93d7f93cc1 100644 --- a/benchmark/scripts/test_Benchmark_Driver.py +++ b/benchmark/scripts/test_Benchmark_Driver.py @@ -15,15 +15,33 @@ import logging import os +import sys import time import unittest -from StringIO import StringIO -from imp import load_source + +try: + # for Python 2 + from StringIO import StringIO +except ImportError: + # for Python 3 + from io import StringIO from compare_perf_tests import PerformanceTestResult from test_utils import Mock, MockLoggingHandler, Stub, captured_output + +# imp.load_source is deprecated in Python 3.4 +if sys.version_info < (3, 4): + from imp import load_source +else: + + def load_source(name, path): + from importlib.machinery import SourceFileLoader + + return SourceFileLoader(name, path).load_module() + + # import Benchmark_Driver # doesn't work because it misses '.py' extension Benchmark_Driver = load_source( "Benchmark_Driver", @@ -46,7 +64,17 @@ def assert_contains(self, texts, output): def test_requires_command_argument(self): with captured_output() as (_, err): self.assertRaises(SystemExit, parse_args, []) - self.assert_contains(["usage:", "COMMAND", "too few arguments"], err.getvalue()) + + if sys.version_info < (3, 3): + self.assert_contains( + ["usage:", "COMMAND", "too few arguments"], err.getvalue() + ) + else: + # The error message has changed in Python 3.3 + self.assert_contains( + ["usage:", "COMMAND", "the following arguments are required"], + err.getvalue(), + ) def test_command_help_lists_commands(self): with captured_output() as (out, _): @@ -151,7 +179,14 @@ class SubprocessMock(Mock): def __init__(self, responses=None): super(SubprocessMock, self).__init__(responses) - def _check_output(args, stdin=None, stdout=None, stderr=None, shell=False): + def _check_output( + args, + stdin=None, + stdout=None, + stderr=None, + shell=False, + universal_newlines=False, + ): return self.record_and_respond(args, stdin, stdout, stderr, shell) self.check_output = _check_output @@ -190,8 +225,8 @@ def test_gets_list_of_precommit_benchmarks(self): self.subprocess_mock.assert_called_all_expected() self.assertEqual(driver.tests, ["Benchmark1", "Benchmark2"]) self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2"]) - self.assertEquals(driver.test_number["Benchmark1"], "1") - self.assertEquals(driver.test_number["Benchmark2"], "2") + self.assertEqual(driver.test_number["Benchmark1"], "1") + self.assertEqual(driver.test_number["Benchmark2"], "2") list_all_tests = ( "/benchmarks/Benchmark_O --list --delim=\t --skip-tags=".split(" "), @@ -330,10 +365,10 @@ def test_parse_results_from_running_benchmarks(self): """ r = self.driver.run("b") self.assertTrue(self.parser_stub.results_from_string_called) - self.assertEquals(r.name, "b1") # non-matching name, just 1st result + self.assertEqual(r.name, "b1") # non-matching name, just 1st result r = self.driver.run() self.assertTrue(isinstance(r, dict)) - self.assertEquals(r["b1"].name, "b1") + self.assertEqual(r["b1"].name, "b1") def test_measure_memory(self): self.driver.run("b", measure_memory=True) @@ -412,7 +447,11 @@ def test_log_results(self): def assert_log_written(out, log_file, content): self.assertEqual(out.getvalue(), "Logging results to: " + log_file + "\n") - with open(log_file, "rU") as f: + if sys.version_info < (3, 0): + openmode = "rU" + else: + openmode = "r" # 'U' mode is deprecated in Python 3 + with open(log_file, openmode) as f: text = f.read() self.assertEqual(text, "formatted output") diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 2053e93c0b42b..469c591afb3e2 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -100,28 +100,28 @@ def test_computes_inter_quartile_range(self): self.samples.add(Sample(5, 1, 1100)) self.assertEqual(self.samples.iqr, 50) - def assertEqualtats(self, stats, expected_stats): + def assertEqualStats(self, stats, expected_stats): for actual, expected in zip(stats, expected_stats): - self.assertAlmostEquals(actual, expected, places=2) + self.assertAlmostEqual(actual, expected, places=2) def test_computes_mean_sd_cv(self): ss = self.samples - self.assertEqualtats((ss.mean, ss.sd, ss.cv), (1000.0, 0.0, 0.0)) + self.assertEqualStats((ss.mean, ss.sd, ss.cv), (1000.0, 0.0, 0.0)) self.samples.add(Sample(2, 1, 1100)) - self.assertEqualtats((ss.mean, ss.sd, ss.cv), (1050.0, 70.71, 6.7 / 100)) + self.assertEqualStats((ss.mean, ss.sd, ss.cv), (1050.0, 70.71, 6.7 / 100)) def test_computes_range_spread(self): ss = self.samples - self.assertEqualtats((ss.range, ss.spread), (0, 0)) + self.assertEqualStats((ss.range, ss.spread), (0, 0)) self.samples.add(Sample(2, 1, 1100)) - self.assertEqualtats((ss.range, ss.spread), (100, 10.0 / 100)) + self.assertEqualStats((ss.range, ss.spread), (100, 10.0 / 100)) def test_init_with_samples(self): self.samples = PerformanceTestSamples( "B2", [Sample(0, 1, 1000), Sample(1, 1, 1100)] ) self.assertEqual(self.samples.count, 2) - self.assertEqualtats( + self.assertEqualStats( ( self.samples.mean, self.samples.sd, @@ -135,7 +135,7 @@ def test_can_handle_zero_runtime(self): # guard against dividing by 0 self.samples = PerformanceTestSamples("Zero") self.samples.add(Sample(0, 1, 0)) - self.assertEqualtats( + self.assertEqualStats( ( self.samples.mean, self.samples.sd, @@ -155,14 +155,14 @@ def test_excludes_outliers(self): ] self.samples = PerformanceTestSamples("Outliers", ss) self.assertEqual(self.samples.count, 13) - self.assertEqualtats((self.samples.mean, self.samples.sd), (1050, 52.36)) + self.assertEqualStats((self.samples.mean, self.samples.sd), (1050, 52.36)) self.samples.exclude_outliers() self.assertEqual(self.samples.count, 11) self.assertEqual(self.samples.outliers, ss[11:]) self.assertEqualFiveNumberSummary(self.samples, (1000, 1025, 1050, 1075, 1100)) - self.assertEqualtats((self.samples.mean, self.samples.sd), (1050, 35.36)) + self.assertEqualStats((self.samples.mean, self.samples.sd), (1050, 35.36)) def test_excludes_outliers_zero_IQR(self): self.samples = PerformanceTestSamples("Tight") @@ -175,7 +175,7 @@ def test_excludes_outliers_zero_IQR(self): self.samples.exclude_outliers() self.assertEqual(self.samples.count, 3) - self.assertEqualtats((self.samples.min, self.samples.max), (18, 18)) + self.assertEqualStats((self.samples.min, self.samples.max), (18, 18)) def test_excludes_outliers_top_only(self): ss = [ @@ -189,7 +189,7 @@ def test_excludes_outliers_top_only(self): self.samples.exclude_outliers(top_only=True) self.assertEqual(self.samples.count, 4) - self.assertEqualtats((self.samples.min, self.samples.max), (1, 2)) + self.assertEqualStats((self.samples.min, self.samples.max), (1, 2)) class TestPerformanceTestResult(unittest.TestCase): @@ -217,8 +217,8 @@ def test_init_quantiles(self): self.assertEqual( (r.num_samples, r.min, r.median, r.max), (3, 54383, 54512, 54601) ) - self.assertAlmostEquals(r.mean, 54498.67, places=2) - self.assertAlmostEquals(r.sd, 109.61, places=2) + self.assertAlmostEqual(r.mean, 54498.67, places=2) + self.assertAlmostEqual(r.sd, 109.61, places=2) self.assertEqual(r.samples.count, 3) self.assertEqual(r.samples.num_samples, 3) self.assertEqual( @@ -357,7 +357,7 @@ def test_init_meta(self): self.assertEqual( (r.samples.count, r.samples.min, r.samples.max), (2, 715, 1259) ) - self.assertEquals(r.max_rss, 32768) + self.assertEqual(r.max_rss, 32768) self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 28, 15)) def test_repr(self): @@ -379,7 +379,7 @@ def test_merge(self): )[ 1: ] - results = map(PerformanceTestResult, [line.split(",") for line in tests]) + results = list(map(PerformanceTestResult, [line.split(",") for line in tests])) results[2].setup = 9 results[3].setup = 7 @@ -432,20 +432,20 @@ def setUp(self): def test_init(self): rc = ResultComparison(self.r1, self.r2) self.assertEqual(rc.name, "AngryPhonebook") - self.assertAlmostEquals(rc.ratio, 12325.0 / 11616.0) - self.assertAlmostEquals(rc.delta, (((11616.0 / 12325.0) - 1) * 100), places=3) + self.assertAlmostEqual(rc.ratio, 12325.0 / 11616.0) + self.assertAlmostEqual(rc.delta, (((11616.0 / 12325.0) - 1) * 100), places=3) # handle test results that sometimes change to zero, when compiler # optimizes out the body of the incorrectly written test rc = ResultComparison(self.r0, self.r0) self.assertEqual(rc.name, "GlobalClass") - self.assertAlmostEquals(rc.ratio, 1) - self.assertAlmostEquals(rc.delta, 0, places=3) + self.assertAlmostEqual(rc.ratio, 1) + self.assertAlmostEqual(rc.delta, 0, places=3) rc = ResultComparison(self.r0, self.r01) - self.assertAlmostEquals(rc.ratio, 0, places=3) - self.assertAlmostEquals(rc.delta, 2000000, places=3) + self.assertAlmostEqual(rc.ratio, 0, places=3) + self.assertAlmostEqual(rc.delta, 2000000, places=3) rc = ResultComparison(self.r01, self.r0) - self.assertAlmostEquals(rc.ratio, 20001) - self.assertAlmostEquals(rc.delta, -99.995, places=3) + self.assertAlmostEqual(rc.ratio, 20001) + self.assertAlmostEqual(rc.delta, -99.995, places=3) # disallow comparison of different test results self.assertRaises(AssertionError, ResultComparison, self.r0, self.r1) @@ -528,9 +528,9 @@ def test_parse_results_csv(self): parser = LogParser() results = parser.parse_results(log.splitlines()) self.assertTrue(isinstance(results[0], PerformanceTestResult)) - self.assertEquals(results[0].name, "Array.append.Array.Int?") - self.assertEquals(results[1].name, "Bridging.NSArray.as!.Array.NSString") - self.assertEquals(results[2].name, "Flatten.Array.Tuple4.lazy.for-in.Reserve") + self.assertEqual(results[0].name, "Array.append.Array.Int?") + self.assertEqual(results[1].name, "Bridging.NSArray.as!.Array.NSString") + self.assertEqual(results[2].name, "Flatten.Array.Tuple4.lazy.for-in.Reserve") def test_parse_results_tab_delimited(self): log = "34\tBitCount\t20\t3\t4\t4\t0\t4" @@ -706,7 +706,7 @@ def test_results_from_merge(self): concatenated_logs = """4,ArrayAppend,20,23641,29000,24990,0,24990 4,ArrayAppend,1,20000,20000,20000,0,20000""" results = LogParser.results_from_string(concatenated_logs) - self.assertEqual(results.keys(), ["ArrayAppend"]) + self.assertEqual(list(results.keys()), ["ArrayAppend"]) result = results["ArrayAppend"] self.assertTrue(isinstance(result, PerformanceTestResult)) self.assertEqual(result.min, 20000) @@ -728,14 +728,14 @@ def test_results_from_merge_verbose(self): Sample 3,364245 3,Array2D,4,363094,376131,368159,5931,369169""" results = LogParser.results_from_string(concatenated_logs) - self.assertEqual(results.keys(), ["Array2D"]) + self.assertEqual(list(results.keys()), ["Array2D"]) result = results["Array2D"] self.assertTrue(isinstance(result, PerformanceTestResult)) self.assertEqual(result.min, 350815) self.assertEqual(result.max, 376131) self.assertEqual(result.median, 358817) - self.assertAlmostEquals(result.sd, 8443.37, places=2) - self.assertAlmostEquals(result.mean, 361463.25, places=2) + self.assertAlmostEqual(result.sd, 8443.37, places=2) + self.assertAlmostEqual(result.mean, 361463.25, places=2) self.assertEqual(result.num_samples, 8) samples = result.samples self.assertTrue(isinstance(samples, PerformanceTestSamples)) diff --git a/benchmark/scripts/test_utils.py b/benchmark/scripts/test_utils.py index 4b675d9d82582..70465f0b83d52 100644 --- a/benchmark/scripts/test_utils.py +++ b/benchmark/scripts/test_utils.py @@ -24,7 +24,10 @@ import logging import sys -from StringIO import StringIO +try: + from StringIO import StringIO # for Python 2 +except ImportError: + from io import StringIO # for Python 3 from contextlib import contextmanager