diff --git a/benchmark/scripts/Benchmark_DTrace.in b/benchmark/scripts/Benchmark_DTrace.in
index 300291813b96d..fdc98f6a8aba6 100644
--- a/benchmark/scripts/Benchmark_DTrace.in
+++ b/benchmark/scripts/Benchmark_DTrace.in
@@ -103,6 +103,7 @@ class DTraceBenchmarkDriver(perf_test_driver.BenchmarkDriver):
                 stdout=subprocess.PIPE,
                 stderr=open("/dev/null", "w"),
                 env=e,
+                universal_newlines=True,
             )
             results = [x for x in p.communicate()[0].split("\n") if len(x) > 0]
             return [
@@ -136,7 +137,9 @@ class DTraceBenchmarkDriver(perf_test_driver.BenchmarkDriver):
             results.append(result_3)
             results.append(single_iter)
 
-        return DTraceResult(test_name, int(not foundInstability), results)
+        return DTraceResult(
+            test_name, int(not foundInstability), results, self.csv_output
+        )
 
 
 SWIFT_BIN_DIR = os.path.dirname(os.path.abspath(__file__))
diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver
index 1e84738562bfe..708e3d6ffdd0d 100755
--- a/benchmark/scripts/Benchmark_Driver
+++ b/benchmark/scripts/Benchmark_Driver
@@ -26,6 +26,7 @@ class `BenchmarkDoctor` analyzes performance tests, implements `check` COMMAND.
 """
 
 import argparse
+import functools
 import glob
 import logging
 import math
@@ -64,7 +65,9 @@ class BenchmarkDriver(object):
         os.environ["SWIFT_DETERMINISTIC_HASHING"] = "1"
 
     def _invoke(self, cmd):
-        return self._subprocess.check_output(cmd, stderr=self._subprocess.STDOUT)
+        return self._subprocess.check_output(
+            cmd, stderr=self._subprocess.STDOUT, universal_newlines=True
+        )
 
     @property
     def test_harness(self):
@@ -165,7 +168,7 @@ class BenchmarkDriver(object):
         )
         output = self._invoke(cmd)
         results = self.parser.results_from_string(output)
-        return results.items()[0][1] if test else results
+        return list(results.items())[0][1] if test else results
 
     def _cmd_run(
         self,
@@ -207,7 +210,7 @@ class BenchmarkDriver(object):
             a.merge(b)
             return a
 
-        return reduce(
+        return functools.reduce(
             merge_results,
             [
                 self.run(test, measure_memory=True, num_iters=1, quantile=20)
@@ -249,19 +252,21 @@ class BenchmarkDriver(object):
             print(format(values))
 
         def result_values(r):
-            return map(
-                str,
-                [
-                    r.test_num,
-                    r.name,
-                    r.num_samples,
-                    r.min,
-                    r.samples.q1,
-                    r.median,
-                    r.samples.q3,
-                    r.max,
-                    r.max_rss,
-                ],
+            return list(
+                map(
+                    str,
+                    [
+                        r.test_num,
+                        r.name,
+                        r.num_samples,
+                        r.min,
+                        r.samples.q1,
+                        r.median,
+                        r.samples.q3,
+                        r.max,
+                        r.max_rss,
+                    ],
+                )
             )
 
         header = [
@@ -370,7 +375,12 @@ class MarkdownReportHandler(logging.StreamHandler):
         msg = self.format(record)
         stream = self.stream
         try:
-            if isinstance(msg, unicode) and getattr(stream, "encoding", None):
+            # In Python 2 Unicode strings have a special type
+            unicode_type = unicode
+        except NameError:
+            unicode_type = str
+        try:
+            if isinstance(msg, unicode_type) and getattr(stream, "encoding", None):
                 stream.write(msg.encode(stream.encoding))
             else:
                 stream.write(msg)
@@ -487,16 +497,14 @@ class BenchmarkDoctor(object):
         name = measurements["name"]
         setup, ratio = BenchmarkDoctor._setup_overhead(measurements)
         setup = 0 if ratio < 0.05 else setup
-        runtime = min(
-            [
-                (result.samples.min - correction)
-                for i_series in [
-                    BenchmarkDoctor._select(measurements, num_iters=i)
-                    for correction in [(setup / i) for i in [1, 2]]
-                ]
-                for result in i_series
-            ]
-        )
+
+        runtimes = []
+        for i in range(1, 3):
+            correction = setup / i
+            i_series = BenchmarkDoctor._select(measurements, num_iters=i)
+            for result in i_series:
+                runtimes.append(result.samples.min - correction)
+        runtime = min(runtimes)
 
         threshold = 1000
         if threshold < runtime:
@@ -572,7 +580,9 @@ class BenchmarkDoctor(object):
 
     @staticmethod
     def _reasonable_setup_time(measurements):
-        setup = min([result.setup for result in BenchmarkDoctor._select(measurements)])
+        setup = min(
+            [result.setup or 0 for result in BenchmarkDoctor._select(measurements)]
+        )
         if 200000 < setup:  # 200 ms
             BenchmarkDoctor.log_runtime.error(
                 "'%s' setup took at least %d μs.", measurements["name"], setup
@@ -857,6 +867,7 @@ def parse_args(args):
         help="See COMMAND -h for additional arguments",
         metavar="COMMAND",
     )
+    subparsers.required = True
 
     shared_benchmarks_parser = argparse.ArgumentParser(add_help=False)
     benchmarks_group = shared_benchmarks_parser.add_mutually_exclusive_group()
diff --git a/benchmark/scripts/Benchmark_QuickCheck.in b/benchmark/scripts/Benchmark_QuickCheck.in
index a2cc257476240..4f78101bac076 100644
--- a/benchmark/scripts/Benchmark_QuickCheck.in
+++ b/benchmark/scripts/Benchmark_QuickCheck.in
@@ -63,6 +63,7 @@ class QuickCheckBenchmarkDriver(perf_test_driver.BenchmarkDriver):
             ],
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
+            universal_newlines=True,
         )
         error_out = p.communicate()[1].split("\n")
         result = p.returncode
@@ -76,7 +77,7 @@ class QuickCheckBenchmarkDriver(perf_test_driver.BenchmarkDriver):
         try:
             args = [data, num_iters]
             perf_test_driver.run_with_timeout(self.run_test_inner, args)
-        except Exception, e:
+        except Exception as e:
             sys.stderr.write(
                 "Child Process Failed! (%s,%s). Error: %s\n"
                 % (data["path"], data["test_name"], e)
diff --git a/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in b/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in
index 756af2348c6b5..aab05c9821194 100644
--- a/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in
+++ b/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in
@@ -89,6 +89,7 @@ class LeaksRunnerBenchmarkDriver(perf_test_driver.BenchmarkDriver):
             ],
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
+            universal_newlines=True,
         )
         error_out = p.communicate()[1].split("\n")
         result = p.returncode
@@ -102,7 +103,7 @@ class LeaksRunnerBenchmarkDriver(perf_test_driver.BenchmarkDriver):
         try:
             args = [data, num_iters]
             result = perf_test_driver.run_with_timeout(self.run_test_inner, args)
-        except Exception, e:
+        except Exception as e:
             sys.stderr.write(
                 "Child Process Failed! (%s,%s). Error: %s\n"
                 % (data["path"], data["test_name"], e)
diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py
index 69450cb4b97b5..ef461dbde2aa6 100755
--- a/benchmark/scripts/compare_perf_tests.py
+++ b/benchmark/scripts/compare_perf_tests.py
@@ -30,6 +30,7 @@ class `ReportFormatter` creates the test comparison report in specified format.
 from __future__ import print_function
 
 import argparse
+import functools
 import re
 import sys
 from bisect import bisect, bisect_left, bisect_right
@@ -142,7 +143,7 @@ def num_samples(self):
     @property
     def all_samples(self):
         """List of all samples in ascending order."""
-        return sorted(self.samples + self.outliers, key=lambda s: s.i)
+        return sorted(self.samples + self.outliers, key=lambda s: s.i or -1)
 
     @property
     def min(self):
@@ -189,13 +190,16 @@ def sd(self):
         return 0 if self.count < 2 else sqrt(self.S_runtime / (self.count - 1))
 
     @staticmethod
-    def running_mean_variance((k, M_, S_), x):
+    def running_mean_variance(stats, x):
         """Compute running variance, B. P. Welford's method.
 
         See Knuth TAOCP vol 2, 3rd edition, page 232, or
         https://www.johndcook.com/blog/standard_deviation/
         M is mean, Standard Deviation is defined as sqrt(S/k-1)
         """
+
+        (k, M_, S_) = stats
+
         k = float(k + 1)
         M = M_ + (x - M_) / k
         S = S_ + (x - M_) * (x - M)
@@ -247,7 +251,7 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False, meta=Fal
             runtimes = csv_row[3:mem_index] if memory or meta else csv_row[3:]
             if delta:
                 runtimes = [int(x) if x else 0 for x in runtimes]
-                runtimes = reduce(
+                runtimes = functools.reduce(
                     lambda l, x: l.append(l[-1] + x) or l if l else [x],  # runnin
                     runtimes,
                     None,
@@ -315,7 +319,8 @@ def merge(self, r):
         """
         # Statistics
         if self.samples and r.samples:
-            map(self.samples.add, r.samples.samples)
+            for sample in r.samples.samples:
+                self.samples.add(sample)
             sams = self.samples
             self.num_samples = sams.num_samples
             self.min, self.max, self.median, self.mean, self.sd = (
@@ -490,7 +495,7 @@ def add_or_merge(names, r):
                 names[r.name].merge(r)
             return names
 
-        return reduce(add_or_merge, tests, dict())
+        return functools.reduce(add_or_merge, tests, dict())
 
     @staticmethod
     def results_from_string(log_contents):
@@ -544,10 +549,12 @@ def __init__(self, old_results, new_results, delta_threshold):
         def compare(name):
             return ResultComparison(old_results[name], new_results[name])
 
-        comparisons = map(compare, comparable_tests)
+        comparisons = list(map(compare, comparable_tests))
 
         def partition(l, p):
-            return reduce(lambda x, y: x[not p(y)].append(y) or x, l, ([], []))
+            return functools.reduce(
+                lambda x, y: x[not p(y)].append(y) or x, l, ([], [])
+            )
 
         decreased, not_decreased = partition(
             comparisons, lambda c: c.ratio < (1 - delta_threshold)
@@ -668,7 +675,7 @@ def _column_widths(self):
         def max_widths(maximum, widths):
             return map(max, zip(maximum, widths))
 
-        return reduce(max_widths, widths, [0] * 5)
+        return list(functools.reduce(max_widths, widths, [0] * 5))
 
     def _formatted_text(
         self, label_formatter, COLUMN_SEPARATOR, DELIMITER_ROW, SEPARATOR, SECTION
diff --git a/benchmark/scripts/perf_test_driver/perf_test_driver.py b/benchmark/scripts/perf_test_driver/perf_test_driver.py
index 7f8929f771764..ef8ffd2b600cd 100644
--- a/benchmark/scripts/perf_test_driver/perf_test_driver.py
+++ b/benchmark/scripts/perf_test_driver/perf_test_driver.py
@@ -111,7 +111,8 @@ def process_input(self, data):
     def run_for_opt_level(self, binary, opt_level, test_filter):
         print("testing driver at path: %s" % binary)
         names = []
-        for l in subprocess.check_output([binary, "--list"]).split("\n")[1:]:
+        output = subprocess.check_output([binary, "--list"], universal_newlines=True)
+        for l in output.split("\n")[1:]:
             m = BENCHMARK_OUTPUT_RE.match(l)
             if m is None:
                 continue
diff --git a/benchmark/scripts/test_Benchmark_Driver.py b/benchmark/scripts/test_Benchmark_Driver.py
index 570fee82f2f8b..62d93d7f93cc1 100644
--- a/benchmark/scripts/test_Benchmark_Driver.py
+++ b/benchmark/scripts/test_Benchmark_Driver.py
@@ -15,15 +15,33 @@
 
 import logging
 import os
+import sys
 import time
 import unittest
-from StringIO import StringIO
-from imp import load_source
+
+try:
+    # for Python 2
+    from StringIO import StringIO
+except ImportError:
+    # for Python 3
+    from io import StringIO
 
 from compare_perf_tests import PerformanceTestResult
 
 from test_utils import Mock, MockLoggingHandler, Stub, captured_output
 
+
+# imp.load_source is deprecated in Python 3.4
+if sys.version_info < (3, 4):
+    from imp import load_source
+else:
+
+    def load_source(name, path):
+        from importlib.machinery import SourceFileLoader
+
+        return SourceFileLoader(name, path).load_module()
+
+
 # import Benchmark_Driver  # doesn't work because it misses '.py' extension
 Benchmark_Driver = load_source(
     "Benchmark_Driver",
@@ -46,7 +64,17 @@ def assert_contains(self, texts, output):
     def test_requires_command_argument(self):
         with captured_output() as (_, err):
             self.assertRaises(SystemExit, parse_args, [])
-        self.assert_contains(["usage:", "COMMAND", "too few arguments"], err.getvalue())
+
+        if sys.version_info < (3, 3):
+            self.assert_contains(
+                ["usage:", "COMMAND", "too few arguments"], err.getvalue()
+            )
+        else:
+            # The error message has changed in Python 3.3
+            self.assert_contains(
+                ["usage:", "COMMAND", "the following arguments are required"],
+                err.getvalue(),
+            )
 
     def test_command_help_lists_commands(self):
         with captured_output() as (out, _):
@@ -151,7 +179,14 @@ class SubprocessMock(Mock):
     def __init__(self, responses=None):
         super(SubprocessMock, self).__init__(responses)
 
-        def _check_output(args, stdin=None, stdout=None, stderr=None, shell=False):
+        def _check_output(
+            args,
+            stdin=None,
+            stdout=None,
+            stderr=None,
+            shell=False,
+            universal_newlines=False,
+        ):
             return self.record_and_respond(args, stdin, stdout, stderr, shell)
 
         self.check_output = _check_output
@@ -190,8 +225,8 @@ def test_gets_list_of_precommit_benchmarks(self):
         self.subprocess_mock.assert_called_all_expected()
         self.assertEqual(driver.tests, ["Benchmark1", "Benchmark2"])
         self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2"])
-        self.assertEquals(driver.test_number["Benchmark1"], "1")
-        self.assertEquals(driver.test_number["Benchmark2"], "2")
+        self.assertEqual(driver.test_number["Benchmark1"], "1")
+        self.assertEqual(driver.test_number["Benchmark2"], "2")
 
     list_all_tests = (
         "/benchmarks/Benchmark_O --list --delim=\t --skip-tags=".split(" "),
@@ -330,10 +365,10 @@ def test_parse_results_from_running_benchmarks(self):
         """
         r = self.driver.run("b")
         self.assertTrue(self.parser_stub.results_from_string_called)
-        self.assertEquals(r.name, "b1")  # non-matching name, just 1st result
+        self.assertEqual(r.name, "b1")  # non-matching name, just 1st result
         r = self.driver.run()
         self.assertTrue(isinstance(r, dict))
-        self.assertEquals(r["b1"].name, "b1")
+        self.assertEqual(r["b1"].name, "b1")
 
     def test_measure_memory(self):
         self.driver.run("b", measure_memory=True)
@@ -412,7 +447,11 @@ def test_log_results(self):
 
         def assert_log_written(out, log_file, content):
             self.assertEqual(out.getvalue(), "Logging results to: " + log_file + "\n")
-            with open(log_file, "rU") as f:
+            if sys.version_info < (3, 0):
+                openmode = "rU"
+            else:
+                openmode = "r"  # 'U' mode is deprecated in Python 3
+            with open(log_file, openmode) as f:
                 text = f.read()
             self.assertEqual(text, "formatted output")
 
diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py
index 2053e93c0b42b..469c591afb3e2 100644
--- a/benchmark/scripts/test_compare_perf_tests.py
+++ b/benchmark/scripts/test_compare_perf_tests.py
@@ -100,28 +100,28 @@ def test_computes_inter_quartile_range(self):
         self.samples.add(Sample(5, 1, 1100))
         self.assertEqual(self.samples.iqr, 50)
 
-    def assertEqualtats(self, stats, expected_stats):
+    def assertEqualStats(self, stats, expected_stats):
         for actual, expected in zip(stats, expected_stats):
-            self.assertAlmostEquals(actual, expected, places=2)
+            self.assertAlmostEqual(actual, expected, places=2)
 
     def test_computes_mean_sd_cv(self):
         ss = self.samples
-        self.assertEqualtats((ss.mean, ss.sd, ss.cv), (1000.0, 0.0, 0.0))
+        self.assertEqualStats((ss.mean, ss.sd, ss.cv), (1000.0, 0.0, 0.0))
         self.samples.add(Sample(2, 1, 1100))
-        self.assertEqualtats((ss.mean, ss.sd, ss.cv), (1050.0, 70.71, 6.7 / 100))
+        self.assertEqualStats((ss.mean, ss.sd, ss.cv), (1050.0, 70.71, 6.7 / 100))
 
     def test_computes_range_spread(self):
         ss = self.samples
-        self.assertEqualtats((ss.range, ss.spread), (0, 0))
+        self.assertEqualStats((ss.range, ss.spread), (0, 0))
         self.samples.add(Sample(2, 1, 1100))
-        self.assertEqualtats((ss.range, ss.spread), (100, 10.0 / 100))
+        self.assertEqualStats((ss.range, ss.spread), (100, 10.0 / 100))
 
     def test_init_with_samples(self):
         self.samples = PerformanceTestSamples(
             "B2", [Sample(0, 1, 1000), Sample(1, 1, 1100)]
         )
         self.assertEqual(self.samples.count, 2)
-        self.assertEqualtats(
+        self.assertEqualStats(
             (
                 self.samples.mean,
                 self.samples.sd,
@@ -135,7 +135,7 @@ def test_can_handle_zero_runtime(self):
         # guard against dividing by 0
         self.samples = PerformanceTestSamples("Zero")
         self.samples.add(Sample(0, 1, 0))
-        self.assertEqualtats(
+        self.assertEqualStats(
             (
                 self.samples.mean,
                 self.samples.sd,
@@ -155,14 +155,14 @@ def test_excludes_outliers(self):
         ]
         self.samples = PerformanceTestSamples("Outliers", ss)
         self.assertEqual(self.samples.count, 13)
-        self.assertEqualtats((self.samples.mean, self.samples.sd), (1050, 52.36))
+        self.assertEqualStats((self.samples.mean, self.samples.sd), (1050, 52.36))
 
         self.samples.exclude_outliers()
 
         self.assertEqual(self.samples.count, 11)
         self.assertEqual(self.samples.outliers, ss[11:])
         self.assertEqualFiveNumberSummary(self.samples, (1000, 1025, 1050, 1075, 1100))
-        self.assertEqualtats((self.samples.mean, self.samples.sd), (1050, 35.36))
+        self.assertEqualStats((self.samples.mean, self.samples.sd), (1050, 35.36))
 
     def test_excludes_outliers_zero_IQR(self):
         self.samples = PerformanceTestSamples("Tight")
@@ -175,7 +175,7 @@ def test_excludes_outliers_zero_IQR(self):
         self.samples.exclude_outliers()
 
         self.assertEqual(self.samples.count, 3)
-        self.assertEqualtats((self.samples.min, self.samples.max), (18, 18))
+        self.assertEqualStats((self.samples.min, self.samples.max), (18, 18))
 
     def test_excludes_outliers_top_only(self):
         ss = [
@@ -189,7 +189,7 @@ def test_excludes_outliers_top_only(self):
         self.samples.exclude_outliers(top_only=True)
 
         self.assertEqual(self.samples.count, 4)
-        self.assertEqualtats((self.samples.min, self.samples.max), (1, 2))
+        self.assertEqualStats((self.samples.min, self.samples.max), (1, 2))
 
 
 class TestPerformanceTestResult(unittest.TestCase):
@@ -217,8 +217,8 @@ def test_init_quantiles(self):
         self.assertEqual(
             (r.num_samples, r.min, r.median, r.max), (3, 54383, 54512, 54601)
         )
-        self.assertAlmostEquals(r.mean, 54498.67, places=2)
-        self.assertAlmostEquals(r.sd, 109.61, places=2)
+        self.assertAlmostEqual(r.mean, 54498.67, places=2)
+        self.assertAlmostEqual(r.sd, 109.61, places=2)
         self.assertEqual(r.samples.count, 3)
         self.assertEqual(r.samples.num_samples, 3)
         self.assertEqual(
@@ -357,7 +357,7 @@ def test_init_meta(self):
         self.assertEqual(
             (r.samples.count, r.samples.min, r.samples.max), (2, 715, 1259)
         )
-        self.assertEquals(r.max_rss, 32768)
+        self.assertEqual(r.max_rss, 32768)
         self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 28, 15))
 
     def test_repr(self):
@@ -379,7 +379,7 @@ def test_merge(self):
         )[
             1:
         ]
-        results = map(PerformanceTestResult, [line.split(",") for line in tests])
+        results = list(map(PerformanceTestResult, [line.split(",") for line in tests]))
         results[2].setup = 9
         results[3].setup = 7
 
@@ -432,20 +432,20 @@ def setUp(self):
     def test_init(self):
         rc = ResultComparison(self.r1, self.r2)
         self.assertEqual(rc.name, "AngryPhonebook")
-        self.assertAlmostEquals(rc.ratio, 12325.0 / 11616.0)
-        self.assertAlmostEquals(rc.delta, (((11616.0 / 12325.0) - 1) * 100), places=3)
+        self.assertAlmostEqual(rc.ratio, 12325.0 / 11616.0)
+        self.assertAlmostEqual(rc.delta, (((11616.0 / 12325.0) - 1) * 100), places=3)
         # handle test results that sometimes change to zero, when compiler
         # optimizes out the body of the incorrectly written test
         rc = ResultComparison(self.r0, self.r0)
         self.assertEqual(rc.name, "GlobalClass")
-        self.assertAlmostEquals(rc.ratio, 1)
-        self.assertAlmostEquals(rc.delta, 0, places=3)
+        self.assertAlmostEqual(rc.ratio, 1)
+        self.assertAlmostEqual(rc.delta, 0, places=3)
         rc = ResultComparison(self.r0, self.r01)
-        self.assertAlmostEquals(rc.ratio, 0, places=3)
-        self.assertAlmostEquals(rc.delta, 2000000, places=3)
+        self.assertAlmostEqual(rc.ratio, 0, places=3)
+        self.assertAlmostEqual(rc.delta, 2000000, places=3)
         rc = ResultComparison(self.r01, self.r0)
-        self.assertAlmostEquals(rc.ratio, 20001)
-        self.assertAlmostEquals(rc.delta, -99.995, places=3)
+        self.assertAlmostEqual(rc.ratio, 20001)
+        self.assertAlmostEqual(rc.delta, -99.995, places=3)
         # disallow comparison of different test results
         self.assertRaises(AssertionError, ResultComparison, self.r0, self.r1)
 
@@ -528,9 +528,9 @@ def test_parse_results_csv(self):
         parser = LogParser()
         results = parser.parse_results(log.splitlines())
         self.assertTrue(isinstance(results[0], PerformanceTestResult))
-        self.assertEquals(results[0].name, "Array.append.Array.Int?")
-        self.assertEquals(results[1].name, "Bridging.NSArray.as!.Array.NSString")
-        self.assertEquals(results[2].name, "Flatten.Array.Tuple4.lazy.for-in.Reserve")
+        self.assertEqual(results[0].name, "Array.append.Array.Int?")
+        self.assertEqual(results[1].name, "Bridging.NSArray.as!.Array.NSString")
+        self.assertEqual(results[2].name, "Flatten.Array.Tuple4.lazy.for-in.Reserve")
 
     def test_parse_results_tab_delimited(self):
         log = "34\tBitCount\t20\t3\t4\t4\t0\t4"
@@ -706,7 +706,7 @@ def test_results_from_merge(self):
         concatenated_logs = """4,ArrayAppend,20,23641,29000,24990,0,24990
 4,ArrayAppend,1,20000,20000,20000,0,20000"""
         results = LogParser.results_from_string(concatenated_logs)
-        self.assertEqual(results.keys(), ["ArrayAppend"])
+        self.assertEqual(list(results.keys()), ["ArrayAppend"])
         result = results["ArrayAppend"]
         self.assertTrue(isinstance(result, PerformanceTestResult))
         self.assertEqual(result.min, 20000)
@@ -728,14 +728,14 @@ def test_results_from_merge_verbose(self):
     Sample 3,364245
 3,Array2D,4,363094,376131,368159,5931,369169"""
         results = LogParser.results_from_string(concatenated_logs)
-        self.assertEqual(results.keys(), ["Array2D"])
+        self.assertEqual(list(results.keys()), ["Array2D"])
         result = results["Array2D"]
         self.assertTrue(isinstance(result, PerformanceTestResult))
         self.assertEqual(result.min, 350815)
         self.assertEqual(result.max, 376131)
         self.assertEqual(result.median, 358817)
-        self.assertAlmostEquals(result.sd, 8443.37, places=2)
-        self.assertAlmostEquals(result.mean, 361463.25, places=2)
+        self.assertAlmostEqual(result.sd, 8443.37, places=2)
+        self.assertAlmostEqual(result.mean, 361463.25, places=2)
         self.assertEqual(result.num_samples, 8)
         samples = result.samples
         self.assertTrue(isinstance(samples, PerformanceTestSamples))
diff --git a/benchmark/scripts/test_utils.py b/benchmark/scripts/test_utils.py
index 4b675d9d82582..70465f0b83d52 100644
--- a/benchmark/scripts/test_utils.py
+++ b/benchmark/scripts/test_utils.py
@@ -24,7 +24,10 @@
 
 import logging
 import sys
-from StringIO import StringIO
+try:
+    from StringIO import StringIO  # for Python 2
+except ImportError:
+    from io import StringIO  # for Python 3
 from contextlib import contextmanager