From 7c3465f757be1e053008482feacb65120dbf2505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20L=C3=BCck?= Date: Wed, 29 Apr 2020 17:43:45 +0200 Subject: [PATCH 1/3] Add decoding benchmark --- examples/91-benchmark-count.php | 43 +++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 examples/91-benchmark-count.php diff --git a/examples/91-benchmark-count.php b/examples/91-benchmark-count.php new file mode 100644 index 0000000..45a17a4 --- /dev/null +++ b/examples/91-benchmark-count.php @@ -0,0 +1,43 @@ +on('data', function () use (&$count) { + ++$count; +}); + +$start = microtime(true); +$report = $loop->addPeriodicTimer(0.05, function () use (&$count, $start) { + printf("\r%d records in %0.3fs...", $count, microtime(true) - $start); +}); + +$decoder->on('close', function () use (&$count, $report, $loop, $start) { + $now = microtime(true); + $loop->cancelTimer($report); + + printf("\r%d records in %0.3fs => %d records/s\n", $count, $now - $start, $count / ($now - $start)); +}); + +$loop->run(); From e829f5e1e368c582430f92a7653630b42a060ca0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20L=C3=BCck?= Date: Fri, 1 May 2020 19:30:27 +0200 Subject: [PATCH 2/3] Add decoding benchmark for GZIP-compressed CSV files --- composer.json | 5 +-- examples/92-benchmark-count-gzip.php | 51 ++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 examples/92-benchmark-count-gzip.php diff --git a/composer.json b/composer.json index 97d9541..dc5c767 100644 --- a/composer.json +++ b/composer.json @@ -18,7 +18,8 @@ "react/stream": "^1.0 || ^0.7 || ^0.6" }, "require-dev": { - "react/event-loop": "^1.0 || ^0.5 || ^0.4 || ^0.3", - "phpunit/phpunit": "^7.0 || ^6.0 || ^5.7 || ^4.8.35" + "clue/zlib-react": "^1.0 || ^0.2.2", + "phpunit/phpunit": "^7.0 || ^6.0 || ^5.7 || ^4.8.35", + "react/event-loop": "^1.0 || ^0.5 || ^0.4 || ^0.3" } } diff --git a/examples/92-benchmark-count-gzip.php b/examples/92-benchmark-count-gzip.php new file mode 100644 index 0000000..8c43c32 --- /dev/null +++ b/examples/92-benchmark-count-gzip.php @@ -0,0 +1,51 @@ + IRAhandle_tweets_1.csv.gz +// +// 3) pipe compressed CSV into benchmark script: +// $ php examples/92-benchmark-count-gzip.php < IRAhandle_tweets_1.csv.gz + +use Clue\React\Csv\AssocDecoder; +use Clue\React\Zlib\Decompressor; +use React\EventLoop\Factory; +use React\Stream\ReadableResourceStream; + +require __DIR__ . '/../vendor/autoload.php'; + +if (extension_loaded('xdebug')) { + echo 'NOTICE: The "xdebug" extension is loaded, this has a major impact on performance.' . PHP_EOL; +} + +$loop = Factory::create(); +$input = new ReadableResourceStream(STDIN, $loop); +$decompressor = new Decompressor(ZLIB_ENCODING_GZIP); +$input->pipe($decompressor); +$decoder = new AssocDecoder($decompressor); + +$decompressor->on('error', function (Exception $e) { + printf("\nDecompression error: " . $e->getMessage() . "\n"); +}); + +$count = 0; +$decoder->on('data', function () use (&$count) { + ++$count; +}); + +$start = microtime(true); +$report = $loop->addPeriodicTimer(0.05, function () use (&$count, $start) { + printf("\r%d records in %0.3fs...", $count, microtime(true) - $start); +}); + +$decoder->on('close', function () use (&$count, $report, $loop, $start) { + $now = microtime(true); + $loop->cancelTimer($report); + + printf("\r%d records in %0.3fs => %d records/s\n", $count, $now - $start, $count / ($now - $start)); +}); + +$loop->run(); From 9e366832d4ae28c5adca503dcbf1009050db4747 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20L=C3=BCck?= Date: Sat, 2 May 2020 20:21:41 +0200 Subject: [PATCH 3/3] Use parallel gunzip process for faster gzip decompression --- composer.json | 2 +- examples/92-benchmark-count-gzip.php | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/composer.json b/composer.json index dc5c767..82d1c2d 100644 --- a/composer.json +++ b/composer.json @@ -18,8 +18,8 @@ "react/stream": "^1.0 || ^0.7 || ^0.6" }, "require-dev": { - "clue/zlib-react": "^1.0 || ^0.2.2", "phpunit/phpunit": "^7.0 || ^6.0 || ^5.7 || ^4.8.35", + "react/child-process": "^0.6", "react/event-loop": "^1.0 || ^0.5 || ^0.4 || ^0.3" } } diff --git a/examples/92-benchmark-count-gzip.php b/examples/92-benchmark-count-gzip.php index 8c43c32..ae89625 100644 --- a/examples/92-benchmark-count-gzip.php +++ b/examples/92-benchmark-count-gzip.php @@ -11,9 +11,8 @@ // $ php examples/92-benchmark-count-gzip.php < IRAhandle_tweets_1.csv.gz use Clue\React\Csv\AssocDecoder; -use Clue\React\Zlib\Decompressor; +use React\ChildProcess\Process; use React\EventLoop\Factory; -use React\Stream\ReadableResourceStream; require __DIR__ . '/../vendor/autoload.php'; @@ -22,14 +21,19 @@ } $loop = Factory::create(); -$input = new ReadableResourceStream(STDIN, $loop); -$decompressor = new Decompressor(ZLIB_ENCODING_GZIP); -$input->pipe($decompressor); -$decoder = new AssocDecoder($decompressor); -$decompressor->on('error', function (Exception $e) { - printf("\nDecompression error: " . $e->getMessage() . "\n"); -}); +// This benchmark example spawns the decompressor in a child `gunzip` process +// because parsing CSV files is already mostly CPU-bound and multi-processing +// is preferred here. If the input source is slower (such as an HTTP download) +// or if `gunzip` is not available (Windows), using a built-in decompressor +// such as https://github.com/clue/reactphp-zlib would be preferable. +$process = new Process('exec gunzip', null, null, array( + 0 => STDIN, + 1 => array('pipe', 'w'), + STDERR +)); +$process->start($loop); +$decoder = new AssocDecoder($process->stdout); $count = 0; $decoder->on('data', function () use (&$count) {