Skip to content

Commit d5b2f86

Browse files
authored
Merge pull request #78 from pycompression/buffercompressionwriter
Buffer compression writers
2 parents 3a0a481 + ad773ca commit d5b2f86

File tree

2 files changed

+18
-6
lines changed

2 files changed

+18
-6
lines changed

src/xopen/__init__.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -732,10 +732,20 @@ def xopen(
732732
detected_format = _detect_format_from_content(filename)
733733

734734
if detected_format == "gz":
735-
return _open_gz(filename, mode, compresslevel, threads)
735+
opened_file = _open_gz(filename, mode, compresslevel, threads)
736736
elif detected_format == "xz":
737-
return _open_xz(filename, mode)
737+
opened_file = _open_xz(filename, mode)
738738
elif detected_format == "bz2":
739-
return _open_bz2(filename, mode, threads)
739+
opened_file = _open_bz2(filename, mode, threads)
740740
else:
741-
return open(filename, mode)
741+
opened_file = open(filename, mode)
742+
743+
# The "write" method for GzipFile is very costly. Lots of python calls are
744+
# made. To a lesser extent this is true for LzmaFile and BZ2File. By
745+
# putting a buffer in between, the expensive write method is called much
746+
# less. The effect is very noticeable when writing small units such as
747+
# lines or FASTQ records.
748+
if (isinstance(opened_file, (gzip.GzipFile, bz2.BZ2File, lzma.LZMAFile))
749+
and "w" in mode):
750+
opened_file = io.BufferedWriter(opened_file) # type: ignore
751+
return opened_file

tests/test_xopen.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -532,14 +532,16 @@ def test_write_no_threads(tmpdir, ext):
532532
klass = klasses[ext]
533533
path = str(tmpdir.join(f"out.{ext}"))
534534
with xopen(path, "wb", threads=0) as f:
535-
assert isinstance(f, klass), f
535+
assert isinstance(f, io.BufferedWriter)
536+
if ext:
537+
assert isinstance(f.raw, klass), f
536538

537539

538540
def test_write_gzip_no_threads_no_isal(tmpdir, xopen_without_igzip):
539541
import gzip
540542
path = str(tmpdir.join("out.gz"))
541543
with xopen_without_igzip(path, "wb", threads=0) as f:
542-
assert isinstance(f, gzip.GzipFile), f
544+
assert isinstance(f.raw, gzip.GzipFile), f
543545

544546

545547
def test_write_stdout():

0 commit comments

Comments
 (0)