@@ -732,10 +732,20 @@ def xopen(
732
732
detected_format = _detect_format_from_content (filename )
733
733
734
734
if detected_format == "gz" :
735
- return _open_gz (filename , mode , compresslevel , threads )
735
+ opened_file = _open_gz (filename , mode , compresslevel , threads )
736
736
elif detected_format == "xz" :
737
- return _open_xz (filename , mode )
737
+ opened_file = _open_xz (filename , mode )
738
738
elif detected_format == "bz2" :
739
- return _open_bz2 (filename , mode , threads )
739
+ opened_file = _open_bz2 (filename , mode , threads )
740
740
else :
741
- return open (filename , mode )
741
+ opened_file = open (filename , mode )
742
+
743
+ # The "write" method for GzipFile is very costly. Lots of python calls are
744
+ # made. To a lesser extent this is true for LzmaFile and BZ2File. By
745
+ # putting a buffer in between, the expensive write method is called much
746
+ # less. The effect is very noticeable when writing small units such as
747
+ # lines or FASTQ records.
748
+ if (isinstance (opened_file , (gzip .GzipFile , bz2 .BZ2File , lzma .LZMAFile ))
749
+ and "w" in mode ):
750
+ opened_file = io .BufferedWriter (opened_file ) # type: ignore
751
+ return opened_file
0 commit comments