22Download mode implementation.
33
44"""
5+
56import mimetypes
67import os
78import re
1213
1314import requests
1415
16+ from .context import Environment
1517from .models import HTTPResponse , OutputOptions
1618from .output .streams import RawStream
17- from .context import Environment
18-
1919
2020PARTIAL_CONTENT = 206
2121
@@ -37,24 +37,23 @@ def parse_content_range(content_range: str, resumed_from: int) -> int:
3737
3838 """
3939 if content_range is None :
40- raise ContentRangeError (' Missing Content-Range' )
40+ raise ContentRangeError (" Missing Content-Range" )
4141
4242 pattern = (
43- r' ^bytes (?P<first_byte_pos>\d+)-(?P<last_byte_pos>\d+)'
44- r' /(\*|(?P<instance_length>\d+))$'
43+ r" ^bytes (?P<first_byte_pos>\d+)-(?P<last_byte_pos>\d+)"
44+ r" /(\*|(?P<instance_length>\d+))$"
4545 )
4646 match = re .match (pattern , content_range )
4747
4848 if not match :
49- raise ContentRangeError (
50- f'Invalid Content-Range format { content_range !r} ' )
49+ raise ContentRangeError (f"Invalid Content-Range format { content_range !r} " )
5150
5251 content_range_dict = match .groupdict ()
53- first_byte_pos = int (content_range_dict [' first_byte_pos' ])
54- last_byte_pos = int (content_range_dict [' last_byte_pos' ])
52+ first_byte_pos = int (content_range_dict [" first_byte_pos" ])
53+ last_byte_pos = int (content_range_dict [" last_byte_pos" ])
5554 instance_length = (
56- int (content_range_dict [' instance_length' ])
57- if content_range_dict [' instance_length' ]
55+ int (content_range_dict [" instance_length" ])
56+ if content_range_dict [" instance_length" ]
5857 else None
5958 )
6059
@@ -64,27 +63,24 @@ def parse_content_range(content_range: str, resumed_from: int) -> int:
6463 # last-byte-pos value, is invalid. The recipient of an invalid
6564 # byte-content-range- spec MUST ignore it and any content
6665 # transferred along with it."
67- if (first_byte_pos > last_byte_pos
68- or (instance_length is not None
69- and instance_length <= last_byte_pos )):
70- raise ContentRangeError (
71- f'Invalid Content-Range returned: { content_range !r} ' )
66+ if first_byte_pos > last_byte_pos or (
67+ instance_length is not None and instance_length <= last_byte_pos
68+ ):
69+ raise ContentRangeError (f"Invalid Content-Range returned: { content_range !r} " )
7270
73- if ( first_byte_pos != resumed_from
74- or ( instance_length is not None
75- and last_byte_pos + 1 != instance_length ) ):
71+ if first_byte_pos != resumed_from or (
72+ instance_length is not None and last_byte_pos + 1 != instance_length
73+ ):
7674 # Not what we asked for.
7775 raise ContentRangeError (
78- f' Unexpected Content-Range returned ({ content_range !r} )'
76+ f" Unexpected Content-Range returned ({ content_range !r} )"
7977 f' for the requested Range ("bytes={ resumed_from } -")'
8078 )
8179
8280 return last_byte_pos + 1
8381
8482
85- def filename_from_content_disposition (
86- content_disposition : str
87- ) -> Optional [str ]:
83+ def filename_from_content_disposition (content_disposition : str ) -> Optional [str ]:
8884 """
8985 Extract and validate filename from a Content-Disposition header.
9086
@@ -94,28 +90,28 @@ def filename_from_content_disposition(
9490 """
9591 # attachment; filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz
9692
97- msg = Message (f' Content-Disposition: { content_disposition } ' )
93+ msg = Message (f" Content-Disposition: { content_disposition } " )
9894 filename = msg .get_filename ()
9995 if filename :
10096 # Basic sanitation.
101- filename = os .path .basename (filename ).lstrip ('.' ).strip ()
97+ filename = os .path .basename (filename ).lstrip ("." ).strip ()
10298 if filename :
10399 return filename
104100
105101
106102def filename_from_url (url : str , content_type : Optional [str ]) -> str :
107- fn = urlsplit (url ).path .rstrip ('/' )
108- fn = os .path .basename (fn ) if fn else ' index'
109- if '.' not in fn and content_type :
110- content_type = content_type .split (';' )[0 ]
111- if content_type == ' text/plain' :
103+ fn = urlsplit (url ).path .rstrip ("/" )
104+ fn = os .path .basename (fn ) if fn else " index"
105+ if "." not in fn and content_type :
106+ content_type = content_type .split (";" )[0 ]
107+ if content_type == " text/plain" :
112108 # mimetypes returns '.ksh'
113- ext = ' .txt'
109+ ext = " .txt"
114110 else :
115111 ext = mimetypes .guess_extension (content_type )
116112
117- if ext == ' .htm' :
118- ext = ' .html'
113+ if ext == " .htm" :
114+ ext = " .html"
119115
120116 if ext :
121117 fn += ext
@@ -136,12 +132,12 @@ def trim_filename(filename: str, max_len: int) -> str:
136132
137133def get_filename_max_length (directory : str ) -> int :
138134 max_len = 255
139- if hasattr (os , ' pathconf' ) and ' PC_NAME_MAX' in os .pathconf_names :
140- max_len = os .pathconf (directory , ' PC_NAME_MAX' )
135+ if hasattr (os , " pathconf" ) and " PC_NAME_MAX" in os .pathconf_names :
136+ max_len = os .pathconf (directory , " PC_NAME_MAX" )
141137 return max_len
142138
143139
144- def trim_filename_if_needed (filename : str , directory = '.' , extra = 0 ) -> str :
140+ def trim_filename_if_needed (filename : str , directory = "." , extra = 0 ) -> str :
145141 max_len = get_filename_max_length (directory ) - extra
146142 if len (filename ) > max_len :
147143 filename = trim_filename (filename , max_len )
@@ -151,7 +147,7 @@ def trim_filename_if_needed(filename: str, directory='.', extra=0) -> str:
151147def get_unique_filename (filename : str , exists = os .path .exists ) -> str :
152148 attempt = 0
153149 while True :
154- suffix = f' -{ attempt } ' if attempt > 0 else ''
150+ suffix = f" -{ attempt } " if attempt > 0 else ""
155151 try_filename = trim_filename_if_needed (filename , extra = len (suffix ))
156152 try_filename += suffix
157153 if not exists (try_filename ):
@@ -161,12 +157,7 @@ def get_unique_filename(filename: str, exists=os.path.exists) -> str:
161157
162158class Downloader :
163159
164- def __init__ (
165- self ,
166- env : Environment ,
167- output_file : IO = None ,
168- resume : bool = False
169- ):
160+ def __init__ (self , env : Environment , output_file : IO = None , resume : bool = False ):
170161 """
171162 :param resume: Should the download resume if partial download
172163 already exists.
@@ -190,19 +181,17 @@ def pre_request(self, request_headers: dict):
190181
191182 """
192183 # Ask the server not to encode the content so that we can resume, etc.
193- request_headers [' Accept-Encoding' ] = ' identity'
184+ request_headers [" Accept-Encoding" ] = " identity"
194185 if self ._resume :
195186 bytes_have = os .path .getsize (self ._output_file .name )
196187 if bytes_have :
197188 # Set ``Range`` header to resume the download
198189 # TODO: Use "If-Range: mtime" to make sure it's fresh?
199- request_headers [' Range' ] = f' bytes={ bytes_have } -'
190+ request_headers [" Range" ] = f" bytes={ bytes_have } -"
200191 self ._resumed_from = bytes_have
201192
202193 def start (
203- self ,
204- initial_url : str ,
205- final_response : requests .Response
194+ self , initial_url : str , final_response : requests .Response
206195 ) -> Tuple [RawStream , IO ]:
207196 """
208197 Initiate and return a stream for `response` body with progress
@@ -216,13 +205,27 @@ def start(
216205 """
217206 assert not self .status .time_started
218207
219- # FIXME: some servers still might sent Content-Encoding: gzip
220- # <https://github.com/httpie/cli/issues/423>
208+ # Some servers may still send a compressed body even though
209+ # we ask for identity encoding. In that case, ``Content-Length``
210+ # refers to the encoded size (RFC 9110 § 8.6), so we disable
211+ # automatic decoding to make our byte tracking match.
221212 try :
222- total_size = int (final_response .headers [' Content-Length' ])
213+ total_size = int (final_response .headers [" Content-Length" ])
223214 except (KeyError , ValueError , TypeError ):
224215 total_size = None
225216
217+ content_encoding = final_response .headers .get ("Content-Encoding" )
218+ if content_encoding :
219+ final_response .raw .decode_content = False
220+
221+ class EncodedHTTPResponse (HTTPResponse ):
222+ def iter_body (self , chunk_size = 1 ): # type: ignore[override]
223+ return final_response .raw .stream (chunk_size , decode_content = False )
224+
225+ response_msg = EncodedHTTPResponse (final_response )
226+ else :
227+ response_msg = HTTPResponse (final_response )
228+
226229 if not self ._output_file :
227230 self ._output_file = self ._get_output_file_from_response (
228231 initial_url = initial_url ,
@@ -232,8 +235,7 @@ def start(
232235 # `--output, -o` provided
233236 if self ._resume and final_response .status_code == PARTIAL_CONTENT :
234237 total_size = parse_content_range (
235- final_response .headers .get ('Content-Range' ),
236- self ._resumed_from
238+ final_response .headers .get ("Content-Range" ), self ._resumed_from
237239 )
238240
239241 else :
@@ -244,17 +246,19 @@ def start(
244246 except OSError :
245247 pass # stdout
246248
247- output_options = OutputOptions .from_message (final_response , headers = False , body = True )
249+ output_options = OutputOptions .from_message (
250+ final_response , headers = False , body = True
251+ )
248252 stream = RawStream (
249- msg = HTTPResponse ( final_response ) ,
253+ msg = response_msg ,
250254 output_options = output_options ,
251255 on_body_chunk_downloaded = self .chunk_downloaded ,
252256 )
253257
254258 self .status .started (
255259 output_file = self ._output_file ,
256260 resumed_from = self ._resumed_from ,
257- total_size = total_size
261+ total_size = total_size ,
258262 )
259263
260264 return stream , self ._output_file
@@ -292,16 +296,17 @@ def _get_output_file_from_response(
292296 ) -> IO :
293297 # Output file not specified. Pick a name that doesn't exist yet.
294298 filename = None
295- if ' Content-Disposition' in final_response .headers :
299+ if " Content-Disposition" in final_response .headers :
296300 filename = filename_from_content_disposition (
297- final_response .headers ['Content-Disposition' ])
301+ final_response .headers ["Content-Disposition" ]
302+ )
298303 if not filename :
299304 filename = filename_from_url (
300305 url = initial_url ,
301- content_type = final_response .headers .get (' Content-Type' ),
306+ content_type = final_response .headers .get (" Content-Type" ),
302307 )
303308 unique_filename = get_unique_filename (filename )
304- return open (unique_filename , buffering = 0 , mode = ' a+b' )
309+ return open (unique_filename , buffering = 0 , mode = " a+b" )
305310
306311
307312class DownloadStatus :
@@ -325,11 +330,11 @@ def started(self, output_file, resumed_from=0, total_size=None):
325330 def start_display (self , output_file ):
326331 from httpie .output .ui .rich_progress import (
327332 DummyDisplay ,
333+ ProgressDisplay ,
328334 StatusDisplay ,
329- ProgressDisplay
330335 )
331336
332- message = f' Downloading to { output_file .name } '
337+ message = f" Downloading to { output_file .name } "
333338 if self .env .show_displays :
334339 if self .total_size is None :
335340 # Rich does not support progress bars without a total
@@ -341,9 +346,7 @@ def start_display(self, output_file):
341346 self .display = DummyDisplay (self .env )
342347
343348 self .display .start (
344- total = self .total_size ,
345- at = self .downloaded ,
346- description = message
349+ total = self .total_size , at = self .downloaded , description = message
347350 )
348351
349352 def chunk_downloaded (self , size ):
@@ -357,10 +360,7 @@ def has_finished(self):
357360
358361 @property
359362 def time_spent (self ):
360- if (
361- self .time_started is not None
362- and self .time_finished is not None
363- ):
363+ if self .time_started is not None and self .time_finished is not None :
364364 return self .time_finished - self .time_started
365365 else :
366366 return None
@@ -369,9 +369,9 @@ def finished(self):
369369 assert self .time_started is not None
370370 assert self .time_finished is None
371371 self .time_finished = monotonic ()
372- if hasattr (self , ' display' ):
372+ if hasattr (self , " display" ):
373373 self .display .stop (self .time_spent )
374374
375375 def terminate (self ):
376- if hasattr (self , ' display' ):
376+ if hasattr (self , " display" ):
377377 self .display .stop (self .time_spent )
0 commit comments