From c076854c295d4f107ada78ad4d2df027988e9b77 Mon Sep 17 00:00:00 2001 From: 4n4l1st-ch4rl3s <63286909+4n4l1st-ch4rl3s@users.noreply.github.com> Date: Tue, 25 Feb 2025 12:13:07 +0300 Subject: [PATCH 1/3] [ FIXED: error [!]WARNING: thirdparty.urllib3.packages.six.moves (even after installing six and urllib3)] --- thirdparty/{six => .six}/__init__.py | 0 thirdparty/.six/moves/six.py | 1003 +++++++++++++++ thirdparty/.six/six.py | 1003 +++++++++++++++ thirdparty/six.py | 1003 +++++++++++++++ thirdparty/urllib3/__init__.py | 164 ++- thirdparty/urllib3/_base_connection.py | 165 +++ thirdparty/urllib3/_collections.py | 429 ++++--- thirdparty/urllib3/_request_methods.py | 278 ++++ thirdparty/urllib3/_version.py | 18 +- thirdparty/urllib3/connection.py | 1088 ++++++++++++---- thirdparty/urllib3/connectionpool.py | 835 +++++++----- .../urllib3/contrib/emscripten/__init__.py | 16 + .../urllib3/contrib/emscripten/connection.py | 255 ++++ .../emscripten/emscripten_fetch_worker.js | 110 ++ .../urllib3/contrib/emscripten/fetch.py | 708 +++++++++++ .../urllib3/contrib/emscripten/request.py | 22 + .../urllib3/contrib/emscripten/response.py | 285 +++++ thirdparty/urllib3/contrib/pyopenssl.py | 409 +++--- thirdparty/urllib3/contrib/socks.py | 102 +- thirdparty/urllib3/exceptions.py | 281 +++-- thirdparty/urllib3/fields.py | 262 ++-- thirdparty/urllib3/filepost.py | 67 +- thirdparty/urllib3/http2/__init__.py | 53 + thirdparty/urllib3/http2/connection.py | 356 ++++++ thirdparty/urllib3/http2/probe.py | 87 ++ thirdparty/urllib3/packages/six/six.py | 1003 +++++++++++++++ thirdparty/urllib3/poolmanager.py | 403 ++++-- thirdparty/urllib3/py.typed | 2 + thirdparty/urllib3/response.py | 1114 ++++++++++++----- thirdparty/urllib3/util/__init__.py | 30 +- thirdparty/urllib3/util/connection.py | 87 +- thirdparty/urllib3/util/proxy.py | 43 + thirdparty/urllib3/util/request.py | 181 ++- thirdparty/urllib3/util/response.py | 71 +- thirdparty/urllib3/util/retry.py | 289 +++-- thirdparty/urllib3/util/ssl_.py | 563 +++++---- thirdparty/urllib3/util/ssl_match_hostname.py | 159 +++ thirdparty/urllib3/util/ssltransport.py | 271 ++++ thirdparty/urllib3/util/timeout.py | 136 +- thirdparty/urllib3/util/url.py | 413 +++--- thirdparty/urllib3/util/util.py | 42 + thirdparty/urllib3/util/wait.py | 93 +- 42 files changed, 11453 insertions(+), 2446 deletions(-) rename thirdparty/{six => .six}/__init__.py (100%) create mode 100644 thirdparty/.six/moves/six.py create mode 100644 thirdparty/.six/six.py create mode 100644 thirdparty/six.py create mode 100644 thirdparty/urllib3/_base_connection.py create mode 100644 thirdparty/urllib3/_request_methods.py create mode 100644 thirdparty/urllib3/contrib/emscripten/__init__.py create mode 100644 thirdparty/urllib3/contrib/emscripten/connection.py create mode 100644 thirdparty/urllib3/contrib/emscripten/emscripten_fetch_worker.js create mode 100644 thirdparty/urllib3/contrib/emscripten/fetch.py create mode 100644 thirdparty/urllib3/contrib/emscripten/request.py create mode 100644 thirdparty/urllib3/contrib/emscripten/response.py create mode 100644 thirdparty/urllib3/http2/__init__.py create mode 100644 thirdparty/urllib3/http2/connection.py create mode 100644 thirdparty/urllib3/http2/probe.py create mode 100644 thirdparty/urllib3/packages/six/six.py create mode 100644 thirdparty/urllib3/py.typed create mode 100644 thirdparty/urllib3/util/proxy.py create mode 100644 thirdparty/urllib3/util/ssl_match_hostname.py create mode 100644 thirdparty/urllib3/util/ssltransport.py create mode 100644 thirdparty/urllib3/util/util.py diff --git a/thirdparty/six/__init__.py b/thirdparty/.six/__init__.py similarity index 100% rename from thirdparty/six/__init__.py rename to thirdparty/.six/__init__.py diff --git a/thirdparty/.six/moves/six.py b/thirdparty/.six/moves/six.py new file mode 100644 index 0000000..3de5969 --- /dev/null +++ b/thirdparty/.six/moves/six.py @@ -0,0 +1,1003 @@ +# Copyright (c) 2010-2024 Benjamin Peterson +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Utilities for writing code that runs on Python 2 and 3""" + +from __future__ import absolute_import + +import functools +import itertools +import operator +import sys +import types + +__author__ = "Benjamin Peterson " +__version__ = "1.17.0" + + +# Useful for very coarse version differentiation. +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 +PY34 = sys.version_info[0:2] >= (3, 4) + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + MAXSIZE = sys.maxsize +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + if sys.platform.startswith("java"): + # Jython always uses 32 bits. + MAXSIZE = int((1 << 31) - 1) + else: + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + + def __len__(self): + return 1 << 31 + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X + +if PY34: + from importlib.util import spec_from_loader +else: + spec_from_loader = None + + +def _add_doc(func, doc): + """Add documentation to a function.""" + func.__doc__ = doc + + +def _import_module(name): + """Import module, returning the module after the last dot.""" + __import__(name) + return sys.modules[name] + + +class _LazyDescr(object): + + def __init__(self, name): + self.name = name + + def __get__(self, obj, tp): + result = self._resolve() + setattr(obj, self.name, result) # Invokes __set__. + try: + # This is a bit ugly, but it avoids running this again by + # removing this descriptor. + delattr(obj.__class__, self.name) + except AttributeError: + pass + return result + + +class MovedModule(_LazyDescr): + + def __init__(self, name, old, new=None): + super(MovedModule, self).__init__(name) + if PY3: + if new is None: + new = name + self.mod = new + else: + self.mod = old + + def _resolve(self): + return _import_module(self.mod) + + def __getattr__(self, attr): + _module = self._resolve() + value = getattr(_module, attr) + setattr(self, attr, value) + return value + + +class _LazyModule(types.ModuleType): + + def __init__(self, name): + super(_LazyModule, self).__init__(name) + self.__doc__ = self.__class__.__doc__ + + def __dir__(self): + attrs = ["__doc__", "__name__"] + attrs += [attr.name for attr in self._moved_attributes] + return attrs + + # Subclasses should override this + _moved_attributes = [] + + +class MovedAttribute(_LazyDescr): + + def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): + super(MovedAttribute, self).__init__(name) + if PY3: + if new_mod is None: + new_mod = name + self.mod = new_mod + if new_attr is None: + if old_attr is None: + new_attr = name + else: + new_attr = old_attr + self.attr = new_attr + else: + self.mod = old_mod + if old_attr is None: + old_attr = name + self.attr = old_attr + + def _resolve(self): + module = _import_module(self.mod) + return getattr(module, self.attr) + + +class _SixMetaPathImporter(object): + + """ + A meta path importer to import six.moves and its submodules. + + This class implements a PEP302 finder and loader. It should be compatible + with Python 2.5 and all existing versions of Python3 + """ + + def __init__(self, six_module_name): + self.name = six_module_name + self.known_modules = {} + + def _add_module(self, mod, *fullnames): + for fullname in fullnames: + self.known_modules[self.name + "." + fullname] = mod + + def _get_module(self, fullname): + return self.known_modules[self.name + "." + fullname] + + def find_module(self, fullname, path=None): + if fullname in self.known_modules: + return self + return None + + def find_spec(self, fullname, path, target=None): + if fullname in self.known_modules: + return spec_from_loader(fullname, self) + return None + + def __get_module(self, fullname): + try: + return self.known_modules[fullname] + except KeyError: + raise ImportError("This loader does not know module " + fullname) + + def load_module(self, fullname): + try: + # in case of a reload + return sys.modules[fullname] + except KeyError: + pass + mod = self.__get_module(fullname) + if isinstance(mod, MovedModule): + mod = mod._resolve() + else: + mod.__loader__ = self + sys.modules[fullname] = mod + return mod + + def is_package(self, fullname): + """ + Return true, if the named module is a package. + + We need this method to get correct spec objects with + Python 3.4 (see PEP451) + """ + return hasattr(self.__get_module(fullname), "__path__") + + def get_code(self, fullname): + """Return None + + Required, if is_package is implemented""" + self.__get_module(fullname) # eventually raises ImportError + return None + get_source = get_code # same as get_code + + def create_module(self, spec): + return self.load_module(spec.name) + + def exec_module(self, module): + pass + +_importer = _SixMetaPathImporter(__name__) + + +class _MovedItems(_LazyModule): + + """Lazy loading of moved objects""" + __path__ = [] # mark as package + + +_moved_attributes = [ + MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), + MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"), + MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), + MovedAttribute("intern", "__builtin__", "sys"), + MovedAttribute("map", "itertools", "builtins", "imap", "map"), + MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"), + MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"), + MovedAttribute("getoutput", "commands", "subprocess"), + MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"), + MovedAttribute("reduce", "__builtin__", "functools"), + MovedAttribute("shlex_quote", "pipes", "shlex", "quote"), + MovedAttribute("StringIO", "StringIO", "io"), + MovedAttribute("UserDict", "UserDict", "collections", "IterableUserDict", "UserDict"), + MovedAttribute("UserList", "UserList", "collections"), + MovedAttribute("UserString", "UserString", "collections"), + MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), + MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"), + MovedModule("builtins", "__builtin__"), + MovedModule("configparser", "ConfigParser"), + MovedModule("collections_abc", "collections", "collections.abc" if sys.version_info >= (3, 3) else "collections"), + MovedModule("copyreg", "copy_reg"), + MovedModule("dbm_gnu", "gdbm", "dbm.gnu"), + MovedModule("dbm_ndbm", "dbm", "dbm.ndbm"), + MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread" if sys.version_info < (3, 9) else "_thread"), + MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), + MovedModule("http_cookies", "Cookie", "http.cookies"), + MovedModule("html_entities", "htmlentitydefs", "html.entities"), + MovedModule("html_parser", "HTMLParser", "html.parser"), + MovedModule("http_client", "httplib", "http.client"), + MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), + MovedModule("email_mime_image", "email.MIMEImage", "email.mime.image"), + MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), + MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"), + MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), + MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), + MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), + MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), + MovedModule("cPickle", "cPickle", "pickle"), + MovedModule("queue", "Queue"), + MovedModule("reprlib", "repr"), + MovedModule("socketserver", "SocketServer"), + MovedModule("_thread", "thread", "_thread"), + MovedModule("tkinter", "Tkinter"), + MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), + MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), + MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), + MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), + MovedModule("tkinter_tix", "Tix", "tkinter.tix"), + MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"), + MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), + MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), + MovedModule("tkinter_colorchooser", "tkColorChooser", + "tkinter.colorchooser"), + MovedModule("tkinter_commondialog", "tkCommonDialog", + "tkinter.commondialog"), + MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), + MovedModule("tkinter_font", "tkFont", "tkinter.font"), + MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), + MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", + "tkinter.simpledialog"), + MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"), + MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"), + MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"), + MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), + MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"), + MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"), +] +# Add windows specific modules. +if sys.platform == "win32": + _moved_attributes += [ + MovedModule("winreg", "_winreg"), + ] + +for attr in _moved_attributes: + setattr(_MovedItems, attr.name, attr) + if isinstance(attr, MovedModule): + _importer._add_module(attr, "moves." + attr.name) +del attr + +_MovedItems._moved_attributes = _moved_attributes + +moves = _MovedItems(__name__ + ".moves") +_importer._add_module(moves, "moves") + + +class Module_six_moves_urllib_parse(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_parse""" + + +_urllib_parse_moved_attributes = [ + MovedAttribute("ParseResult", "urlparse", "urllib.parse"), + MovedAttribute("SplitResult", "urlparse", "urllib.parse"), + MovedAttribute("parse_qs", "urlparse", "urllib.parse"), + MovedAttribute("parse_qsl", "urlparse", "urllib.parse"), + MovedAttribute("urldefrag", "urlparse", "urllib.parse"), + MovedAttribute("urljoin", "urlparse", "urllib.parse"), + MovedAttribute("urlparse", "urlparse", "urllib.parse"), + MovedAttribute("urlsplit", "urlparse", "urllib.parse"), + MovedAttribute("urlunparse", "urlparse", "urllib.parse"), + MovedAttribute("urlunsplit", "urlparse", "urllib.parse"), + MovedAttribute("quote", "urllib", "urllib.parse"), + MovedAttribute("quote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote", "urllib", "urllib.parse"), + MovedAttribute("unquote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote_to_bytes", "urllib", "urllib.parse", "unquote", "unquote_to_bytes"), + MovedAttribute("urlencode", "urllib", "urllib.parse"), + MovedAttribute("splitquery", "urllib", "urllib.parse"), + MovedAttribute("splittag", "urllib", "urllib.parse"), + MovedAttribute("splituser", "urllib", "urllib.parse"), + MovedAttribute("splitvalue", "urllib", "urllib.parse"), + MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), + MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), + MovedAttribute("uses_params", "urlparse", "urllib.parse"), + MovedAttribute("uses_query", "urlparse", "urllib.parse"), + MovedAttribute("uses_relative", "urlparse", "urllib.parse"), +] +for attr in _urllib_parse_moved_attributes: + setattr(Module_six_moves_urllib_parse, attr.name, attr) +del attr + +Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes + +_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"), + "moves.urllib_parse", "moves.urllib.parse") + + +class Module_six_moves_urllib_error(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_error""" + + +_urllib_error_moved_attributes = [ + MovedAttribute("URLError", "urllib2", "urllib.error"), + MovedAttribute("HTTPError", "urllib2", "urllib.error"), + MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), +] +for attr in _urllib_error_moved_attributes: + setattr(Module_six_moves_urllib_error, attr.name, attr) +del attr + +Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes + +_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"), + "moves.urllib_error", "moves.urllib.error") + + +class Module_six_moves_urllib_request(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_request""" + + +_urllib_request_moved_attributes = [ + MovedAttribute("urlopen", "urllib2", "urllib.request"), + MovedAttribute("install_opener", "urllib2", "urllib.request"), + MovedAttribute("build_opener", "urllib2", "urllib.request"), + MovedAttribute("pathname2url", "urllib", "urllib.request"), + MovedAttribute("url2pathname", "urllib", "urllib.request"), + MovedAttribute("getproxies", "urllib", "urllib.request"), + MovedAttribute("Request", "urllib2", "urllib.request"), + MovedAttribute("OpenerDirector", "urllib2", "urllib.request"), + MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"), + MovedAttribute("ProxyHandler", "urllib2", "urllib.request"), + MovedAttribute("BaseHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"), + MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"), + MovedAttribute("FileHandler", "urllib2", "urllib.request"), + MovedAttribute("FTPHandler", "urllib2", "urllib.request"), + MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"), + MovedAttribute("UnknownHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"), + MovedAttribute("urlretrieve", "urllib", "urllib.request"), + MovedAttribute("urlcleanup", "urllib", "urllib.request"), + MovedAttribute("proxy_bypass", "urllib", "urllib.request"), + MovedAttribute("parse_http_list", "urllib2", "urllib.request"), + MovedAttribute("parse_keqv_list", "urllib2", "urllib.request"), +] +if sys.version_info[:2] < (3, 14): + _urllib_request_moved_attributes.extend( + [ + MovedAttribute("URLopener", "urllib", "urllib.request"), + MovedAttribute("FancyURLopener", "urllib", "urllib.request"), + ] + ) +for attr in _urllib_request_moved_attributes: + setattr(Module_six_moves_urllib_request, attr.name, attr) +del attr + +Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes + +_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"), + "moves.urllib_request", "moves.urllib.request") + + +class Module_six_moves_urllib_response(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_response""" + + +_urllib_response_moved_attributes = [ + MovedAttribute("addbase", "urllib", "urllib.response"), + MovedAttribute("addclosehook", "urllib", "urllib.response"), + MovedAttribute("addinfo", "urllib", "urllib.response"), + MovedAttribute("addinfourl", "urllib", "urllib.response"), +] +for attr in _urllib_response_moved_attributes: + setattr(Module_six_moves_urllib_response, attr.name, attr) +del attr + +Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes + +_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"), + "moves.urllib_response", "moves.urllib.response") + + +class Module_six_moves_urllib_robotparser(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_robotparser""" + + +_urllib_robotparser_moved_attributes = [ + MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"), +] +for attr in _urllib_robotparser_moved_attributes: + setattr(Module_six_moves_urllib_robotparser, attr.name, attr) +del attr + +Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes + +_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"), + "moves.urllib_robotparser", "moves.urllib.robotparser") + + +class Module_six_moves_urllib(types.ModuleType): + + """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" + __path__ = [] # mark as package + parse = _importer._get_module("moves.urllib_parse") + error = _importer._get_module("moves.urllib_error") + request = _importer._get_module("moves.urllib_request") + response = _importer._get_module("moves.urllib_response") + robotparser = _importer._get_module("moves.urllib_robotparser") + + def __dir__(self): + return ['parse', 'error', 'request', 'response', 'robotparser'] + +_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"), + "moves.urllib") + + +def add_move(move): + """Add an item to six.moves.""" + setattr(_MovedItems, move.name, move) + + +def remove_move(name): + """Remove item from six.moves.""" + try: + delattr(_MovedItems, name) + except AttributeError: + try: + del moves.__dict__[name] + except KeyError: + raise AttributeError("no such move, %r" % (name,)) + + +if PY3: + _meth_func = "__func__" + _meth_self = "__self__" + + _func_closure = "__closure__" + _func_code = "__code__" + _func_defaults = "__defaults__" + _func_globals = "__globals__" +else: + _meth_func = "im_func" + _meth_self = "im_self" + + _func_closure = "func_closure" + _func_code = "func_code" + _func_defaults = "func_defaults" + _func_globals = "func_globals" + + +try: + advance_iterator = next +except NameError: + def advance_iterator(it): + return it.next() +next = advance_iterator + + +try: + callable = callable +except NameError: + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + + +if PY3: + def get_unbound_function(unbound): + return unbound + + create_bound_method = types.MethodType + + def create_unbound_method(func, cls): + return func + + Iterator = object +else: + def get_unbound_function(unbound): + return unbound.im_func + + def create_bound_method(func, obj): + return types.MethodType(func, obj, obj.__class__) + + def create_unbound_method(func, cls): + return types.MethodType(func, None, cls) + + class Iterator(object): + + def next(self): + return type(self).__next__(self) + + callable = callable +_add_doc(get_unbound_function, + """Get the function out of a possibly unbound function""") + + +get_method_function = operator.attrgetter(_meth_func) +get_method_self = operator.attrgetter(_meth_self) +get_function_closure = operator.attrgetter(_func_closure) +get_function_code = operator.attrgetter(_func_code) +get_function_defaults = operator.attrgetter(_func_defaults) +get_function_globals = operator.attrgetter(_func_globals) + + +if PY3: + def iterkeys(d, **kw): + return iter(d.keys(**kw)) + + def itervalues(d, **kw): + return iter(d.values(**kw)) + + def iteritems(d, **kw): + return iter(d.items(**kw)) + + def iterlists(d, **kw): + return iter(d.lists(**kw)) + + viewkeys = operator.methodcaller("keys") + + viewvalues = operator.methodcaller("values") + + viewitems = operator.methodcaller("items") +else: + def iterkeys(d, **kw): + return d.iterkeys(**kw) + + def itervalues(d, **kw): + return d.itervalues(**kw) + + def iteritems(d, **kw): + return d.iteritems(**kw) + + def iterlists(d, **kw): + return d.iterlists(**kw) + + viewkeys = operator.methodcaller("viewkeys") + + viewvalues = operator.methodcaller("viewvalues") + + viewitems = operator.methodcaller("viewitems") + +_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.") +_add_doc(itervalues, "Return an iterator over the values of a dictionary.") +_add_doc(iteritems, + "Return an iterator over the (key, value) pairs of a dictionary.") +_add_doc(iterlists, + "Return an iterator over the (key, [values]) pairs of a dictionary.") + + +if PY3: + def b(s): + return s.encode("latin-1") + + def u(s): + return s + unichr = chr + import struct + int2byte = struct.Struct(">B").pack + del struct + byte2int = operator.itemgetter(0) + indexbytes = operator.getitem + iterbytes = iter + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + del io + _assertCountEqual = "assertCountEqual" + if sys.version_info[1] <= 1: + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" + _assertNotRegex = "assertNotRegexpMatches" + else: + _assertRaisesRegex = "assertRaisesRegex" + _assertRegex = "assertRegex" + _assertNotRegex = "assertNotRegex" +else: + def b(s): + return s + # Workaround for standalone backslash + + def u(s): + return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape") + unichr = unichr + int2byte = chr + + def byte2int(bs): + return ord(bs[0]) + + def indexbytes(buf, i): + return ord(buf[i]) + iterbytes = functools.partial(itertools.imap, ord) + import StringIO + StringIO = BytesIO = StringIO.StringIO + _assertCountEqual = "assertItemsEqual" + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" + _assertNotRegex = "assertNotRegexpMatches" +_add_doc(b, """Byte literal""") +_add_doc(u, """Text literal""") + + +def assertCountEqual(self, *args, **kwargs): + return getattr(self, _assertCountEqual)(*args, **kwargs) + + +def assertRaisesRegex(self, *args, **kwargs): + return getattr(self, _assertRaisesRegex)(*args, **kwargs) + + +def assertRegex(self, *args, **kwargs): + return getattr(self, _assertRegex)(*args, **kwargs) + + +def assertNotRegex(self, *args, **kwargs): + return getattr(self, _assertNotRegex)(*args, **kwargs) + + +if PY3: + exec_ = getattr(moves.builtins, "exec") + + def reraise(tp, value, tb=None): + try: + if value is None: + value = tp() + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + finally: + value = None + tb = None + +else: + def exec_(_code_, _globs_=None, _locs_=None): + """Execute code in a namespace.""" + if _globs_ is None: + frame = sys._getframe(1) + _globs_ = frame.f_globals + if _locs_ is None: + _locs_ = frame.f_locals + del frame + elif _locs_ is None: + _locs_ = _globs_ + exec("""exec _code_ in _globs_, _locs_""") + + exec_("""def reraise(tp, value, tb=None): + try: + raise tp, value, tb + finally: + tb = None +""") + + +if sys.version_info[:2] > (3,): + exec_("""def raise_from(value, from_value): + try: + raise value from from_value + finally: + value = None +""") +else: + def raise_from(value, from_value): + raise value + + +print_ = getattr(moves.builtins, "print", None) +if print_ is None: + def print_(*args, **kwargs): + """The new-style print function for Python 2.4 and 2.5.""" + fp = kwargs.pop("file", sys.stdout) + if fp is None: + return + + def write(data): + if not isinstance(data, basestring): + data = str(data) + # If the file has an encoding, encode unicode with it. + if (isinstance(fp, file) and + isinstance(data, unicode) and + fp.encoding is not None): + errors = getattr(fp, "errors", None) + if errors is None: + errors = "strict" + data = data.encode(fp.encoding, errors) + fp.write(data) + want_unicode = False + sep = kwargs.pop("sep", None) + if sep is not None: + if isinstance(sep, unicode): + want_unicode = True + elif not isinstance(sep, str): + raise TypeError("sep must be None or a string") + end = kwargs.pop("end", None) + if end is not None: + if isinstance(end, unicode): + want_unicode = True + elif not isinstance(end, str): + raise TypeError("end must be None or a string") + if kwargs: + raise TypeError("invalid keyword arguments to print()") + if not want_unicode: + for arg in args: + if isinstance(arg, unicode): + want_unicode = True + break + if want_unicode: + newline = unicode("\n") + space = unicode(" ") + else: + newline = "\n" + space = " " + if sep is None: + sep = space + if end is None: + end = newline + for i, arg in enumerate(args): + if i: + write(sep) + write(arg) + write(end) +if sys.version_info[:2] < (3, 3): + _print = print_ + + def print_(*args, **kwargs): + fp = kwargs.get("file", sys.stdout) + flush = kwargs.pop("flush", False) + _print(*args, **kwargs) + if flush and fp is not None: + fp.flush() + +_add_doc(reraise, """Reraise an exception.""") + +if sys.version_info[0:2] < (3, 4): + # This does exactly the same what the :func:`py3:functools.update_wrapper` + # function does on Python versions after 3.2. It sets the ``__wrapped__`` + # attribute on ``wrapper`` object and it doesn't raise an error if any of + # the attributes mentioned in ``assigned`` and ``updated`` are missing on + # ``wrapped`` object. + def _update_wrapper(wrapper, wrapped, + assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + for attr in assigned: + try: + value = getattr(wrapped, attr) + except AttributeError: + continue + else: + setattr(wrapper, attr, value) + for attr in updated: + getattr(wrapper, attr).update(getattr(wrapped, attr, {})) + wrapper.__wrapped__ = wrapped + return wrapper + _update_wrapper.__doc__ = functools.update_wrapper.__doc__ + + def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + return functools.partial(_update_wrapper, wrapped=wrapped, + assigned=assigned, updated=updated) + wraps.__doc__ = functools.wraps.__doc__ + +else: + wraps = functools.wraps + + +def with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + # This requires a bit of explanation: the basic idea is to make a dummy + # metaclass for one level of class instantiation that replaces itself with + # the actual metaclass. + class metaclass(type): + + def __new__(cls, name, this_bases, d): + if sys.version_info[:2] >= (3, 7): + # This version introduced PEP 560 that requires a bit + # of extra care (we mimic what is done by __build_class__). + resolved_bases = types.resolve_bases(bases) + if resolved_bases is not bases: + d['__orig_bases__'] = bases + else: + resolved_bases = bases + return meta(name, resolved_bases, d) + + @classmethod + def __prepare__(cls, name, this_bases): + return meta.__prepare__(name, bases) + return type.__new__(metaclass, 'temporary_class', (), {}) + + +def add_metaclass(metaclass): + """Class decorator for creating a class with a metaclass.""" + def wrapper(cls): + orig_vars = cls.__dict__.copy() + slots = orig_vars.get('__slots__') + if slots is not None: + if isinstance(slots, str): + slots = [slots] + for slots_var in slots: + orig_vars.pop(slots_var) + orig_vars.pop('__dict__', None) + orig_vars.pop('__weakref__', None) + if hasattr(cls, '__qualname__'): + orig_vars['__qualname__'] = cls.__qualname__ + return metaclass(cls.__name__, cls.__bases__, orig_vars) + return wrapper + + +def ensure_binary(s, encoding='utf-8', errors='strict'): + """Coerce **s** to six.binary_type. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> encoded to `bytes` + - `bytes` -> `bytes` + """ + if isinstance(s, binary_type): + return s + if isinstance(s, text_type): + return s.encode(encoding, errors) + raise TypeError("not expecting type '%s'" % type(s)) + + +def ensure_str(s, encoding='utf-8', errors='strict'): + """Coerce *s* to `str`. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + # Optimization: Fast return for the common case. + if type(s) is str: + return s + if PY2 and isinstance(s, text_type): + return s.encode(encoding, errors) + elif PY3 and isinstance(s, binary_type): + return s.decode(encoding, errors) + elif not isinstance(s, (text_type, binary_type)): + raise TypeError("not expecting type '%s'" % type(s)) + return s + + +def ensure_text(s, encoding='utf-8', errors='strict'): + """Coerce *s* to six.text_type. + + For Python 2: + - `unicode` -> `unicode` + - `str` -> `unicode` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + if isinstance(s, binary_type): + return s.decode(encoding, errors) + elif isinstance(s, text_type): + return s + else: + raise TypeError("not expecting type '%s'" % type(s)) + + +def python_2_unicode_compatible(klass): + """ + A class decorator that defines __unicode__ and __str__ methods under Python 2. + Under Python 3 it does nothing. + + To support Python 2 and 3 with a single code base, define a __str__ method + returning text and apply this decorator to the class. + """ + if PY2: + if '__str__' not in klass.__dict__: + raise ValueError("@python_2_unicode_compatible cannot be applied " + "to %s because it doesn't define __str__()." % + klass.__name__) + klass.__unicode__ = klass.__str__ + klass.__str__ = lambda self: self.__unicode__().encode('utf-8') + return klass + + +# Complete the moves implementation. +# This code is at the end of this module to speed up module loading. +# Turn this module into a package. +__path__ = [] # required for PEP 302 and PEP 451 +__package__ = __name__ # see PEP 366 @ReservedAssignment +if globals().get("__spec__") is not None: + __spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable +# Remove other six meta path importers, since they cause problems. This can +# happen if six is removed from sys.modules and then reloaded. (Setuptools does +# this for some reason.) +if sys.meta_path: + for i, importer in enumerate(sys.meta_path): + # Here's some real nastiness: Another "instance" of the six module might + # be floating around. Therefore, we can't use isinstance() to check for + # the six meta path importer, since the other six instance will have + # inserted an importer with different class. + if (type(importer).__name__ == "_SixMetaPathImporter" and + importer.name == __name__): + del sys.meta_path[i] + break + del i, importer +# Finally, add the importer to the meta path import hook. +sys.meta_path.append(_importer) diff --git a/thirdparty/.six/six.py b/thirdparty/.six/six.py new file mode 100644 index 0000000..3de5969 --- /dev/null +++ b/thirdparty/.six/six.py @@ -0,0 +1,1003 @@ +# Copyright (c) 2010-2024 Benjamin Peterson +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Utilities for writing code that runs on Python 2 and 3""" + +from __future__ import absolute_import + +import functools +import itertools +import operator +import sys +import types + +__author__ = "Benjamin Peterson " +__version__ = "1.17.0" + + +# Useful for very coarse version differentiation. +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 +PY34 = sys.version_info[0:2] >= (3, 4) + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + MAXSIZE = sys.maxsize +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + if sys.platform.startswith("java"): + # Jython always uses 32 bits. + MAXSIZE = int((1 << 31) - 1) + else: + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + + def __len__(self): + return 1 << 31 + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X + +if PY34: + from importlib.util import spec_from_loader +else: + spec_from_loader = None + + +def _add_doc(func, doc): + """Add documentation to a function.""" + func.__doc__ = doc + + +def _import_module(name): + """Import module, returning the module after the last dot.""" + __import__(name) + return sys.modules[name] + + +class _LazyDescr(object): + + def __init__(self, name): + self.name = name + + def __get__(self, obj, tp): + result = self._resolve() + setattr(obj, self.name, result) # Invokes __set__. + try: + # This is a bit ugly, but it avoids running this again by + # removing this descriptor. + delattr(obj.__class__, self.name) + except AttributeError: + pass + return result + + +class MovedModule(_LazyDescr): + + def __init__(self, name, old, new=None): + super(MovedModule, self).__init__(name) + if PY3: + if new is None: + new = name + self.mod = new + else: + self.mod = old + + def _resolve(self): + return _import_module(self.mod) + + def __getattr__(self, attr): + _module = self._resolve() + value = getattr(_module, attr) + setattr(self, attr, value) + return value + + +class _LazyModule(types.ModuleType): + + def __init__(self, name): + super(_LazyModule, self).__init__(name) + self.__doc__ = self.__class__.__doc__ + + def __dir__(self): + attrs = ["__doc__", "__name__"] + attrs += [attr.name for attr in self._moved_attributes] + return attrs + + # Subclasses should override this + _moved_attributes = [] + + +class MovedAttribute(_LazyDescr): + + def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): + super(MovedAttribute, self).__init__(name) + if PY3: + if new_mod is None: + new_mod = name + self.mod = new_mod + if new_attr is None: + if old_attr is None: + new_attr = name + else: + new_attr = old_attr + self.attr = new_attr + else: + self.mod = old_mod + if old_attr is None: + old_attr = name + self.attr = old_attr + + def _resolve(self): + module = _import_module(self.mod) + return getattr(module, self.attr) + + +class _SixMetaPathImporter(object): + + """ + A meta path importer to import six.moves and its submodules. + + This class implements a PEP302 finder and loader. It should be compatible + with Python 2.5 and all existing versions of Python3 + """ + + def __init__(self, six_module_name): + self.name = six_module_name + self.known_modules = {} + + def _add_module(self, mod, *fullnames): + for fullname in fullnames: + self.known_modules[self.name + "." + fullname] = mod + + def _get_module(self, fullname): + return self.known_modules[self.name + "." + fullname] + + def find_module(self, fullname, path=None): + if fullname in self.known_modules: + return self + return None + + def find_spec(self, fullname, path, target=None): + if fullname in self.known_modules: + return spec_from_loader(fullname, self) + return None + + def __get_module(self, fullname): + try: + return self.known_modules[fullname] + except KeyError: + raise ImportError("This loader does not know module " + fullname) + + def load_module(self, fullname): + try: + # in case of a reload + return sys.modules[fullname] + except KeyError: + pass + mod = self.__get_module(fullname) + if isinstance(mod, MovedModule): + mod = mod._resolve() + else: + mod.__loader__ = self + sys.modules[fullname] = mod + return mod + + def is_package(self, fullname): + """ + Return true, if the named module is a package. + + We need this method to get correct spec objects with + Python 3.4 (see PEP451) + """ + return hasattr(self.__get_module(fullname), "__path__") + + def get_code(self, fullname): + """Return None + + Required, if is_package is implemented""" + self.__get_module(fullname) # eventually raises ImportError + return None + get_source = get_code # same as get_code + + def create_module(self, spec): + return self.load_module(spec.name) + + def exec_module(self, module): + pass + +_importer = _SixMetaPathImporter(__name__) + + +class _MovedItems(_LazyModule): + + """Lazy loading of moved objects""" + __path__ = [] # mark as package + + +_moved_attributes = [ + MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), + MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"), + MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), + MovedAttribute("intern", "__builtin__", "sys"), + MovedAttribute("map", "itertools", "builtins", "imap", "map"), + MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"), + MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"), + MovedAttribute("getoutput", "commands", "subprocess"), + MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"), + MovedAttribute("reduce", "__builtin__", "functools"), + MovedAttribute("shlex_quote", "pipes", "shlex", "quote"), + MovedAttribute("StringIO", "StringIO", "io"), + MovedAttribute("UserDict", "UserDict", "collections", "IterableUserDict", "UserDict"), + MovedAttribute("UserList", "UserList", "collections"), + MovedAttribute("UserString", "UserString", "collections"), + MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), + MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"), + MovedModule("builtins", "__builtin__"), + MovedModule("configparser", "ConfigParser"), + MovedModule("collections_abc", "collections", "collections.abc" if sys.version_info >= (3, 3) else "collections"), + MovedModule("copyreg", "copy_reg"), + MovedModule("dbm_gnu", "gdbm", "dbm.gnu"), + MovedModule("dbm_ndbm", "dbm", "dbm.ndbm"), + MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread" if sys.version_info < (3, 9) else "_thread"), + MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), + MovedModule("http_cookies", "Cookie", "http.cookies"), + MovedModule("html_entities", "htmlentitydefs", "html.entities"), + MovedModule("html_parser", "HTMLParser", "html.parser"), + MovedModule("http_client", "httplib", "http.client"), + MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), + MovedModule("email_mime_image", "email.MIMEImage", "email.mime.image"), + MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), + MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"), + MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), + MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), + MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), + MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), + MovedModule("cPickle", "cPickle", "pickle"), + MovedModule("queue", "Queue"), + MovedModule("reprlib", "repr"), + MovedModule("socketserver", "SocketServer"), + MovedModule("_thread", "thread", "_thread"), + MovedModule("tkinter", "Tkinter"), + MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), + MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), + MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), + MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), + MovedModule("tkinter_tix", "Tix", "tkinter.tix"), + MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"), + MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), + MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), + MovedModule("tkinter_colorchooser", "tkColorChooser", + "tkinter.colorchooser"), + MovedModule("tkinter_commondialog", "tkCommonDialog", + "tkinter.commondialog"), + MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), + MovedModule("tkinter_font", "tkFont", "tkinter.font"), + MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), + MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", + "tkinter.simpledialog"), + MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"), + MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"), + MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"), + MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), + MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"), + MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"), +] +# Add windows specific modules. +if sys.platform == "win32": + _moved_attributes += [ + MovedModule("winreg", "_winreg"), + ] + +for attr in _moved_attributes: + setattr(_MovedItems, attr.name, attr) + if isinstance(attr, MovedModule): + _importer._add_module(attr, "moves." + attr.name) +del attr + +_MovedItems._moved_attributes = _moved_attributes + +moves = _MovedItems(__name__ + ".moves") +_importer._add_module(moves, "moves") + + +class Module_six_moves_urllib_parse(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_parse""" + + +_urllib_parse_moved_attributes = [ + MovedAttribute("ParseResult", "urlparse", "urllib.parse"), + MovedAttribute("SplitResult", "urlparse", "urllib.parse"), + MovedAttribute("parse_qs", "urlparse", "urllib.parse"), + MovedAttribute("parse_qsl", "urlparse", "urllib.parse"), + MovedAttribute("urldefrag", "urlparse", "urllib.parse"), + MovedAttribute("urljoin", "urlparse", "urllib.parse"), + MovedAttribute("urlparse", "urlparse", "urllib.parse"), + MovedAttribute("urlsplit", "urlparse", "urllib.parse"), + MovedAttribute("urlunparse", "urlparse", "urllib.parse"), + MovedAttribute("urlunsplit", "urlparse", "urllib.parse"), + MovedAttribute("quote", "urllib", "urllib.parse"), + MovedAttribute("quote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote", "urllib", "urllib.parse"), + MovedAttribute("unquote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote_to_bytes", "urllib", "urllib.parse", "unquote", "unquote_to_bytes"), + MovedAttribute("urlencode", "urllib", "urllib.parse"), + MovedAttribute("splitquery", "urllib", "urllib.parse"), + MovedAttribute("splittag", "urllib", "urllib.parse"), + MovedAttribute("splituser", "urllib", "urllib.parse"), + MovedAttribute("splitvalue", "urllib", "urllib.parse"), + MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), + MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), + MovedAttribute("uses_params", "urlparse", "urllib.parse"), + MovedAttribute("uses_query", "urlparse", "urllib.parse"), + MovedAttribute("uses_relative", "urlparse", "urllib.parse"), +] +for attr in _urllib_parse_moved_attributes: + setattr(Module_six_moves_urllib_parse, attr.name, attr) +del attr + +Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes + +_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"), + "moves.urllib_parse", "moves.urllib.parse") + + +class Module_six_moves_urllib_error(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_error""" + + +_urllib_error_moved_attributes = [ + MovedAttribute("URLError", "urllib2", "urllib.error"), + MovedAttribute("HTTPError", "urllib2", "urllib.error"), + MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), +] +for attr in _urllib_error_moved_attributes: + setattr(Module_six_moves_urllib_error, attr.name, attr) +del attr + +Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes + +_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"), + "moves.urllib_error", "moves.urllib.error") + + +class Module_six_moves_urllib_request(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_request""" + + +_urllib_request_moved_attributes = [ + MovedAttribute("urlopen", "urllib2", "urllib.request"), + MovedAttribute("install_opener", "urllib2", "urllib.request"), + MovedAttribute("build_opener", "urllib2", "urllib.request"), + MovedAttribute("pathname2url", "urllib", "urllib.request"), + MovedAttribute("url2pathname", "urllib", "urllib.request"), + MovedAttribute("getproxies", "urllib", "urllib.request"), + MovedAttribute("Request", "urllib2", "urllib.request"), + MovedAttribute("OpenerDirector", "urllib2", "urllib.request"), + MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"), + MovedAttribute("ProxyHandler", "urllib2", "urllib.request"), + MovedAttribute("BaseHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"), + MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"), + MovedAttribute("FileHandler", "urllib2", "urllib.request"), + MovedAttribute("FTPHandler", "urllib2", "urllib.request"), + MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"), + MovedAttribute("UnknownHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"), + MovedAttribute("urlretrieve", "urllib", "urllib.request"), + MovedAttribute("urlcleanup", "urllib", "urllib.request"), + MovedAttribute("proxy_bypass", "urllib", "urllib.request"), + MovedAttribute("parse_http_list", "urllib2", "urllib.request"), + MovedAttribute("parse_keqv_list", "urllib2", "urllib.request"), +] +if sys.version_info[:2] < (3, 14): + _urllib_request_moved_attributes.extend( + [ + MovedAttribute("URLopener", "urllib", "urllib.request"), + MovedAttribute("FancyURLopener", "urllib", "urllib.request"), + ] + ) +for attr in _urllib_request_moved_attributes: + setattr(Module_six_moves_urllib_request, attr.name, attr) +del attr + +Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes + +_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"), + "moves.urllib_request", "moves.urllib.request") + + +class Module_six_moves_urllib_response(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_response""" + + +_urllib_response_moved_attributes = [ + MovedAttribute("addbase", "urllib", "urllib.response"), + MovedAttribute("addclosehook", "urllib", "urllib.response"), + MovedAttribute("addinfo", "urllib", "urllib.response"), + MovedAttribute("addinfourl", "urllib", "urllib.response"), +] +for attr in _urllib_response_moved_attributes: + setattr(Module_six_moves_urllib_response, attr.name, attr) +del attr + +Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes + +_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"), + "moves.urllib_response", "moves.urllib.response") + + +class Module_six_moves_urllib_robotparser(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_robotparser""" + + +_urllib_robotparser_moved_attributes = [ + MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"), +] +for attr in _urllib_robotparser_moved_attributes: + setattr(Module_six_moves_urllib_robotparser, attr.name, attr) +del attr + +Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes + +_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"), + "moves.urllib_robotparser", "moves.urllib.robotparser") + + +class Module_six_moves_urllib(types.ModuleType): + + """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" + __path__ = [] # mark as package + parse = _importer._get_module("moves.urllib_parse") + error = _importer._get_module("moves.urllib_error") + request = _importer._get_module("moves.urllib_request") + response = _importer._get_module("moves.urllib_response") + robotparser = _importer._get_module("moves.urllib_robotparser") + + def __dir__(self): + return ['parse', 'error', 'request', 'response', 'robotparser'] + +_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"), + "moves.urllib") + + +def add_move(move): + """Add an item to six.moves.""" + setattr(_MovedItems, move.name, move) + + +def remove_move(name): + """Remove item from six.moves.""" + try: + delattr(_MovedItems, name) + except AttributeError: + try: + del moves.__dict__[name] + except KeyError: + raise AttributeError("no such move, %r" % (name,)) + + +if PY3: + _meth_func = "__func__" + _meth_self = "__self__" + + _func_closure = "__closure__" + _func_code = "__code__" + _func_defaults = "__defaults__" + _func_globals = "__globals__" +else: + _meth_func = "im_func" + _meth_self = "im_self" + + _func_closure = "func_closure" + _func_code = "func_code" + _func_defaults = "func_defaults" + _func_globals = "func_globals" + + +try: + advance_iterator = next +except NameError: + def advance_iterator(it): + return it.next() +next = advance_iterator + + +try: + callable = callable +except NameError: + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + + +if PY3: + def get_unbound_function(unbound): + return unbound + + create_bound_method = types.MethodType + + def create_unbound_method(func, cls): + return func + + Iterator = object +else: + def get_unbound_function(unbound): + return unbound.im_func + + def create_bound_method(func, obj): + return types.MethodType(func, obj, obj.__class__) + + def create_unbound_method(func, cls): + return types.MethodType(func, None, cls) + + class Iterator(object): + + def next(self): + return type(self).__next__(self) + + callable = callable +_add_doc(get_unbound_function, + """Get the function out of a possibly unbound function""") + + +get_method_function = operator.attrgetter(_meth_func) +get_method_self = operator.attrgetter(_meth_self) +get_function_closure = operator.attrgetter(_func_closure) +get_function_code = operator.attrgetter(_func_code) +get_function_defaults = operator.attrgetter(_func_defaults) +get_function_globals = operator.attrgetter(_func_globals) + + +if PY3: + def iterkeys(d, **kw): + return iter(d.keys(**kw)) + + def itervalues(d, **kw): + return iter(d.values(**kw)) + + def iteritems(d, **kw): + return iter(d.items(**kw)) + + def iterlists(d, **kw): + return iter(d.lists(**kw)) + + viewkeys = operator.methodcaller("keys") + + viewvalues = operator.methodcaller("values") + + viewitems = operator.methodcaller("items") +else: + def iterkeys(d, **kw): + return d.iterkeys(**kw) + + def itervalues(d, **kw): + return d.itervalues(**kw) + + def iteritems(d, **kw): + return d.iteritems(**kw) + + def iterlists(d, **kw): + return d.iterlists(**kw) + + viewkeys = operator.methodcaller("viewkeys") + + viewvalues = operator.methodcaller("viewvalues") + + viewitems = operator.methodcaller("viewitems") + +_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.") +_add_doc(itervalues, "Return an iterator over the values of a dictionary.") +_add_doc(iteritems, + "Return an iterator over the (key, value) pairs of a dictionary.") +_add_doc(iterlists, + "Return an iterator over the (key, [values]) pairs of a dictionary.") + + +if PY3: + def b(s): + return s.encode("latin-1") + + def u(s): + return s + unichr = chr + import struct + int2byte = struct.Struct(">B").pack + del struct + byte2int = operator.itemgetter(0) + indexbytes = operator.getitem + iterbytes = iter + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + del io + _assertCountEqual = "assertCountEqual" + if sys.version_info[1] <= 1: + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" + _assertNotRegex = "assertNotRegexpMatches" + else: + _assertRaisesRegex = "assertRaisesRegex" + _assertRegex = "assertRegex" + _assertNotRegex = "assertNotRegex" +else: + def b(s): + return s + # Workaround for standalone backslash + + def u(s): + return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape") + unichr = unichr + int2byte = chr + + def byte2int(bs): + return ord(bs[0]) + + def indexbytes(buf, i): + return ord(buf[i]) + iterbytes = functools.partial(itertools.imap, ord) + import StringIO + StringIO = BytesIO = StringIO.StringIO + _assertCountEqual = "assertItemsEqual" + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" + _assertNotRegex = "assertNotRegexpMatches" +_add_doc(b, """Byte literal""") +_add_doc(u, """Text literal""") + + +def assertCountEqual(self, *args, **kwargs): + return getattr(self, _assertCountEqual)(*args, **kwargs) + + +def assertRaisesRegex(self, *args, **kwargs): + return getattr(self, _assertRaisesRegex)(*args, **kwargs) + + +def assertRegex(self, *args, **kwargs): + return getattr(self, _assertRegex)(*args, **kwargs) + + +def assertNotRegex(self, *args, **kwargs): + return getattr(self, _assertNotRegex)(*args, **kwargs) + + +if PY3: + exec_ = getattr(moves.builtins, "exec") + + def reraise(tp, value, tb=None): + try: + if value is None: + value = tp() + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + finally: + value = None + tb = None + +else: + def exec_(_code_, _globs_=None, _locs_=None): + """Execute code in a namespace.""" + if _globs_ is None: + frame = sys._getframe(1) + _globs_ = frame.f_globals + if _locs_ is None: + _locs_ = frame.f_locals + del frame + elif _locs_ is None: + _locs_ = _globs_ + exec("""exec _code_ in _globs_, _locs_""") + + exec_("""def reraise(tp, value, tb=None): + try: + raise tp, value, tb + finally: + tb = None +""") + + +if sys.version_info[:2] > (3,): + exec_("""def raise_from(value, from_value): + try: + raise value from from_value + finally: + value = None +""") +else: + def raise_from(value, from_value): + raise value + + +print_ = getattr(moves.builtins, "print", None) +if print_ is None: + def print_(*args, **kwargs): + """The new-style print function for Python 2.4 and 2.5.""" + fp = kwargs.pop("file", sys.stdout) + if fp is None: + return + + def write(data): + if not isinstance(data, basestring): + data = str(data) + # If the file has an encoding, encode unicode with it. + if (isinstance(fp, file) and + isinstance(data, unicode) and + fp.encoding is not None): + errors = getattr(fp, "errors", None) + if errors is None: + errors = "strict" + data = data.encode(fp.encoding, errors) + fp.write(data) + want_unicode = False + sep = kwargs.pop("sep", None) + if sep is not None: + if isinstance(sep, unicode): + want_unicode = True + elif not isinstance(sep, str): + raise TypeError("sep must be None or a string") + end = kwargs.pop("end", None) + if end is not None: + if isinstance(end, unicode): + want_unicode = True + elif not isinstance(end, str): + raise TypeError("end must be None or a string") + if kwargs: + raise TypeError("invalid keyword arguments to print()") + if not want_unicode: + for arg in args: + if isinstance(arg, unicode): + want_unicode = True + break + if want_unicode: + newline = unicode("\n") + space = unicode(" ") + else: + newline = "\n" + space = " " + if sep is None: + sep = space + if end is None: + end = newline + for i, arg in enumerate(args): + if i: + write(sep) + write(arg) + write(end) +if sys.version_info[:2] < (3, 3): + _print = print_ + + def print_(*args, **kwargs): + fp = kwargs.get("file", sys.stdout) + flush = kwargs.pop("flush", False) + _print(*args, **kwargs) + if flush and fp is not None: + fp.flush() + +_add_doc(reraise, """Reraise an exception.""") + +if sys.version_info[0:2] < (3, 4): + # This does exactly the same what the :func:`py3:functools.update_wrapper` + # function does on Python versions after 3.2. It sets the ``__wrapped__`` + # attribute on ``wrapper`` object and it doesn't raise an error if any of + # the attributes mentioned in ``assigned`` and ``updated`` are missing on + # ``wrapped`` object. + def _update_wrapper(wrapper, wrapped, + assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + for attr in assigned: + try: + value = getattr(wrapped, attr) + except AttributeError: + continue + else: + setattr(wrapper, attr, value) + for attr in updated: + getattr(wrapper, attr).update(getattr(wrapped, attr, {})) + wrapper.__wrapped__ = wrapped + return wrapper + _update_wrapper.__doc__ = functools.update_wrapper.__doc__ + + def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + return functools.partial(_update_wrapper, wrapped=wrapped, + assigned=assigned, updated=updated) + wraps.__doc__ = functools.wraps.__doc__ + +else: + wraps = functools.wraps + + +def with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + # This requires a bit of explanation: the basic idea is to make a dummy + # metaclass for one level of class instantiation that replaces itself with + # the actual metaclass. + class metaclass(type): + + def __new__(cls, name, this_bases, d): + if sys.version_info[:2] >= (3, 7): + # This version introduced PEP 560 that requires a bit + # of extra care (we mimic what is done by __build_class__). + resolved_bases = types.resolve_bases(bases) + if resolved_bases is not bases: + d['__orig_bases__'] = bases + else: + resolved_bases = bases + return meta(name, resolved_bases, d) + + @classmethod + def __prepare__(cls, name, this_bases): + return meta.__prepare__(name, bases) + return type.__new__(metaclass, 'temporary_class', (), {}) + + +def add_metaclass(metaclass): + """Class decorator for creating a class with a metaclass.""" + def wrapper(cls): + orig_vars = cls.__dict__.copy() + slots = orig_vars.get('__slots__') + if slots is not None: + if isinstance(slots, str): + slots = [slots] + for slots_var in slots: + orig_vars.pop(slots_var) + orig_vars.pop('__dict__', None) + orig_vars.pop('__weakref__', None) + if hasattr(cls, '__qualname__'): + orig_vars['__qualname__'] = cls.__qualname__ + return metaclass(cls.__name__, cls.__bases__, orig_vars) + return wrapper + + +def ensure_binary(s, encoding='utf-8', errors='strict'): + """Coerce **s** to six.binary_type. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> encoded to `bytes` + - `bytes` -> `bytes` + """ + if isinstance(s, binary_type): + return s + if isinstance(s, text_type): + return s.encode(encoding, errors) + raise TypeError("not expecting type '%s'" % type(s)) + + +def ensure_str(s, encoding='utf-8', errors='strict'): + """Coerce *s* to `str`. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + # Optimization: Fast return for the common case. + if type(s) is str: + return s + if PY2 and isinstance(s, text_type): + return s.encode(encoding, errors) + elif PY3 and isinstance(s, binary_type): + return s.decode(encoding, errors) + elif not isinstance(s, (text_type, binary_type)): + raise TypeError("not expecting type '%s'" % type(s)) + return s + + +def ensure_text(s, encoding='utf-8', errors='strict'): + """Coerce *s* to six.text_type. + + For Python 2: + - `unicode` -> `unicode` + - `str` -> `unicode` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + if isinstance(s, binary_type): + return s.decode(encoding, errors) + elif isinstance(s, text_type): + return s + else: + raise TypeError("not expecting type '%s'" % type(s)) + + +def python_2_unicode_compatible(klass): + """ + A class decorator that defines __unicode__ and __str__ methods under Python 2. + Under Python 3 it does nothing. + + To support Python 2 and 3 with a single code base, define a __str__ method + returning text and apply this decorator to the class. + """ + if PY2: + if '__str__' not in klass.__dict__: + raise ValueError("@python_2_unicode_compatible cannot be applied " + "to %s because it doesn't define __str__()." % + klass.__name__) + klass.__unicode__ = klass.__str__ + klass.__str__ = lambda self: self.__unicode__().encode('utf-8') + return klass + + +# Complete the moves implementation. +# This code is at the end of this module to speed up module loading. +# Turn this module into a package. +__path__ = [] # required for PEP 302 and PEP 451 +__package__ = __name__ # see PEP 366 @ReservedAssignment +if globals().get("__spec__") is not None: + __spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable +# Remove other six meta path importers, since they cause problems. This can +# happen if six is removed from sys.modules and then reloaded. (Setuptools does +# this for some reason.) +if sys.meta_path: + for i, importer in enumerate(sys.meta_path): + # Here's some real nastiness: Another "instance" of the six module might + # be floating around. Therefore, we can't use isinstance() to check for + # the six meta path importer, since the other six instance will have + # inserted an importer with different class. + if (type(importer).__name__ == "_SixMetaPathImporter" and + importer.name == __name__): + del sys.meta_path[i] + break + del i, importer +# Finally, add the importer to the meta path import hook. +sys.meta_path.append(_importer) diff --git a/thirdparty/six.py b/thirdparty/six.py new file mode 100644 index 0000000..3de5969 --- /dev/null +++ b/thirdparty/six.py @@ -0,0 +1,1003 @@ +# Copyright (c) 2010-2024 Benjamin Peterson +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Utilities for writing code that runs on Python 2 and 3""" + +from __future__ import absolute_import + +import functools +import itertools +import operator +import sys +import types + +__author__ = "Benjamin Peterson " +__version__ = "1.17.0" + + +# Useful for very coarse version differentiation. +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 +PY34 = sys.version_info[0:2] >= (3, 4) + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + MAXSIZE = sys.maxsize +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + if sys.platform.startswith("java"): + # Jython always uses 32 bits. + MAXSIZE = int((1 << 31) - 1) + else: + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + + def __len__(self): + return 1 << 31 + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X + +if PY34: + from importlib.util import spec_from_loader +else: + spec_from_loader = None + + +def _add_doc(func, doc): + """Add documentation to a function.""" + func.__doc__ = doc + + +def _import_module(name): + """Import module, returning the module after the last dot.""" + __import__(name) + return sys.modules[name] + + +class _LazyDescr(object): + + def __init__(self, name): + self.name = name + + def __get__(self, obj, tp): + result = self._resolve() + setattr(obj, self.name, result) # Invokes __set__. + try: + # This is a bit ugly, but it avoids running this again by + # removing this descriptor. + delattr(obj.__class__, self.name) + except AttributeError: + pass + return result + + +class MovedModule(_LazyDescr): + + def __init__(self, name, old, new=None): + super(MovedModule, self).__init__(name) + if PY3: + if new is None: + new = name + self.mod = new + else: + self.mod = old + + def _resolve(self): + return _import_module(self.mod) + + def __getattr__(self, attr): + _module = self._resolve() + value = getattr(_module, attr) + setattr(self, attr, value) + return value + + +class _LazyModule(types.ModuleType): + + def __init__(self, name): + super(_LazyModule, self).__init__(name) + self.__doc__ = self.__class__.__doc__ + + def __dir__(self): + attrs = ["__doc__", "__name__"] + attrs += [attr.name for attr in self._moved_attributes] + return attrs + + # Subclasses should override this + _moved_attributes = [] + + +class MovedAttribute(_LazyDescr): + + def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): + super(MovedAttribute, self).__init__(name) + if PY3: + if new_mod is None: + new_mod = name + self.mod = new_mod + if new_attr is None: + if old_attr is None: + new_attr = name + else: + new_attr = old_attr + self.attr = new_attr + else: + self.mod = old_mod + if old_attr is None: + old_attr = name + self.attr = old_attr + + def _resolve(self): + module = _import_module(self.mod) + return getattr(module, self.attr) + + +class _SixMetaPathImporter(object): + + """ + A meta path importer to import six.moves and its submodules. + + This class implements a PEP302 finder and loader. It should be compatible + with Python 2.5 and all existing versions of Python3 + """ + + def __init__(self, six_module_name): + self.name = six_module_name + self.known_modules = {} + + def _add_module(self, mod, *fullnames): + for fullname in fullnames: + self.known_modules[self.name + "." + fullname] = mod + + def _get_module(self, fullname): + return self.known_modules[self.name + "." + fullname] + + def find_module(self, fullname, path=None): + if fullname in self.known_modules: + return self + return None + + def find_spec(self, fullname, path, target=None): + if fullname in self.known_modules: + return spec_from_loader(fullname, self) + return None + + def __get_module(self, fullname): + try: + return self.known_modules[fullname] + except KeyError: + raise ImportError("This loader does not know module " + fullname) + + def load_module(self, fullname): + try: + # in case of a reload + return sys.modules[fullname] + except KeyError: + pass + mod = self.__get_module(fullname) + if isinstance(mod, MovedModule): + mod = mod._resolve() + else: + mod.__loader__ = self + sys.modules[fullname] = mod + return mod + + def is_package(self, fullname): + """ + Return true, if the named module is a package. + + We need this method to get correct spec objects with + Python 3.4 (see PEP451) + """ + return hasattr(self.__get_module(fullname), "__path__") + + def get_code(self, fullname): + """Return None + + Required, if is_package is implemented""" + self.__get_module(fullname) # eventually raises ImportError + return None + get_source = get_code # same as get_code + + def create_module(self, spec): + return self.load_module(spec.name) + + def exec_module(self, module): + pass + +_importer = _SixMetaPathImporter(__name__) + + +class _MovedItems(_LazyModule): + + """Lazy loading of moved objects""" + __path__ = [] # mark as package + + +_moved_attributes = [ + MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), + MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"), + MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), + MovedAttribute("intern", "__builtin__", "sys"), + MovedAttribute("map", "itertools", "builtins", "imap", "map"), + MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"), + MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"), + MovedAttribute("getoutput", "commands", "subprocess"), + MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"), + MovedAttribute("reduce", "__builtin__", "functools"), + MovedAttribute("shlex_quote", "pipes", "shlex", "quote"), + MovedAttribute("StringIO", "StringIO", "io"), + MovedAttribute("UserDict", "UserDict", "collections", "IterableUserDict", "UserDict"), + MovedAttribute("UserList", "UserList", "collections"), + MovedAttribute("UserString", "UserString", "collections"), + MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), + MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"), + MovedModule("builtins", "__builtin__"), + MovedModule("configparser", "ConfigParser"), + MovedModule("collections_abc", "collections", "collections.abc" if sys.version_info >= (3, 3) else "collections"), + MovedModule("copyreg", "copy_reg"), + MovedModule("dbm_gnu", "gdbm", "dbm.gnu"), + MovedModule("dbm_ndbm", "dbm", "dbm.ndbm"), + MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread" if sys.version_info < (3, 9) else "_thread"), + MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), + MovedModule("http_cookies", "Cookie", "http.cookies"), + MovedModule("html_entities", "htmlentitydefs", "html.entities"), + MovedModule("html_parser", "HTMLParser", "html.parser"), + MovedModule("http_client", "httplib", "http.client"), + MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), + MovedModule("email_mime_image", "email.MIMEImage", "email.mime.image"), + MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), + MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"), + MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), + MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), + MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), + MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), + MovedModule("cPickle", "cPickle", "pickle"), + MovedModule("queue", "Queue"), + MovedModule("reprlib", "repr"), + MovedModule("socketserver", "SocketServer"), + MovedModule("_thread", "thread", "_thread"), + MovedModule("tkinter", "Tkinter"), + MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), + MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), + MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), + MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), + MovedModule("tkinter_tix", "Tix", "tkinter.tix"), + MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"), + MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), + MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), + MovedModule("tkinter_colorchooser", "tkColorChooser", + "tkinter.colorchooser"), + MovedModule("tkinter_commondialog", "tkCommonDialog", + "tkinter.commondialog"), + MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), + MovedModule("tkinter_font", "tkFont", "tkinter.font"), + MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), + MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", + "tkinter.simpledialog"), + MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"), + MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"), + MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"), + MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), + MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"), + MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"), +] +# Add windows specific modules. +if sys.platform == "win32": + _moved_attributes += [ + MovedModule("winreg", "_winreg"), + ] + +for attr in _moved_attributes: + setattr(_MovedItems, attr.name, attr) + if isinstance(attr, MovedModule): + _importer._add_module(attr, "moves." + attr.name) +del attr + +_MovedItems._moved_attributes = _moved_attributes + +moves = _MovedItems(__name__ + ".moves") +_importer._add_module(moves, "moves") + + +class Module_six_moves_urllib_parse(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_parse""" + + +_urllib_parse_moved_attributes = [ + MovedAttribute("ParseResult", "urlparse", "urllib.parse"), + MovedAttribute("SplitResult", "urlparse", "urllib.parse"), + MovedAttribute("parse_qs", "urlparse", "urllib.parse"), + MovedAttribute("parse_qsl", "urlparse", "urllib.parse"), + MovedAttribute("urldefrag", "urlparse", "urllib.parse"), + MovedAttribute("urljoin", "urlparse", "urllib.parse"), + MovedAttribute("urlparse", "urlparse", "urllib.parse"), + MovedAttribute("urlsplit", "urlparse", "urllib.parse"), + MovedAttribute("urlunparse", "urlparse", "urllib.parse"), + MovedAttribute("urlunsplit", "urlparse", "urllib.parse"), + MovedAttribute("quote", "urllib", "urllib.parse"), + MovedAttribute("quote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote", "urllib", "urllib.parse"), + MovedAttribute("unquote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote_to_bytes", "urllib", "urllib.parse", "unquote", "unquote_to_bytes"), + MovedAttribute("urlencode", "urllib", "urllib.parse"), + MovedAttribute("splitquery", "urllib", "urllib.parse"), + MovedAttribute("splittag", "urllib", "urllib.parse"), + MovedAttribute("splituser", "urllib", "urllib.parse"), + MovedAttribute("splitvalue", "urllib", "urllib.parse"), + MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), + MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), + MovedAttribute("uses_params", "urlparse", "urllib.parse"), + MovedAttribute("uses_query", "urlparse", "urllib.parse"), + MovedAttribute("uses_relative", "urlparse", "urllib.parse"), +] +for attr in _urllib_parse_moved_attributes: + setattr(Module_six_moves_urllib_parse, attr.name, attr) +del attr + +Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes + +_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"), + "moves.urllib_parse", "moves.urllib.parse") + + +class Module_six_moves_urllib_error(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_error""" + + +_urllib_error_moved_attributes = [ + MovedAttribute("URLError", "urllib2", "urllib.error"), + MovedAttribute("HTTPError", "urllib2", "urllib.error"), + MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), +] +for attr in _urllib_error_moved_attributes: + setattr(Module_six_moves_urllib_error, attr.name, attr) +del attr + +Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes + +_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"), + "moves.urllib_error", "moves.urllib.error") + + +class Module_six_moves_urllib_request(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_request""" + + +_urllib_request_moved_attributes = [ + MovedAttribute("urlopen", "urllib2", "urllib.request"), + MovedAttribute("install_opener", "urllib2", "urllib.request"), + MovedAttribute("build_opener", "urllib2", "urllib.request"), + MovedAttribute("pathname2url", "urllib", "urllib.request"), + MovedAttribute("url2pathname", "urllib", "urllib.request"), + MovedAttribute("getproxies", "urllib", "urllib.request"), + MovedAttribute("Request", "urllib2", "urllib.request"), + MovedAttribute("OpenerDirector", "urllib2", "urllib.request"), + MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"), + MovedAttribute("ProxyHandler", "urllib2", "urllib.request"), + MovedAttribute("BaseHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"), + MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"), + MovedAttribute("FileHandler", "urllib2", "urllib.request"), + MovedAttribute("FTPHandler", "urllib2", "urllib.request"), + MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"), + MovedAttribute("UnknownHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"), + MovedAttribute("urlretrieve", "urllib", "urllib.request"), + MovedAttribute("urlcleanup", "urllib", "urllib.request"), + MovedAttribute("proxy_bypass", "urllib", "urllib.request"), + MovedAttribute("parse_http_list", "urllib2", "urllib.request"), + MovedAttribute("parse_keqv_list", "urllib2", "urllib.request"), +] +if sys.version_info[:2] < (3, 14): + _urllib_request_moved_attributes.extend( + [ + MovedAttribute("URLopener", "urllib", "urllib.request"), + MovedAttribute("FancyURLopener", "urllib", "urllib.request"), + ] + ) +for attr in _urllib_request_moved_attributes: + setattr(Module_six_moves_urllib_request, attr.name, attr) +del attr + +Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes + +_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"), + "moves.urllib_request", "moves.urllib.request") + + +class Module_six_moves_urllib_response(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_response""" + + +_urllib_response_moved_attributes = [ + MovedAttribute("addbase", "urllib", "urllib.response"), + MovedAttribute("addclosehook", "urllib", "urllib.response"), + MovedAttribute("addinfo", "urllib", "urllib.response"), + MovedAttribute("addinfourl", "urllib", "urllib.response"), +] +for attr in _urllib_response_moved_attributes: + setattr(Module_six_moves_urllib_response, attr.name, attr) +del attr + +Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes + +_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"), + "moves.urllib_response", "moves.urllib.response") + + +class Module_six_moves_urllib_robotparser(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_robotparser""" + + +_urllib_robotparser_moved_attributes = [ + MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"), +] +for attr in _urllib_robotparser_moved_attributes: + setattr(Module_six_moves_urllib_robotparser, attr.name, attr) +del attr + +Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes + +_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"), + "moves.urllib_robotparser", "moves.urllib.robotparser") + + +class Module_six_moves_urllib(types.ModuleType): + + """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" + __path__ = [] # mark as package + parse = _importer._get_module("moves.urllib_parse") + error = _importer._get_module("moves.urllib_error") + request = _importer._get_module("moves.urllib_request") + response = _importer._get_module("moves.urllib_response") + robotparser = _importer._get_module("moves.urllib_robotparser") + + def __dir__(self): + return ['parse', 'error', 'request', 'response', 'robotparser'] + +_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"), + "moves.urllib") + + +def add_move(move): + """Add an item to six.moves.""" + setattr(_MovedItems, move.name, move) + + +def remove_move(name): + """Remove item from six.moves.""" + try: + delattr(_MovedItems, name) + except AttributeError: + try: + del moves.__dict__[name] + except KeyError: + raise AttributeError("no such move, %r" % (name,)) + + +if PY3: + _meth_func = "__func__" + _meth_self = "__self__" + + _func_closure = "__closure__" + _func_code = "__code__" + _func_defaults = "__defaults__" + _func_globals = "__globals__" +else: + _meth_func = "im_func" + _meth_self = "im_self" + + _func_closure = "func_closure" + _func_code = "func_code" + _func_defaults = "func_defaults" + _func_globals = "func_globals" + + +try: + advance_iterator = next +except NameError: + def advance_iterator(it): + return it.next() +next = advance_iterator + + +try: + callable = callable +except NameError: + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + + +if PY3: + def get_unbound_function(unbound): + return unbound + + create_bound_method = types.MethodType + + def create_unbound_method(func, cls): + return func + + Iterator = object +else: + def get_unbound_function(unbound): + return unbound.im_func + + def create_bound_method(func, obj): + return types.MethodType(func, obj, obj.__class__) + + def create_unbound_method(func, cls): + return types.MethodType(func, None, cls) + + class Iterator(object): + + def next(self): + return type(self).__next__(self) + + callable = callable +_add_doc(get_unbound_function, + """Get the function out of a possibly unbound function""") + + +get_method_function = operator.attrgetter(_meth_func) +get_method_self = operator.attrgetter(_meth_self) +get_function_closure = operator.attrgetter(_func_closure) +get_function_code = operator.attrgetter(_func_code) +get_function_defaults = operator.attrgetter(_func_defaults) +get_function_globals = operator.attrgetter(_func_globals) + + +if PY3: + def iterkeys(d, **kw): + return iter(d.keys(**kw)) + + def itervalues(d, **kw): + return iter(d.values(**kw)) + + def iteritems(d, **kw): + return iter(d.items(**kw)) + + def iterlists(d, **kw): + return iter(d.lists(**kw)) + + viewkeys = operator.methodcaller("keys") + + viewvalues = operator.methodcaller("values") + + viewitems = operator.methodcaller("items") +else: + def iterkeys(d, **kw): + return d.iterkeys(**kw) + + def itervalues(d, **kw): + return d.itervalues(**kw) + + def iteritems(d, **kw): + return d.iteritems(**kw) + + def iterlists(d, **kw): + return d.iterlists(**kw) + + viewkeys = operator.methodcaller("viewkeys") + + viewvalues = operator.methodcaller("viewvalues") + + viewitems = operator.methodcaller("viewitems") + +_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.") +_add_doc(itervalues, "Return an iterator over the values of a dictionary.") +_add_doc(iteritems, + "Return an iterator over the (key, value) pairs of a dictionary.") +_add_doc(iterlists, + "Return an iterator over the (key, [values]) pairs of a dictionary.") + + +if PY3: + def b(s): + return s.encode("latin-1") + + def u(s): + return s + unichr = chr + import struct + int2byte = struct.Struct(">B").pack + del struct + byte2int = operator.itemgetter(0) + indexbytes = operator.getitem + iterbytes = iter + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + del io + _assertCountEqual = "assertCountEqual" + if sys.version_info[1] <= 1: + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" + _assertNotRegex = "assertNotRegexpMatches" + else: + _assertRaisesRegex = "assertRaisesRegex" + _assertRegex = "assertRegex" + _assertNotRegex = "assertNotRegex" +else: + def b(s): + return s + # Workaround for standalone backslash + + def u(s): + return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape") + unichr = unichr + int2byte = chr + + def byte2int(bs): + return ord(bs[0]) + + def indexbytes(buf, i): + return ord(buf[i]) + iterbytes = functools.partial(itertools.imap, ord) + import StringIO + StringIO = BytesIO = StringIO.StringIO + _assertCountEqual = "assertItemsEqual" + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" + _assertNotRegex = "assertNotRegexpMatches" +_add_doc(b, """Byte literal""") +_add_doc(u, """Text literal""") + + +def assertCountEqual(self, *args, **kwargs): + return getattr(self, _assertCountEqual)(*args, **kwargs) + + +def assertRaisesRegex(self, *args, **kwargs): + return getattr(self, _assertRaisesRegex)(*args, **kwargs) + + +def assertRegex(self, *args, **kwargs): + return getattr(self, _assertRegex)(*args, **kwargs) + + +def assertNotRegex(self, *args, **kwargs): + return getattr(self, _assertNotRegex)(*args, **kwargs) + + +if PY3: + exec_ = getattr(moves.builtins, "exec") + + def reraise(tp, value, tb=None): + try: + if value is None: + value = tp() + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + finally: + value = None + tb = None + +else: + def exec_(_code_, _globs_=None, _locs_=None): + """Execute code in a namespace.""" + if _globs_ is None: + frame = sys._getframe(1) + _globs_ = frame.f_globals + if _locs_ is None: + _locs_ = frame.f_locals + del frame + elif _locs_ is None: + _locs_ = _globs_ + exec("""exec _code_ in _globs_, _locs_""") + + exec_("""def reraise(tp, value, tb=None): + try: + raise tp, value, tb + finally: + tb = None +""") + + +if sys.version_info[:2] > (3,): + exec_("""def raise_from(value, from_value): + try: + raise value from from_value + finally: + value = None +""") +else: + def raise_from(value, from_value): + raise value + + +print_ = getattr(moves.builtins, "print", None) +if print_ is None: + def print_(*args, **kwargs): + """The new-style print function for Python 2.4 and 2.5.""" + fp = kwargs.pop("file", sys.stdout) + if fp is None: + return + + def write(data): + if not isinstance(data, basestring): + data = str(data) + # If the file has an encoding, encode unicode with it. + if (isinstance(fp, file) and + isinstance(data, unicode) and + fp.encoding is not None): + errors = getattr(fp, "errors", None) + if errors is None: + errors = "strict" + data = data.encode(fp.encoding, errors) + fp.write(data) + want_unicode = False + sep = kwargs.pop("sep", None) + if sep is not None: + if isinstance(sep, unicode): + want_unicode = True + elif not isinstance(sep, str): + raise TypeError("sep must be None or a string") + end = kwargs.pop("end", None) + if end is not None: + if isinstance(end, unicode): + want_unicode = True + elif not isinstance(end, str): + raise TypeError("end must be None or a string") + if kwargs: + raise TypeError("invalid keyword arguments to print()") + if not want_unicode: + for arg in args: + if isinstance(arg, unicode): + want_unicode = True + break + if want_unicode: + newline = unicode("\n") + space = unicode(" ") + else: + newline = "\n" + space = " " + if sep is None: + sep = space + if end is None: + end = newline + for i, arg in enumerate(args): + if i: + write(sep) + write(arg) + write(end) +if sys.version_info[:2] < (3, 3): + _print = print_ + + def print_(*args, **kwargs): + fp = kwargs.get("file", sys.stdout) + flush = kwargs.pop("flush", False) + _print(*args, **kwargs) + if flush and fp is not None: + fp.flush() + +_add_doc(reraise, """Reraise an exception.""") + +if sys.version_info[0:2] < (3, 4): + # This does exactly the same what the :func:`py3:functools.update_wrapper` + # function does on Python versions after 3.2. It sets the ``__wrapped__`` + # attribute on ``wrapper`` object and it doesn't raise an error if any of + # the attributes mentioned in ``assigned`` and ``updated`` are missing on + # ``wrapped`` object. + def _update_wrapper(wrapper, wrapped, + assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + for attr in assigned: + try: + value = getattr(wrapped, attr) + except AttributeError: + continue + else: + setattr(wrapper, attr, value) + for attr in updated: + getattr(wrapper, attr).update(getattr(wrapped, attr, {})) + wrapper.__wrapped__ = wrapped + return wrapper + _update_wrapper.__doc__ = functools.update_wrapper.__doc__ + + def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + return functools.partial(_update_wrapper, wrapped=wrapped, + assigned=assigned, updated=updated) + wraps.__doc__ = functools.wraps.__doc__ + +else: + wraps = functools.wraps + + +def with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + # This requires a bit of explanation: the basic idea is to make a dummy + # metaclass for one level of class instantiation that replaces itself with + # the actual metaclass. + class metaclass(type): + + def __new__(cls, name, this_bases, d): + if sys.version_info[:2] >= (3, 7): + # This version introduced PEP 560 that requires a bit + # of extra care (we mimic what is done by __build_class__). + resolved_bases = types.resolve_bases(bases) + if resolved_bases is not bases: + d['__orig_bases__'] = bases + else: + resolved_bases = bases + return meta(name, resolved_bases, d) + + @classmethod + def __prepare__(cls, name, this_bases): + return meta.__prepare__(name, bases) + return type.__new__(metaclass, 'temporary_class', (), {}) + + +def add_metaclass(metaclass): + """Class decorator for creating a class with a metaclass.""" + def wrapper(cls): + orig_vars = cls.__dict__.copy() + slots = orig_vars.get('__slots__') + if slots is not None: + if isinstance(slots, str): + slots = [slots] + for slots_var in slots: + orig_vars.pop(slots_var) + orig_vars.pop('__dict__', None) + orig_vars.pop('__weakref__', None) + if hasattr(cls, '__qualname__'): + orig_vars['__qualname__'] = cls.__qualname__ + return metaclass(cls.__name__, cls.__bases__, orig_vars) + return wrapper + + +def ensure_binary(s, encoding='utf-8', errors='strict'): + """Coerce **s** to six.binary_type. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> encoded to `bytes` + - `bytes` -> `bytes` + """ + if isinstance(s, binary_type): + return s + if isinstance(s, text_type): + return s.encode(encoding, errors) + raise TypeError("not expecting type '%s'" % type(s)) + + +def ensure_str(s, encoding='utf-8', errors='strict'): + """Coerce *s* to `str`. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + # Optimization: Fast return for the common case. + if type(s) is str: + return s + if PY2 and isinstance(s, text_type): + return s.encode(encoding, errors) + elif PY3 and isinstance(s, binary_type): + return s.decode(encoding, errors) + elif not isinstance(s, (text_type, binary_type)): + raise TypeError("not expecting type '%s'" % type(s)) + return s + + +def ensure_text(s, encoding='utf-8', errors='strict'): + """Coerce *s* to six.text_type. + + For Python 2: + - `unicode` -> `unicode` + - `str` -> `unicode` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + if isinstance(s, binary_type): + return s.decode(encoding, errors) + elif isinstance(s, text_type): + return s + else: + raise TypeError("not expecting type '%s'" % type(s)) + + +def python_2_unicode_compatible(klass): + """ + A class decorator that defines __unicode__ and __str__ methods under Python 2. + Under Python 3 it does nothing. + + To support Python 2 and 3 with a single code base, define a __str__ method + returning text and apply this decorator to the class. + """ + if PY2: + if '__str__' not in klass.__dict__: + raise ValueError("@python_2_unicode_compatible cannot be applied " + "to %s because it doesn't define __str__()." % + klass.__name__) + klass.__unicode__ = klass.__str__ + klass.__str__ = lambda self: self.__unicode__().encode('utf-8') + return klass + + +# Complete the moves implementation. +# This code is at the end of this module to speed up module loading. +# Turn this module into a package. +__path__ = [] # required for PEP 302 and PEP 451 +__package__ = __name__ # see PEP 366 @ReservedAssignment +if globals().get("__spec__") is not None: + __spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable +# Remove other six meta path importers, since they cause problems. This can +# happen if six is removed from sys.modules and then reloaded. (Setuptools does +# this for some reason.) +if sys.meta_path: + for i, importer in enumerate(sys.meta_path): + # Here's some real nastiness: Another "instance" of the six module might + # be floating around. Therefore, we can't use isinstance() to check for + # the six meta path importer, since the other six instance will have + # inserted an importer with different class. + if (type(importer).__name__ == "_SixMetaPathImporter" and + importer.name == __name__): + del sys.meta_path[i] + break + del i, importer +# Finally, add the importer to the meta path import hook. +sys.meta_path.append(_importer) diff --git a/thirdparty/urllib3/__init__.py b/thirdparty/urllib3/__init__.py index 09024d4..3fe782c 100644 --- a/thirdparty/urllib3/__init__.py +++ b/thirdparty/urllib3/__init__.py @@ -1,25 +1,49 @@ """ -urllib3 - Thread-safe connection pooling and re-using. +Python HTTP library with thread-safe connection pooling, file post support, user friendly, and more """ -from __future__ import absolute_import -import warnings -from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url +from __future__ import annotations + +# Set default logging handler to avoid "No handler found" warnings. +import logging +import sys +import typing +import warnings +from logging import NullHandler from . import exceptions -from .filepost import encode_multipart_formdata +from ._base_connection import _TYPE_BODY +from ._collections import HTTPHeaderDict +from ._version import __version__ +from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url +from .filepost import _TYPE_FIELDS, encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url -from .response import HTTPResponse +from .response import BaseHTTPResponse, HTTPResponse from .util.request import make_headers -from .util.url import get_host -from .util.timeout import Timeout from .util.retry import Retry -from ._version import __version__ +from .util.timeout import Timeout - -# Set default logging handler to avoid "No handler found" warnings. -import logging -from logging import NullHandler +# Ensure that Python is compiled with OpenSSL 1.1.1+ +# If the 'ssl' module isn't available at all that's +# fine, we only care if the module is available. +try: + import ssl +except ImportError: + pass +else: + if not ssl.OPENSSL_VERSION.startswith("OpenSSL "): # Defensive: + warnings.warn( + "urllib3 v2 only supports OpenSSL 1.1.1+, currently " + f"the 'ssl' module is compiled with {ssl.OPENSSL_VERSION!r}. " + "See: https://github.com/urllib3/urllib3/issues/3020", + exceptions.NotOpenSSLWarning, + ) + elif ssl.OPENSSL_VERSION_INFO < (1, 1, 1): # Defensive: + raise ImportError( + "urllib3 v2 only supports OpenSSL 1.1.1+, currently " + f"the 'ssl' module is compiled with {ssl.OPENSSL_VERSION!r}. " + "See: https://github.com/urllib3/urllib3/issues/2168" + ) __author__ = "Andrey Petrov (andrey.petrov@shazow.net)" __license__ = "MIT" @@ -27,6 +51,7 @@ __all__ = ( "HTTPConnectionPool", + "HTTPHeaderDict", "HTTPSConnectionPool", "PoolManager", "ProxyManager", @@ -37,15 +62,18 @@ "connection_from_url", "disable_warnings", "encode_multipart_formdata", - "get_host", "make_headers", "proxy_from_url", + "request", + "BaseHTTPResponse", ) logging.getLogger(__name__).addHandler(NullHandler()) -def add_stderr_logger(level=logging.DEBUG): +def add_stderr_logger( + level: int = logging.DEBUG, +) -> logging.StreamHandler[typing.TextIO]: """ Helper for quickly adding a StreamHandler to the logger. Useful for debugging. @@ -72,16 +100,112 @@ def add_stderr_logger(level=logging.DEBUG): # mechanisms to silence them. # SecurityWarning's always go off by default. warnings.simplefilter("always", exceptions.SecurityWarning, append=True) -# SubjectAltNameWarning's should go off once per host -warnings.simplefilter("default", exceptions.SubjectAltNameWarning, append=True) # InsecurePlatformWarning's don't vary between requests, so we keep it default. warnings.simplefilter("default", exceptions.InsecurePlatformWarning, append=True) -# SNIMissingWarnings should go off only once. -warnings.simplefilter("default", exceptions.SNIMissingWarning, append=True) -def disable_warnings(category=exceptions.HTTPWarning): +def disable_warnings(category: type[Warning] = exceptions.HTTPWarning) -> None: """ Helper for quickly disabling all urllib3 warnings. """ warnings.simplefilter("ignore", category) + + +_DEFAULT_POOL = PoolManager() + + +def request( + method: str, + url: str, + *, + body: _TYPE_BODY | None = None, + fields: _TYPE_FIELDS | None = None, + headers: typing.Mapping[str, str] | None = None, + preload_content: bool | None = True, + decode_content: bool | None = True, + redirect: bool | None = True, + retries: Retry | bool | int | None = None, + timeout: Timeout | float | int | None = 3, + json: typing.Any | None = None, +) -> BaseHTTPResponse: + """ + A convenience, top-level request method. It uses a module-global ``PoolManager`` instance. + Therefore, its side effects could be shared across dependencies relying on it. + To avoid side effects create a new ``PoolManager`` instance and use it instead. + The method does not accept low-level ``**urlopen_kw`` keyword arguments. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param url: + The URL to perform the request on. + + :param body: + Data to send in the request body, either :class:`str`, :class:`bytes`, + an iterable of :class:`str`/:class:`bytes`, or a file-like object. + + :param fields: + Data to encode and send in the request body. + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. + + :param bool preload_content: + If True, the response's body will be preloaded into memory. + + :param bool decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + + :param redirect: + If True, automatically handle redirects (status codes 301, 302, + 303, 307, 308). Each redirect counts as a retry. Disabling retries + will disable redirect, too. + + :param retries: + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + If ``None`` (default) will retry 3 times, see ``Retry.DEFAULT``. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. + + :param timeout: + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. + + :param json: + Data to encode and send as JSON with UTF-encoded in the request body. + The ``"Content-Type"`` header will be set to ``"application/json"`` + unless specified otherwise. + """ + + return _DEFAULT_POOL.request( + method, + url, + body=body, + fields=fields, + headers=headers, + preload_content=preload_content, + decode_content=decode_content, + redirect=redirect, + retries=retries, + timeout=timeout, + json=json, + ) + + +if sys.platform == "emscripten": + from .contrib.emscripten import inject_into_urllib3 # noqa: 401 + + inject_into_urllib3() diff --git a/thirdparty/urllib3/_base_connection.py b/thirdparty/urllib3/_base_connection.py new file mode 100644 index 0000000..dc0f318 --- /dev/null +++ b/thirdparty/urllib3/_base_connection.py @@ -0,0 +1,165 @@ +from __future__ import annotations + +import typing + +from .util.connection import _TYPE_SOCKET_OPTIONS +from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT +from .util.url import Url + +_TYPE_BODY = typing.Union[bytes, typing.IO[typing.Any], typing.Iterable[bytes], str] + + +class ProxyConfig(typing.NamedTuple): + ssl_context: ssl.SSLContext | None + use_forwarding_for_https: bool + assert_hostname: None | str | typing.Literal[False] + assert_fingerprint: str | None + + +class _ResponseOptions(typing.NamedTuple): + # TODO: Remove this in favor of a better + # HTTP request/response lifecycle tracking. + request_method: str + request_url: str + preload_content: bool + decode_content: bool + enforce_content_length: bool + + +if typing.TYPE_CHECKING: + import ssl + from typing import Protocol + + from .response import BaseHTTPResponse + + class BaseHTTPConnection(Protocol): + default_port: typing.ClassVar[int] + default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS] + + host: str + port: int + timeout: None | ( + float + ) # Instance doesn't store _DEFAULT_TIMEOUT, must be resolved. + blocksize: int + source_address: tuple[str, int] | None + socket_options: _TYPE_SOCKET_OPTIONS | None + + proxy: Url | None + proxy_config: ProxyConfig | None + + is_verified: bool + proxy_is_verified: bool | None + + def __init__( + self, + host: str, + port: int | None = None, + *, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + source_address: tuple[str, int] | None = None, + blocksize: int = 8192, + socket_options: _TYPE_SOCKET_OPTIONS | None = ..., + proxy: Url | None = None, + proxy_config: ProxyConfig | None = None, + ) -> None: ... + + def set_tunnel( + self, + host: str, + port: int | None = None, + headers: typing.Mapping[str, str] | None = None, + scheme: str = "http", + ) -> None: ... + + def connect(self) -> None: ... + + def request( + self, + method: str, + url: str, + body: _TYPE_BODY | None = None, + headers: typing.Mapping[str, str] | None = None, + # We know *at least* botocore is depending on the order of the + # first 3 parameters so to be safe we only mark the later ones + # as keyword-only to ensure we have space to extend. + *, + chunked: bool = False, + preload_content: bool = True, + decode_content: bool = True, + enforce_content_length: bool = True, + ) -> None: ... + + def getresponse(self) -> BaseHTTPResponse: ... + + def close(self) -> None: ... + + @property + def is_closed(self) -> bool: + """Whether the connection either is brand new or has been previously closed. + If this property is True then both ``is_connected`` and ``has_connected_to_proxy`` + properties must be False. + """ + + @property + def is_connected(self) -> bool: + """Whether the connection is actively connected to any origin (proxy or target)""" + + @property + def has_connected_to_proxy(self) -> bool: + """Whether the connection has successfully connected to its proxy. + This returns False if no proxy is in use. Used to determine whether + errors are coming from the proxy layer or from tunnelling to the target origin. + """ + + class BaseHTTPSConnection(BaseHTTPConnection, Protocol): + default_port: typing.ClassVar[int] + default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS] + + # Certificate verification methods + cert_reqs: int | str | None + assert_hostname: None | str | typing.Literal[False] + assert_fingerprint: str | None + ssl_context: ssl.SSLContext | None + + # Trusted CAs + ca_certs: str | None + ca_cert_dir: str | None + ca_cert_data: None | str | bytes + + # TLS version + ssl_minimum_version: int | None + ssl_maximum_version: int | None + ssl_version: int | str | None # Deprecated + + # Client certificates + cert_file: str | None + key_file: str | None + key_password: str | None + + def __init__( + self, + host: str, + port: int | None = None, + *, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + source_address: tuple[str, int] | None = None, + blocksize: int = 16384, + socket_options: _TYPE_SOCKET_OPTIONS | None = ..., + proxy: Url | None = None, + proxy_config: ProxyConfig | None = None, + cert_reqs: int | str | None = None, + assert_hostname: None | str | typing.Literal[False] = None, + assert_fingerprint: str | None = None, + server_hostname: str | None = None, + ssl_context: ssl.SSLContext | None = None, + ca_certs: str | None = None, + ca_cert_dir: str | None = None, + ca_cert_data: None | str | bytes = None, + ssl_minimum_version: int | None = None, + ssl_maximum_version: int | None = None, + ssl_version: int | str | None = None, # Deprecated + cert_file: str | None = None, + key_file: str | None = None, + key_password: str | None = None, + ) -> None: ... diff --git a/thirdparty/urllib3/_collections.py b/thirdparty/urllib3/_collections.py index 2bae904..1b6c136 100644 --- a/thirdparty/urllib3/_collections.py +++ b/thirdparty/urllib3/_collections.py @@ -1,33 +1,66 @@ -from __future__ import absolute_import - -try: - from collections.abc import Mapping, MutableMapping -except ImportError: - from collections import Mapping, MutableMapping -try: - from threading import RLock -except ImportError: # Platform-specific: No threads available - - class RLock: - def __enter__(self): - pass +from __future__ import annotations - def __exit__(self, exc_type, exc_value, traceback): - pass +import typing +from collections import OrderedDict +from enum import Enum, auto +from threading import RLock +if typing.TYPE_CHECKING: + # We can only import Protocol if TYPE_CHECKING because it's a development + # dependency, and is not available at runtime. + from typing import Protocol -from collections import OrderedDict -from .exceptions import InvalidHeader -from .packages.six import iterkeys, itervalues, PY3 + from typing_extensions import Self + class HasGettableStringKeys(Protocol): + def keys(self) -> typing.Iterator[str]: ... -__all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"] + def __getitem__(self, key: str) -> str: ... -_Null = object() +__all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"] -class RecentlyUsedContainer(MutableMapping): +# Key type +_KT = typing.TypeVar("_KT") +# Value type +_VT = typing.TypeVar("_VT") +# Default type +_DT = typing.TypeVar("_DT") + +ValidHTTPHeaderSource = typing.Union[ + "HTTPHeaderDict", + typing.Mapping[str, str], + typing.Iterable[tuple[str, str]], + "HasGettableStringKeys", +] + + +class _Sentinel(Enum): + not_passed = auto() + + +def ensure_can_construct_http_header_dict( + potential: object, +) -> ValidHTTPHeaderSource | None: + if isinstance(potential, HTTPHeaderDict): + return potential + elif isinstance(potential, typing.Mapping): + # Full runtime checking of the contents of a Mapping is expensive, so for the + # purposes of typechecking, we assume that any Mapping is the right shape. + return typing.cast(typing.Mapping[str, str], potential) + elif isinstance(potential, typing.Iterable): + # Similarly to Mapping, full runtime checking of the contents of an Iterable is + # expensive, so for the purposes of typechecking, we assume that any Iterable + # is the right shape. + return typing.cast(typing.Iterable[tuple[str, str]], potential) + elif hasattr(potential, "keys") and hasattr(potential, "__getitem__"): + return typing.cast("HasGettableStringKeys", potential) + else: + return None + + +class RecentlyUsedContainer(typing.Generic[_KT, _VT], typing.MutableMapping[_KT, _VT]): """ Provides a thread-safe dict-like container which maintains up to ``maxsize`` keys while throwing away the least-recently-used keys beyond @@ -41,69 +74,134 @@ class RecentlyUsedContainer(MutableMapping): ``dispose_func(value)`` is called. Callback which will get called """ - ContainerCls = OrderedDict - - def __init__(self, maxsize=10, dispose_func=None): + _container: typing.OrderedDict[_KT, _VT] + _maxsize: int + dispose_func: typing.Callable[[_VT], None] | None + lock: RLock + + def __init__( + self, + maxsize: int = 10, + dispose_func: typing.Callable[[_VT], None] | None = None, + ) -> None: + super().__init__() self._maxsize = maxsize self.dispose_func = dispose_func - - self._container = self.ContainerCls() + self._container = OrderedDict() self.lock = RLock() - def __getitem__(self, key): + def __getitem__(self, key: _KT) -> _VT: # Re-insert the item, moving it to the end of the eviction line. with self.lock: item = self._container.pop(key) self._container[key] = item return item - def __setitem__(self, key, value): - evicted_value = _Null + def __setitem__(self, key: _KT, value: _VT) -> None: + evicted_item = None with self.lock: # Possibly evict the existing value of 'key' - evicted_value = self._container.get(key, _Null) - self._container[key] = value - - # If we didn't evict an existing value, we might have to evict the - # least recently used item from the beginning of the container. - if len(self._container) > self._maxsize: - _key, evicted_value = self._container.popitem(last=False) - - if self.dispose_func and evicted_value is not _Null: + try: + # If the key exists, we'll overwrite it, which won't change the + # size of the pool. Because accessing a key should move it to + # the end of the eviction line, we pop it out first. + evicted_item = key, self._container.pop(key) + self._container[key] = value + except KeyError: + # When the key does not exist, we insert the value first so that + # evicting works in all cases, including when self._maxsize is 0 + self._container[key] = value + if len(self._container) > self._maxsize: + # If we didn't evict an existing value, and we've hit our maximum + # size, then we have to evict the least recently used item from + # the beginning of the container. + evicted_item = self._container.popitem(last=False) + + # After releasing the lock on the pool, dispose of any evicted value. + if evicted_item is not None and self.dispose_func: + _, evicted_value = evicted_item self.dispose_func(evicted_value) - def __delitem__(self, key): + def __delitem__(self, key: _KT) -> None: with self.lock: value = self._container.pop(key) if self.dispose_func: self.dispose_func(value) - def __len__(self): + def __len__(self) -> int: with self.lock: return len(self._container) - def __iter__(self): + def __iter__(self) -> typing.NoReturn: raise NotImplementedError( "Iteration over this class is unlikely to be threadsafe." ) - def clear(self): + def clear(self) -> None: with self.lock: # Copy pointers to all values, then wipe the mapping - values = list(itervalues(self._container)) + values = list(self._container.values()) self._container.clear() if self.dispose_func: for value in values: self.dispose_func(value) - def keys(self): + def keys(self) -> set[_KT]: # type: ignore[override] with self.lock: - return list(iterkeys(self._container)) + return set(self._container.keys()) -class HTTPHeaderDict(MutableMapping): +class HTTPHeaderDictItemView(set[tuple[str, str]]): + """ + HTTPHeaderDict is unusual for a Mapping[str, str] in that it has two modes of + address. + + If we directly try to get an item with a particular name, we will get a string + back that is the concatenated version of all the values: + + >>> d['X-Header-Name'] + 'Value1, Value2, Value3' + + However, if we iterate over an HTTPHeaderDict's items, we will optionally combine + these values based on whether combine=True was called when building up the dictionary + + >>> d = HTTPHeaderDict({"A": "1", "B": "foo"}) + >>> d.add("A", "2", combine=True) + >>> d.add("B", "bar") + >>> list(d.items()) + [ + ('A', '1, 2'), + ('B', 'foo'), + ('B', 'bar'), + ] + + This class conforms to the interface required by the MutableMapping ABC while + also giving us the nonstandard iteration behavior we want; items with duplicate + keys, ordered by time of first insertion. + """ + + _headers: HTTPHeaderDict + + def __init__(self, headers: HTTPHeaderDict) -> None: + self._headers = headers + + def __len__(self) -> int: + return len(list(self._headers.iteritems())) + + def __iter__(self) -> typing.Iterator[tuple[str, str]]: + return self._headers.iteritems() + + def __contains__(self, item: object) -> bool: + if isinstance(item, tuple) and len(item) == 2: + passed_key, passed_val = item + if isinstance(passed_key, str) and isinstance(passed_val, str): + return self._headers._has_value_for_header(passed_key, passed_val) + return False + + +class HTTPHeaderDict(typing.MutableMapping[str, str]): """ :param headers: An iterable of field-value pairs. Must not contain multiple field names @@ -137,9 +235,11 @@ class HTTPHeaderDict(MutableMapping): '7' """ - def __init__(self, headers=None, **kwargs): - super(HTTPHeaderDict, self).__init__() - self._container = OrderedDict() + _container: typing.MutableMapping[str, list[str]] + + def __init__(self, headers: ValidHTTPHeaderSource | None = None, **kwargs: str): + super().__init__() + self._container = {} # 'dict' is insert-ordered if headers is not None: if isinstance(headers, HTTPHeaderDict): self._copy_from(headers) @@ -148,125 +248,165 @@ def __init__(self, headers=None, **kwargs): if kwargs: self.extend(kwargs) - def __setitem__(self, key, val): + def __setitem__(self, key: str, val: str) -> None: + # avoid a bytes/str comparison by decoding before httplib + if isinstance(key, bytes): + key = key.decode("latin-1") self._container[key.lower()] = [key, val] - return self._container[key.lower()] - def __getitem__(self, key): + def __getitem__(self, key: str) -> str: val = self._container[key.lower()] return ", ".join(val[1:]) - def __delitem__(self, key): + def __delitem__(self, key: str) -> None: del self._container[key.lower()] - def __contains__(self, key): - return key.lower() in self._container + def __contains__(self, key: object) -> bool: + if isinstance(key, str): + return key.lower() in self._container + return False - def __eq__(self, other): - if not isinstance(other, Mapping) and not hasattr(other, "keys"): - return False - if not isinstance(other, type(self)): - other = type(self)(other) - return dict((k.lower(), v) for k, v in self.itermerged()) == dict( - (k.lower(), v) for k, v in other.itermerged() - ) + def setdefault(self, key: str, default: str = "") -> str: + return super().setdefault(key, default) - def __ne__(self, other): - return not self.__eq__(other) + def __eq__(self, other: object) -> bool: + maybe_constructable = ensure_can_construct_http_header_dict(other) + if maybe_constructable is None: + return False + else: + other_as_http_header_dict = type(self)(maybe_constructable) - if not PY3: # Python 2 - iterkeys = MutableMapping.iterkeys - itervalues = MutableMapping.itervalues + return {k.lower(): v for k, v in self.itermerged()} == { + k.lower(): v for k, v in other_as_http_header_dict.itermerged() + } - __marker = object() + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) - def __len__(self): + def __len__(self) -> int: return len(self._container) - def __iter__(self): + def __iter__(self) -> typing.Iterator[str]: # Only provide the originally cased names for vals in self._container.values(): yield vals[0] - def pop(self, key, default=__marker): - """D.pop(k[,d]) -> v, remove specified key and return the corresponding value. - If key is not found, d is returned if given, otherwise KeyError is raised. - """ - # Using the MutableMapping function directly fails due to the private marker. - # Using ordinary dict.pop would expose the internal structures. - # So let's reinvent the wheel. - try: - value = self[key] - except KeyError: - if default is self.__marker: - raise - return default - else: - del self[key] - return value - - def discard(self, key): + def discard(self, key: str) -> None: try: del self[key] except KeyError: pass - def add(self, key, val): + def add(self, key: str, val: str, *, combine: bool = False) -> None: """Adds a (name, value) pair, doesn't overwrite the value if it already exists. + If this is called with combine=True, instead of adding a new header value + as a distinct item during iteration, this will instead append the value to + any existing header value with a comma. If no existing header value exists + for the key, then the value will simply be added, ignoring the combine parameter. + >>> headers = HTTPHeaderDict(foo='bar') >>> headers.add('Foo', 'baz') >>> headers['foo'] 'bar, baz' + >>> list(headers.items()) + [('foo', 'bar'), ('foo', 'baz')] + >>> headers.add('foo', 'quz', combine=True) + >>> list(headers.items()) + [('foo', 'bar, baz, quz')] """ + # avoid a bytes/str comparison by decoding before httplib + if isinstance(key, bytes): + key = key.decode("latin-1") key_lower = key.lower() new_vals = [key, val] # Keep the common case aka no item present as fast as possible vals = self._container.setdefault(key_lower, new_vals) if new_vals is not vals: - vals.append(val) + # if there are values here, then there is at least the initial + # key/value pair + assert len(vals) >= 2 + if combine: + vals[-1] = vals[-1] + ", " + val + else: + vals.append(val) - def extend(self, *args, **kwargs): + def extend(self, *args: ValidHTTPHeaderSource, **kwargs: str) -> None: """Generic import function for any type of header-like object. Adapted version of MutableMapping.update in order to insert items with self.add instead of self.__setitem__ """ if len(args) > 1: raise TypeError( - "extend() takes at most 1 positional " - "arguments ({0} given)".format(len(args)) + f"extend() takes at most 1 positional arguments ({len(args)} given)" ) other = args[0] if len(args) >= 1 else () if isinstance(other, HTTPHeaderDict): for key, val in other.iteritems(): self.add(key, val) - elif isinstance(other, Mapping): - for key in other: - self.add(key, other[key]) - elif hasattr(other, "keys"): - for key in other.keys(): - self.add(key, other[key]) - else: + elif isinstance(other, typing.Mapping): + for key, val in other.items(): + self.add(key, val) + elif isinstance(other, typing.Iterable): + other = typing.cast(typing.Iterable[tuple[str, str]], other) for key, value in other: self.add(key, value) + elif hasattr(other, "keys") and hasattr(other, "__getitem__"): + # THIS IS NOT A TYPESAFE BRANCH + # In this branch, the object has a `keys` attr but is not a Mapping or any of + # the other types indicated in the method signature. We do some stuff with + # it as though it partially implements the Mapping interface, but we're not + # doing that stuff safely AT ALL. + for key in other.keys(): + self.add(key, other[key]) for key, value in kwargs.items(): self.add(key, value) - def getlist(self, key, default=__marker): + @typing.overload + def getlist(self, key: str) -> list[str]: ... + + @typing.overload + def getlist(self, key: str, default: _DT) -> list[str] | _DT: ... + + def getlist( + self, key: str, default: _Sentinel | _DT = _Sentinel.not_passed + ) -> list[str] | _DT: """Returns a list of all the values for the named field. Returns an empty list if the key doesn't exist.""" try: vals = self._container[key.lower()] except KeyError: - if default is self.__marker: + if default is _Sentinel.not_passed: + # _DT is unbound; empty list is instance of List[str] return [] + # _DT is bound; default is instance of _DT return default else: + # _DT may or may not be bound; vals[1:] is instance of List[str], which + # meets our external interface requirement of `Union[List[str], _DT]`. return vals[1:] + def _prepare_for_method_change(self) -> Self: + """ + Remove content-specific header fields before changing the request + method to GET or HEAD according to RFC 9110, Section 15.4. + """ + content_specific_headers = [ + "Content-Encoding", + "Content-Language", + "Content-Location", + "Content-Type", + "Content-Length", + "Digest", + "Last-Modified", + ] + for header in content_specific_headers: + self.discard(header) + return self + # Backwards compatibility for httplib getheaders = getlist getallmatchingheaders = getlist @@ -275,62 +415,65 @@ def getlist(self, key, default=__marker): # Backwards compatibility for http.cookiejar get_all = getlist - def __repr__(self): - return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) + def __repr__(self) -> str: + return f"{type(self).__name__}({dict(self.itermerged())})" - def _copy_from(self, other): + def _copy_from(self, other: HTTPHeaderDict) -> None: for key in other: val = other.getlist(key) - if isinstance(val, list): - # Don't need to convert tuples - val = list(val) - self._container[key.lower()] = [key] + val + self._container[key.lower()] = [key, *val] - def copy(self): + def copy(self) -> Self: clone = type(self)() clone._copy_from(self) return clone - def iteritems(self): + def iteritems(self) -> typing.Iterator[tuple[str, str]]: """Iterate over all header lines, including duplicate ones.""" for key in self: vals = self._container[key.lower()] for val in vals[1:]: yield vals[0], val - def itermerged(self): + def itermerged(self) -> typing.Iterator[tuple[str, str]]: """Iterate over all headers, merging duplicate ones together.""" for key in self: val = self._container[key.lower()] yield val[0], ", ".join(val[1:]) - def items(self): - return list(self.iteritems()) - - @classmethod - def from_httplib(cls, message): # Python 2 - """Read headers from a Python 2 httplib message object.""" - # python2.7 does not expose a proper API for exporting multiheaders - # efficiently. This function re-reads raw lines from the message - # object and extracts the multiheaders properly. - obs_fold_continued_leaders = (" ", "\t") - headers = [] - - for line in message.headers: - if line.startswith(obs_fold_continued_leaders): - if not headers: - # We received a header line that starts with OWS as described - # in RFC-7230 S3.2.4. This indicates a multiline header, but - # there exists no previous header to which we can attach it. - raise InvalidHeader( - "Header continuation with no previous header: %s" % line - ) - else: - key, value = headers[-1] - headers[-1] = (key, value + " " + line.strip()) - continue - - key, value = line.split(":", 1) - headers.append((key, value.strip())) - - return cls(headers) + def items(self) -> HTTPHeaderDictItemView: # type: ignore[override] + return HTTPHeaderDictItemView(self) + + def _has_value_for_header(self, header_name: str, potential_value: str) -> bool: + if header_name in self: + return potential_value in self._container[header_name.lower()][1:] + return False + + def __ior__(self, other: object) -> HTTPHeaderDict: + # Supports extending a header dict in-place using operator |= + # combining items with add instead of __setitem__ + maybe_constructable = ensure_can_construct_http_header_dict(other) + if maybe_constructable is None: + return NotImplemented + self.extend(maybe_constructable) + return self + + def __or__(self, other: object) -> Self: + # Supports merging header dicts using operator | + # combining items with add instead of __setitem__ + maybe_constructable = ensure_can_construct_http_header_dict(other) + if maybe_constructable is None: + return NotImplemented + result = self.copy() + result.extend(maybe_constructable) + return result + + def __ror__(self, other: object) -> Self: + # Supports merging header dicts using operator | when other is on left side + # combining items with add instead of __setitem__ + maybe_constructable = ensure_can_construct_http_header_dict(other) + if maybe_constructable is None: + return NotImplemented + result = type(self)(maybe_constructable) + result.extend(self) + return result diff --git a/thirdparty/urllib3/_request_methods.py b/thirdparty/urllib3/_request_methods.py new file mode 100644 index 0000000..297c271 --- /dev/null +++ b/thirdparty/urllib3/_request_methods.py @@ -0,0 +1,278 @@ +from __future__ import annotations + +import json as _json +import typing +from urllib.parse import urlencode + +from ._base_connection import _TYPE_BODY +from ._collections import HTTPHeaderDict +from .filepost import _TYPE_FIELDS, encode_multipart_formdata +from .response import BaseHTTPResponse + +__all__ = ["RequestMethods"] + +_TYPE_ENCODE_URL_FIELDS = typing.Union[ + typing.Sequence[tuple[str, typing.Union[str, bytes]]], + typing.Mapping[str, typing.Union[str, bytes]], +] + + +class RequestMethods: + """ + Convenience mixin for classes who implement a :meth:`urlopen` method, such + as :class:`urllib3.HTTPConnectionPool` and + :class:`urllib3.PoolManager`. + + Provides behavior for making common types of HTTP request methods and + decides which type of request field encoding to use. + + Specifically, + + :meth:`.request_encode_url` is for sending requests whose fields are + encoded in the URL (such as GET, HEAD, DELETE). + + :meth:`.request_encode_body` is for sending requests whose fields are + encoded in the *body* of the request using multipart or www-form-urlencoded + (such as for POST, PUT, PATCH). + + :meth:`.request` is for making any kind of request, it will look up the + appropriate encoding format and use one of the above two methods to make + the request. + + Initializer parameters: + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + """ + + _encode_url_methods = {"DELETE", "GET", "HEAD", "OPTIONS"} + + def __init__(self, headers: typing.Mapping[str, str] | None = None) -> None: + self.headers = headers or {} + + def urlopen( + self, + method: str, + url: str, + body: _TYPE_BODY | None = None, + headers: typing.Mapping[str, str] | None = None, + encode_multipart: bool = True, + multipart_boundary: str | None = None, + **kw: typing.Any, + ) -> BaseHTTPResponse: # Abstract + raise NotImplementedError( + "Classes extending RequestMethods must implement " + "their own ``urlopen`` method." + ) + + def request( + self, + method: str, + url: str, + body: _TYPE_BODY | None = None, + fields: _TYPE_FIELDS | None = None, + headers: typing.Mapping[str, str] | None = None, + json: typing.Any | None = None, + **urlopen_kw: typing.Any, + ) -> BaseHTTPResponse: + """ + Make a request using :meth:`urlopen` with the appropriate encoding of + ``fields`` based on the ``method`` used. + + This is a convenience method that requires the least amount of manual + effort. It can be used in most situations, while still having the + option to drop down to more specific methods when necessary, such as + :meth:`request_encode_url`, :meth:`request_encode_body`, + or even the lowest level :meth:`urlopen`. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param url: + The URL to perform the request on. + + :param body: + Data to send in the request body, either :class:`str`, :class:`bytes`, + an iterable of :class:`str`/:class:`bytes`, or a file-like object. + + :param fields: + Data to encode and send in the URL or request body, depending on ``method``. + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param json: + Data to encode and send as JSON with UTF-encoded in the request body. + The ``"Content-Type"`` header will be set to ``"application/json"`` + unless specified otherwise. + """ + method = method.upper() + + if json is not None and body is not None: + raise TypeError( + "request got values for both 'body' and 'json' parameters which are mutually exclusive" + ) + + if json is not None: + if headers is None: + headers = self.headers + + if not ("content-type" in map(str.lower, headers.keys())): + headers = HTTPHeaderDict(headers) + headers["Content-Type"] = "application/json" + + body = _json.dumps(json, separators=(",", ":"), ensure_ascii=False).encode( + "utf-8" + ) + + if body is not None: + urlopen_kw["body"] = body + + if method in self._encode_url_methods: + return self.request_encode_url( + method, + url, + fields=fields, # type: ignore[arg-type] + headers=headers, + **urlopen_kw, + ) + else: + return self.request_encode_body( + method, url, fields=fields, headers=headers, **urlopen_kw + ) + + def request_encode_url( + self, + method: str, + url: str, + fields: _TYPE_ENCODE_URL_FIELDS | None = None, + headers: typing.Mapping[str, str] | None = None, + **urlopen_kw: str, + ) -> BaseHTTPResponse: + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the url. This is useful for request methods like GET, HEAD, DELETE, etc. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param url: + The URL to perform the request on. + + :param fields: + Data to encode and send in the URL. + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + """ + if headers is None: + headers = self.headers + + extra_kw: dict[str, typing.Any] = {"headers": headers} + extra_kw.update(urlopen_kw) + + if fields: + url += "?" + urlencode(fields) + + return self.urlopen(method, url, **extra_kw) + + def request_encode_body( + self, + method: str, + url: str, + fields: _TYPE_FIELDS | None = None, + headers: typing.Mapping[str, str] | None = None, + encode_multipart: bool = True, + multipart_boundary: str | None = None, + **urlopen_kw: str, + ) -> BaseHTTPResponse: + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the body. This is useful for request methods like POST, PUT, PATCH, etc. + + When ``encode_multipart=True`` (default), then + :func:`urllib3.encode_multipart_formdata` is used to encode + the payload with the appropriate content type. Otherwise + :func:`urllib.parse.urlencode` is used with the + 'application/x-www-form-urlencoded' content type. + + Multipart encoding must be used when posting files, and it's reasonably + safe to use it in other times too. However, it may break request + signing, such as with OAuth. + + Supports an optional ``fields`` parameter of key/value strings AND + key/filetuple. A filetuple is a (filename, data, MIME type) tuple where + the MIME type is optional. For example:: + + fields = { + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), + 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + } + + When uploading a file, providing a filename (the first parameter of the + tuple) is optional but recommended to best mimic behavior of browsers. + + Note that if ``headers`` are supplied, the 'Content-Type' header will + be overwritten because it depends on the dynamic random boundary string + which is used to compose the body of the request. The random boundary + string can be explicitly set with the ``multipart_boundary`` parameter. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param url: + The URL to perform the request on. + + :param fields: + Data to encode and send in the request body. + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param encode_multipart: + If True, encode the ``fields`` using the multipart/form-data MIME + format. + + :param multipart_boundary: + If not specified, then a random boundary will be generated using + :func:`urllib3.filepost.choose_boundary`. + """ + if headers is None: + headers = self.headers + + extra_kw: dict[str, typing.Any] = {"headers": HTTPHeaderDict(headers)} + body: bytes | str + + if fields: + if "body" in urlopen_kw: + raise TypeError( + "request got values for both 'fields' and 'body', can only specify one." + ) + + if encode_multipart: + body, content_type = encode_multipart_formdata( + fields, boundary=multipart_boundary + ) + else: + body, content_type = ( + urlencode(fields), # type: ignore[arg-type] + "application/x-www-form-urlencoded", + ) + + extra_kw["body"] = body + extra_kw["headers"].setdefault("Content-Type", content_type) + + extra_kw.update(urlopen_kw) + + return self.urlopen(method, url, **extra_kw) diff --git a/thirdparty/urllib3/_version.py b/thirdparty/urllib3/_version.py index 5a0ec53..af6144b 100644 --- a/thirdparty/urllib3/_version.py +++ b/thirdparty/urllib3/_version.py @@ -1,2 +1,16 @@ -# This file is protected via CODEOWNERS -__version__ = "1.25.11" +# file generated by setuptools_scm +# don't change, don't track in version control +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple, Union + VERSION_TUPLE = Tuple[Union[int, str], ...] +else: + VERSION_TUPLE = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE + +__version__ = version = '2.3.0' +__version_tuple__ = version_tuple = (2, 3, 0) diff --git a/thirdparty/urllib3/connection.py b/thirdparty/urllib3/connection.py index 80f638c..591ac40 100644 --- a/thirdparty/urllib3/connection.py +++ b/thirdparty/urllib3/connection.py @@ -1,55 +1,72 @@ -from __future__ import absolute_import -import re +from __future__ import annotations + import datetime +import http.client import logging import os +import re import socket -from socket import error as SocketError, timeout as SocketTimeout +import sys +import threading +import typing import warnings -from .packages import six -from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection -from .packages.six.moves.http_client import HTTPException # noqa: F401 +from http.client import HTTPConnection as _HTTPConnection +from http.client import HTTPException as HTTPException # noqa: F401 +from http.client import ResponseNotReady +from socket import timeout as SocketTimeout + +if typing.TYPE_CHECKING: + from .response import HTTPResponse + from .util.ssl_ import _TYPE_PEER_CERT_RET_DICT + from .util.ssltransport import SSLTransport + +from ._collections import HTTPHeaderDict +from .http2 import probe as http2_probe +from .util.response import assert_header_parsing +from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT, Timeout +from .util.util import to_str +from .util.wait import wait_for_read try: # Compiled with SSL? import ssl BaseSSLError = ssl.SSLError -except (ImportError, AttributeError): # Platform-specific: No SSL. - ssl = None +except (ImportError, AttributeError): + ssl = None # type: ignore[assignment] - class BaseSSLError(BaseException): - pass - - -try: - # Python 3: not a no-op, we're adding this to the namespace so it can be imported. - ConnectionError = ConnectionError -except NameError: - # Python 2 - class ConnectionError(Exception): + class BaseSSLError(BaseException): # type: ignore[no-redef] pass +from ._base_connection import _TYPE_BODY +from ._base_connection import ProxyConfig as ProxyConfig +from ._base_connection import _ResponseOptions as _ResponseOptions +from ._version import __version__ from .exceptions import ( - NewConnectionError, ConnectTimeoutError, - SubjectAltNameWarning, + HeaderParsingError, + NameResolutionError, + NewConnectionError, + ProxyError, SystemTimeWarning, ) -from .packages.ssl_match_hostname import match_hostname, CertificateError - +from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection, ssl_ +from .util.request import body_to_chunks +from .util.ssl_ import assert_fingerprint as _assert_fingerprint from .util.ssl_ import ( + create_urllib3_context, + is_ipaddress, resolve_cert_reqs, resolve_ssl_version, - assert_fingerprint, - create_urllib3_context, ssl_wrap_socket, ) +from .util.ssl_match_hostname import CertificateError, match_hostname +from .util.url import Url +# Not a no-op, we're adding this to the namespace so it can be imported. +ConnectionError = ConnectionError +BrokenPipeError = BrokenPipeError -from .util import connection - -from ._collections import HTTPHeaderDict log = logging.getLogger(__name__) @@ -57,65 +74,94 @@ class ConnectionError(Exception): # When it comes time to update this value as a part of regular maintenance # (ie test_recent_date is failing) update it to ~6 months before the current date. -RECENT_DATE = datetime.date(2019, 1, 1) +RECENT_DATE = datetime.date(2023, 6, 1) _CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]") -class DummyConnection(object): - """Used to detect a failed ConnectionCls import.""" - - pass - - -class HTTPConnection(_HTTPConnection, object): +class HTTPConnection(_HTTPConnection): """ - Based on httplib.HTTPConnection but provides an extra constructor + Based on :class:`http.client.HTTPConnection` but provides an extra constructor backwards-compatibility layer between older and newer Pythons. Additional keyword parameters are used to configure attributes of the connection. Accepted parameters include: - - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` - - ``source_address``: Set the source address for the current connection. - - ``socket_options``: Set specific options on the underlying socket. If not specified, then - defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling - Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. + - ``source_address``: Set the source address for the current connection. + - ``socket_options``: Set specific options on the underlying socket. If not specified, then + defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling + Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. + + For example, if you wish to enable TCP Keep Alive in addition to the defaults, + you might pass: - For example, if you wish to enable TCP Keep Alive in addition to the defaults, - you might pass:: + .. code-block:: python - HTTPConnection.default_socket_options + [ - (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), - ] + HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + ] - Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). + Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). """ - default_port = port_by_scheme["http"] + default_port: typing.ClassVar[int] = port_by_scheme["http"] # type: ignore[misc] #: Disable Nagle's algorithm by default. #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` - default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + default_socket_options: typing.ClassVar[connection._TYPE_SOCKET_OPTIONS] = [ + (socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + ] #: Whether this connection verifies the host's certificate. - is_verified = False + is_verified: bool = False + + #: Whether this proxy connection verified the proxy host's certificate. + # If no proxy is currently connected to the value will be ``None``. + proxy_is_verified: bool | None = None - def __init__(self, *args, **kw): - if not six.PY2: - kw.pop("strict", None) + blocksize: int + source_address: tuple[str, int] | None + socket_options: connection._TYPE_SOCKET_OPTIONS | None - # Pre-set source_address. - self.source_address = kw.get("source_address") + _has_connected_to_proxy: bool + _response_options: _ResponseOptions | None + _tunnel_host: str | None + _tunnel_port: int | None + _tunnel_scheme: str | None - #: The socket options provided by the user. If no options are - #: provided, we use the default options. - self.socket_options = kw.pop("socket_options", self.default_socket_options) + def __init__( + self, + host: str, + port: int | None = None, + *, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + source_address: tuple[str, int] | None = None, + blocksize: int = 16384, + socket_options: None | ( + connection._TYPE_SOCKET_OPTIONS + ) = default_socket_options, + proxy: Url | None = None, + proxy_config: ProxyConfig | None = None, + ) -> None: + super().__init__( + host=host, + port=port, + timeout=Timeout.resolve_default_timeout(timeout), + source_address=source_address, + blocksize=blocksize, + ) + self.socket_options = socket_options + self.proxy = proxy + self.proxy_config = proxy_config - _HTTPConnection.__init__(self, *args, **kw) + self._has_connected_to_proxy = False + self._response_options = None + self._tunnel_host: str | None = None + self._tunnel_port: int | None = None + self._tunnel_scheme: str | None = None @property - def host(self): + def host(self) -> str: """ Getter method to remove any trailing dots that indicate the hostname is an FQDN. @@ -134,7 +180,7 @@ def host(self): return self._dns_host.rstrip(".") @host.setter - def host(self, value): + def host(self, value: str) -> None: """ Setter for the `host` property. @@ -143,149 +189,465 @@ def host(self, value): """ self._dns_host = value - def _new_conn(self): + def _new_conn(self) -> socket.socket: """Establish a socket connection and set nodelay settings on it. :return: New socket connection. """ - extra_kw = {} - if self.source_address: - extra_kw["source_address"] = self.source_address - - if self.socket_options: - extra_kw["socket_options"] = self.socket_options - try: - conn = connection.create_connection( - (self._dns_host, self.port), self.timeout, **extra_kw + sock = connection.create_connection( + (self._dns_host, self.port), + self.timeout, + source_address=self.source_address, + socket_options=self.socket_options, ) - - except SocketTimeout: + except socket.gaierror as e: + raise NameResolutionError(self.host, self, e) from e + except SocketTimeout as e: raise ConnectTimeoutError( self, - "Connection to %s timed out. (connect timeout=%s)" - % (self.host, self.timeout), - ) + f"Connection to {self.host} timed out. (connect timeout={self.timeout})", + ) from e - except SocketError as e: + except OSError as e: raise NewConnectionError( - self, "Failed to establish a new connection: %s" % e + self, f"Failed to establish a new connection: {e}" + ) from e + + sys.audit("http.client.connect", self, self.host, self.port) + + return sock + + def set_tunnel( + self, + host: str, + port: int | None = None, + headers: typing.Mapping[str, str] | None = None, + scheme: str = "http", + ) -> None: + if scheme not in ("http", "https"): + raise ValueError( + f"Invalid proxy scheme for tunneling: {scheme!r}, must be either 'http' or 'https'" ) + super().set_tunnel(host, port=port, headers=headers) + self._tunnel_scheme = scheme - return conn + if sys.version_info < (3, 11, 4): + + def _tunnel(self) -> None: + _MAXLINE = http.client._MAXLINE # type: ignore[attr-defined] + connect = b"CONNECT %s:%d HTTP/1.0\r\n" % ( # type: ignore[str-format] + self._tunnel_host.encode("ascii"), # type: ignore[union-attr] + self._tunnel_port, + ) + headers = [connect] + for header, value in self._tunnel_headers.items(): # type: ignore[attr-defined] + headers.append(f"{header}: {value}\r\n".encode("latin-1")) + headers.append(b"\r\n") + # Making a single send() call instead of one per line encourages + # the host OS to use a more optimal packet size instead of + # potentially emitting a series of small packets. + self.send(b"".join(headers)) + del headers + + response = self.response_class(self.sock, method=self._method) # type: ignore[attr-defined] + try: + (version, code, message) = response._read_status() # type: ignore[attr-defined] + + if code != http.HTTPStatus.OK: + self.close() + raise OSError(f"Tunnel connection failed: {code} {message.strip()}") + while True: + line = response.fp.readline(_MAXLINE + 1) + if len(line) > _MAXLINE: + raise http.client.LineTooLong("header line") + if not line: + # for sites which EOF without sending a trailer + break + if line in (b"\r\n", b"\n", b""): + break + + if self.debuglevel > 0: + print("header:", line.decode()) + finally: + response.close() + + def connect(self) -> None: + self.sock = self._new_conn() + if self._tunnel_host: + # If we're tunneling it means we're connected to our proxy. + self._has_connected_to_proxy = True - def _prepare_conn(self, conn): - self.sock = conn - # Google App Engine's httplib does not define _tunnel_host - if getattr(self, "_tunnel_host", None): # TODO: Fix tunnel so it doesn't depend on self.sock state. self._tunnel() - # Mark this connection as not reusable - self.auto_open = 0 - def connect(self): - conn = self._new_conn() - self._prepare_conn(conn) + # If there's a proxy to be connected to we are fully connected. + # This is set twice (once above and here) due to forwarding proxies + # not using tunnelling. + self._has_connected_to_proxy = bool(self.proxy) - def putrequest(self, method, url, *args, **kwargs): - """Send a request to the server""" + if self._has_connected_to_proxy: + self.proxy_is_verified = False + + @property + def is_closed(self) -> bool: + return self.sock is None + + @property + def is_connected(self) -> bool: + if self.sock is None: + return False + return not wait_for_read(self.sock, timeout=0.0) + + @property + def has_connected_to_proxy(self) -> bool: + return self._has_connected_to_proxy + + @property + def proxy_is_forwarding(self) -> bool: + """ + Return True if a forwarding proxy is configured, else return False + """ + return bool(self.proxy) and self._tunnel_host is None + + @property + def proxy_is_tunneling(self) -> bool: + """ + Return True if a tunneling proxy is configured, else return False + """ + return self._tunnel_host is not None + + def close(self) -> None: + try: + super().close() + finally: + # Reset all stateful properties so connection + # can be re-used without leaking prior configs. + self.sock = None + self.is_verified = False + self.proxy_is_verified = None + self._has_connected_to_proxy = False + self._response_options = None + self._tunnel_host = None + self._tunnel_port = None + self._tunnel_scheme = None + + def putrequest( + self, + method: str, + url: str, + skip_host: bool = False, + skip_accept_encoding: bool = False, + ) -> None: + """""" + # Empty docstring because the indentation of CPython's implementation + # is broken but we don't want this method in our documentation. match = _CONTAINS_CONTROL_CHAR_RE.search(method) if match: raise ValueError( - "Method cannot contain non-token characters %r (found at least %r)" - % (method, match.group()) + f"Method cannot contain non-token characters {method!r} (found at least {match.group()!r})" ) - return _HTTPConnection.putrequest(self, method, url, *args, **kwargs) + return super().putrequest( + method, url, skip_host=skip_host, skip_accept_encoding=skip_accept_encoding + ) - def request_chunked(self, method, url, body=None, headers=None): - """ - Alternative to the common request method, which sends the - body with chunked encoding and not as one block - """ - headers = HTTPHeaderDict(headers if headers is not None else {}) - skip_accept_encoding = "accept-encoding" in headers - skip_host = "host" in headers + def putheader(self, header: str, *values: str) -> None: # type: ignore[override] + """""" + if not any(isinstance(v, str) and v == SKIP_HEADER for v in values): + super().putheader(header, *values) + elif to_str(header.lower()) not in SKIPPABLE_HEADERS: + skippable_headers = "', '".join( + [str.title(header) for header in sorted(SKIPPABLE_HEADERS)] + ) + raise ValueError( + f"urllib3.util.SKIP_HEADER only supports '{skippable_headers}'" + ) + + # `request` method's signature intentionally violates LSP. + # urllib3's API is different from `http.client.HTTPConnection` and the subclassing is only incidental. + def request( # type: ignore[override] + self, + method: str, + url: str, + body: _TYPE_BODY | None = None, + headers: typing.Mapping[str, str] | None = None, + *, + chunked: bool = False, + preload_content: bool = True, + decode_content: bool = True, + enforce_content_length: bool = True, + ) -> None: + # Update the inner socket's timeout value to send the request. + # This only triggers if the connection is re-used. + if self.sock is not None: + self.sock.settimeout(self.timeout) + + # Store these values to be fed into the HTTPResponse + # object later. TODO: Remove this in favor of a real + # HTTP lifecycle mechanism. + + # We have to store these before we call .request() + # because sometimes we can still salvage a response + # off the wire even if we aren't able to completely + # send the request body. + self._response_options = _ResponseOptions( + request_method=method, + request_url=url, + preload_content=preload_content, + decode_content=decode_content, + enforce_content_length=enforce_content_length, + ) + + if headers is None: + headers = {} + header_keys = frozenset(to_str(k.lower()) for k in headers) + skip_accept_encoding = "accept-encoding" in header_keys + skip_host = "host" in header_keys self.putrequest( method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host ) + + # Transform the body into an iterable of sendall()-able chunks + # and detect if an explicit Content-Length is doable. + chunks_and_cl = body_to_chunks(body, method=method, blocksize=self.blocksize) + chunks = chunks_and_cl.chunks + content_length = chunks_and_cl.content_length + + # When chunked is explicit set to 'True' we respect that. + if chunked: + if "transfer-encoding" not in header_keys: + self.putheader("Transfer-Encoding", "chunked") + else: + # Detect whether a framing mechanism is already in use. If so + # we respect that value, otherwise we pick chunked vs content-length + # depending on the type of 'body'. + if "content-length" in header_keys: + chunked = False + elif "transfer-encoding" in header_keys: + chunked = True + + # Otherwise we go off the recommendation of 'body_to_chunks()'. + else: + chunked = False + if content_length is None: + if chunks is not None: + chunked = True + self.putheader("Transfer-Encoding", "chunked") + else: + self.putheader("Content-Length", str(content_length)) + + # Now that framing headers are out of the way we send all the other headers. + if "user-agent" not in header_keys: + self.putheader("User-Agent", _get_default_user_agent()) for header, value in headers.items(): self.putheader(header, value) - if "transfer-encoding" not in headers: - self.putheader("Transfer-Encoding", "chunked") self.endheaders() - if body is not None: - stringish_types = six.string_types + (bytes,) - if isinstance(body, stringish_types): - body = (body,) - for chunk in body: + # If we're given a body we start sending that in chunks. + if chunks is not None: + for chunk in chunks: + # Sending empty chunks isn't allowed for TE: chunked + # as it indicates the end of the body. if not chunk: continue - if not isinstance(chunk, bytes): - chunk = chunk.encode("utf8") - len_str = hex(len(chunk))[2:] - to_send = bytearray(len_str.encode()) - to_send += b"\r\n" - to_send += chunk - to_send += b"\r\n" - self.send(to_send) + if isinstance(chunk, str): + chunk = chunk.encode("utf-8") + if chunked: + self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk)) + else: + self.send(chunk) + + # Regardless of whether we have a body or not, if we're in + # chunked mode we want to send an explicit empty chunk. + if chunked: + self.send(b"0\r\n\r\n") + + def request_chunked( + self, + method: str, + url: str, + body: _TYPE_BODY | None = None, + headers: typing.Mapping[str, str] | None = None, + ) -> None: + """ + Alternative to the common request method, which sends the + body with chunked encoding and not as one block + """ + warnings.warn( + "HTTPConnection.request_chunked() is deprecated and will be removed " + "in urllib3 v2.1.0. Instead use HTTPConnection.request(..., chunked=True).", + category=DeprecationWarning, + stacklevel=2, + ) + self.request(method, url, body=body, headers=headers, chunked=True) - # After the if clause, to always have a closed body - self.send(b"0\r\n\r\n") + def getresponse( # type: ignore[override] + self, + ) -> HTTPResponse: + """ + Get the response from the server. + + If the HTTPConnection is in the correct state, returns an instance of HTTPResponse or of whatever object is returned by the response_class variable. + + If a request has not been sent or if a previous response has not be handled, ResponseNotReady is raised. If the HTTP response indicates that the connection should be closed, then it will be closed before the response is returned. When the connection is closed, the underlying socket is closed. + """ + # Raise the same error as http.client.HTTPConnection + if self._response_options is None: + raise ResponseNotReady() + + # Reset this attribute for being used again. + resp_options = self._response_options + self._response_options = None + + # Since the connection's timeout value may have been updated + # we need to set the timeout on the socket. + self.sock.settimeout(self.timeout) + + # This is needed here to avoid circular import errors + from .response import HTTPResponse + + # Save a reference to the shutdown function before ownership is passed + # to httplib_response + # TODO should we implement it everywhere? + _shutdown = getattr(self.sock, "shutdown", None) + + # Get the response from http.client.HTTPConnection + httplib_response = super().getresponse() + + try: + assert_header_parsing(httplib_response.msg) + except (HeaderParsingError, TypeError) as hpe: + log.warning( + "Failed to parse headers (url=%s): %s", + _url_from_connection(self, resp_options.request_url), + hpe, + exc_info=True, + ) + + headers = HTTPHeaderDict(httplib_response.msg.items()) + + response = HTTPResponse( + body=httplib_response, + headers=headers, + status=httplib_response.status, + version=httplib_response.version, + version_string=getattr(self, "_http_vsn_str", "HTTP/?"), + reason=httplib_response.reason, + preload_content=resp_options.preload_content, + decode_content=resp_options.decode_content, + original_response=httplib_response, + enforce_content_length=resp_options.enforce_content_length, + request_method=resp_options.request_method, + request_url=resp_options.request_url, + sock_shutdown=_shutdown, + ) + return response class HTTPSConnection(HTTPConnection): - default_port = port_by_scheme["https"] + """ + Many of the parameters to this constructor are passed to the underlying SSL + socket by means of :py:func:`urllib3.util.ssl_wrap_socket`. + """ - cert_reqs = None - ca_certs = None - ca_cert_dir = None - ca_cert_data = None - ssl_version = None - assert_fingerprint = None + default_port = port_by_scheme["https"] # type: ignore[misc] + + cert_reqs: int | str | None = None + ca_certs: str | None = None + ca_cert_dir: str | None = None + ca_cert_data: None | str | bytes = None + ssl_version: int | str | None = None + ssl_minimum_version: int | None = None + ssl_maximum_version: int | None = None + assert_fingerprint: str | None = None + _connect_callback: typing.Callable[..., None] | None = None def __init__( self, - host, - port=None, - key_file=None, - cert_file=None, - key_password=None, - strict=None, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - ssl_context=None, - server_hostname=None, - **kw - ): - - HTTPConnection.__init__(self, host, port, strict=strict, timeout=timeout, **kw) + host: str, + port: int | None = None, + *, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + source_address: tuple[str, int] | None = None, + blocksize: int = 16384, + socket_options: None | ( + connection._TYPE_SOCKET_OPTIONS + ) = HTTPConnection.default_socket_options, + proxy: Url | None = None, + proxy_config: ProxyConfig | None = None, + cert_reqs: int | str | None = None, + assert_hostname: None | str | typing.Literal[False] = None, + assert_fingerprint: str | None = None, + server_hostname: str | None = None, + ssl_context: ssl.SSLContext | None = None, + ca_certs: str | None = None, + ca_cert_dir: str | None = None, + ca_cert_data: None | str | bytes = None, + ssl_minimum_version: int | None = None, + ssl_maximum_version: int | None = None, + ssl_version: int | str | None = None, # Deprecated + cert_file: str | None = None, + key_file: str | None = None, + key_password: str | None = None, + ) -> None: + super().__init__( + host, + port=port, + timeout=timeout, + source_address=source_address, + blocksize=blocksize, + socket_options=socket_options, + proxy=proxy, + proxy_config=proxy_config, + ) self.key_file = key_file self.cert_file = cert_file self.key_password = key_password self.ssl_context = ssl_context self.server_hostname = server_hostname + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + self.ssl_version = ssl_version + self.ssl_minimum_version = ssl_minimum_version + self.ssl_maximum_version = ssl_maximum_version + self.ca_certs = ca_certs and os.path.expanduser(ca_certs) + self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) + self.ca_cert_data = ca_cert_data - # Required property for Google AppEngine 1.9.0 which otherwise causes - # HTTPS requests to go out as HTTP. (See Issue #356) - self._protocol = "https" + # cert_reqs depends on ssl_context so calculate last. + if cert_reqs is None: + if self.ssl_context is not None: + cert_reqs = self.ssl_context.verify_mode + else: + cert_reqs = resolve_cert_reqs(None) + self.cert_reqs = cert_reqs + self._connect_callback = None def set_cert( self, - key_file=None, - cert_file=None, - cert_reqs=None, - key_password=None, - ca_certs=None, - assert_hostname=None, - assert_fingerprint=None, - ca_cert_dir=None, - ca_cert_data=None, - ): + key_file: str | None = None, + cert_file: str | None = None, + cert_reqs: int | str | None = None, + key_password: str | None = None, + ca_certs: str | None = None, + assert_hostname: None | str | typing.Literal[False] = None, + assert_fingerprint: str | None = None, + ca_cert_dir: str | None = None, + ca_cert_data: None | str | bytes = None, + ) -> None: """ This method should only be called once, before the connection is used. """ + warnings.warn( + "HTTPSConnection.set_cert() is deprecated and will be removed " + "in urllib3 v2.1.0. Instead provide the parameters to the " + "HTTPSConnection constructor.", + category=DeprecationWarning, + stacklevel=2, + ) + # If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also # have an SSLContext object in which case we'll use its verify_mode. if cert_reqs is None: @@ -304,107 +666,322 @@ def set_cert( self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) self.ca_cert_data = ca_cert_data - def connect(self): - # Add certificate verification - conn = self._new_conn() - hostname = self.host - - # Google App Engine's httplib does not define _tunnel_host - if getattr(self, "_tunnel_host", None): - self.sock = conn - # Calls self._set_hostport(), so self.host is - # self._tunnel_host below. - self._tunnel() - # Mark this connection as not reusable - self.auto_open = 0 - - # Override the host with the one we're requesting data from. - hostname = self._tunnel_host - - server_hostname = hostname - if self.server_hostname is not None: - server_hostname = self.server_hostname - - is_time_off = datetime.date.today() < RECENT_DATE - if is_time_off: - warnings.warn( - ( - "System time is way off (before {0}). This will probably " - "lead to SSL verification errors" - ).format(RECENT_DATE), - SystemTimeWarning, + def connect(self) -> None: + # Today we don't need to be doing this step before the /actual/ socket + # connection, however in the future we'll need to decide whether to + # create a new socket or re-use an existing "shared" socket as a part + # of the HTTP/2 handshake dance. + if self._tunnel_host is not None and self._tunnel_port is not None: + probe_http2_host = self._tunnel_host + probe_http2_port = self._tunnel_port + else: + probe_http2_host = self.host + probe_http2_port = self.port + + # Check if the target origin supports HTTP/2. + # If the value comes back as 'None' it means that the current thread + # is probing for HTTP/2 support. Otherwise, we're waiting for another + # probe to complete, or we get a value right away. + target_supports_http2: bool | None + if "h2" in ssl_.ALPN_PROTOCOLS: + target_supports_http2 = http2_probe.acquire_and_get( + host=probe_http2_host, port=probe_http2_port ) + else: + # If HTTP/2 isn't going to be offered it doesn't matter if + # the target supports HTTP/2. Don't want to make a probe. + target_supports_http2 = False + + if self._connect_callback is not None: + self._connect_callback( + "before connect", + thread_id=threading.get_ident(), + target_supports_http2=target_supports_http2, + ) + + try: + sock: socket.socket | ssl.SSLSocket + self.sock = sock = self._new_conn() + server_hostname: str = self.host + tls_in_tls = False + + # Do we need to establish a tunnel? + if self.proxy_is_tunneling: + # We're tunneling to an HTTPS origin so need to do TLS-in-TLS. + if self._tunnel_scheme == "https": + # _connect_tls_proxy will verify and assign proxy_is_verified + self.sock = sock = self._connect_tls_proxy(self.host, sock) + tls_in_tls = True + elif self._tunnel_scheme == "http": + self.proxy_is_verified = False + + # If we're tunneling it means we're connected to our proxy. + self._has_connected_to_proxy = True + + self._tunnel() + # Override the host with the one we're requesting data from. + server_hostname = typing.cast(str, self._tunnel_host) + + if self.server_hostname is not None: + server_hostname = self.server_hostname + + is_time_off = datetime.date.today() < RECENT_DATE + if is_time_off: + warnings.warn( + ( + f"System time is way off (before {RECENT_DATE}). This will probably " + "lead to SSL verification errors" + ), + SystemTimeWarning, + ) - # Wrap socket using verification with the root certs in - # trusted_root_certs - default_ssl_context = False - if self.ssl_context is None: - default_ssl_context = True - self.ssl_context = create_urllib3_context( - ssl_version=resolve_ssl_version(self.ssl_version), - cert_reqs=resolve_cert_reqs(self.cert_reqs), + # Remove trailing '.' from fqdn hostnames to allow certificate validation + server_hostname_rm_dot = server_hostname.rstrip(".") + + sock_and_verified = _ssl_wrap_socket_and_match_hostname( + sock=sock, + cert_reqs=self.cert_reqs, + ssl_version=self.ssl_version, + ssl_minimum_version=self.ssl_minimum_version, + ssl_maximum_version=self.ssl_maximum_version, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + ca_cert_data=self.ca_cert_data, + cert_file=self.cert_file, + key_file=self.key_file, + key_password=self.key_password, + server_hostname=server_hostname_rm_dot, + ssl_context=self.ssl_context, + tls_in_tls=tls_in_tls, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint, ) + self.sock = sock_and_verified.socket + + # If an error occurs during connection/handshake we may need to release + # our lock so another connection can probe the origin. + except BaseException: + if self._connect_callback is not None: + self._connect_callback( + "after connect failure", + thread_id=threading.get_ident(), + target_supports_http2=target_supports_http2, + ) - context = self.ssl_context - context.verify_mode = resolve_cert_reqs(self.cert_reqs) - - # Try to load OS default certs if none are given. - # Works well on Windows (requires Python3.4+) - if ( - not self.ca_certs - and not self.ca_cert_dir - and not self.ca_cert_data - and default_ssl_context - and hasattr(context, "load_default_certs") - ): - context.load_default_certs() + if target_supports_http2 is None: + http2_probe.set_and_release( + host=probe_http2_host, port=probe_http2_port, supports_http2=None + ) + raise + + # If this connection doesn't know if the origin supports HTTP/2 + # we report back to the HTTP/2 probe our result. + if target_supports_http2 is None: + supports_http2 = sock_and_verified.socket.selected_alpn_protocol() == "h2" + http2_probe.set_and_release( + host=probe_http2_host, + port=probe_http2_port, + supports_http2=supports_http2, + ) - self.sock = ssl_wrap_socket( - sock=conn, - keyfile=self.key_file, - certfile=self.cert_file, - key_password=self.key_password, + # Forwarding proxies can never have a verified target since + # the proxy is the one doing the verification. Should instead + # use a CONNECT tunnel in order to verify the target. + # See: https://github.com/urllib3/urllib3/issues/3267. + if self.proxy_is_forwarding: + self.is_verified = False + else: + self.is_verified = sock_and_verified.is_verified + + # If there's a proxy to be connected to we are fully connected. + # This is set twice (once above and here) due to forwarding proxies + # not using tunnelling. + self._has_connected_to_proxy = bool(self.proxy) + + # Set `self.proxy_is_verified` unless it's already set while + # establishing a tunnel. + if self._has_connected_to_proxy and self.proxy_is_verified is None: + self.proxy_is_verified = sock_and_verified.is_verified + + def _connect_tls_proxy(self, hostname: str, sock: socket.socket) -> ssl.SSLSocket: + """ + Establish a TLS connection to the proxy using the provided SSL context. + """ + # `_connect_tls_proxy` is called when self._tunnel_host is truthy. + proxy_config = typing.cast(ProxyConfig, self.proxy_config) + ssl_context = proxy_config.ssl_context + sock_and_verified = _ssl_wrap_socket_and_match_hostname( + sock, + cert_reqs=self.cert_reqs, + ssl_version=self.ssl_version, + ssl_minimum_version=self.ssl_minimum_version, + ssl_maximum_version=self.ssl_maximum_version, ca_certs=self.ca_certs, ca_cert_dir=self.ca_cert_dir, ca_cert_data=self.ca_cert_data, - server_hostname=server_hostname, - ssl_context=context, + server_hostname=hostname, + ssl_context=ssl_context, + assert_hostname=proxy_config.assert_hostname, + assert_fingerprint=proxy_config.assert_fingerprint, + # Features that aren't implemented for proxies yet: + cert_file=None, + key_file=None, + key_password=None, + tls_in_tls=False, ) + self.proxy_is_verified = sock_and_verified.is_verified + return sock_and_verified.socket # type: ignore[return-value] + + +class _WrappedAndVerifiedSocket(typing.NamedTuple): + """ + Wrapped socket and whether the connection is + verified after the TLS handshake + """ - if self.assert_fingerprint: - assert_fingerprint( - self.sock.getpeercert(binary_form=True), self.assert_fingerprint + socket: ssl.SSLSocket | SSLTransport + is_verified: bool + + +def _ssl_wrap_socket_and_match_hostname( + sock: socket.socket, + *, + cert_reqs: None | str | int, + ssl_version: None | str | int, + ssl_minimum_version: int | None, + ssl_maximum_version: int | None, + cert_file: str | None, + key_file: str | None, + key_password: str | None, + ca_certs: str | None, + ca_cert_dir: str | None, + ca_cert_data: None | str | bytes, + assert_hostname: None | str | typing.Literal[False], + assert_fingerprint: str | None, + server_hostname: str | None, + ssl_context: ssl.SSLContext | None, + tls_in_tls: bool = False, +) -> _WrappedAndVerifiedSocket: + """Logic for constructing an SSLContext from all TLS parameters, passing + that down into ssl_wrap_socket, and then doing certificate verification + either via hostname or fingerprint. This function exists to guarantee + that both proxies and targets have the same behavior when connecting via TLS. + """ + default_ssl_context = False + if ssl_context is None: + default_ssl_context = True + context = create_urllib3_context( + ssl_version=resolve_ssl_version(ssl_version), + ssl_minimum_version=ssl_minimum_version, + ssl_maximum_version=ssl_maximum_version, + cert_reqs=resolve_cert_reqs(cert_reqs), + ) + else: + context = ssl_context + + context.verify_mode = resolve_cert_reqs(cert_reqs) + + # In some cases, we want to verify hostnames ourselves + if ( + # `ssl` can't verify fingerprints or alternate hostnames + assert_fingerprint + or assert_hostname + # assert_hostname can be set to False to disable hostname checking + or assert_hostname is False + # We still support OpenSSL 1.0.2, which prevents us from verifying + # hostnames easily: https://github.com/pyca/pyopenssl/pull/933 + or ssl_.IS_PYOPENSSL + or not ssl_.HAS_NEVER_CHECK_COMMON_NAME + ): + context.check_hostname = False + + # Try to load OS default certs if none are given. We need to do the hasattr() check + # for custom pyOpenSSL SSLContext objects because they don't support + # load_default_certs(). + if ( + not ca_certs + and not ca_cert_dir + and not ca_cert_data + and default_ssl_context + and hasattr(context, "load_default_certs") + ): + context.load_default_certs() + + # Ensure that IPv6 addresses are in the proper format and don't have a + # scope ID. Python's SSL module fails to recognize scoped IPv6 addresses + # and interprets them as DNS hostnames. + if server_hostname is not None: + normalized = server_hostname.strip("[]") + if "%" in normalized: + normalized = normalized[: normalized.rfind("%")] + if is_ipaddress(normalized): + server_hostname = normalized + + ssl_sock = ssl_wrap_socket( + sock=sock, + keyfile=key_file, + certfile=cert_file, + key_password=key_password, + ca_certs=ca_certs, + ca_cert_dir=ca_cert_dir, + ca_cert_data=ca_cert_data, + server_hostname=server_hostname, + ssl_context=context, + tls_in_tls=tls_in_tls, + ) + + try: + if assert_fingerprint: + _assert_fingerprint( + ssl_sock.getpeercert(binary_form=True), assert_fingerprint ) elif ( context.verify_mode != ssl.CERT_NONE - and not getattr(context, "check_hostname", False) - and self.assert_hostname is not False + and not context.check_hostname + and assert_hostname is not False ): - # While urllib3 attempts to always turn off hostname matching from - # the TLS library, this cannot always be done. So we check whether - # the TLS Library still thinks it's matching hostnames. - cert = self.sock.getpeercert() - if not cert.get("subjectAltName", ()): - warnings.warn( - ( - "Certificate for {0} has no `subjectAltName`, falling back to check for a " - "`commonName` for now. This feature is being removed by major browsers and " - "deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 " - "for details.)".format(hostname) - ), - SubjectAltNameWarning, + cert: _TYPE_PEER_CERT_RET_DICT = ssl_sock.getpeercert() # type: ignore[assignment] + + # Need to signal to our match_hostname whether to use 'commonName' or not. + # If we're using our own constructed SSLContext we explicitly set 'False' + # because PyPy hard-codes 'True' from SSLContext.hostname_checks_common_name. + if default_ssl_context: + hostname_checks_common_name = False + else: + hostname_checks_common_name = ( + getattr(context, "hostname_checks_common_name", False) or False ) - _match_hostname(cert, self.assert_hostname or server_hostname) - self.is_verified = ( - context.verify_mode == ssl.CERT_REQUIRED - or self.assert_fingerprint is not None + _match_hostname( + cert, + assert_hostname or server_hostname, # type: ignore[arg-type] + hostname_checks_common_name, + ) + + return _WrappedAndVerifiedSocket( + socket=ssl_sock, + is_verified=context.verify_mode == ssl.CERT_REQUIRED + or bool(assert_fingerprint), ) + except BaseException: + ssl_sock.close() + raise + +def _match_hostname( + cert: _TYPE_PEER_CERT_RET_DICT | None, + asserted_hostname: str, + hostname_checks_common_name: bool = False, +) -> None: + # Our upstream implementation of ssl.match_hostname() + # only applies this normalization to IP addresses so it doesn't + # match DNS SANs so we do the same thing! + stripped_hostname = asserted_hostname.strip("[]") + if is_ipaddress(stripped_hostname): + asserted_hostname = stripped_hostname -def _match_hostname(cert, asserted_hostname): try: - match_hostname(cert, asserted_hostname) + match_hostname(cert, asserted_hostname, hostname_checks_common_name) except CertificateError as e: log.warning( "Certificate did not match expected hostname: %s. Certificate: %s", @@ -413,12 +990,55 @@ def _match_hostname(cert, asserted_hostname): ) # Add cert to exception and reraise so client code can inspect # the cert when catching the exception, if they want to - e._peer_cert = cert + e._peer_cert = cert # type: ignore[attr-defined] raise +def _wrap_proxy_error(err: Exception, proxy_scheme: str | None) -> ProxyError: + # Look for the phrase 'wrong version number', if found + # then we should warn the user that we're very sure that + # this proxy is HTTP-only and they have a configuration issue. + error_normalized = " ".join(re.split("[^a-z]", str(err).lower())) + is_likely_http_proxy = ( + "wrong version number" in error_normalized + or "unknown protocol" in error_normalized + or "record layer failure" in error_normalized + ) + http_proxy_warning = ( + ". Your proxy appears to only use HTTP and not HTTPS, " + "try changing your proxy URL to be HTTP. See: " + "https://urllib3.readthedocs.io/en/latest/advanced-usage.html" + "#https-proxy-error-http-proxy" + ) + new_err = ProxyError( + f"Unable to connect to proxy" + f"{http_proxy_warning if is_likely_http_proxy and proxy_scheme == 'https' else ''}", + err, + ) + new_err.__cause__ = err + return new_err + + +def _get_default_user_agent() -> str: + return f"python-urllib3/{__version__}" + + +class DummyConnection: + """Used to detect a failed ConnectionCls import.""" + + if not ssl: - HTTPSConnection = DummyConnection # noqa: F811 + HTTPSConnection = DummyConnection # type: ignore[misc, assignment] # noqa: F811 VerifiedHTTPSConnection = HTTPSConnection + + +def _url_from_connection( + conn: HTTPConnection | HTTPSConnection, path: str | None = None +) -> str: + """Returns the URL from a given connection. This is mainly used for testing and logging.""" + + scheme = "https" if isinstance(conn, HTTPSConnection) else "http" + + return Url(scheme=scheme, host=conn.host, port=conn.port, path=path).url diff --git a/thirdparty/urllib3/connectionpool.py b/thirdparty/urllib3/connectionpool.py index 174fe6c..3a0685b 100644 --- a/thirdparty/urllib3/connectionpool.py +++ b/thirdparty/urllib3/connectionpool.py @@ -1,67 +1,70 @@ -from __future__ import absolute_import +from __future__ import annotations + import errno import logging +import queue import sys +import typing import warnings +import weakref +from socket import timeout as SocketTimeout +from types import TracebackType -from socket import error as SocketError, timeout as SocketTimeout -import socket - - +from ._base_connection import _TYPE_BODY +from ._collections import HTTPHeaderDict +from ._request_methods import RequestMethods +from .connection import ( + BaseSSLError, + BrokenPipeError, + DummyConnection, + HTTPConnection, + HTTPException, + HTTPSConnection, + ProxyConfig, + _wrap_proxy_error, +) +from .connection import port_by_scheme as port_by_scheme from .exceptions import ( ClosedPoolError, - ProtocolError, EmptyPoolError, - HeaderParsingError, + FullPoolError, HostChangedError, + InsecureRequestWarning, LocationValueError, MaxRetryError, + NewConnectionError, + ProtocolError, ProxyError, ReadTimeoutError, SSLError, TimeoutError, - InsecureRequestWarning, - NewConnectionError, -) -from .packages.ssl_match_hostname import CertificateError -from .packages import six -from .packages.six.moves import queue -from .connection import ( - port_by_scheme, - DummyConnection, - HTTPConnection, - HTTPSConnection, - VerifiedHTTPSConnection, - HTTPException, - BaseSSLError, ) -from .request import RequestMethods -from .response import HTTPResponse - +from .response import BaseHTTPResponse from .util.connection import is_connection_dropped -from .util.request import set_file_position -from .util.response import assert_header_parsing +from .util.proxy import connection_requires_http_tunnel +from .util.request import _TYPE_BODY_POSITION, set_file_position from .util.retry import Retry -from .util.timeout import Timeout -from .util.url import ( - get_host, - parse_url, - Url, - _normalize_host as normalize_host, - _encode_target, -) -from .util.queue import LifoQueue +from .util.ssl_match_hostname import CertificateError +from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_DEFAULT, Timeout +from .util.url import Url, _encode_target +from .util.url import _normalize_host as normalize_host +from .util.url import parse_url +from .util.util import to_str + +if typing.TYPE_CHECKING: + import ssl + from typing_extensions import Self -xrange = six.moves.xrange + from ._base_connection import BaseHTTPConnection, BaseHTTPSConnection log = logging.getLogger(__name__) -_Default = object() +_TYPE_TIMEOUT = typing.Union[Timeout, float, _TYPE_DEFAULT, None] # Pool objects -class ConnectionPool(object): +class ConnectionPool: """ Base class for all connection pools, such as :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. @@ -72,33 +75,42 @@ class ConnectionPool(object): target URIs. """ - scheme = None - QueueCls = LifoQueue + scheme: str | None = None + QueueCls = queue.LifoQueue - def __init__(self, host, port=None): + def __init__(self, host: str, port: int | None = None) -> None: if not host: raise LocationValueError("No host specified.") self.host = _normalize_host(host, scheme=self.scheme) - self._proxy_host = host.lower() self.port = port - def __str__(self): - return "%s(host=%r, port=%r)" % (type(self).__name__, self.host, self.port) + # This property uses 'normalize_host()' (not '_normalize_host()') + # to avoid removing square braces around IPv6 addresses. + # This value is sent to `HTTPConnection.set_tunnel()` if called + # because square braces are required for HTTP CONNECT tunneling. + self._tunnel_host = normalize_host(host, scheme=self.scheme).lower() + + def __str__(self) -> str: + return f"{type(self).__name__}(host={self.host!r}, port={self.port!r})" - def __enter__(self): + def __enter__(self) -> Self: return self - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, + ) -> typing.Literal[False]: self.close() # Return False to re-raise any potential exceptions return False - def close(self): + def close(self) -> None: """ Close all pooled connections and disable the pool. """ - pass # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 @@ -111,19 +123,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param host: Host used for this HTTP Connection (e.g. "localhost"), passed into - :class:`httplib.HTTPConnection`. + :class:`http.client.HTTPConnection`. :param port: Port used for this HTTP Connection (None is equivalent to 80), passed - into :class:`httplib.HTTPConnection`. - - :param strict: - Causes BadStatusLine to be raised if the status line can't be parsed - as a valid HTTP/1.0 or 1.1 status line, passed into - :class:`httplib.HTTPConnection`. - - .. note:: - Only works in Python 2. This parameter is ignored in Python 3. + into :class:`http.client.HTTPConnection`. :param timeout: Socket timeout in seconds for each individual connection. This can @@ -154,11 +158,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param _proxy: Parsed proxy URL, should not be used directly, instead, see - :class:`urllib3.connectionpool.ProxyManager`" + :class:`urllib3.ProxyManager` :param _proxy_headers: A dictionary with proxy headers, should not be used directly, - instead, see :class:`urllib3.connectionpool.ProxyManager`" + instead, see :class:`urllib3.ProxyManager` :param \\**conn_kw: Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, @@ -166,28 +170,25 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ scheme = "http" - ConnectionCls = HTTPConnection - ResponseCls = HTTPResponse + ConnectionCls: type[BaseHTTPConnection] | type[BaseHTTPSConnection] = HTTPConnection def __init__( self, - host, - port=None, - strict=False, - timeout=Timeout.DEFAULT_TIMEOUT, - maxsize=1, - block=False, - headers=None, - retries=None, - _proxy=None, - _proxy_headers=None, - **conn_kw + host: str, + port: int | None = None, + timeout: _TYPE_TIMEOUT | None = _DEFAULT_TIMEOUT, + maxsize: int = 1, + block: bool = False, + headers: typing.Mapping[str, str] | None = None, + retries: Retry | bool | int | None = None, + _proxy: Url | None = None, + _proxy_headers: typing.Mapping[str, str] | None = None, + _proxy_config: ProxyConfig | None = None, + **conn_kw: typing.Any, ): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) - self.strict = strict - if not isinstance(timeout, Timeout): timeout = Timeout.from_float(timeout) @@ -197,14 +198,15 @@ def __init__( self.timeout = timeout self.retries = retries - self.pool = self.QueueCls(maxsize) + self.pool: queue.LifoQueue[typing.Any] | None = self.QueueCls(maxsize) self.block = block self.proxy = _proxy self.proxy_headers = _proxy_headers or {} + self.proxy_config = _proxy_config # Fill the queue up so that doing get() on it will block properly - for _ in xrange(maxsize): + for _ in range(maxsize): self.pool.put(None) # These are mostly for testing and debugging purposes. @@ -218,7 +220,20 @@ def __init__( # list. self.conn_kw.setdefault("socket_options", []) - def _new_conn(self): + self.conn_kw["proxy"] = self.proxy + self.conn_kw["proxy_config"] = self.proxy_config + + # Do not pass 'self' as callback to 'finalize'. + # Then the 'finalize' would keep an endless living (leak) to self. + # By just passing a reference to the pool allows the garbage collector + # to free self if nobody else has a reference to it. + pool = self.pool + + # Close all the HTTPConnections in the pool before the + # HTTPConnectionPool object is garbage collected. + weakref.finalize(self, _close_pool_connections, pool) + + def _new_conn(self) -> BaseHTTPConnection: """ Return a fresh :class:`HTTPConnection`. """ @@ -234,12 +249,11 @@ def _new_conn(self): host=self.host, port=self.port, timeout=self.timeout.connect_timeout, - strict=self.strict, - **self.conn_kw + **self.conn_kw, ) return conn - def _get_conn(self, timeout=None): + def _get_conn(self, timeout: float | None = None) -> BaseHTTPConnection: """ Get a connection. Will return a pooled connection if one is available. @@ -252,33 +266,32 @@ def _get_conn(self, timeout=None): :prop:`.block` is ``True``. """ conn = None + + if self.pool is None: + raise ClosedPoolError(self, "Pool is closed.") + try: conn = self.pool.get(block=self.block, timeout=timeout) except AttributeError: # self.pool is None - raise ClosedPoolError(self, "Pool is closed.") + raise ClosedPoolError(self, "Pool is closed.") from None # Defensive: except queue.Empty: if self.block: raise EmptyPoolError( self, - "Pool reached maximum size and no more connections are allowed.", - ) + "Pool is empty and a new connection can't be opened due to blocking mode.", + ) from None pass # Oh well, we'll create a new connection then # If this is a persistent connection, check if it got disconnected if conn and is_connection_dropped(conn): log.debug("Resetting dropped connection: %s", self.host) conn.close() - if getattr(conn, "auto_open", 1) == 0: - # This is a proxied connection that has been mutated by - # httplib._tunnel() and cannot be reused (since it would - # attempt to bypass the proxy) - conn = None return conn or self._new_conn() - def _put_conn(self, conn): + def _put_conn(self, conn: BaseHTTPConnection | None) -> None: """ Put a connection back into the pool. @@ -292,33 +305,47 @@ def _put_conn(self, conn): If the pool is closed, then the connection will be closed and discarded. """ - try: - self.pool.put(conn, block=False) - return # Everything is dandy, done. - except AttributeError: - # self.pool is None. - pass - except queue.Full: - # This should never happen if self.block == True - log.warning("Connection pool is full, discarding connection: %s", self.host) + if self.pool is not None: + try: + self.pool.put(conn, block=False) + return # Everything is dandy, done. + except AttributeError: + # self.pool is None. + pass + except queue.Full: + # Connection never got put back into the pool, close it. + if conn: + conn.close() + + if self.block: + # This should never happen if you got the conn from self._get_conn + raise FullPoolError( + self, + "Pool reached maximum size and no more connections are allowed.", + ) from None + + log.warning( + "Connection pool is full, discarding connection: %s. Connection pool size: %s", + self.host, + self.pool.qsize(), + ) # Connection never got put back into the pool, close it. if conn: conn.close() - def _validate_conn(self, conn): + def _validate_conn(self, conn: BaseHTTPConnection) -> None: """ Called right before a request is made, after the socket is created. """ - pass - def _prepare_proxy(self, conn): + def _prepare_proxy(self, conn: BaseHTTPConnection) -> None: # Nothing to do for HTTP connections. pass - def _get_timeout(self, timeout): - """ Helper that always returns a :class:`urllib3.util.Timeout` """ - if timeout is _Default: + def _get_timeout(self, timeout: _TYPE_TIMEOUT) -> Timeout: + """Helper that always returns a :class:`urllib3.util.Timeout`""" + if timeout is _DEFAULT_TIMEOUT: return self.timeout.clone() if isinstance(timeout, Timeout): @@ -328,34 +355,40 @@ def _get_timeout(self, timeout): # can be removed later return Timeout.from_float(timeout) - def _raise_timeout(self, err, url, timeout_value): + def _raise_timeout( + self, + err: BaseSSLError | OSError | SocketTimeout, + url: str, + timeout_value: _TYPE_TIMEOUT | None, + ) -> None: """Is the error actually a timeout? Will raise a ReadTimeout or pass""" if isinstance(err, SocketTimeout): raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % timeout_value - ) + self, url, f"Read timed out. (read timeout={timeout_value})" + ) from err - # See the above comment about EAGAIN in Python 3. In Python 2 we have - # to specifically catch it and throw the timeout error + # See the above comment about EAGAIN in Python 3. if hasattr(err, "errno") and err.errno in _blocking_errnos: raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % timeout_value - ) - - # Catch possible read timeouts thrown as SSL errors. If not the - # case, rethrow the original. We need to do this because of: - # http://bugs.python.org/issue10272 - if "timed out" in str(err) or "did not complete (read)" in str( - err - ): # Python < 2.7.4 - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % timeout_value - ) + self, url, f"Read timed out. (read timeout={timeout_value})" + ) from err def _make_request( - self, conn, method, url, timeout=_Default, chunked=False, **httplib_request_kw - ): + self, + conn: BaseHTTPConnection, + method: str, + url: str, + body: _TYPE_BODY | None = None, + headers: typing.Mapping[str, str] | None = None, + retries: Retry | None = None, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + chunked: bool = False, + response_conn: BaseHTTPConnection | None = None, + preload_content: bool = True, + decode_content: bool = True, + enforce_content_length: bool = True, + ) -> BaseHTTPResponse: """ Perform a request on a given urllib connection object taken from our pool. @@ -363,39 +396,128 @@ def _make_request( :param conn: a connection from one of our connection pools + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param url: + The URL to perform the request on. + + :param body: + Data to send in the request body, either :class:`str`, :class:`bytes`, + an iterable of :class:`str`/:class:`bytes`, or a file-like object. + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param retries: + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + Pass ``None`` to retry until you receive a response. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. + :param timeout: - Socket timeout in seconds for the request. This can be a - float or integer, which will set the same timeout value for - the socket connect and the socket read, or an instance of - :class:`urllib3.util.Timeout`, which gives you more fine-grained - control over your timeouts. + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. + + :param chunked: + If True, urllib3 will send the body using chunked transfer + encoding. Otherwise, urllib3 will send the body using the standard + content-length form. Defaults to False. + + :param response_conn: + Set this to ``None`` if you will handle releasing the connection or + set the connection to have the response release it. + + :param preload_content: + If True, the response's body will be preloaded during construction. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + + :param enforce_content_length: + Enforce content length checking. Body returned by server must match + value of Content-Length header, if present. Otherwise, raise error. """ self.num_requests += 1 timeout_obj = self._get_timeout(timeout) timeout_obj.start_connect() - conn.timeout = timeout_obj.connect_timeout + conn.timeout = Timeout.resolve_default_timeout(timeout_obj.connect_timeout) - # Trigger any extra validation we need to do. try: - self._validate_conn(conn) - except (SocketTimeout, BaseSSLError) as e: - # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. - self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) - raise + # Trigger any extra validation we need to do. + try: + self._validate_conn(conn) + except (SocketTimeout, BaseSSLError) as e: + self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) + raise - # conn.request() calls httplib.*.request, not the method in + # _validate_conn() starts the connection to an HTTPS proxy + # so we need to wrap errors with 'ProxyError' here too. + except ( + OSError, + NewConnectionError, + TimeoutError, + BaseSSLError, + CertificateError, + SSLError, + ) as e: + new_e: Exception = e + if isinstance(e, (BaseSSLError, CertificateError)): + new_e = SSLError(e) + # If the connection didn't successfully connect to it's proxy + # then there + if isinstance( + new_e, (OSError, NewConnectionError, TimeoutError, SSLError) + ) and (conn and conn.proxy and not conn.has_connected_to_proxy): + new_e = _wrap_proxy_error(new_e, conn.proxy.scheme) + raise new_e + + # conn.request() calls http.client.*.request, not the method in # urllib3.request. It also calls makefile (recv) on the socket. - if chunked: - conn.request_chunked(method, url, **httplib_request_kw) - else: - conn.request(method, url, **httplib_request_kw) + try: + conn.request( + method, + url, + body=body, + headers=headers, + chunked=chunked, + preload_content=preload_content, + decode_content=decode_content, + enforce_content_length=enforce_content_length, + ) + + # We are swallowing BrokenPipeError (errno.EPIPE) since the server is + # legitimately able to close the connection after sending a valid response. + # With this behaviour, the received response is still readable. + except BrokenPipeError: + pass + except OSError as e: + # MacOS/Linux + # EPROTOTYPE and ECONNRESET are needed on macOS + # https://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/ + # Condition changed later to emit ECONNRESET instead of only EPROTOTYPE. + if e.errno != errno.EPROTOTYPE and e.errno != errno.ECONNRESET: + raise # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout - # App Engine doesn't have a sock attr - if getattr(conn, "sock", None): + if not conn.is_closed: # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching @@ -403,33 +525,22 @@ def _make_request( # timeouts, check for a zero timeout before making the request. if read_timeout == 0: raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout + self, url, f"Read timed out. (read timeout={read_timeout})" ) - if read_timeout is Timeout.DEFAULT_TIMEOUT: - conn.sock.settimeout(socket.getdefaulttimeout()) - else: # None or a value - conn.sock.settimeout(read_timeout) + conn.timeout = read_timeout # Receive the response from the server try: - try: - # Python 2.7, use buffering of HTTP responses - httplib_response = conn.getresponse(buffering=True) - except TypeError: - # Python 3 - try: - httplib_response = conn.getresponse() - except BaseException as e: - # Remove the TypeError from the exception chain in - # Python 3 (including for exceptions like SystemExit). - # Otherwise it looks like a bug in the code. - six.raise_from(e, None) - except (SocketTimeout, BaseSSLError, SocketError) as e: + response = conn.getresponse() + except (BaseSSLError, OSError) as e: self._raise_timeout(err=e, url=url, timeout_value=read_timeout) raise - # AppEngine doesn't have a version attr. - http_version = getattr(conn, "_http_vsn_str", "HTTP/?") + # Set properties that are used by the pooling layer. + response.retries = retries + response._connection = response_conn # type: ignore[attr-defined] + response._pool = self # type: ignore[attr-defined] + log.debug( '%s://%s:%s "%s %s %s" %s %s', self.scheme, @@ -437,27 +548,14 @@ def _make_request( self.port, method, url, - http_version, - httplib_response.status, - httplib_response.length, + response.version_string, + response.status, + response.length_remaining, ) - try: - assert_header_parsing(httplib_response.msg) - except (HeaderParsingError, TypeError) as hpe: # Platform-specific: Python 3 - log.warning( - "Failed to parse headers (url=%s): %s", - self._absolute_url(url), - hpe, - exc_info=True, - ) - - return httplib_response - - def _absolute_url(self, path): - return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url + return response - def close(self): + def close(self) -> None: """ Close all pooled connections and disable the pool. """ @@ -466,16 +564,10 @@ def close(self): # Disable access to the pool old_pool, self.pool = self.pool, None - try: - while True: - conn = old_pool.get(block=False) - if conn: - conn.close() + # Close all the HTTPConnections in the pool. + _close_pool_connections(old_pool) - except queue.Empty: - pass # Done. - - def is_same_host(self, url): + def is_same_host(self, url: str) -> bool: """ Check if the given ``url`` is a member of the same host as this connection pool. @@ -484,7 +576,8 @@ def is_same_host(self, url): return True # TODO: Add optional support for socket.gethostbyname checking. - scheme, host, port = get_host(url) + scheme, _, host, port, *_ = parse_url(url) + scheme = scheme or "http" if host is not None: host = _normalize_host(host, scheme=scheme) @@ -496,22 +589,24 @@ def is_same_host(self, url): return (scheme, host, port) == (self.scheme, self.host, self.port) - def urlopen( + def urlopen( # type: ignore[override] self, - method, - url, - body=None, - headers=None, - retries=None, - redirect=True, - assert_same_host=True, - timeout=_Default, - pool_timeout=None, - release_conn=None, - chunked=False, - body_pos=None, - **response_kw - ): + method: str, + url: str, + body: _TYPE_BODY | None = None, + headers: typing.Mapping[str, str] | None = None, + retries: Retry | bool | int | None = None, + redirect: bool = True, + assert_same_host: bool = True, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + pool_timeout: int | None = None, + release_conn: bool | None = None, + chunked: bool = False, + body_pos: _TYPE_BODY_POSITION | None = None, + preload_content: bool = True, + decode_content: bool = True, + **response_kw: typing.Any, + ) -> BaseHTTPResponse: """ Get a connection from the pool and perform an HTTP request. This is the lowest level call for making a request, so you'll need to specify all @@ -519,8 +614,8 @@ def urlopen( .. note:: - More commonly, it's appropriate to use a convenience method provided - by :class:`.RequestMethods`, such as :meth:`request`. + More commonly, it's appropriate to use a convenience method + such as :meth:`request`. .. note:: @@ -532,10 +627,12 @@ def urlopen( :param method: HTTP request method (such as GET, POST, PUT, etc.) + :param url: + The URL to perform the request on. + :param body: - Data to send in the request body (useful for creating - POST requests, see HTTPConnectionPool.post_url for - more convenience). + Data to send in the request body, either :class:`str`, :class:`bytes`, + an iterable of :class:`str`/:class:`bytes`, or a file-like object. :param headers: Dictionary of custom headers to send, such as User-Agent, @@ -546,7 +643,7 @@ def urlopen( Configure the number of retries to allow before raising a :class:`~urllib3.exceptions.MaxRetryError` exception. - Pass ``None`` to retry until you receive a response. Pass a + If ``None`` (default) will retry 3 times, see ``Retry.DEFAULT``. Pass a :class:`~urllib3.util.retry.Retry` object for fine-grained control over different types of retries. Pass an integer number to retry connection errors that many times, @@ -565,7 +662,7 @@ def urlopen( :param assert_same_host: If ``True``, will make sure that the host of the pool requests is - consistent else will raise HostChangedError. When False, you can + consistent else will raise HostChangedError. When ``False``, you can use the pool on an HTTP proxy and request foreign hosts. :param timeout: @@ -578,6 +675,13 @@ def urlopen( block for ``pool_timeout`` seconds and raise EmptyPoolError if no connection is available within the time period. + :param bool preload_content: + If True, the response's body will be preloaded into memory. + + :param bool decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + :param release_conn: If False, then the urlopen call will not release the connection back into the pool once a response is received (but will release if @@ -585,10 +689,10 @@ def urlopen( `preload_content=True`). This is useful if you're not preloading the response's content immediately. You will need to call ``r.release_conn()`` on the response ``r`` to return the connection - back into the pool. If None, it takes the value of - ``response_kw.get('preload_content', True)``. + back into the pool. If None, it takes the value of ``preload_content`` + which defaults to ``True``. - :param chunked: + :param bool chunked: If True, urllib3 will send the body using chunked transfer encoding. Otherwise, urllib3 will send the body using the standard content-length form. Defaults to False. @@ -597,11 +701,10 @@ def urlopen( Position to seek to in file-like body in the event of a retry or redirect. Typically this won't need to be set because urllib3 will auto-populate the value when needed. - - :param \\**response_kw: - Additional parameters are passed to - :meth:`urllib3.response.HTTPResponse.from_httplib` """ + parsed_url = parse_url(url) + destination_scheme = parsed_url.scheme + if headers is None: headers = self.headers @@ -609,7 +712,7 @@ def urlopen( retries = Retry.from_int(retries, redirect=redirect, default=self.retries) if release_conn is None: - release_conn = response_kw.get("preload_content", True) + release_conn = preload_content # Check host if assert_same_host and not self.is_same_host(url): @@ -617,9 +720,9 @@ def urlopen( # Ensure that the URL we're connecting to is properly encoded if url.startswith("/"): - url = six.ensure_str(_encode_target(url)) + url = to_str(_encode_target(url)) else: - url = six.ensure_str(parse_url(url).url) + url = to_str(parsed_url.url) conn = None @@ -634,12 +737,16 @@ def urlopen( # [1] release_this_conn = release_conn - # Merge the proxy headers. Only do this in HTTP. We have to copy the - # headers dict so we can safely change it without those changes being - # reflected in anyone else's copy. - if self.scheme == "http": - headers = headers.copy() - headers.update(self.proxy_headers) + http_tunnel_required = connection_requires_http_tunnel( + self.proxy, self.proxy_config, destination_scheme + ) + + # Merge the proxy headers. Only done when not using HTTP CONNECT. We + # have to copy the headers dict so we can safely change it without those + # changes being reflected in anyone else's copy. + if not http_tunnel_required: + headers = headers.copy() # type: ignore[attr-defined] + headers.update(self.proxy_headers) # type: ignore[union-attr] # Must keep the exception bound to a separate variable or else Python 3 # complains about UnboundLocalError. @@ -658,16 +765,26 @@ def urlopen( timeout_obj = self._get_timeout(timeout) conn = self._get_conn(timeout=pool_timeout) - conn.timeout = timeout_obj.connect_timeout + conn.timeout = timeout_obj.connect_timeout # type: ignore[assignment] - is_new_proxy_conn = self.proxy is not None and not getattr( - conn, "sock", None - ) - if is_new_proxy_conn: - self._prepare_proxy(conn) + # Is this a closed/new connection that requires CONNECT tunnelling? + if self.proxy is not None and http_tunnel_required and conn.is_closed: + try: + self._prepare_proxy(conn) + except (BaseSSLError, OSError, SocketTimeout) as e: + self._raise_timeout( + err=e, url=self.proxy.url, timeout_value=conn.timeout + ) + raise + + # If we're going to release the connection in ``finally:``, then + # the response doesn't need to know about the connection. Otherwise + # it will also try to release it and we'll have a double-release + # mess. + response_conn = conn if not release_conn else None - # Make the request on the httplib connection object. - httplib_response = self._make_request( + # Make the request on the HTTPConnection object + response = self._make_request( conn, method, url, @@ -675,24 +792,11 @@ def urlopen( body=body, headers=headers, chunked=chunked, - ) - - # If we're going to release the connection in ``finally:``, then - # the response doesn't need to know about the connection. Otherwise - # it will also try to release it and we'll have a double-release - # mess. - response_conn = conn if not release_conn else None - - # Pass method to Response for length checking - response_kw["request_method"] = method - - # Import httplib's response into our own wrapper object - response = self.ResponseCls.from_httplib( - httplib_response, - pool=self, - connection=response_conn, retries=retries, - **response_kw + response_conn=response_conn, + preload_content=preload_content, + decode_content=decode_content, + **response_kw, ) # Everything went great! @@ -707,24 +811,35 @@ def urlopen( except ( TimeoutError, HTTPException, - SocketError, + OSError, ProtocolError, BaseSSLError, SSLError, CertificateError, + ProxyError, ) as e: # Discard the connection for these exceptions. It will be # replaced during the next _get_conn() call. clean_exit = False + new_e: Exception = e if isinstance(e, (BaseSSLError, CertificateError)): - e = SSLError(e) - elif isinstance(e, (SocketError, NewConnectionError)) and self.proxy: - e = ProxyError("Cannot connect to proxy.", e) - elif isinstance(e, (SocketError, HTTPException)): - e = ProtocolError("Connection aborted.", e) + new_e = SSLError(e) + if isinstance( + new_e, + ( + OSError, + NewConnectionError, + TimeoutError, + SSLError, + HTTPException, + ), + ) and (conn and conn.proxy and not conn.has_connected_to_proxy): + new_e = _wrap_proxy_error(new_e, conn.proxy.scheme) + elif isinstance(new_e, (OSError, HTTPException)): + new_e = ProtocolError("Connection aborted.", new_e) retries = retries.increment( - method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2] + method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2] ) retries.sleep() @@ -737,7 +852,9 @@ def urlopen( # to throw the connection away unless explicitly told not to. # Close the connection, set the variable to None, and make sure # we put the None back in the pool to avoid leaking it. - conn = conn and conn.close() + if conn: + conn.close() + conn = None release_this_conn = True if release_this_conn: @@ -764,14 +881,20 @@ def urlopen( release_conn=release_conn, chunked=chunked, body_pos=body_pos, - **response_kw + preload_content=preload_content, + decode_content=decode_content, + **response_kw, ) # Handle redirect? redirect_location = redirect and response.get_redirect_location() if redirect_location: if response.status == 303: + # Change the method according to RFC 9110, Section 15.4.4. method = "GET" + # And lose the body not to transfer anything sensitive. + body = None + headers = HTTPHeaderDict(headers)._prepare_for_method_change() try: retries = retries.increment(method, url, response=response, _pool=self) @@ -797,11 +920,13 @@ def urlopen( release_conn=release_conn, chunked=chunked, body_pos=body_pos, - **response_kw + preload_content=preload_content, + decode_content=decode_content, + **response_kw, ) # Check if we should retry the HTTP response. - has_retry_after = bool(response.getheader("Retry-After")) + has_retry_after = bool(response.headers.get("Retry-After")) if retries.is_retry(method, response.status, has_retry_after): try: retries = retries.increment(method, url, response=response, _pool=self) @@ -827,7 +952,9 @@ def urlopen( release_conn=release_conn, chunked=chunked, body_pos=body_pos, - **response_kw + preload_content=preload_content, + decode_content=decode_content, + **response_kw, ) return response @@ -837,11 +964,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): """ Same as :class:`.HTTPConnectionPool`, but HTTPS. - When Python is compiled with the :mod:`ssl` module, then - :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, - instead of :class:`.HTTPSConnection`. - - :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, + :class:`.HTTPSConnection` uses one of ``assert_fingerprint``, ``assert_hostname`` and ``host`` in this order to verify connections. If ``assert_hostname`` is False, no verification is done. @@ -852,37 +975,35 @@ class HTTPSConnectionPool(HTTPConnectionPool): """ scheme = "https" - ConnectionCls = HTTPSConnection + ConnectionCls: type[BaseHTTPSConnection] = HTTPSConnection def __init__( self, - host, - port=None, - strict=False, - timeout=Timeout.DEFAULT_TIMEOUT, - maxsize=1, - block=False, - headers=None, - retries=None, - _proxy=None, - _proxy_headers=None, - key_file=None, - cert_file=None, - cert_reqs=None, - key_password=None, - ca_certs=None, - ssl_version=None, - assert_hostname=None, - assert_fingerprint=None, - ca_cert_dir=None, - **conn_kw - ): - - HTTPConnectionPool.__init__( - self, + host: str, + port: int | None = None, + timeout: _TYPE_TIMEOUT | None = _DEFAULT_TIMEOUT, + maxsize: int = 1, + block: bool = False, + headers: typing.Mapping[str, str] | None = None, + retries: Retry | bool | int | None = None, + _proxy: Url | None = None, + _proxy_headers: typing.Mapping[str, str] | None = None, + key_file: str | None = None, + cert_file: str | None = None, + cert_reqs: int | str | None = None, + key_password: str | None = None, + ca_certs: str | None = None, + ssl_version: int | str | None = None, + ssl_minimum_version: ssl.TLSVersion | None = None, + ssl_maximum_version: ssl.TLSVersion | None = None, + assert_hostname: str | typing.Literal[False] | None = None, + assert_fingerprint: str | None = None, + ca_cert_dir: str | None = None, + **conn_kw: typing.Any, + ) -> None: + super().__init__( host, port, - strict, timeout, maxsize, block, @@ -890,7 +1011,7 @@ def __init__( retries, _proxy, _proxy_headers, - **conn_kw + **conn_kw, ) self.key_file = key_file @@ -900,40 +1021,29 @@ def __init__( self.ca_certs = ca_certs self.ca_cert_dir = ca_cert_dir self.ssl_version = ssl_version + self.ssl_minimum_version = ssl_minimum_version + self.ssl_maximum_version = ssl_maximum_version self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - def _prepare_conn(self, conn): - """ - Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` - and establish the tunnel if proxy is used. - """ - - if isinstance(conn, VerifiedHTTPSConnection): - conn.set_cert( - key_file=self.key_file, - key_password=self.key_password, - cert_file=self.cert_file, - cert_reqs=self.cert_reqs, - ca_certs=self.ca_certs, - ca_cert_dir=self.ca_cert_dir, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint, - ) - conn.ssl_version = self.ssl_version - return conn + def _prepare_proxy(self, conn: HTTPSConnection) -> None: # type: ignore[override] + """Establishes a tunnel connection through HTTP CONNECT.""" + if self.proxy and self.proxy.scheme == "https": + tunnel_scheme = "https" + else: + tunnel_scheme = "http" - def _prepare_proxy(self, conn): - """ - Establish tunnel connection early, because otherwise httplib - would improperly set Host: header to proxy's IP:port. - """ - conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers) + conn.set_tunnel( + scheme=tunnel_scheme, + host=self._tunnel_host, + port=self.port, + headers=self.proxy_headers, + ) conn.connect() - def _new_conn(self): + def _new_conn(self) -> BaseHTTPSConnection: """ - Return a fresh :class:`httplib.HTTPSConnection`. + Return a fresh :class:`urllib3.connection.HTTPConnection`. """ self.num_connections += 1 log.debug( @@ -943,53 +1053,59 @@ def _new_conn(self): self.port or "443", ) - if not self.ConnectionCls or self.ConnectionCls is DummyConnection: - raise SSLError( + if not self.ConnectionCls or self.ConnectionCls is DummyConnection: # type: ignore[comparison-overlap] + raise ImportError( "Can't connect to HTTPS URL because the SSL module is not available." ) - actual_host = self.host + actual_host: str = self.host actual_port = self.port - if self.proxy is not None: + if self.proxy is not None and self.proxy.host is not None: actual_host = self.proxy.host actual_port = self.proxy.port - conn = self.ConnectionCls( + return self.ConnectionCls( host=actual_host, port=actual_port, timeout=self.timeout.connect_timeout, - strict=self.strict, cert_file=self.cert_file, key_file=self.key_file, key_password=self.key_password, - **self.conn_kw + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint, + ssl_version=self.ssl_version, + ssl_minimum_version=self.ssl_minimum_version, + ssl_maximum_version=self.ssl_maximum_version, + **self.conn_kw, ) - return self._prepare_conn(conn) - - def _validate_conn(self, conn): + def _validate_conn(self, conn: BaseHTTPConnection) -> None: """ Called right before a request is made, after the socket is created. """ - super(HTTPSConnectionPool, self)._validate_conn(conn) + super()._validate_conn(conn) # Force connect early to allow us to validate the connection. - if not getattr(conn, "sock", None): # AppEngine might not have `.sock` + if conn.is_closed: conn.connect() - if not conn.is_verified: + # TODO revise this, see https://github.com/urllib3/urllib3/issues/2791 + if not conn.is_verified and not conn.proxy_is_verified: warnings.warn( ( - "Unverified HTTPS request is being made to host '%s'. " + f"Unverified HTTPS request is being made to host '{conn.host}'. " "Adding certificate verification is strongly advised. See: " "https://urllib3.readthedocs.io/en/latest/advanced-usage.html" - "#ssl-warnings" % conn.host + "#tls-warnings" ), InsecureRequestWarning, ) -def connection_from_url(url, **kw): +def connection_from_url(url: str, **kw: typing.Any) -> HTTPConnectionPool: """ Given a url, return an :class:`.ConnectionPool` instance of its host. @@ -1009,15 +1125,24 @@ def connection_from_url(url, **kw): >>> conn = connection_from_url('http://google.com/') >>> r = conn.request('GET', '/') """ - scheme, host, port = get_host(url) + scheme, _, host, port, *_ = parse_url(url) + scheme = scheme or "http" port = port or port_by_scheme.get(scheme, 80) if scheme == "https": - return HTTPSConnectionPool(host, port=port, **kw) + return HTTPSConnectionPool(host, port=port, **kw) # type: ignore[arg-type] else: - return HTTPConnectionPool(host, port=port, **kw) + return HTTPConnectionPool(host, port=port, **kw) # type: ignore[arg-type] + + +@typing.overload +def _normalize_host(host: None, scheme: str | None) -> None: ... + +@typing.overload +def _normalize_host(host: str, scheme: str | None) -> str: ... -def _normalize_host(host, scheme): + +def _normalize_host(host: str | None, scheme: str | None) -> str | None: """ Normalize hosts for comparisons and use with sockets. """ @@ -1030,6 +1155,24 @@ def _normalize_host(host, scheme): # Instead, we need to make sure we never pass ``None`` as the port. # However, for backward compatibility reasons we can't actually # *assert* that. See http://bugs.python.org/issue28539 - if host.startswith("[") and host.endswith("]"): + if host and host.startswith("[") and host.endswith("]"): host = host[1:-1] return host + + +def _url_from_pool( + pool: HTTPConnectionPool | HTTPSConnectionPool, path: str | None = None +) -> str: + """Returns the URL from a given connection pool. This is mainly used for testing and logging.""" + return Url(scheme=pool.scheme, host=pool.host, port=pool.port, path=path).url + + +def _close_pool_connections(pool: queue.LifoQueue[typing.Any]) -> None: + """Drains a queue of connections and closes each one.""" + try: + while True: + conn = pool.get(block=False) + if conn: + conn.close() + except queue.Empty: + pass # Done. diff --git a/thirdparty/urllib3/contrib/emscripten/__init__.py b/thirdparty/urllib3/contrib/emscripten/__init__.py new file mode 100644 index 0000000..8a3c5be --- /dev/null +++ b/thirdparty/urllib3/contrib/emscripten/__init__.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +import urllib3.connection + +from ...connectionpool import HTTPConnectionPool, HTTPSConnectionPool +from .connection import EmscriptenHTTPConnection, EmscriptenHTTPSConnection + + +def inject_into_urllib3() -> None: + # override connection classes to use emscripten specific classes + # n.b. mypy complains about the overriding of classes below + # if it isn't ignored + HTTPConnectionPool.ConnectionCls = EmscriptenHTTPConnection + HTTPSConnectionPool.ConnectionCls = EmscriptenHTTPSConnection + urllib3.connection.HTTPConnection = EmscriptenHTTPConnection # type: ignore[misc,assignment] + urllib3.connection.HTTPSConnection = EmscriptenHTTPSConnection # type: ignore[misc,assignment] diff --git a/thirdparty/urllib3/contrib/emscripten/connection.py b/thirdparty/urllib3/contrib/emscripten/connection.py new file mode 100644 index 0000000..41bfd27 --- /dev/null +++ b/thirdparty/urllib3/contrib/emscripten/connection.py @@ -0,0 +1,255 @@ +from __future__ import annotations + +import os +import typing + +# use http.client.HTTPException for consistency with non-emscripten +from http.client import HTTPException as HTTPException # noqa: F401 +from http.client import ResponseNotReady + +from ..._base_connection import _TYPE_BODY +from ...connection import HTTPConnection, ProxyConfig, port_by_scheme +from ...exceptions import TimeoutError +from ...response import BaseHTTPResponse +from ...util.connection import _TYPE_SOCKET_OPTIONS +from ...util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT +from ...util.url import Url +from .fetch import _RequestError, _TimeoutError, send_request, send_streaming_request +from .request import EmscriptenRequest +from .response import EmscriptenHttpResponseWrapper, EmscriptenResponse + +if typing.TYPE_CHECKING: + from ..._base_connection import BaseHTTPConnection, BaseHTTPSConnection + + +class EmscriptenHTTPConnection: + default_port: typing.ClassVar[int] = port_by_scheme["http"] + default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS] + + timeout: None | (float) + + host: str + port: int + blocksize: int + source_address: tuple[str, int] | None + socket_options: _TYPE_SOCKET_OPTIONS | None + + proxy: Url | None + proxy_config: ProxyConfig | None + + is_verified: bool = False + proxy_is_verified: bool | None = None + + _response: EmscriptenResponse | None + + def __init__( + self, + host: str, + port: int = 0, + *, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + source_address: tuple[str, int] | None = None, + blocksize: int = 8192, + socket_options: _TYPE_SOCKET_OPTIONS | None = None, + proxy: Url | None = None, + proxy_config: ProxyConfig | None = None, + ) -> None: + self.host = host + self.port = port + self.timeout = timeout if isinstance(timeout, float) else 0.0 + self.scheme = "http" + self._closed = True + self._response = None + # ignore these things because we don't + # have control over that stuff + self.proxy = None + self.proxy_config = None + self.blocksize = blocksize + self.source_address = None + self.socket_options = None + self.is_verified = False + + def set_tunnel( + self, + host: str, + port: int | None = 0, + headers: typing.Mapping[str, str] | None = None, + scheme: str = "http", + ) -> None: + pass + + def connect(self) -> None: + pass + + def request( + self, + method: str, + url: str, + body: _TYPE_BODY | None = None, + headers: typing.Mapping[str, str] | None = None, + # We know *at least* botocore is depending on the order of the + # first 3 parameters so to be safe we only mark the later ones + # as keyword-only to ensure we have space to extend. + *, + chunked: bool = False, + preload_content: bool = True, + decode_content: bool = True, + enforce_content_length: bool = True, + ) -> None: + self._closed = False + if url.startswith("/"): + # no scheme / host / port included, make a full url + url = f"{self.scheme}://{self.host}:{self.port}" + url + request = EmscriptenRequest( + url=url, + method=method, + timeout=self.timeout if self.timeout else 0, + decode_content=decode_content, + ) + request.set_body(body) + if headers: + for k, v in headers.items(): + request.set_header(k, v) + self._response = None + try: + if not preload_content: + self._response = send_streaming_request(request) + if self._response is None: + self._response = send_request(request) + except _TimeoutError as e: + raise TimeoutError(e.message) from e + except _RequestError as e: + raise HTTPException(e.message) from e + + def getresponse(self) -> BaseHTTPResponse: + if self._response is not None: + return EmscriptenHttpResponseWrapper( + internal_response=self._response, + url=self._response.request.url, + connection=self, + ) + else: + raise ResponseNotReady() + + def close(self) -> None: + self._closed = True + self._response = None + + @property + def is_closed(self) -> bool: + """Whether the connection either is brand new or has been previously closed. + If this property is True then both ``is_connected`` and ``has_connected_to_proxy`` + properties must be False. + """ + return self._closed + + @property + def is_connected(self) -> bool: + """Whether the connection is actively connected to any origin (proxy or target)""" + return True + + @property + def has_connected_to_proxy(self) -> bool: + """Whether the connection has successfully connected to its proxy. + This returns False if no proxy is in use. Used to determine whether + errors are coming from the proxy layer or from tunnelling to the target origin. + """ + return False + + +class EmscriptenHTTPSConnection(EmscriptenHTTPConnection): + default_port = port_by_scheme["https"] + # all this is basically ignored, as browser handles https + cert_reqs: int | str | None = None + ca_certs: str | None = None + ca_cert_dir: str | None = None + ca_cert_data: None | str | bytes = None + cert_file: str | None + key_file: str | None + key_password: str | None + ssl_context: typing.Any | None + ssl_version: int | str | None = None + ssl_minimum_version: int | None = None + ssl_maximum_version: int | None = None + assert_hostname: None | str | typing.Literal[False] + assert_fingerprint: str | None = None + + def __init__( + self, + host: str, + port: int = 0, + *, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + source_address: tuple[str, int] | None = None, + blocksize: int = 16384, + socket_options: ( + None | _TYPE_SOCKET_OPTIONS + ) = HTTPConnection.default_socket_options, + proxy: Url | None = None, + proxy_config: ProxyConfig | None = None, + cert_reqs: int | str | None = None, + assert_hostname: None | str | typing.Literal[False] = None, + assert_fingerprint: str | None = None, + server_hostname: str | None = None, + ssl_context: typing.Any | None = None, + ca_certs: str | None = None, + ca_cert_dir: str | None = None, + ca_cert_data: None | str | bytes = None, + ssl_minimum_version: int | None = None, + ssl_maximum_version: int | None = None, + ssl_version: int | str | None = None, # Deprecated + cert_file: str | None = None, + key_file: str | None = None, + key_password: str | None = None, + ) -> None: + super().__init__( + host, + port=port, + timeout=timeout, + source_address=source_address, + blocksize=blocksize, + socket_options=socket_options, + proxy=proxy, + proxy_config=proxy_config, + ) + self.scheme = "https" + + self.key_file = key_file + self.cert_file = cert_file + self.key_password = key_password + self.ssl_context = ssl_context + self.server_hostname = server_hostname + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + self.ssl_version = ssl_version + self.ssl_minimum_version = ssl_minimum_version + self.ssl_maximum_version = ssl_maximum_version + self.ca_certs = ca_certs and os.path.expanduser(ca_certs) + self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) + self.ca_cert_data = ca_cert_data + + self.cert_reqs = None + + # The browser will automatically verify all requests. + # We have no control over that setting. + self.is_verified = True + + def set_cert( + self, + key_file: str | None = None, + cert_file: str | None = None, + cert_reqs: int | str | None = None, + key_password: str | None = None, + ca_certs: str | None = None, + assert_hostname: None | str | typing.Literal[False] = None, + assert_fingerprint: str | None = None, + ca_cert_dir: str | None = None, + ca_cert_data: None | str | bytes = None, + ) -> None: + pass + + +# verify that this class implements BaseHTTP(s) connection correctly +if typing.TYPE_CHECKING: + _supports_http_protocol: BaseHTTPConnection = EmscriptenHTTPConnection("", 0) + _supports_https_protocol: BaseHTTPSConnection = EmscriptenHTTPSConnection("", 0) diff --git a/thirdparty/urllib3/contrib/emscripten/emscripten_fetch_worker.js b/thirdparty/urllib3/contrib/emscripten/emscripten_fetch_worker.js new file mode 100644 index 0000000..243b862 --- /dev/null +++ b/thirdparty/urllib3/contrib/emscripten/emscripten_fetch_worker.js @@ -0,0 +1,110 @@ +let Status = { + SUCCESS_HEADER: -1, + SUCCESS_EOF: -2, + ERROR_TIMEOUT: -3, + ERROR_EXCEPTION: -4, +}; + +let connections = {}; +let nextConnectionID = 1; +const encoder = new TextEncoder(); + +self.addEventListener("message", async function (event) { + if (event.data.close) { + let connectionID = event.data.close; + delete connections[connectionID]; + return; + } else if (event.data.getMore) { + let connectionID = event.data.getMore; + let { curOffset, value, reader, intBuffer, byteBuffer } = + connections[connectionID]; + // if we still have some in buffer, then just send it back straight away + if (!value || curOffset >= value.length) { + // read another buffer if required + try { + let readResponse = await reader.read(); + + if (readResponse.done) { + // read everything - clear connection and return + delete connections[connectionID]; + Atomics.store(intBuffer, 0, Status.SUCCESS_EOF); + Atomics.notify(intBuffer, 0); + // finished reading successfully + // return from event handler + return; + } + curOffset = 0; + connections[connectionID].value = readResponse.value; + value = readResponse.value; + } catch (error) { + console.log("Request exception:", error); + let errorBytes = encoder.encode(error.message); + let written = errorBytes.length; + byteBuffer.set(errorBytes); + intBuffer[1] = written; + Atomics.store(intBuffer, 0, Status.ERROR_EXCEPTION); + Atomics.notify(intBuffer, 0); + } + } + + // send as much buffer as we can + let curLen = value.length - curOffset; + if (curLen > byteBuffer.length) { + curLen = byteBuffer.length; + } + byteBuffer.set(value.subarray(curOffset, curOffset + curLen), 0); + + Atomics.store(intBuffer, 0, curLen); // store current length in bytes + Atomics.notify(intBuffer, 0); + curOffset += curLen; + connections[connectionID].curOffset = curOffset; + + return; + } else { + // start fetch + let connectionID = nextConnectionID; + nextConnectionID += 1; + const intBuffer = new Int32Array(event.data.buffer); + const byteBuffer = new Uint8Array(event.data.buffer, 8); + try { + const response = await fetch(event.data.url, event.data.fetchParams); + // return the headers first via textencoder + var headers = []; + for (const pair of response.headers.entries()) { + headers.push([pair[0], pair[1]]); + } + let headerObj = { + headers: headers, + status: response.status, + connectionID, + }; + const headerText = JSON.stringify(headerObj); + let headerBytes = encoder.encode(headerText); + let written = headerBytes.length; + byteBuffer.set(headerBytes); + intBuffer[1] = written; + // make a connection + connections[connectionID] = { + reader: response.body.getReader(), + intBuffer: intBuffer, + byteBuffer: byteBuffer, + value: undefined, + curOffset: 0, + }; + // set header ready + Atomics.store(intBuffer, 0, Status.SUCCESS_HEADER); + Atomics.notify(intBuffer, 0); + // all fetching after this goes through a new postmessage call with getMore + // this allows for parallel requests + } catch (error) { + console.log("Request exception:", error); + let errorBytes = encoder.encode(error.message); + let written = errorBytes.length; + byteBuffer.set(errorBytes); + intBuffer[1] = written; + Atomics.store(intBuffer, 0, Status.ERROR_EXCEPTION); + Atomics.notify(intBuffer, 0); + } + } +}); +self.postMessage({ inited: true }); diff --git a/thirdparty/urllib3/contrib/emscripten/fetch.py b/thirdparty/urllib3/contrib/emscripten/fetch.py new file mode 100644 index 0000000..a514306 --- /dev/null +++ b/thirdparty/urllib3/contrib/emscripten/fetch.py @@ -0,0 +1,708 @@ +""" +Support for streaming http requests in emscripten. + +A few caveats - + +If your browser (or Node.js) has WebAssembly JavaScript Promise Integration enabled +https://github.com/WebAssembly/js-promise-integration/blob/main/proposals/js-promise-integration/Overview.md +*and* you launch pyodide using `pyodide.runPythonAsync`, this will fetch data using the +JavaScript asynchronous fetch api (wrapped via `pyodide.ffi.call_sync`). In this case +timeouts and streaming should just work. + +Otherwise, it uses a combination of XMLHttpRequest and a web-worker for streaming. + +This approach has several caveats: + +Firstly, you can't do streaming http in the main UI thread, because atomics.wait isn't allowed. +Streaming only works if you're running pyodide in a web worker. + +Secondly, this uses an extra web worker and SharedArrayBuffer to do the asynchronous fetch +operation, so it requires that you have crossOriginIsolation enabled, by serving over https +(or from localhost) with the two headers below set: + + Cross-Origin-Opener-Policy: same-origin + Cross-Origin-Embedder-Policy: require-corp + +You can tell if cross origin isolation is successfully enabled by looking at the global crossOriginIsolated variable in +JavaScript console. If it isn't, streaming requests will fallback to XMLHttpRequest, i.e. getting the whole +request into a buffer and then returning it. it shows a warning in the JavaScript console in this case. + +Finally, the webworker which does the streaming fetch is created on initial import, but will only be started once +control is returned to javascript. Call `await wait_for_streaming_ready()` to wait for streaming fetch. + +NB: in this code, there are a lot of JavaScript objects. They are named js_* +to make it clear what type of object they are. +""" + +from __future__ import annotations + +import io +import json +from email.parser import Parser +from importlib.resources import files +from typing import TYPE_CHECKING, Any + +import js # type: ignore[import-not-found] +from pyodide.ffi import ( # type: ignore[import-not-found] + JsArray, + JsException, + JsProxy, + to_js, +) + +if TYPE_CHECKING: + from typing_extensions import Buffer + +from .request import EmscriptenRequest +from .response import EmscriptenResponse + +""" +There are some headers that trigger unintended CORS preflight requests. +See also https://github.com/koenvo/pyodide-http/issues/22 +""" +HEADERS_TO_IGNORE = ("user-agent",) + +SUCCESS_HEADER = -1 +SUCCESS_EOF = -2 +ERROR_TIMEOUT = -3 +ERROR_EXCEPTION = -4 + +_STREAMING_WORKER_CODE = ( + files(__package__) + .joinpath("emscripten_fetch_worker.js") + .read_text(encoding="utf-8") +) + + +class _RequestError(Exception): + def __init__( + self, + message: str | None = None, + *, + request: EmscriptenRequest | None = None, + response: EmscriptenResponse | None = None, + ): + self.request = request + self.response = response + self.message = message + super().__init__(self.message) + + +class _StreamingError(_RequestError): + pass + + +class _TimeoutError(_RequestError): + pass + + +def _obj_from_dict(dict_val: dict[str, Any]) -> JsProxy: + return to_js(dict_val, dict_converter=js.Object.fromEntries) + + +class _ReadStream(io.RawIOBase): + def __init__( + self, + int_buffer: JsArray, + byte_buffer: JsArray, + timeout: float, + worker: JsProxy, + connection_id: int, + request: EmscriptenRequest, + ): + self.int_buffer = int_buffer + self.byte_buffer = byte_buffer + self.read_pos = 0 + self.read_len = 0 + self.connection_id = connection_id + self.worker = worker + self.timeout = int(1000 * timeout) if timeout > 0 else None + self.is_live = True + self._is_closed = False + self.request: EmscriptenRequest | None = request + + def __del__(self) -> None: + self.close() + + # this is compatible with _base_connection + def is_closed(self) -> bool: + return self._is_closed + + # for compatibility with RawIOBase + @property + def closed(self) -> bool: + return self.is_closed() + + def close(self) -> None: + if self.is_closed(): + return + self.read_len = 0 + self.read_pos = 0 + self.int_buffer = None + self.byte_buffer = None + self._is_closed = True + self.request = None + if self.is_live: + self.worker.postMessage(_obj_from_dict({"close": self.connection_id})) + self.is_live = False + super().close() + + def readable(self) -> bool: + return True + + def writable(self) -> bool: + return False + + def seekable(self) -> bool: + return False + + def readinto(self, byte_obj: Buffer) -> int: + if not self.int_buffer: + raise _StreamingError( + "No buffer for stream in _ReadStream.readinto", + request=self.request, + response=None, + ) + if self.read_len == 0: + # wait for the worker to send something + js.Atomics.store(self.int_buffer, 0, ERROR_TIMEOUT) + self.worker.postMessage(_obj_from_dict({"getMore": self.connection_id})) + if ( + js.Atomics.wait(self.int_buffer, 0, ERROR_TIMEOUT, self.timeout) + == "timed-out" + ): + raise _TimeoutError + data_len = self.int_buffer[0] + if data_len > 0: + self.read_len = data_len + self.read_pos = 0 + elif data_len == ERROR_EXCEPTION: + string_len = self.int_buffer[1] + # decode the error string + js_decoder = js.TextDecoder.new() + json_str = js_decoder.decode(self.byte_buffer.slice(0, string_len)) + raise _StreamingError( + f"Exception thrown in fetch: {json_str}", + request=self.request, + response=None, + ) + else: + # EOF, free the buffers and return zero + # and free the request + self.is_live = False + self.close() + return 0 + # copy from int32array to python bytes + ret_length = min(self.read_len, len(memoryview(byte_obj))) + subarray = self.byte_buffer.subarray( + self.read_pos, self.read_pos + ret_length + ).to_py() + memoryview(byte_obj)[0:ret_length] = subarray + self.read_len -= ret_length + self.read_pos += ret_length + return ret_length + + +class _StreamingFetcher: + def __init__(self) -> None: + # make web-worker and data buffer on startup + self.streaming_ready = False + + js_data_blob = js.Blob.new( + to_js([_STREAMING_WORKER_CODE], create_pyproxies=False), + _obj_from_dict({"type": "application/javascript"}), + ) + + def promise_resolver(js_resolve_fn: JsProxy, js_reject_fn: JsProxy) -> None: + def onMsg(e: JsProxy) -> None: + self.streaming_ready = True + js_resolve_fn(e) + + def onErr(e: JsProxy) -> None: + js_reject_fn(e) # Defensive: never happens in ci + + self.js_worker.onmessage = onMsg + self.js_worker.onerror = onErr + + js_data_url = js.URL.createObjectURL(js_data_blob) + self.js_worker = js.globalThis.Worker.new(js_data_url) + self.js_worker_ready_promise = js.globalThis.Promise.new(promise_resolver) + + def send(self, request: EmscriptenRequest) -> EmscriptenResponse: + headers = { + k: v for k, v in request.headers.items() if k not in HEADERS_TO_IGNORE + } + + body = request.body + fetch_data = {"headers": headers, "body": to_js(body), "method": request.method} + # start the request off in the worker + timeout = int(1000 * request.timeout) if request.timeout > 0 else None + js_shared_buffer = js.SharedArrayBuffer.new(1048576) + js_int_buffer = js.Int32Array.new(js_shared_buffer) + js_byte_buffer = js.Uint8Array.new(js_shared_buffer, 8) + + js.Atomics.store(js_int_buffer, 0, ERROR_TIMEOUT) + js.Atomics.notify(js_int_buffer, 0) + js_absolute_url = js.URL.new(request.url, js.location).href + self.js_worker.postMessage( + _obj_from_dict( + { + "buffer": js_shared_buffer, + "url": js_absolute_url, + "fetchParams": fetch_data, + } + ) + ) + # wait for the worker to send something + js.Atomics.wait(js_int_buffer, 0, ERROR_TIMEOUT, timeout) + if js_int_buffer[0] == ERROR_TIMEOUT: + raise _TimeoutError( + "Timeout connecting to streaming request", + request=request, + response=None, + ) + elif js_int_buffer[0] == SUCCESS_HEADER: + # got response + # header length is in second int of intBuffer + string_len = js_int_buffer[1] + # decode the rest to a JSON string + js_decoder = js.TextDecoder.new() + # this does a copy (the slice) because decode can't work on shared array + # for some silly reason + json_str = js_decoder.decode(js_byte_buffer.slice(0, string_len)) + # get it as an object + response_obj = json.loads(json_str) + return EmscriptenResponse( + request=request, + status_code=response_obj["status"], + headers=response_obj["headers"], + body=_ReadStream( + js_int_buffer, + js_byte_buffer, + request.timeout, + self.js_worker, + response_obj["connectionID"], + request, + ), + ) + elif js_int_buffer[0] == ERROR_EXCEPTION: + string_len = js_int_buffer[1] + # decode the error string + js_decoder = js.TextDecoder.new() + json_str = js_decoder.decode(js_byte_buffer.slice(0, string_len)) + raise _StreamingError( + f"Exception thrown in fetch: {json_str}", request=request, response=None + ) + else: + raise _StreamingError( + f"Unknown status from worker in fetch: {js_int_buffer[0]}", + request=request, + response=None, + ) + + +class _JSPIReadStream(io.RawIOBase): + """ + A read stream that uses pyodide.ffi.run_sync to read from a JavaScript fetch + response. This requires support for WebAssembly JavaScript Promise Integration + in the containing browser, and for pyodide to be launched via runPythonAsync. + + :param js_read_stream: + The JavaScript stream reader + + :param timeout: + Timeout in seconds + + :param request: + The request we're handling + + :param response: + The response this stream relates to + + :param js_abort_controller: + A JavaScript AbortController object, used for timeouts + """ + + def __init__( + self, + js_read_stream: Any, + timeout: float, + request: EmscriptenRequest, + response: EmscriptenResponse, + js_abort_controller: Any, # JavaScript AbortController for timeouts + ): + self.js_read_stream = js_read_stream + self.timeout = timeout + self._is_closed = False + self._is_done = False + self.request: EmscriptenRequest | None = request + self.response: EmscriptenResponse | None = response + self.current_buffer = None + self.current_buffer_pos = 0 + self.js_abort_controller = js_abort_controller + + def __del__(self) -> None: + self.close() + + # this is compatible with _base_connection + def is_closed(self) -> bool: + return self._is_closed + + # for compatibility with RawIOBase + @property + def closed(self) -> bool: + return self.is_closed() + + def close(self) -> None: + if self.is_closed(): + return + self.read_len = 0 + self.read_pos = 0 + self.js_read_stream.cancel() + self.js_read_stream = None + self._is_closed = True + self._is_done = True + self.request = None + self.response = None + super().close() + + def readable(self) -> bool: + return True + + def writable(self) -> bool: + return False + + def seekable(self) -> bool: + return False + + def _get_next_buffer(self) -> bool: + result_js = _run_sync_with_timeout( + self.js_read_stream.read(), + self.timeout, + self.js_abort_controller, + request=self.request, + response=self.response, + ) + if result_js.done: + self._is_done = True + return False + else: + self.current_buffer = result_js.value.to_py() + self.current_buffer_pos = 0 + return True + + def readinto(self, byte_obj: Buffer) -> int: + if self.current_buffer is None: + if not self._get_next_buffer() or self.current_buffer is None: + self.close() + return 0 + ret_length = min( + len(byte_obj), len(self.current_buffer) - self.current_buffer_pos + ) + byte_obj[0:ret_length] = self.current_buffer[ + self.current_buffer_pos : self.current_buffer_pos + ret_length + ] + self.current_buffer_pos += ret_length + if self.current_buffer_pos == len(self.current_buffer): + self.current_buffer = None + return ret_length + + +# check if we are in a worker or not +def is_in_browser_main_thread() -> bool: + return hasattr(js, "window") and hasattr(js, "self") and js.self == js.window + + +def is_cross_origin_isolated() -> bool: + return hasattr(js, "crossOriginIsolated") and js.crossOriginIsolated + + +def is_in_node() -> bool: + return ( + hasattr(js, "process") + and hasattr(js.process, "release") + and hasattr(js.process.release, "name") + and js.process.release.name == "node" + ) + + +def is_worker_available() -> bool: + return hasattr(js, "Worker") and hasattr(js, "Blob") + + +_fetcher: _StreamingFetcher | None = None + +if is_worker_available() and ( + (is_cross_origin_isolated() and not is_in_browser_main_thread()) + and (not is_in_node()) +): + _fetcher = _StreamingFetcher() +else: + _fetcher = None + + +NODE_JSPI_ERROR = ( + "urllib3 only works in Node.js with pyodide.runPythonAsync" + " and requires the flag --experimental-wasm-stack-switching in " + " versions of node <24." +) + + +def send_streaming_request(request: EmscriptenRequest) -> EmscriptenResponse | None: + if has_jspi(): + return send_jspi_request(request, True) + elif is_in_node(): + raise _RequestError( + message=NODE_JSPI_ERROR, + request=request, + response=None, + ) + + if _fetcher and streaming_ready(): + return _fetcher.send(request) + else: + _show_streaming_warning() + return None + + +_SHOWN_TIMEOUT_WARNING = False + + +def _show_timeout_warning() -> None: + global _SHOWN_TIMEOUT_WARNING + if not _SHOWN_TIMEOUT_WARNING: + _SHOWN_TIMEOUT_WARNING = True + message = "Warning: Timeout is not available on main browser thread" + js.console.warn(message) + + +_SHOWN_STREAMING_WARNING = False + + +def _show_streaming_warning() -> None: + global _SHOWN_STREAMING_WARNING + if not _SHOWN_STREAMING_WARNING: + _SHOWN_STREAMING_WARNING = True + message = "Can't stream HTTP requests because: \n" + if not is_cross_origin_isolated(): + message += " Page is not cross-origin isolated\n" + if is_in_browser_main_thread(): + message += " Python is running in main browser thread\n" + if not is_worker_available(): + message += " Worker or Blob classes are not available in this environment." # Defensive: this is always False in browsers that we test in + if streaming_ready() is False: + message += """ Streaming fetch worker isn't ready. If you want to be sure that streaming fetch +is working, you need to call: 'await urllib3.contrib.emscripten.fetch.wait_for_streaming_ready()`""" + from js import console + + console.warn(message) + + +def send_request(request: EmscriptenRequest) -> EmscriptenResponse: + if has_jspi(): + return send_jspi_request(request, False) + elif is_in_node(): + raise _RequestError( + message=NODE_JSPI_ERROR, + request=request, + response=None, + ) + try: + js_xhr = js.XMLHttpRequest.new() + + if not is_in_browser_main_thread(): + js_xhr.responseType = "arraybuffer" + if request.timeout: + js_xhr.timeout = int(request.timeout * 1000) + else: + js_xhr.overrideMimeType("text/plain; charset=ISO-8859-15") + if request.timeout: + # timeout isn't available on the main thread - show a warning in console + # if it is set + _show_timeout_warning() + + js_xhr.open(request.method, request.url, False) + for name, value in request.headers.items(): + if name.lower() not in HEADERS_TO_IGNORE: + js_xhr.setRequestHeader(name, value) + + js_xhr.send(to_js(request.body)) + + headers = dict(Parser().parsestr(js_xhr.getAllResponseHeaders())) + + if not is_in_browser_main_thread(): + body = js_xhr.response.to_py().tobytes() + else: + body = js_xhr.response.encode("ISO-8859-15") + return EmscriptenResponse( + status_code=js_xhr.status, headers=headers, body=body, request=request + ) + except JsException as err: + if err.name == "TimeoutError": + raise _TimeoutError(err.message, request=request) + elif err.name == "NetworkError": + raise _RequestError(err.message, request=request) + else: + # general http error + raise _RequestError(err.message, request=request) + + +def send_jspi_request( + request: EmscriptenRequest, streaming: bool +) -> EmscriptenResponse: + """ + Send a request using WebAssembly JavaScript Promise Integration + to wrap the asynchronous JavaScript fetch api (experimental). + + :param request: + Request to send + + :param streaming: + Whether to stream the response + + :return: The response object + :rtype: EmscriptenResponse + """ + timeout = request.timeout + js_abort_controller = js.AbortController.new() + headers = {k: v for k, v in request.headers.items() if k not in HEADERS_TO_IGNORE} + req_body = request.body + fetch_data = { + "headers": headers, + "body": to_js(req_body), + "method": request.method, + "signal": js_abort_controller.signal, + } + # Call JavaScript fetch (async api, returns a promise) + fetcher_promise_js = js.fetch(request.url, _obj_from_dict(fetch_data)) + # Now suspend WebAssembly until we resolve that promise + # or time out. + response_js = _run_sync_with_timeout( + fetcher_promise_js, + timeout, + js_abort_controller, + request=request, + response=None, + ) + headers = {} + header_iter = response_js.headers.entries() + while True: + iter_value_js = header_iter.next() + if getattr(iter_value_js, "done", False): + break + else: + headers[str(iter_value_js.value[0])] = str(iter_value_js.value[1]) + status_code = response_js.status + body: bytes | io.RawIOBase = b"" + + response = EmscriptenResponse( + status_code=status_code, headers=headers, body=b"", request=request + ) + if streaming: + # get via inputstream + if response_js.body is not None: + # get a reader from the fetch response + body_stream_js = response_js.body.getReader() + body = _JSPIReadStream( + body_stream_js, timeout, request, response, js_abort_controller + ) + else: + # get directly via arraybuffer + # n.b. this is another async JavaScript call. + body = _run_sync_with_timeout( + response_js.arrayBuffer(), + timeout, + js_abort_controller, + request=request, + response=response, + ).to_py() + response.body = body + return response + + +def _run_sync_with_timeout( + promise: Any, + timeout: float, + js_abort_controller: Any, + request: EmscriptenRequest | None, + response: EmscriptenResponse | None, +) -> Any: + """ + Await a JavaScript promise synchronously with a timeout which is implemented + via the AbortController + + :param promise: + Javascript promise to await + + :param timeout: + Timeout in seconds + + :param js_abort_controller: + A JavaScript AbortController object, used on timeout + + :param request: + The request being handled + + :param response: + The response being handled (if it exists yet) + + :raises _TimeoutError: If the request times out + :raises _RequestError: If the request raises a JavaScript exception + + :return: The result of awaiting the promise. + """ + timer_id = None + if timeout > 0: + timer_id = js.setTimeout( + js_abort_controller.abort.bind(js_abort_controller), int(timeout * 1000) + ) + try: + from pyodide.ffi import run_sync + + # run_sync here uses WebAssembly JavaScript Promise Integration to + # suspend python until the JavaScript promise resolves. + return run_sync(promise) + except JsException as err: + if err.name == "AbortError": + raise _TimeoutError( + message="Request timed out", request=request, response=response + ) + else: + raise _RequestError(message=err.message, request=request, response=response) + finally: + if timer_id is not None: + js.clearTimeout(timer_id) + + +def has_jspi() -> bool: + """ + Return true if jspi can be used. + + This requires both browser support and also WebAssembly + to be in the correct state - i.e. that the javascript + call into python was async not sync. + + :return: True if jspi can be used. + :rtype: bool + """ + try: + from pyodide.ffi import can_run_sync, run_sync # noqa: F401 + + return bool(can_run_sync()) + except ImportError: + return False + + +def streaming_ready() -> bool | None: + if _fetcher: + return _fetcher.streaming_ready + else: + return None # no fetcher, return None to signify that + + +async def wait_for_streaming_ready() -> bool: + if _fetcher: + await _fetcher.js_worker_ready_promise + return True + else: + return False diff --git a/thirdparty/urllib3/contrib/emscripten/request.py b/thirdparty/urllib3/contrib/emscripten/request.py new file mode 100644 index 0000000..e692e69 --- /dev/null +++ b/thirdparty/urllib3/contrib/emscripten/request.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + +from ..._base_connection import _TYPE_BODY + + +@dataclass +class EmscriptenRequest: + method: str + url: str + params: dict[str, str] | None = None + body: _TYPE_BODY | None = None + headers: dict[str, str] = field(default_factory=dict) + timeout: float = 0 + decode_content: bool = True + + def set_header(self, name: str, value: str) -> None: + self.headers[name.capitalize()] = value + + def set_body(self, body: _TYPE_BODY | None) -> None: + self.body = body diff --git a/thirdparty/urllib3/contrib/emscripten/response.py b/thirdparty/urllib3/contrib/emscripten/response.py new file mode 100644 index 0000000..b32b402 --- /dev/null +++ b/thirdparty/urllib3/contrib/emscripten/response.py @@ -0,0 +1,285 @@ +from __future__ import annotations + +import json as _json +import logging +import typing +from contextlib import contextmanager +from dataclasses import dataclass +from http.client import HTTPException as HTTPException +from io import BytesIO, IOBase + +from ...exceptions import InvalidHeader, TimeoutError +from ...response import BaseHTTPResponse +from ...util.retry import Retry +from .request import EmscriptenRequest + +if typing.TYPE_CHECKING: + from ..._base_connection import BaseHTTPConnection, BaseHTTPSConnection + +log = logging.getLogger(__name__) + + +@dataclass +class EmscriptenResponse: + status_code: int + headers: dict[str, str] + body: IOBase | bytes + request: EmscriptenRequest + + +class EmscriptenHttpResponseWrapper(BaseHTTPResponse): + def __init__( + self, + internal_response: EmscriptenResponse, + url: str | None = None, + connection: BaseHTTPConnection | BaseHTTPSConnection | None = None, + ): + self._pool = None # set by pool class + self._body = None + self._response = internal_response + self._url = url + self._connection = connection + self._closed = False + super().__init__( + headers=internal_response.headers, + status=internal_response.status_code, + request_url=url, + version=0, + version_string="HTTP/?", + reason="", + decode_content=True, + ) + self.length_remaining = self._init_length(self._response.request.method) + self.length_is_certain = False + + @property + def url(self) -> str | None: + return self._url + + @url.setter + def url(self, url: str | None) -> None: + self._url = url + + @property + def connection(self) -> BaseHTTPConnection | BaseHTTPSConnection | None: + return self._connection + + @property + def retries(self) -> Retry | None: + return self._retries + + @retries.setter + def retries(self, retries: Retry | None) -> None: + # Override the request_url if retries has a redirect location. + self._retries = retries + + def stream( + self, amt: int | None = 2**16, decode_content: bool | None = None + ) -> typing.Generator[bytes]: + """ + A generator wrapper for the read() method. A call will block until + ``amt`` bytes have been read from the connection or until the + connection is closed. + + :param amt: + How much of the content to read. The generator will return up to + much data per iteration, but may return less. This is particularly + likely when using compressed data. However, the empty string will + never be returned. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + while True: + data = self.read(amt=amt, decode_content=decode_content) + + if data: + yield data + else: + break + + def _init_length(self, request_method: str | None) -> int | None: + length: int | None + content_length: str | None = self.headers.get("content-length") + + if content_length is not None: + try: + # RFC 7230 section 3.3.2 specifies multiple content lengths can + # be sent in a single Content-Length header + # (e.g. Content-Length: 42, 42). This line ensures the values + # are all valid ints and that as long as the `set` length is 1, + # all values are the same. Otherwise, the header is invalid. + lengths = {int(val) for val in content_length.split(",")} + if len(lengths) > 1: + raise InvalidHeader( + "Content-Length contained multiple " + "unmatching values (%s)" % content_length + ) + length = lengths.pop() + except ValueError: + length = None + else: + if length < 0: + length = None + + else: # if content_length is None + length = None + + # Check for responses that shouldn't include a body + if ( + self.status in (204, 304) + or 100 <= self.status < 200 + or request_method == "HEAD" + ): + length = 0 + + return length + + def read( + self, + amt: int | None = None, + decode_content: bool | None = None, # ignored because browser decodes always + cache_content: bool = False, + ) -> bytes: + if ( + self._closed + or self._response is None + or (isinstance(self._response.body, IOBase) and self._response.body.closed) + ): + return b"" + + with self._error_catcher(): + # body has been preloaded as a string by XmlHttpRequest + if not isinstance(self._response.body, IOBase): + self.length_remaining = len(self._response.body) + self.length_is_certain = True + # wrap body in IOStream + self._response.body = BytesIO(self._response.body) + if amt is not None and amt >= 0: + # don't cache partial content + cache_content = False + data = self._response.body.read(amt) + if self.length_remaining is not None: + self.length_remaining = max(self.length_remaining - len(data), 0) + if (self.length_is_certain and self.length_remaining == 0) or len( + data + ) < amt: + # definitely finished reading, close response stream + self._response.body.close() + return typing.cast(bytes, data) + else: # read all we can (and cache it) + data = self._response.body.read() + if cache_content: + self._body = data + if self.length_remaining is not None: + self.length_remaining = max(self.length_remaining - len(data), 0) + if len(data) == 0 or ( + self.length_is_certain and self.length_remaining == 0 + ): + # definitely finished reading, close response stream + self._response.body.close() + return typing.cast(bytes, data) + + def read_chunked( + self, + amt: int | None = None, + decode_content: bool | None = None, + ) -> typing.Generator[bytes]: + # chunked is handled by browser + while True: + bytes = self.read(amt, decode_content) + if not bytes: + break + yield bytes + + def release_conn(self) -> None: + if not self._pool or not self._connection: + return None + + self._pool._put_conn(self._connection) + self._connection = None + + def drain_conn(self) -> None: + self.close() + + @property + def data(self) -> bytes: + if self._body: + return self._body + else: + return self.read(cache_content=True) + + def json(self) -> typing.Any: + """ + Deserializes the body of the HTTP response as a Python object. + + The body of the HTTP response must be encoded using UTF-8, as per + `RFC 8529 Section 8.1 `_. + + To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to + your custom decoder instead. + + If the body of the HTTP response is not decodable to UTF-8, a + `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a + valid JSON document, a `json.JSONDecodeError` will be raised. + + Read more :ref:`here `. + + :returns: The body of the HTTP response as a Python object. + """ + data = self.data.decode("utf-8") + return _json.loads(data) + + def close(self) -> None: + if not self._closed: + if isinstance(self._response.body, IOBase): + self._response.body.close() + if self._connection: + self._connection.close() + self._connection = None + self._closed = True + + @contextmanager + def _error_catcher(self) -> typing.Generator[None]: + """ + Catch Emscripten specific exceptions thrown by fetch.py, + instead re-raising urllib3 variants, so that low-level exceptions + are not leaked in the high-level api. + + On exit, release the connection back to the pool. + """ + from .fetch import _RequestError, _TimeoutError # avoid circular import + + clean_exit = False + + try: + yield + # If no exception is thrown, we should avoid cleaning up + # unnecessarily. + clean_exit = True + except _TimeoutError as e: + raise TimeoutError(str(e)) + except _RequestError as e: + raise HTTPException(str(e)) + finally: + # If we didn't terminate cleanly, we need to throw away our + # connection. + if not clean_exit: + # The response may not be closed but we're not going to use it + # anymore so close it now + if ( + isinstance(self._response.body, IOBase) + and not self._response.body.closed + ): + self._response.body.close() + # release the connection back to the pool + self.release_conn() + else: + # If we have read everything from the response stream, + # return the connection back to the pool. + if ( + isinstance(self._response.body, IOBase) + and self._response.body.closed + ): + self.release_conn() diff --git a/thirdparty/urllib3/contrib/pyopenssl.py b/thirdparty/urllib3/contrib/pyopenssl.py index 9ae65ea..ed65430 100644 --- a/thirdparty/urllib3/contrib/pyopenssl.py +++ b/thirdparty/urllib3/contrib/pyopenssl.py @@ -1,27 +1,31 @@ """ -SSL with SNI_-support for Python 2. Follow these instructions if you would -like to verify SSL certificates in Python 2. Note, the default libraries do -*not* do certificate checking; you need to do additional work to validate -certificates yourself. +Module for using pyOpenSSL as a TLS backend. This module was relevant before +the standard library ``ssl`` module supported SNI, but now that we've dropped +support for Python 2.7 all relevant Python versions support SNI so +**this module is no longer recommended**. This needs the following packages installed: -* pyOpenSSL (tested with 16.0.0) -* cryptography (minimum 1.3.4, from pyopenssl) -* idna (minimum 2.0, from cryptography) +* `pyOpenSSL`_ (tested with 16.0.0) +* `cryptography`_ (minimum 1.3.4, from pyopenssl) +* `idna`_ (minimum 2.0) -However, pyopenssl depends on cryptography, which depends on idna, so while we -use all three directly here we end up having relatively few packages required. +However, pyOpenSSL depends on cryptography, so while we use all three directly here we +end up having relatively few packages required. You can install them with the following command: - pip install pyopenssl cryptography idna +.. code-block:: bash + + $ python -m pip install pyopenssl cryptography idna To activate certificate checking, call :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code before you begin making HTTP requests. This can be done in a ``sitecustomize`` module, or at any other time before your application begins using ``urllib3``, -like this:: +like this: + +.. code-block:: python try: import urllib3.contrib.pyopenssl @@ -29,64 +33,46 @@ except ImportError: pass -Now you can use :mod:`urllib3` as you normally would, and it will support SNI -when the required modules are installed. - -Activating this module also has the positive side effect of disabling SSL/TLS -compression in Python 2 (see `CRIME attack`_). - -If you want to configure the default list of supported cipher suites, you can -set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. - -.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication -.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) +.. _pyopenssl: https://www.pyopenssl.org +.. _cryptography: https://cryptography.io +.. _idna: https://github.com/kjd/idna """ -from __future__ import absolute_import -import OpenSSL.SSL +from __future__ import annotations + +import OpenSSL.SSL # type: ignore[import-untyped] from cryptography import x509 -from cryptography.hazmat.backends.openssl import backend as openssl_backend -from cryptography.hazmat.backends.openssl.x509 import _Certificate try: - from cryptography.x509 import UnsupportedExtension + from cryptography.x509 import UnsupportedExtension # type: ignore[attr-defined] except ImportError: # UnsupportedExtension is gone in cryptography >= 2.1.0 - class UnsupportedExtension(Exception): + class UnsupportedExtension(Exception): # type: ignore[no-redef] pass -from socket import timeout, error as SocketError -from io import BytesIO - -try: # Platform-specific: Python 2 - from socket import _fileobject -except ImportError: # Platform-specific: Python 3 - _fileobject = None - from ..packages.backports.makefile import backport_makefile - import logging import ssl -from ..packages import six -import sys +import typing +from io import BytesIO +from socket import socket as socket_cls +from socket import timeout from .. import util +if typing.TYPE_CHECKING: + from OpenSSL.crypto import X509 # type: ignore[import-untyped] -__all__ = ["inject_into_urllib3", "extract_from_urllib3"] -# SNI always works. -HAS_SNI = True +__all__ = ["inject_into_urllib3", "extract_from_urllib3"] # Map from urllib3 to PyOpenSSL compatible parameter-values. -_openssl_versions = { - util.PROTOCOL_TLS: OpenSSL.SSL.SSLv23_METHOD, +_openssl_versions: dict[int, int] = { + util.ssl_.PROTOCOL_TLS: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined] + util.ssl_.PROTOCOL_TLS_CLIENT: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined] ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, } -if hasattr(ssl, "PROTOCOL_SSLv3") and hasattr(OpenSSL.SSL, "SSLv3_METHOD"): - _openssl_versions[ssl.PROTOCOL_SSLv3] = OpenSSL.SSL.SSLv3_METHOD - if hasattr(ssl, "PROTOCOL_TLSv1_1") and hasattr(OpenSSL.SSL, "TLSv1_1_METHOD"): _openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD @@ -100,43 +86,77 @@ class UnsupportedExtension(Exception): ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, } -_openssl_to_stdlib_verify = dict((v, k) for k, v in _stdlib_to_openssl_verify.items()) +_openssl_to_stdlib_verify = {v: k for k, v in _stdlib_to_openssl_verify.items()} + +# The SSLvX values are the most likely to be missing in the future +# but we check them all just to be sure. +_OP_NO_SSLv2_OR_SSLv3: int = getattr(OpenSSL.SSL, "OP_NO_SSLv2", 0) | getattr( + OpenSSL.SSL, "OP_NO_SSLv3", 0 +) +_OP_NO_TLSv1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1", 0) +_OP_NO_TLSv1_1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_1", 0) +_OP_NO_TLSv1_2: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_2", 0) +_OP_NO_TLSv1_3: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_3", 0) + +_openssl_to_ssl_minimum_version: dict[int, int] = { + ssl.TLSVersion.MINIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3, + ssl.TLSVersion.TLSv1: _OP_NO_SSLv2_OR_SSLv3, + ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1, + ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1, + ssl.TLSVersion.TLSv1_3: ( + _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2 + ), + ssl.TLSVersion.MAXIMUM_SUPPORTED: ( + _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2 + ), +} +_openssl_to_ssl_maximum_version: dict[int, int] = { + ssl.TLSVersion.MINIMUM_SUPPORTED: ( + _OP_NO_SSLv2_OR_SSLv3 + | _OP_NO_TLSv1 + | _OP_NO_TLSv1_1 + | _OP_NO_TLSv1_2 + | _OP_NO_TLSv1_3 + ), + ssl.TLSVersion.TLSv1: ( + _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3 + ), + ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3, + ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_3, + ssl.TLSVersion.TLSv1_3: _OP_NO_SSLv2_OR_SSLv3, + ssl.TLSVersion.MAXIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3, +} # OpenSSL will only write 16K at a time SSL_WRITE_BLOCKSIZE = 16384 -orig_util_HAS_SNI = util.HAS_SNI orig_util_SSLContext = util.ssl_.SSLContext log = logging.getLogger(__name__) -def inject_into_urllib3(): +def inject_into_urllib3() -> None: "Monkey-patch urllib3 with PyOpenSSL-backed SSL-support." _validate_dependencies_met() - util.SSLContext = PyOpenSSLContext - util.ssl_.SSLContext = PyOpenSSLContext - util.HAS_SNI = HAS_SNI - util.ssl_.HAS_SNI = HAS_SNI + util.SSLContext = PyOpenSSLContext # type: ignore[assignment] + util.ssl_.SSLContext = PyOpenSSLContext # type: ignore[assignment] util.IS_PYOPENSSL = True util.ssl_.IS_PYOPENSSL = True -def extract_from_urllib3(): +def extract_from_urllib3() -> None: "Undo monkey-patching by :func:`inject_into_urllib3`." util.SSLContext = orig_util_SSLContext util.ssl_.SSLContext = orig_util_SSLContext - util.HAS_SNI = orig_util_HAS_SNI - util.ssl_.HAS_SNI = orig_util_HAS_SNI util.IS_PYOPENSSL = False util.ssl_.IS_PYOPENSSL = False -def _validate_dependencies_met(): +def _validate_dependencies_met() -> None: """ Verifies that PyOpenSSL's package-level dependencies have been met. Throws `ImportError` if they are not met. @@ -162,7 +182,7 @@ def _validate_dependencies_met(): ) -def _dnsname_to_stdlib(name): +def _dnsname_to_stdlib(name: str) -> str | None: """ Converts a dNSName SubjectAlternativeName field to the form used by the standard library on the given Python version. @@ -176,16 +196,16 @@ def _dnsname_to_stdlib(name): the name given should be skipped. """ - def idna_encode(name): + def idna_encode(name: str) -> bytes | None: """ Borrowed wholesale from the Python Cryptography Project. It turns out that we can't just safely call `idna.encode`: it can explode for wildcard names. This avoids that problem. """ - import thirdparty.idna as idna + import idna try: - for prefix in [u"*.", u"."]: + for prefix in ["*.", "."]: if name.startswith(prefix): name = name[len(prefix) :] return prefix.encode("ascii") + idna.encode(name) @@ -197,25 +217,17 @@ def idna_encode(name): if ":" in name: return name - name = idna_encode(name) - if name is None: + encoded_name = idna_encode(name) + if encoded_name is None: return None - elif sys.version_info >= (3, 0): - name = name.decode("utf-8") - return name + return encoded_name.decode("utf-8") -def get_subj_alt_name(peer_cert): +def get_subj_alt_name(peer_cert: X509) -> list[tuple[str, str]]: """ Given an PyOpenSSL certificate, provides all the subject alternative names. """ - # Pass the cert to cryptography, which has much better APIs for this. - if hasattr(peer_cert, "to_cryptography"): - cert = peer_cert.to_cryptography() - else: - # This is technically using private APIs, but should work across all - # relevant versions before PyOpenSSL got a proper API for this. - cert = _Certificate(openssl_backend, peer_cert._x509) + cert = peer_cert.to_cryptography() # We want to find the SAN extension. Ask Cryptography to locate it (it's # faster than looping in Python) @@ -259,93 +271,94 @@ def get_subj_alt_name(peer_cert): return names -class WrappedSocket(object): - """API-compatibility wrapper for Python OpenSSL's Connection-class. - - Note: _makefile_refs, _drop() and _reuse() are needed for the garbage - collector of pypy. - """ +class WrappedSocket: + """API-compatibility wrapper for Python OpenSSL's Connection-class.""" - def __init__(self, connection, socket, suppress_ragged_eofs=True): + def __init__( + self, + connection: OpenSSL.SSL.Connection, + socket: socket_cls, + suppress_ragged_eofs: bool = True, + ) -> None: self.connection = connection self.socket = socket self.suppress_ragged_eofs = suppress_ragged_eofs - self._makefile_refs = 0 + self._io_refs = 0 self._closed = False - def fileno(self): + def fileno(self) -> int: return self.socket.fileno() # Copy-pasted from Python 3.5 source code - def _decref_socketios(self): - if self._makefile_refs > 0: - self._makefile_refs -= 1 + def _decref_socketios(self) -> None: + if self._io_refs > 0: + self._io_refs -= 1 if self._closed: self.close() - def recv(self, *args, **kwargs): + def recv(self, *args: typing.Any, **kwargs: typing.Any) -> bytes: try: data = self.connection.recv(*args, **kwargs) except OpenSSL.SSL.SysCallError as e: if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"): return b"" else: - raise SocketError(str(e)) + raise OSError(e.args[0], str(e)) from e except OpenSSL.SSL.ZeroReturnError: if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: return b"" else: raise - except OpenSSL.SSL.WantReadError: + except OpenSSL.SSL.WantReadError as e: if not util.wait_for_read(self.socket, self.socket.gettimeout()): - raise timeout("The read operation timed out") + raise timeout("The read operation timed out") from e else: return self.recv(*args, **kwargs) # TLS 1.3 post-handshake authentication except OpenSSL.SSL.Error as e: - raise ssl.SSLError("read error: %r" % e) + raise ssl.SSLError(f"read error: {e!r}") from e else: - return data + return data # type: ignore[no-any-return] - def recv_into(self, *args, **kwargs): + def recv_into(self, *args: typing.Any, **kwargs: typing.Any) -> int: try: - return self.connection.recv_into(*args, **kwargs) + return self.connection.recv_into(*args, **kwargs) # type: ignore[no-any-return] except OpenSSL.SSL.SysCallError as e: if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"): return 0 else: - raise SocketError(str(e)) + raise OSError(e.args[0], str(e)) from e except OpenSSL.SSL.ZeroReturnError: if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: return 0 else: raise - except OpenSSL.SSL.WantReadError: + except OpenSSL.SSL.WantReadError as e: if not util.wait_for_read(self.socket, self.socket.gettimeout()): - raise timeout("The read operation timed out") + raise timeout("The read operation timed out") from e else: return self.recv_into(*args, **kwargs) # TLS 1.3 post-handshake authentication except OpenSSL.SSL.Error as e: - raise ssl.SSLError("read error: %r" % e) + raise ssl.SSLError(f"read error: {e!r}") from e - def settimeout(self, timeout): + def settimeout(self, timeout: float) -> None: return self.socket.settimeout(timeout) - def _send_until_done(self, data): + def _send_until_done(self, data: bytes) -> int: while True: try: - return self.connection.send(data) - except OpenSSL.SSL.WantWriteError: + return self.connection.send(data) # type: ignore[no-any-return] + except OpenSSL.SSL.WantWriteError as e: if not util.wait_for_write(self.socket, self.socket.gettimeout()): - raise timeout() + raise timeout() from e continue except OpenSSL.SSL.SysCallError as e: - raise SocketError(str(e)) + raise OSError(e.args[0], str(e)) from e - def sendall(self, data): + def sendall(self, data: bytes) -> None: total_sent = 0 while total_sent < len(data): sent = self._send_until_done( @@ -353,132 +366,141 @@ def sendall(self, data): ) total_sent += sent - def shutdown(self): - # FIXME rethrow compatible exceptions should we ever use this - self.connection.shutdown() + def shutdown(self, how: int) -> None: + try: + self.connection.shutdown() + except OpenSSL.SSL.Error as e: + raise ssl.SSLError(f"shutdown error: {e!r}") from e - def close(self): - if self._makefile_refs < 1: - try: - self._closed = True - return self.connection.close() - except OpenSSL.SSL.Error: - return - else: - self._makefile_refs -= 1 + def close(self) -> None: + self._closed = True + if self._io_refs <= 0: + self._real_close() + + def _real_close(self) -> None: + try: + return self.connection.close() # type: ignore[no-any-return] + except OpenSSL.SSL.Error: + return - def getpeercert(self, binary_form=False): + def getpeercert( + self, binary_form: bool = False + ) -> dict[str, list[typing.Any]] | None: x509 = self.connection.get_peer_certificate() if not x509: - return x509 + return x509 # type: ignore[no-any-return] if binary_form: - return OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, x509) + return OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, x509) # type: ignore[no-any-return] return { - "subject": ((("commonName", x509.get_subject().CN),),), + "subject": ((("commonName", x509.get_subject().CN),),), # type: ignore[dict-item] "subjectAltName": get_subj_alt_name(x509), } - def version(self): - return self.connection.get_protocol_version_name() + def version(self) -> str: + return self.connection.get_protocol_version_name() # type: ignore[no-any-return] - def _reuse(self): - self._makefile_refs += 1 - - def _drop(self): - if self._makefile_refs < 1: - self.close() - else: - self._makefile_refs -= 1 + def selected_alpn_protocol(self) -> str | None: + alpn_proto = self.connection.get_alpn_proto_negotiated() + return alpn_proto.decode() if alpn_proto else None -if _fileobject: # Platform-specific: Python 2 +WrappedSocket.makefile = socket_cls.makefile # type: ignore[attr-defined] - def makefile(self, mode, bufsize=-1): - self._makefile_refs += 1 - return _fileobject(self, mode, bufsize, close=True) - -else: # Platform-specific: Python 3 - makefile = backport_makefile - -WrappedSocket.makefile = makefile - - -class PyOpenSSLContext(object): +class PyOpenSSLContext: """ I am a wrapper class for the PyOpenSSL ``Context`` object. I am responsible for translating the interface of the standard library ``SSLContext`` object to calls into PyOpenSSL. """ - def __init__(self, protocol): + def __init__(self, protocol: int) -> None: self.protocol = _openssl_versions[protocol] self._ctx = OpenSSL.SSL.Context(self.protocol) self._options = 0 self.check_hostname = False + self._minimum_version: int = ssl.TLSVersion.MINIMUM_SUPPORTED + self._maximum_version: int = ssl.TLSVersion.MAXIMUM_SUPPORTED @property - def options(self): + def options(self) -> int: return self._options @options.setter - def options(self, value): + def options(self, value: int) -> None: self._options = value - self._ctx.set_options(value) + self._set_ctx_options() @property - def verify_mode(self): + def verify_mode(self) -> int: return _openssl_to_stdlib_verify[self._ctx.get_verify_mode()] @verify_mode.setter - def verify_mode(self, value): + def verify_mode(self, value: ssl.VerifyMode) -> None: self._ctx.set_verify(_stdlib_to_openssl_verify[value], _verify_callback) - def set_default_verify_paths(self): + def set_default_verify_paths(self) -> None: self._ctx.set_default_verify_paths() - def set_ciphers(self, ciphers): - if isinstance(ciphers, six.text_type): + def set_ciphers(self, ciphers: bytes | str) -> None: + if isinstance(ciphers, str): ciphers = ciphers.encode("utf-8") self._ctx.set_cipher_list(ciphers) - def load_verify_locations(self, cafile=None, capath=None, cadata=None): + def load_verify_locations( + self, + cafile: str | None = None, + capath: str | None = None, + cadata: bytes | None = None, + ) -> None: if cafile is not None: - cafile = cafile.encode("utf-8") + cafile = cafile.encode("utf-8") # type: ignore[assignment] if capath is not None: - capath = capath.encode("utf-8") + capath = capath.encode("utf-8") # type: ignore[assignment] try: self._ctx.load_verify_locations(cafile, capath) if cadata is not None: self._ctx.load_verify_locations(BytesIO(cadata)) except OpenSSL.SSL.Error as e: - raise ssl.SSLError("unable to load trusted certificates: %r" % e) + raise ssl.SSLError(f"unable to load trusted certificates: {e!r}") from e - def load_cert_chain(self, certfile, keyfile=None, password=None): - self._ctx.use_certificate_chain_file(certfile) - if password is not None: - if not isinstance(password, six.binary_type): - password = password.encode("utf-8") - self._ctx.set_passwd_cb(lambda *_: password) - self._ctx.use_privatekey_file(keyfile or certfile) + def load_cert_chain( + self, + certfile: str, + keyfile: str | None = None, + password: str | None = None, + ) -> None: + try: + self._ctx.use_certificate_chain_file(certfile) + if password is not None: + if not isinstance(password, bytes): + password = password.encode("utf-8") # type: ignore[assignment] + self._ctx.set_passwd_cb(lambda *_: password) + self._ctx.use_privatekey_file(keyfile or certfile) + except OpenSSL.SSL.Error as e: + raise ssl.SSLError(f"Unable to load certificate chain: {e!r}") from e + + def set_alpn_protocols(self, protocols: list[bytes | str]) -> None: + protocols = [util.util.to_bytes(p, "ascii") for p in protocols] + return self._ctx.set_alpn_protos(protocols) # type: ignore[no-any-return] def wrap_socket( self, - sock, - server_side=False, - do_handshake_on_connect=True, - suppress_ragged_eofs=True, - server_hostname=None, - ): + sock: socket_cls, + server_side: bool = False, + do_handshake_on_connect: bool = True, + suppress_ragged_eofs: bool = True, + server_hostname: bytes | str | None = None, + ) -> WrappedSocket: cnx = OpenSSL.SSL.Connection(self._ctx, sock) - if isinstance(server_hostname, six.text_type): # Platform-specific: Python 3 - server_hostname = server_hostname.encode("utf-8") - - if server_hostname is not None: + # If server_hostname is an IP, don't use it for SNI, per RFC6066 Section 3 + if server_hostname and not util.ssl_.is_ipaddress(server_hostname): + if isinstance(server_hostname, str): + server_hostname = server_hostname.encode("utf-8") cnx.set_tlsext_host_name(server_hostname) cnx.set_connect_state() @@ -486,16 +508,47 @@ def wrap_socket( while True: try: cnx.do_handshake() - except OpenSSL.SSL.WantReadError: + except OpenSSL.SSL.WantReadError as e: if not util.wait_for_read(sock, sock.gettimeout()): - raise timeout("select timed out") + raise timeout("select timed out") from e continue except OpenSSL.SSL.Error as e: - raise ssl.SSLError("bad handshake: %r" % e) + raise ssl.SSLError(f"bad handshake: {e!r}") from e break return WrappedSocket(cnx, sock) + def _set_ctx_options(self) -> None: + self._ctx.set_options( + self._options + | _openssl_to_ssl_minimum_version[self._minimum_version] + | _openssl_to_ssl_maximum_version[self._maximum_version] + ) + + @property + def minimum_version(self) -> int: + return self._minimum_version -def _verify_callback(cnx, x509, err_no, err_depth, return_code): + @minimum_version.setter + def minimum_version(self, minimum_version: int) -> None: + self._minimum_version = minimum_version + self._set_ctx_options() + + @property + def maximum_version(self) -> int: + return self._maximum_version + + @maximum_version.setter + def maximum_version(self, maximum_version: int) -> None: + self._maximum_version = maximum_version + self._set_ctx_options() + + +def _verify_callback( + cnx: OpenSSL.SSL.Connection, + x509: X509, + err_no: int, + err_depth: int, + return_code: int, +) -> bool: return err_no == 0 diff --git a/thirdparty/urllib3/contrib/socks.py b/thirdparty/urllib3/contrib/socks.py index 9e97f7a..c62b5e0 100644 --- a/thirdparty/urllib3/contrib/socks.py +++ b/thirdparty/urllib3/contrib/socks.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ This module contains provisional support for SOCKS proxies from within urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and @@ -14,45 +13,52 @@ - SOCKS5 with local DNS (``proxy_url='socks5://...``) - Usernames and passwords for the SOCKS proxy - .. note:: - It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in - your ``proxy_url`` to ensure that DNS resolution is done from the remote - server instead of client-side when connecting to a domain name. +.. note:: + It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in + your ``proxy_url`` to ensure that DNS resolution is done from the remote + server instead of client-side when connecting to a domain name. SOCKS4 supports IPv4 and domain names with the SOCKS4A extension. SOCKS5 supports IPv4, IPv6, and domain names. When connecting to a SOCKS4 proxy the ``username`` portion of the ``proxy_url`` -will be sent as the ``userid`` section of the SOCKS request:: +will be sent as the ``userid`` section of the SOCKS request: + +.. code-block:: python proxy_url="socks4a://@proxy-host" When connecting to a SOCKS5 proxy the ``username`` and ``password`` portion of the ``proxy_url`` will be sent as the username/password to authenticate -with the proxy:: +with the proxy: + +.. code-block:: python proxy_url="socks5h://:@proxy-host" """ -from __future__ import absolute_import + +from __future__ import annotations try: - import socks + import socks # type: ignore[import-not-found] except ImportError: import warnings + from ..exceptions import DependencyWarning warnings.warn( ( "SOCKS support in urllib3 requires the installation of optional " "dependencies: specifically, PySocks. For more information, see " - "https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies" + "https://urllib3.readthedocs.io/en/latest/advanced-usage.html#socks-proxies" ), DependencyWarning, ) raise -from socket import error as SocketError, timeout as SocketTimeout +import typing +from socket import timeout as SocketTimeout from ..connection import HTTPConnection, HTTPSConnection from ..connectionpool import HTTPConnectionPool, HTTPSConnectionPool @@ -63,7 +69,16 @@ try: import ssl except ImportError: - ssl = None + ssl = None # type: ignore[assignment] + + +class _TYPE_SOCKS_OPTIONS(typing.TypedDict): + socks_version: int + proxy_host: str | None + proxy_port: str | None + username: str | None + password: str | None + rdns: bool class SOCKSConnection(HTTPConnection): @@ -71,15 +86,20 @@ class SOCKSConnection(HTTPConnection): A plain-text HTTP connection that connects via a SOCKS proxy. """ - def __init__(self, *args, **kwargs): - self._socks_options = kwargs.pop("_socks_options") - super(SOCKSConnection, self).__init__(*args, **kwargs) - - def _new_conn(self): + def __init__( + self, + _socks_options: _TYPE_SOCKS_OPTIONS, + *args: typing.Any, + **kwargs: typing.Any, + ) -> None: + self._socks_options = _socks_options + super().__init__(*args, **kwargs) + + def _new_conn(self) -> socks.socksocket: """ Establish a new connection via the SOCKS proxy. """ - extra_kw = {} + extra_kw: dict[str, typing.Any] = {} if self.source_address: extra_kw["source_address"] = self.source_address @@ -96,15 +116,14 @@ def _new_conn(self): proxy_password=self._socks_options["password"], proxy_rdns=self._socks_options["rdns"], timeout=self.timeout, - **extra_kw + **extra_kw, ) - except SocketTimeout: + except SocketTimeout as e: raise ConnectTimeoutError( self, - "Connection to %s timed out. (connect timeout=%s)" - % (self.host, self.timeout), - ) + f"Connection to {self.host} timed out. (connect timeout={self.timeout})", + ) from e except socks.ProxyError as e: # This is fragile as hell, but it seems to be the only way to raise @@ -114,22 +133,23 @@ def _new_conn(self): if isinstance(error, SocketTimeout): raise ConnectTimeoutError( self, - "Connection to %s timed out. (connect timeout=%s)" - % (self.host, self.timeout), - ) + f"Connection to {self.host} timed out. (connect timeout={self.timeout})", + ) from e else: + # Adding `from e` messes with coverage somehow, so it's omitted. + # See #2386. raise NewConnectionError( - self, "Failed to establish a new connection: %s" % error + self, f"Failed to establish a new connection: {error}" ) else: raise NewConnectionError( - self, "Failed to establish a new connection: %s" % e - ) + self, f"Failed to establish a new connection: {e}" + ) from e - except SocketError as e: # Defensive: PySocks should catch all these. + except OSError as e: # Defensive: PySocks should catch all these. raise NewConnectionError( - self, "Failed to establish a new connection: %s" % e - ) + self, f"Failed to establish a new connection: {e}" + ) from e return conn @@ -163,12 +183,12 @@ class SOCKSProxyManager(PoolManager): def __init__( self, - proxy_url, - username=None, - password=None, - num_pools=10, - headers=None, - **connection_pool_kw + proxy_url: str, + username: str | None = None, + password: str | None = None, + num_pools: int = 10, + headers: typing.Mapping[str, str] | None = None, + **connection_pool_kw: typing.Any, ): parsed = parse_url(proxy_url) @@ -189,7 +209,7 @@ def __init__( socks_version = socks.PROXY_TYPE_SOCKS4 rdns = True else: - raise ValueError("Unable to determine SOCKS version from %s" % proxy_url) + raise ValueError(f"Unable to determine SOCKS version from {proxy_url}") self.proxy_url = proxy_url @@ -203,8 +223,6 @@ def __init__( } connection_pool_kw["_socks_options"] = socks_options - super(SOCKSProxyManager, self).__init__( - num_pools, headers, **connection_pool_kw - ) + super().__init__(num_pools, headers, **connection_pool_kw) self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme diff --git a/thirdparty/urllib3/exceptions.py b/thirdparty/urllib3/exceptions.py index 3799f8e..0394578 100644 --- a/thirdparty/urllib3/exceptions.py +++ b/thirdparty/urllib3/exceptions.py @@ -1,64 +1,76 @@ -from __future__ import absolute_import -from .packages.six.moves.http_client import IncompleteRead as httplib_IncompleteRead +from __future__ import annotations + +import socket +import typing +import warnings +from email.errors import MessageDefect +from http.client import IncompleteRead as httplib_IncompleteRead + +if typing.TYPE_CHECKING: + from .connection import HTTPConnection + from .connectionpool import ConnectionPool + from .response import HTTPResponse + from .util.retry import Retry # Base Exceptions class HTTPError(Exception): - "Base exception used by this module." - pass + """Base exception used by this module.""" class HTTPWarning(Warning): - "Base warning used by this module." - pass + """Base warning used by this module.""" + + +_TYPE_REDUCE_RESULT = tuple[typing.Callable[..., object], tuple[object, ...]] class PoolError(HTTPError): - "Base exception for errors caused within a pool." + """Base exception for errors caused within a pool.""" - def __init__(self, pool, message): + def __init__(self, pool: ConnectionPool, message: str) -> None: self.pool = pool - HTTPError.__init__(self, "%s: %s" % (pool, message)) + super().__init__(f"{pool}: {message}") - def __reduce__(self): + def __reduce__(self) -> _TYPE_REDUCE_RESULT: # For pickling purposes. return self.__class__, (None, None) class RequestError(PoolError): - "Base exception for PoolErrors that have associated URLs." + """Base exception for PoolErrors that have associated URLs.""" - def __init__(self, pool, url, message): + def __init__(self, pool: ConnectionPool, url: str, message: str) -> None: self.url = url - PoolError.__init__(self, pool, message) + super().__init__(pool, message) - def __reduce__(self): + def __reduce__(self) -> _TYPE_REDUCE_RESULT: # For pickling purposes. return self.__class__, (None, self.url, None) class SSLError(HTTPError): - "Raised when SSL certificate fails in an HTTPS connection." - pass + """Raised when SSL certificate fails in an HTTPS connection.""" class ProxyError(HTTPError): - "Raised when the connection to a proxy fails." + """Raised when the connection to a proxy fails.""" + + # The original error is also available as __cause__. + original_error: Exception - def __init__(self, message, error, *args): - super(ProxyError, self).__init__(message, error, *args) + def __init__(self, message: str, error: Exception) -> None: + super().__init__(message, error) self.original_error = error class DecodeError(HTTPError): - "Raised when automatic decoding based on Content-Type fails." - pass + """Raised when automatic decoding based on Content-Type fails.""" class ProtocolError(HTTPError): - "Raised when something unexpected happens mid-request/response." - pass + """Raised when something unexpected happens mid-request/response.""" #: Renamed to ProtocolError but aliased for backwards compatibility. @@ -73,32 +85,35 @@ class MaxRetryError(RequestError): :param pool: The connection pool :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` - :param string url: The requested Url - :param exceptions.Exception reason: The underlying error + :param str url: The requested Url + :param reason: The underlying error + :type reason: :class:`Exception` """ - def __init__(self, pool, url, reason=None): + def __init__( + self, pool: ConnectionPool, url: str, reason: Exception | None = None + ) -> None: self.reason = reason - message = "Max retries exceeded with url: %s (Caused by %r)" % (url, reason) + message = f"Max retries exceeded with url: {url} (Caused by {reason!r})" - RequestError.__init__(self, pool, url, message) + super().__init__(pool, url, message) class HostChangedError(RequestError): - "Raised when an existing pool gets a request for a foreign host." + """Raised when an existing pool gets a request for a foreign host.""" - def __init__(self, pool, url, retries=3): - message = "Tried to open a foreign host with url: %s" % url - RequestError.__init__(self, pool, url, message) + def __init__( + self, pool: ConnectionPool, url: str, retries: Retry | int = 3 + ) -> None: + message = f"Tried to open a foreign host with url: {url}" + super().__init__(pool, url, message) self.retries = retries class TimeoutStateError(HTTPError): - """ Raised when passing an invalid state to a timeout """ - - pass + """Raised when passing an invalid state to a timeout""" class TimeoutError(HTTPError): @@ -108,85 +123,113 @@ class TimeoutError(HTTPError): ` and :exc:`ConnectTimeoutErrors `. """ - pass - class ReadTimeoutError(TimeoutError, RequestError): - "Raised when a socket timeout occurs while receiving data from a server" - pass + """Raised when a socket timeout occurs while receiving data from a server""" # This timeout error does not have a URL attached and needs to inherit from the # base HTTPError class ConnectTimeoutError(TimeoutError): - "Raised when a socket timeout occurs while connecting to a server" - pass + """Raised when a socket timeout occurs while connecting to a server""" + + +class NewConnectionError(ConnectTimeoutError, HTTPError): + """Raised when we fail to establish a new connection. Usually ECONNREFUSED.""" + + def __init__(self, conn: HTTPConnection, message: str) -> None: + self.conn = conn + super().__init__(f"{conn}: {message}") + + def __reduce__(self) -> _TYPE_REDUCE_RESULT: + # For pickling purposes. + return self.__class__, (None, None) + + @property + def pool(self) -> HTTPConnection: + warnings.warn( + "The 'pool' property is deprecated and will be removed " + "in urllib3 v2.1.0. Use 'conn' instead.", + DeprecationWarning, + stacklevel=2, + ) + + return self.conn + + +class NameResolutionError(NewConnectionError): + """Raised when host name resolution fails.""" + def __init__(self, host: str, conn: HTTPConnection, reason: socket.gaierror): + message = f"Failed to resolve '{host}' ({reason})" + super().__init__(conn, message) -class NewConnectionError(ConnectTimeoutError, PoolError): - "Raised when we fail to establish a new connection. Usually ECONNREFUSED." - pass + def __reduce__(self) -> _TYPE_REDUCE_RESULT: + # For pickling purposes. + return self.__class__, (None, None, None) class EmptyPoolError(PoolError): - "Raised when a pool runs out of connections and no more are allowed." - pass + """Raised when a pool runs out of connections and no more are allowed.""" + + +class FullPoolError(PoolError): + """Raised when we try to add a connection to a full pool in blocking mode.""" class ClosedPoolError(PoolError): - "Raised when a request enters a pool after the pool has been closed." - pass + """Raised when a request enters a pool after the pool has been closed.""" class LocationValueError(ValueError, HTTPError): - "Raised when there is something wrong with a given URL input." - pass + """Raised when there is something wrong with a given URL input.""" class LocationParseError(LocationValueError): - "Raised when get_host or similar fails to parse the URL input." + """Raised when get_host or similar fails to parse the URL input.""" - def __init__(self, location): - message = "Failed to parse: %s" % location - HTTPError.__init__(self, message) + def __init__(self, location: str) -> None: + message = f"Failed to parse: {location}" + super().__init__(message) self.location = location +class URLSchemeUnknown(LocationValueError): + """Raised when a URL input has an unsupported scheme.""" + + def __init__(self, scheme: str): + message = f"Not supported URL scheme {scheme}" + super().__init__(message) + + self.scheme = scheme + + class ResponseError(HTTPError): - "Used as a container for an error reason supplied in a MaxRetryError." + """Used as a container for an error reason supplied in a MaxRetryError.""" + GENERIC_ERROR = "too many error responses" SPECIFIC_ERROR = "too many {status_code} error responses" class SecurityWarning(HTTPWarning): - "Warned when performing security reducing actions" - pass + """Warned when performing security reducing actions""" -class SubjectAltNameWarning(SecurityWarning): - "Warned when connecting to a host with a certificate missing a SAN." - pass +class InsecureRequestWarning(SecurityWarning): + """Warned when making an unverified HTTPS request.""" -class InsecureRequestWarning(SecurityWarning): - "Warned when making an unverified HTTPS request." - pass +class NotOpenSSLWarning(SecurityWarning): + """Warned when using unsupported SSL library""" class SystemTimeWarning(SecurityWarning): - "Warned when system time is suspected to be wrong" - pass + """Warned when system time is suspected to be wrong""" class InsecurePlatformWarning(SecurityWarning): - "Warned when certain SSL configuration is not available on a platform." - pass - - -class SNIMissingWarning(HTTPWarning): - "Warned when making a HTTPS request without SNI available." - pass + """Warned when certain TLS/SSL configuration is not available on a platform.""" class DependencyWarning(HTTPWarning): @@ -195,78 +238,90 @@ class DependencyWarning(HTTPWarning): dependencies. """ - pass - - -class InvalidProxyConfigurationWarning(HTTPWarning): - """ - Warned when using an HTTPS proxy and an HTTPS URL. Currently - urllib3 doesn't support HTTPS proxies and the proxy will be - contacted via HTTP instead. This warning can be fixed by - changing your HTTPS proxy URL into an HTTP proxy URL. - - If you encounter this warning read this: - https://github.com/urllib3/urllib3/issues/1850 - """ - - pass - class ResponseNotChunked(ProtocolError, ValueError): - "Response needs to be chunked in order to read it as chunks." - pass + """Response needs to be chunked in order to read it as chunks.""" class BodyNotHttplibCompatible(HTTPError): """ - Body should be httplib.HTTPResponse like (have an fp attribute which - returns raw chunks) for read_chunked(). + Body should be :class:`http.client.HTTPResponse` like + (have an fp attribute which returns raw chunks) for read_chunked(). """ - pass - class IncompleteRead(HTTPError, httplib_IncompleteRead): """ Response length doesn't match expected Content-Length - Subclass of http_client.IncompleteRead to allow int value - for `partial` to avoid creating large objects on streamed - reads. + Subclass of :class:`http.client.IncompleteRead` to allow int value + for ``partial`` to avoid creating large objects on streamed reads. """ - def __init__(self, partial, expected): - super(IncompleteRead, self).__init__(partial, expected) + partial: int # type: ignore[assignment] + expected: int + + def __init__(self, partial: int, expected: int) -> None: + self.partial = partial + self.expected = expected - def __repr__(self): + def __repr__(self) -> str: return "IncompleteRead(%i bytes read, %i more expected)" % ( self.partial, self.expected, ) +class InvalidChunkLength(HTTPError, httplib_IncompleteRead): + """Invalid chunk length in a chunked response.""" + + def __init__(self, response: HTTPResponse, length: bytes) -> None: + self.partial: int = response.tell() # type: ignore[assignment] + self.expected: int | None = response.length_remaining + self.response = response + self.length = length + + def __repr__(self) -> str: + return "InvalidChunkLength(got length %r, %i bytes read)" % ( + self.length, + self.partial, + ) + + class InvalidHeader(HTTPError): - "The header provided was somehow invalid." - pass + """The header provided was somehow invalid.""" -class ProxySchemeUnknown(AssertionError, ValueError): - "ProxyManager does not support the supplied scheme" +class ProxySchemeUnknown(AssertionError, URLSchemeUnknown): + """ProxyManager does not support the supplied scheme""" + # TODO(t-8ch): Stop inheriting from AssertionError in v2.0. - def __init__(self, scheme): - message = "Not supported proxy scheme %s" % scheme - super(ProxySchemeUnknown, self).__init__(message) + def __init__(self, scheme: str | None) -> None: + # 'localhost' is here because our URL parser parses + # localhost:8080 -> scheme=localhost, remove if we fix this. + if scheme == "localhost": + scheme = None + if scheme is None: + message = "Proxy URL had no scheme, should start with http:// or https://" + else: + message = f"Proxy URL had unsupported scheme {scheme}, should use http:// or https://" + super().__init__(message) + + +class ProxySchemeUnsupported(ValueError): + """Fetching HTTPS resources through HTTPS proxies is unsupported""" class HeaderParsingError(HTTPError): - "Raised by assert_header_parsing, but we convert it to a log.warning statement." + """Raised by assert_header_parsing, but we convert it to a log.warning statement.""" - def __init__(self, defects, unparsed_data): - message = "%s, unparsed data: %r" % (defects or "Unknown", unparsed_data) - super(HeaderParsingError, self).__init__(message) + def __init__( + self, defects: list[MessageDefect], unparsed_data: bytes | str | None + ) -> None: + message = f"{defects or 'Unknown'}, unparsed data: {unparsed_data!r}" + super().__init__(message) class UnrewindableBodyError(HTTPError): - "urllib3 encountered an error when trying to rewind a body" - pass + """urllib3 encountered an error when trying to rewind a body""" diff --git a/thirdparty/urllib3/fields.py b/thirdparty/urllib3/fields.py index 8715b22..97c4730 100644 --- a/thirdparty/urllib3/fields.py +++ b/thirdparty/urllib3/fields.py @@ -1,12 +1,20 @@ -from __future__ import absolute_import +from __future__ import annotations + import email.utils import mimetypes -import re +import typing -from .packages import six +_TYPE_FIELD_VALUE = typing.Union[str, bytes] +_TYPE_FIELD_VALUE_TUPLE = typing.Union[ + _TYPE_FIELD_VALUE, + tuple[str, _TYPE_FIELD_VALUE], + tuple[str, _TYPE_FIELD_VALUE, str], +] -def guess_content_type(filename, default="application/octet-stream"): +def guess_content_type( + filename: str | None, default: str = "application/octet-stream" +) -> str: """ Guess the "Content-Type" of a file. @@ -20,26 +28,41 @@ def guess_content_type(filename, default="application/octet-stream"): return default -def format_header_param_rfc2231(name, value): +def format_header_param_rfc2231(name: str, value: _TYPE_FIELD_VALUE) -> str: """ Helper function to format and quote a single header parameter using the strategy defined in RFC 2231. Particularly useful for header parameters which might contain - non-ASCII values, like file names. This follows RFC 2388 Section 4.4. + non-ASCII values, like file names. This follows + `RFC 2388 Section 4.4 `_. :param name: The name of the parameter, a string expected to be ASCII only. :param value: The value of the parameter, provided as ``bytes`` or `str``. - :ret: + :returns: An RFC-2231-formatted unicode string. + + .. deprecated:: 2.0.0 + Will be removed in urllib3 v2.1.0. This is not valid for + ``multipart/form-data`` header parameters. """ - if isinstance(value, six.binary_type): + import warnings + + warnings.warn( + "'format_header_param_rfc2231' is deprecated and will be " + "removed in urllib3 v2.1.0. This is not valid for " + "multipart/form-data header parameters.", + DeprecationWarning, + stacklevel=2, + ) + + if isinstance(value, bytes): value = value.decode("utf-8") if not any(ch in value for ch in '"\\\r\n'): - result = u'%s="%s"' % (name, value) + result = f'{name}="{value}"' try: result.encode("ascii") except (UnicodeEncodeError, UnicodeDecodeError): @@ -47,82 +70,87 @@ def format_header_param_rfc2231(name, value): else: return result - if six.PY2: # Python 2: - value = value.encode("utf-8") - - # encode_rfc2231 accepts an encoded string and returns an ascii-encoded - # string in Python 2 but accepts and returns unicode strings in Python 3 value = email.utils.encode_rfc2231(value, "utf-8") - value = "%s*=%s" % (name, value) - - if six.PY2: # Python 2: - value = value.decode("utf-8") + value = f"{name}*={value}" return value -_HTML5_REPLACEMENTS = { - u"\u0022": u"%22", - # Replace "\" with "\\". - u"\u005C": u"\u005C\u005C", - u"\u005C": u"\u005C\u005C", -} - -# All control characters from 0x00 to 0x1F *except* 0x1B. -_HTML5_REPLACEMENTS.update( - { - six.unichr(cc): u"%{:02X}".format(cc) - for cc in range(0x00, 0x1F + 1) - if cc not in (0x1B,) - } -) - - -def _replace_multiple(value, needles_and_replacements): - def replacer(match): - return needles_and_replacements[match.group(0)] - - pattern = re.compile( - r"|".join([re.escape(needle) for needle in needles_and_replacements.keys()]) - ) - - result = pattern.sub(replacer, value) - - return result - - -def format_header_param_html5(name, value): +def format_multipart_header_param(name: str, value: _TYPE_FIELD_VALUE) -> str: """ - Helper function to format and quote a single header parameter using the - HTML5 strategy. + Format and quote a single multipart header parameter. - Particularly useful for header parameters which might contain - non-ASCII values, like file names. This follows the `HTML5 Working Draft - Section 4.10.22.7`_ and matches the behavior of curl and modern browsers. + This follows the `WHATWG HTML Standard`_ as of 2021/06/10, matching + the behavior of current browser and curl versions. Values are + assumed to be UTF-8. The ``\\n``, ``\\r``, and ``"`` characters are + percent encoded. - .. _HTML5 Working Draft Section 4.10.22.7: - https://w3c.github.io/html/sec-forms.html#multipart-form-data + .. _WHATWG HTML Standard: + https://html.spec.whatwg.org/multipage/ + form-control-infrastructure.html#multipart-form-data :param name: - The name of the parameter, a string expected to be ASCII only. + The name of the parameter, an ASCII-only ``str``. :param value: - The value of the parameter, provided as ``bytes`` or `str``. - :ret: - A unicode string, stripped of troublesome characters. + The value of the parameter, a ``str`` or UTF-8 encoded + ``bytes``. + :returns: + A string ``name="value"`` with the escaped value. + + .. versionchanged:: 2.0.0 + Matches the WHATWG HTML Standard as of 2021/06/10. Control + characters are no longer percent encoded. + + .. versionchanged:: 2.0.0 + Renamed from ``format_header_param_html5`` and + ``format_header_param``. The old names will be removed in + urllib3 v2.1.0. """ - if isinstance(value, six.binary_type): + if isinstance(value, bytes): value = value.decode("utf-8") - value = _replace_multiple(value, _HTML5_REPLACEMENTS) + # percent encode \n \r " + value = value.translate({10: "%0A", 13: "%0D", 34: "%22"}) + return f'{name}="{value}"' - return u'%s="%s"' % (name, value) +def format_header_param_html5(name: str, value: _TYPE_FIELD_VALUE) -> str: + """ + .. deprecated:: 2.0.0 + Renamed to :func:`format_multipart_header_param`. Will be + removed in urllib3 v2.1.0. + """ + import warnings + + warnings.warn( + "'format_header_param_html5' has been renamed to " + "'format_multipart_header_param'. The old name will be " + "removed in urllib3 v2.1.0.", + DeprecationWarning, + stacklevel=2, + ) + return format_multipart_header_param(name, value) -# For backwards-compatibility. -format_header_param = format_header_param_html5 + +def format_header_param(name: str, value: _TYPE_FIELD_VALUE) -> str: + """ + .. deprecated:: 2.0.0 + Renamed to :func:`format_multipart_header_param`. Will be + removed in urllib3 v2.1.0. + """ + import warnings + + warnings.warn( + "'format_header_param' has been renamed to " + "'format_multipart_header_param'. The old name will be " + "removed in urllib3 v2.1.0.", + DeprecationWarning, + stacklevel=2, + ) + return format_multipart_header_param(name, value) -class RequestField(object): +class RequestField: """ A data container for request body parameters. @@ -134,29 +162,47 @@ class RequestField(object): An optional filename of the request field. Must be unicode. :param headers: An optional dict-like object of headers to initially use for the field. - :param header_formatter: - An optional callable that is used to encode and format the headers. By - default, this is :func:`format_header_param_html5`. + + .. versionchanged:: 2.0.0 + The ``header_formatter`` parameter is deprecated and will + be removed in urllib3 v2.1.0. """ def __init__( self, - name, - data, - filename=None, - headers=None, - header_formatter=format_header_param_html5, + name: str, + data: _TYPE_FIELD_VALUE, + filename: str | None = None, + headers: typing.Mapping[str, str] | None = None, + header_formatter: typing.Callable[[str, _TYPE_FIELD_VALUE], str] | None = None, ): self._name = name self._filename = filename self.data = data - self.headers = {} + self.headers: dict[str, str | None] = {} if headers: self.headers = dict(headers) - self.header_formatter = header_formatter + + if header_formatter is not None: + import warnings + + warnings.warn( + "The 'header_formatter' parameter is deprecated and " + "will be removed in urllib3 v2.1.0.", + DeprecationWarning, + stacklevel=2, + ) + self.header_formatter = header_formatter + else: + self.header_formatter = format_multipart_header_param @classmethod - def from_tuples(cls, fieldname, value, header_formatter=format_header_param_html5): + def from_tuples( + cls, + fieldname: str, + value: _TYPE_FIELD_VALUE_TUPLE, + header_formatter: typing.Callable[[str, _TYPE_FIELD_VALUE], str] | None = None, + ) -> RequestField: """ A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. @@ -173,6 +219,10 @@ def from_tuples(cls, fieldname, value, header_formatter=format_header_param_html Field names and filenames must be unicode. """ + filename: str | None + content_type: str | None + data: _TYPE_FIELD_VALUE + if isinstance(value, tuple): if len(value) == 3: filename, data, content_type = value @@ -191,20 +241,29 @@ def from_tuples(cls, fieldname, value, header_formatter=format_header_param_html return request_param - def _render_part(self, name, value): + def _render_part(self, name: str, value: _TYPE_FIELD_VALUE) -> str: """ - Overridable helper function to format a single header parameter. By - default, this calls ``self.header_formatter``. + Override this method to change how each multipart header + parameter is formatted. By default, this calls + :func:`format_multipart_header_param`. :param name: - The name of the parameter, a string expected to be ASCII only. + The name of the parameter, an ASCII-only ``str``. :param value: - The value of the parameter, provided as a unicode string. - """ + The value of the parameter, a ``str`` or UTF-8 encoded + ``bytes``. + :meta public: + """ return self.header_formatter(name, value) - def _render_parts(self, header_parts): + def _render_parts( + self, + header_parts: ( + dict[str, _TYPE_FIELD_VALUE | None] + | typing.Sequence[tuple[str, _TYPE_FIELD_VALUE | None]] + ), + ) -> str: """ Helper function to format and quote a single header. @@ -215,18 +274,21 @@ def _render_parts(self, header_parts): A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format as `k1="v1"; k2="v2"; ...`. """ + iterable: typing.Iterable[tuple[str, _TYPE_FIELD_VALUE | None]] + parts = [] - iterable = header_parts if isinstance(header_parts, dict): iterable = header_parts.items() + else: + iterable = header_parts for name, value in iterable: if value is not None: parts.append(self._render_part(name, value)) - return u"; ".join(parts) + return "; ".join(parts) - def render_headers(self): + def render_headers(self) -> str: """ Renders the headers for this request field. """ @@ -235,39 +297,45 @@ def render_headers(self): sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"] for sort_key in sort_keys: if self.headers.get(sort_key, False): - lines.append(u"%s: %s" % (sort_key, self.headers[sort_key])) + lines.append(f"{sort_key}: {self.headers[sort_key]}") for header_name, header_value in self.headers.items(): if header_name not in sort_keys: if header_value: - lines.append(u"%s: %s" % (header_name, header_value)) + lines.append(f"{header_name}: {header_value}") - lines.append(u"\r\n") - return u"\r\n".join(lines) + lines.append("\r\n") + return "\r\n".join(lines) def make_multipart( - self, content_disposition=None, content_type=None, content_location=None - ): + self, + content_disposition: str | None = None, + content_type: str | None = None, + content_location: str | None = None, + ) -> None: """ Makes this request field into a multipart request field. This method overrides "Content-Disposition", "Content-Type" and "Content-Location" headers to the request parameter. + :param content_disposition: + The 'Content-Disposition' of the request body. Defaults to 'form-data' :param content_type: The 'Content-Type' of the request body. :param content_location: The 'Content-Location' of the request body. """ - self.headers["Content-Disposition"] = content_disposition or u"form-data" - self.headers["Content-Disposition"] += u"; ".join( + content_disposition = (content_disposition or "form-data") + "; ".join( [ - u"", + "", self._render_parts( - ((u"name", self._name), (u"filename", self._filename)) + (("name", self._name), ("filename", self._filename)) ), ] ) + + self.headers["Content-Disposition"] = content_disposition self.headers["Content-Type"] = content_type self.headers["Content-Location"] = content_location diff --git a/thirdparty/urllib3/filepost.py b/thirdparty/urllib3/filepost.py index b7b0099..14f70b0 100644 --- a/thirdparty/urllib3/filepost.py +++ b/thirdparty/urllib3/filepost.py @@ -1,28 +1,32 @@ -from __future__ import absolute_import +from __future__ import annotations + import binascii import codecs import os - +import typing from io import BytesIO -from .packages import six -from .packages.six import b -from .fields import RequestField +from .fields import _TYPE_FIELD_VALUE_TUPLE, RequestField writer = codecs.lookup("utf-8")[3] +_TYPE_FIELDS_SEQUENCE = typing.Sequence[ + typing.Union[tuple[str, _TYPE_FIELD_VALUE_TUPLE], RequestField] +] +_TYPE_FIELDS = typing.Union[ + _TYPE_FIELDS_SEQUENCE, + typing.Mapping[str, _TYPE_FIELD_VALUE_TUPLE], +] -def choose_boundary(): + +def choose_boundary() -> str: """ Our embarrassingly-simple replacement for mimetools.choose_boundary. """ - boundary = binascii.hexlify(os.urandom(16)) - if not six.PY2: - boundary = boundary.decode("ascii") - return boundary + return binascii.hexlify(os.urandom(16)).decode() -def iter_field_objects(fields): +def iter_field_objects(fields: _TYPE_FIELDS) -> typing.Iterable[RequestField]: """ Iterate over fields. @@ -30,42 +34,29 @@ def iter_field_objects(fields): :class:`~urllib3.fields.RequestField`. """ - if isinstance(fields, dict): - i = six.iteritems(fields) + iterable: typing.Iterable[RequestField | tuple[str, _TYPE_FIELD_VALUE_TUPLE]] + + if isinstance(fields, typing.Mapping): + iterable = fields.items() else: - i = iter(fields) + iterable = fields - for field in i: + for field in iterable: if isinstance(field, RequestField): yield field else: yield RequestField.from_tuples(*field) -def iter_fields(fields): - """ - .. deprecated:: 1.6 - - Iterate over fields. - - The addition of :class:`~urllib3.fields.RequestField` makes this function - obsolete. Instead, use :func:`iter_field_objects`, which returns - :class:`~urllib3.fields.RequestField` objects. - - Supports list of (k, v) tuples and dicts. - """ - if isinstance(fields, dict): - return ((k, v) for k, v in six.iteritems(fields)) - - return ((k, v) for k, v in fields) - - -def encode_multipart_formdata(fields, boundary=None): +def encode_multipart_formdata( + fields: _TYPE_FIELDS, boundary: str | None = None +) -> tuple[bytes, str]: """ Encode a dictionary of ``fields`` using the multipart/form-data MIME format. :param fields: Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). + Values are processed by :func:`urllib3.fields.RequestField.from_tuples`. :param boundary: If not specified, then a random boundary will be generated using @@ -76,7 +67,7 @@ def encode_multipart_formdata(fields, boundary=None): boundary = choose_boundary() for field in iter_field_objects(fields): - body.write(b("--%s\r\n" % (boundary))) + body.write(f"--{boundary}\r\n".encode("latin-1")) writer(body).write(field.render_headers()) data = field.data @@ -84,15 +75,15 @@ def encode_multipart_formdata(fields, boundary=None): if isinstance(data, int): data = str(data) # Backwards compatibility - if isinstance(data, six.text_type): + if isinstance(data, str): writer(body).write(data) else: body.write(data) body.write(b"\r\n") - body.write(b("--%s--\r\n" % (boundary))) + body.write(f"--{boundary}--\r\n".encode("latin-1")) - content_type = str("multipart/form-data; boundary=%s" % boundary) + content_type = f"multipart/form-data; boundary={boundary}" return body.getvalue(), content_type diff --git a/thirdparty/urllib3/http2/__init__.py b/thirdparty/urllib3/http2/__init__.py new file mode 100644 index 0000000..133e1d8 --- /dev/null +++ b/thirdparty/urllib3/http2/__init__.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from importlib.metadata import version + +__all__ = [ + "inject_into_urllib3", + "extract_from_urllib3", +] + +import typing + +orig_HTTPSConnection: typing.Any = None + + +def inject_into_urllib3() -> None: + # First check if h2 version is valid + h2_version = version("h2") + if not h2_version.startswith("4."): + raise ImportError( + "urllib3 v2 supports h2 version 4.x.x, currently " + f"the 'h2' module is compiled with {h2_version!r}. " + "See: https://github.com/urllib3/urllib3/issues/3290" + ) + + # Import here to avoid circular dependencies. + from .. import connection as urllib3_connection + from .. import util as urllib3_util + from ..connectionpool import HTTPSConnectionPool + from ..util import ssl_ as urllib3_util_ssl + from .connection import HTTP2Connection + + global orig_HTTPSConnection + orig_HTTPSConnection = urllib3_connection.HTTPSConnection + + HTTPSConnectionPool.ConnectionCls = HTTP2Connection + urllib3_connection.HTTPSConnection = HTTP2Connection # type: ignore[misc] + + # TODO: Offer 'http/1.1' as well, but for testing purposes this is handy. + urllib3_util.ALPN_PROTOCOLS = ["h2"] + urllib3_util_ssl.ALPN_PROTOCOLS = ["h2"] + + +def extract_from_urllib3() -> None: + from .. import connection as urllib3_connection + from .. import util as urllib3_util + from ..connectionpool import HTTPSConnectionPool + from ..util import ssl_ as urllib3_util_ssl + + HTTPSConnectionPool.ConnectionCls = orig_HTTPSConnection + urllib3_connection.HTTPSConnection = orig_HTTPSConnection # type: ignore[misc] + + urllib3_util.ALPN_PROTOCOLS = ["http/1.1"] + urllib3_util_ssl.ALPN_PROTOCOLS = ["http/1.1"] diff --git a/thirdparty/urllib3/http2/connection.py b/thirdparty/urllib3/http2/connection.py new file mode 100644 index 0000000..f486145 --- /dev/null +++ b/thirdparty/urllib3/http2/connection.py @@ -0,0 +1,356 @@ +from __future__ import annotations + +import logging +import re +import threading +import types +import typing + +import h2.config # type: ignore[import-untyped] +import h2.connection # type: ignore[import-untyped] +import h2.events # type: ignore[import-untyped] + +from .._base_connection import _TYPE_BODY +from .._collections import HTTPHeaderDict +from ..connection import HTTPSConnection, _get_default_user_agent +from ..exceptions import ConnectionError +from ..response import BaseHTTPResponse + +orig_HTTPSConnection = HTTPSConnection + +T = typing.TypeVar("T") + +log = logging.getLogger(__name__) + +RE_IS_LEGAL_HEADER_NAME = re.compile(rb"^[!#$%&'*+\-.^_`|~0-9a-z]+$") +RE_IS_ILLEGAL_HEADER_VALUE = re.compile(rb"[\0\x00\x0a\x0d\r\n]|^[ \r\n\t]|[ \r\n\t]$") + + +def _is_legal_header_name(name: bytes) -> bool: + """ + "An implementation that validates fields according to the definitions in Sections + 5.1 and 5.5 of [HTTP] only needs an additional check that field names do not + include uppercase characters." (https://httpwg.org/specs/rfc9113.html#n-field-validity) + + `http.client._is_legal_header_name` does not validate the field name according to the + HTTP 1.1 spec, so we do that here, in addition to checking for uppercase characters. + + This does not allow for the `:` character in the header name, so should not + be used to validate pseudo-headers. + """ + return bool(RE_IS_LEGAL_HEADER_NAME.match(name)) + + +def _is_illegal_header_value(value: bytes) -> bool: + """ + "A field value MUST NOT contain the zero value (ASCII NUL, 0x00), line feed + (ASCII LF, 0x0a), or carriage return (ASCII CR, 0x0d) at any position. A field + value MUST NOT start or end with an ASCII whitespace character (ASCII SP or HTAB, + 0x20 or 0x09)." (https://httpwg.org/specs/rfc9113.html#n-field-validity) + """ + return bool(RE_IS_ILLEGAL_HEADER_VALUE.search(value)) + + +class _LockedObject(typing.Generic[T]): + """ + A wrapper class that hides a specific object behind a lock. + The goal here is to provide a simple way to protect access to an object + that cannot safely be simultaneously accessed from multiple threads. The + intended use of this class is simple: take hold of it with a context + manager, which returns the protected object. + """ + + __slots__ = ( + "lock", + "_obj", + ) + + def __init__(self, obj: T): + self.lock = threading.RLock() + self._obj = obj + + def __enter__(self) -> T: + self.lock.acquire() + return self._obj + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: types.TracebackType | None, + ) -> None: + self.lock.release() + + +class HTTP2Connection(HTTPSConnection): + def __init__( + self, host: str, port: int | None = None, **kwargs: typing.Any + ) -> None: + self._h2_conn = self._new_h2_conn() + self._h2_stream: int | None = None + self._headers: list[tuple[bytes, bytes]] = [] + + if "proxy" in kwargs or "proxy_config" in kwargs: # Defensive: + raise NotImplementedError("Proxies aren't supported with HTTP/2") + + super().__init__(host, port, **kwargs) + + if self._tunnel_host is not None: + raise NotImplementedError("Tunneling isn't supported with HTTP/2") + + def _new_h2_conn(self) -> _LockedObject[h2.connection.H2Connection]: + config = h2.config.H2Configuration(client_side=True) + return _LockedObject(h2.connection.H2Connection(config=config)) + + def connect(self) -> None: + super().connect() + with self._h2_conn as conn: + conn.initiate_connection() + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + + def putrequest( # type: ignore[override] + self, + method: str, + url: str, + **kwargs: typing.Any, + ) -> None: + """putrequest + This deviates from the HTTPConnection method signature since we never need to override + sending accept-encoding headers or the host header. + """ + if "skip_host" in kwargs: + raise NotImplementedError("`skip_host` isn't supported") + if "skip_accept_encoding" in kwargs: + raise NotImplementedError("`skip_accept_encoding` isn't supported") + + self._request_url = url or "/" + self._validate_path(url) # type: ignore[attr-defined] + + if ":" in self.host: + authority = f"[{self.host}]:{self.port or 443}" + else: + authority = f"{self.host}:{self.port or 443}" + + self._headers.append((b":scheme", b"https")) + self._headers.append((b":method", method.encode())) + self._headers.append((b":authority", authority.encode())) + self._headers.append((b":path", url.encode())) + + with self._h2_conn as conn: + self._h2_stream = conn.get_next_available_stream_id() + + def putheader(self, header: str | bytes, *values: str | bytes) -> None: + # TODO SKIPPABLE_HEADERS from urllib3 are ignored. + header = header.encode() if isinstance(header, str) else header + header = header.lower() # A lot of upstream code uses capitalized headers. + if not _is_legal_header_name(header): + raise ValueError(f"Illegal header name {str(header)}") + + for value in values: + value = value.encode() if isinstance(value, str) else value + if _is_illegal_header_value(value): + raise ValueError(f"Illegal header value {str(value)}") + self._headers.append((header, value)) + + def endheaders(self, message_body: typing.Any = None) -> None: # type: ignore[override] + if self._h2_stream is None: + raise ConnectionError("Must call `putrequest` first.") + + with self._h2_conn as conn: + conn.send_headers( + stream_id=self._h2_stream, + headers=self._headers, + end_stream=(message_body is None), + ) + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + self._headers = [] # Reset headers for the next request. + + def send(self, data: typing.Any) -> None: + """Send data to the server. + `data` can be: `str`, `bytes`, an iterable, or file-like objects + that support a .read() method. + """ + if self._h2_stream is None: + raise ConnectionError("Must call `putrequest` first.") + + with self._h2_conn as conn: + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + + if hasattr(data, "read"): # file-like objects + while True: + chunk = data.read(self.blocksize) + if not chunk: + break + if isinstance(chunk, str): + chunk = chunk.encode() # pragma: no cover + conn.send_data(self._h2_stream, chunk, end_stream=False) + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + conn.end_stream(self._h2_stream) + return + + if isinstance(data, str): # str -> bytes + data = data.encode() + + try: + if isinstance(data, bytes): + conn.send_data(self._h2_stream, data, end_stream=True) + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + else: + for chunk in data: + conn.send_data(self._h2_stream, chunk, end_stream=False) + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + conn.end_stream(self._h2_stream) + except TypeError: + raise TypeError( + "`data` should be str, bytes, iterable, or file. got %r" + % type(data) + ) + + def set_tunnel( + self, + host: str, + port: int | None = None, + headers: typing.Mapping[str, str] | None = None, + scheme: str = "http", + ) -> None: + raise NotImplementedError( + "HTTP/2 does not support setting up a tunnel through a proxy" + ) + + def getresponse( # type: ignore[override] + self, + ) -> HTTP2Response: + status = None + data = bytearray() + with self._h2_conn as conn: + end_stream = False + while not end_stream: + # TODO: Arbitrary read value. + if received_data := self.sock.recv(65535): + events = conn.receive_data(received_data) + for event in events: + if isinstance(event, h2.events.ResponseReceived): + headers = HTTPHeaderDict() + for header, value in event.headers: + if header == b":status": + status = int(value.decode()) + else: + headers.add( + header.decode("ascii"), value.decode("ascii") + ) + + elif isinstance(event, h2.events.DataReceived): + data += event.data + conn.acknowledge_received_data( + event.flow_controlled_length, event.stream_id + ) + + elif isinstance(event, h2.events.StreamEnded): + end_stream = True + + if data_to_send := conn.data_to_send(): + self.sock.sendall(data_to_send) + + assert status is not None + return HTTP2Response( + status=status, + headers=headers, + request_url=self._request_url, + data=bytes(data), + ) + + def request( # type: ignore[override] + self, + method: str, + url: str, + body: _TYPE_BODY | None = None, + headers: typing.Mapping[str, str] | None = None, + *, + preload_content: bool = True, + decode_content: bool = True, + enforce_content_length: bool = True, + **kwargs: typing.Any, + ) -> None: + """Send an HTTP/2 request""" + if "chunked" in kwargs: + # TODO this is often present from upstream. + # raise NotImplementedError("`chunked` isn't supported with HTTP/2") + pass + + if self.sock is not None: + self.sock.settimeout(self.timeout) + + self.putrequest(method, url) + + headers = headers or {} + for k, v in headers.items(): + if k.lower() == "transfer-encoding" and v == "chunked": + continue + else: + self.putheader(k, v) + + if b"user-agent" not in dict(self._headers): + self.putheader(b"user-agent", _get_default_user_agent()) + + if body: + self.endheaders(message_body=body) + self.send(body) + else: + self.endheaders() + + def close(self) -> None: + with self._h2_conn as conn: + try: + conn.close_connection() + if data := conn.data_to_send(): + self.sock.sendall(data) + except Exception: + pass + + # Reset all our HTTP/2 connection state. + self._h2_conn = self._new_h2_conn() + self._h2_stream = None + self._headers = [] + + super().close() + + +class HTTP2Response(BaseHTTPResponse): + # TODO: This is a woefully incomplete response object, but works for non-streaming. + def __init__( + self, + status: int, + headers: HTTPHeaderDict, + request_url: str, + data: bytes, + decode_content: bool = False, # TODO: support decoding + ) -> None: + super().__init__( + status=status, + headers=headers, + # Following CPython, we map HTTP versions to major * 10 + minor integers + version=20, + version_string="HTTP/2", + # No reason phrase in HTTP/2 + reason=None, + decode_content=decode_content, + request_url=request_url, + ) + self._data = data + self.length_remaining = 0 + + @property + def data(self) -> bytes: + return self._data + + def get_redirect_location(self) -> None: + return None + + def close(self) -> None: + pass diff --git a/thirdparty/urllib3/http2/probe.py b/thirdparty/urllib3/http2/probe.py new file mode 100644 index 0000000..9ea9007 --- /dev/null +++ b/thirdparty/urllib3/http2/probe.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +import threading + + +class _HTTP2ProbeCache: + __slots__ = ( + "_lock", + "_cache_locks", + "_cache_values", + ) + + def __init__(self) -> None: + self._lock = threading.Lock() + self._cache_locks: dict[tuple[str, int], threading.RLock] = {} + self._cache_values: dict[tuple[str, int], bool | None] = {} + + def acquire_and_get(self, host: str, port: int) -> bool | None: + # By the end of this block we know that + # _cache_[values,locks] is available. + value = None + with self._lock: + key = (host, port) + try: + value = self._cache_values[key] + # If it's a known value we return right away. + if value is not None: + return value + except KeyError: + self._cache_locks[key] = threading.RLock() + self._cache_values[key] = None + + # If the value is unknown, we acquire the lock to signal + # to the requesting thread that the probe is in progress + # or that the current thread needs to return their findings. + key_lock = self._cache_locks[key] + key_lock.acquire() + try: + # If the by the time we get the lock the value has been + # updated we want to return the updated value. + value = self._cache_values[key] + + # In case an exception like KeyboardInterrupt is raised here. + except BaseException as e: # Defensive: + assert not isinstance(e, KeyError) # KeyError shouldn't be possible. + key_lock.release() + raise + + return value + + def set_and_release( + self, host: str, port: int, supports_http2: bool | None + ) -> None: + key = (host, port) + key_lock = self._cache_locks[key] + with key_lock: # Uses an RLock, so can be locked again from same thread. + if supports_http2 is None and self._cache_values[key] is not None: + raise ValueError( + "Cannot reset HTTP/2 support for origin after value has been set." + ) # Defensive: not expected in normal usage + + self._cache_values[key] = supports_http2 + key_lock.release() + + def _values(self) -> dict[tuple[str, int], bool | None]: + """This function is for testing purposes only. Gets the current state of the probe cache""" + with self._lock: + return {k: v for k, v in self._cache_values.items()} + + def _reset(self) -> None: + """This function is for testing purposes only. Reset the cache values""" + with self._lock: + self._cache_locks = {} + self._cache_values = {} + + +_HTTP2_PROBE_CACHE = _HTTP2ProbeCache() + +set_and_release = _HTTP2_PROBE_CACHE.set_and_release +acquire_and_get = _HTTP2_PROBE_CACHE.acquire_and_get +_values = _HTTP2_PROBE_CACHE._values +_reset = _HTTP2_PROBE_CACHE._reset + +__all__ = [ + "set_and_release", + "acquire_and_get", +] diff --git a/thirdparty/urllib3/packages/six/six.py b/thirdparty/urllib3/packages/six/six.py new file mode 100644 index 0000000..3de5969 --- /dev/null +++ b/thirdparty/urllib3/packages/six/six.py @@ -0,0 +1,1003 @@ +# Copyright (c) 2010-2024 Benjamin Peterson +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Utilities for writing code that runs on Python 2 and 3""" + +from __future__ import absolute_import + +import functools +import itertools +import operator +import sys +import types + +__author__ = "Benjamin Peterson " +__version__ = "1.17.0" + + +# Useful for very coarse version differentiation. +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 +PY34 = sys.version_info[0:2] >= (3, 4) + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + MAXSIZE = sys.maxsize +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + if sys.platform.startswith("java"): + # Jython always uses 32 bits. + MAXSIZE = int((1 << 31) - 1) + else: + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + + def __len__(self): + return 1 << 31 + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X + +if PY34: + from importlib.util import spec_from_loader +else: + spec_from_loader = None + + +def _add_doc(func, doc): + """Add documentation to a function.""" + func.__doc__ = doc + + +def _import_module(name): + """Import module, returning the module after the last dot.""" + __import__(name) + return sys.modules[name] + + +class _LazyDescr(object): + + def __init__(self, name): + self.name = name + + def __get__(self, obj, tp): + result = self._resolve() + setattr(obj, self.name, result) # Invokes __set__. + try: + # This is a bit ugly, but it avoids running this again by + # removing this descriptor. + delattr(obj.__class__, self.name) + except AttributeError: + pass + return result + + +class MovedModule(_LazyDescr): + + def __init__(self, name, old, new=None): + super(MovedModule, self).__init__(name) + if PY3: + if new is None: + new = name + self.mod = new + else: + self.mod = old + + def _resolve(self): + return _import_module(self.mod) + + def __getattr__(self, attr): + _module = self._resolve() + value = getattr(_module, attr) + setattr(self, attr, value) + return value + + +class _LazyModule(types.ModuleType): + + def __init__(self, name): + super(_LazyModule, self).__init__(name) + self.__doc__ = self.__class__.__doc__ + + def __dir__(self): + attrs = ["__doc__", "__name__"] + attrs += [attr.name for attr in self._moved_attributes] + return attrs + + # Subclasses should override this + _moved_attributes = [] + + +class MovedAttribute(_LazyDescr): + + def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): + super(MovedAttribute, self).__init__(name) + if PY3: + if new_mod is None: + new_mod = name + self.mod = new_mod + if new_attr is None: + if old_attr is None: + new_attr = name + else: + new_attr = old_attr + self.attr = new_attr + else: + self.mod = old_mod + if old_attr is None: + old_attr = name + self.attr = old_attr + + def _resolve(self): + module = _import_module(self.mod) + return getattr(module, self.attr) + + +class _SixMetaPathImporter(object): + + """ + A meta path importer to import six.moves and its submodules. + + This class implements a PEP302 finder and loader. It should be compatible + with Python 2.5 and all existing versions of Python3 + """ + + def __init__(self, six_module_name): + self.name = six_module_name + self.known_modules = {} + + def _add_module(self, mod, *fullnames): + for fullname in fullnames: + self.known_modules[self.name + "." + fullname] = mod + + def _get_module(self, fullname): + return self.known_modules[self.name + "." + fullname] + + def find_module(self, fullname, path=None): + if fullname in self.known_modules: + return self + return None + + def find_spec(self, fullname, path, target=None): + if fullname in self.known_modules: + return spec_from_loader(fullname, self) + return None + + def __get_module(self, fullname): + try: + return self.known_modules[fullname] + except KeyError: + raise ImportError("This loader does not know module " + fullname) + + def load_module(self, fullname): + try: + # in case of a reload + return sys.modules[fullname] + except KeyError: + pass + mod = self.__get_module(fullname) + if isinstance(mod, MovedModule): + mod = mod._resolve() + else: + mod.__loader__ = self + sys.modules[fullname] = mod + return mod + + def is_package(self, fullname): + """ + Return true, if the named module is a package. + + We need this method to get correct spec objects with + Python 3.4 (see PEP451) + """ + return hasattr(self.__get_module(fullname), "__path__") + + def get_code(self, fullname): + """Return None + + Required, if is_package is implemented""" + self.__get_module(fullname) # eventually raises ImportError + return None + get_source = get_code # same as get_code + + def create_module(self, spec): + return self.load_module(spec.name) + + def exec_module(self, module): + pass + +_importer = _SixMetaPathImporter(__name__) + + +class _MovedItems(_LazyModule): + + """Lazy loading of moved objects""" + __path__ = [] # mark as package + + +_moved_attributes = [ + MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), + MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"), + MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), + MovedAttribute("intern", "__builtin__", "sys"), + MovedAttribute("map", "itertools", "builtins", "imap", "map"), + MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"), + MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"), + MovedAttribute("getoutput", "commands", "subprocess"), + MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"), + MovedAttribute("reduce", "__builtin__", "functools"), + MovedAttribute("shlex_quote", "pipes", "shlex", "quote"), + MovedAttribute("StringIO", "StringIO", "io"), + MovedAttribute("UserDict", "UserDict", "collections", "IterableUserDict", "UserDict"), + MovedAttribute("UserList", "UserList", "collections"), + MovedAttribute("UserString", "UserString", "collections"), + MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), + MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"), + MovedModule("builtins", "__builtin__"), + MovedModule("configparser", "ConfigParser"), + MovedModule("collections_abc", "collections", "collections.abc" if sys.version_info >= (3, 3) else "collections"), + MovedModule("copyreg", "copy_reg"), + MovedModule("dbm_gnu", "gdbm", "dbm.gnu"), + MovedModule("dbm_ndbm", "dbm", "dbm.ndbm"), + MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread" if sys.version_info < (3, 9) else "_thread"), + MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), + MovedModule("http_cookies", "Cookie", "http.cookies"), + MovedModule("html_entities", "htmlentitydefs", "html.entities"), + MovedModule("html_parser", "HTMLParser", "html.parser"), + MovedModule("http_client", "httplib", "http.client"), + MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), + MovedModule("email_mime_image", "email.MIMEImage", "email.mime.image"), + MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), + MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"), + MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), + MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), + MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), + MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), + MovedModule("cPickle", "cPickle", "pickle"), + MovedModule("queue", "Queue"), + MovedModule("reprlib", "repr"), + MovedModule("socketserver", "SocketServer"), + MovedModule("_thread", "thread", "_thread"), + MovedModule("tkinter", "Tkinter"), + MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), + MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), + MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), + MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), + MovedModule("tkinter_tix", "Tix", "tkinter.tix"), + MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"), + MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), + MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), + MovedModule("tkinter_colorchooser", "tkColorChooser", + "tkinter.colorchooser"), + MovedModule("tkinter_commondialog", "tkCommonDialog", + "tkinter.commondialog"), + MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), + MovedModule("tkinter_font", "tkFont", "tkinter.font"), + MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), + MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", + "tkinter.simpledialog"), + MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"), + MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"), + MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"), + MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), + MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"), + MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"), +] +# Add windows specific modules. +if sys.platform == "win32": + _moved_attributes += [ + MovedModule("winreg", "_winreg"), + ] + +for attr in _moved_attributes: + setattr(_MovedItems, attr.name, attr) + if isinstance(attr, MovedModule): + _importer._add_module(attr, "moves." + attr.name) +del attr + +_MovedItems._moved_attributes = _moved_attributes + +moves = _MovedItems(__name__ + ".moves") +_importer._add_module(moves, "moves") + + +class Module_six_moves_urllib_parse(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_parse""" + + +_urllib_parse_moved_attributes = [ + MovedAttribute("ParseResult", "urlparse", "urllib.parse"), + MovedAttribute("SplitResult", "urlparse", "urllib.parse"), + MovedAttribute("parse_qs", "urlparse", "urllib.parse"), + MovedAttribute("parse_qsl", "urlparse", "urllib.parse"), + MovedAttribute("urldefrag", "urlparse", "urllib.parse"), + MovedAttribute("urljoin", "urlparse", "urllib.parse"), + MovedAttribute("urlparse", "urlparse", "urllib.parse"), + MovedAttribute("urlsplit", "urlparse", "urllib.parse"), + MovedAttribute("urlunparse", "urlparse", "urllib.parse"), + MovedAttribute("urlunsplit", "urlparse", "urllib.parse"), + MovedAttribute("quote", "urllib", "urllib.parse"), + MovedAttribute("quote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote", "urllib", "urllib.parse"), + MovedAttribute("unquote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote_to_bytes", "urllib", "urllib.parse", "unquote", "unquote_to_bytes"), + MovedAttribute("urlencode", "urllib", "urllib.parse"), + MovedAttribute("splitquery", "urllib", "urllib.parse"), + MovedAttribute("splittag", "urllib", "urllib.parse"), + MovedAttribute("splituser", "urllib", "urllib.parse"), + MovedAttribute("splitvalue", "urllib", "urllib.parse"), + MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), + MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), + MovedAttribute("uses_params", "urlparse", "urllib.parse"), + MovedAttribute("uses_query", "urlparse", "urllib.parse"), + MovedAttribute("uses_relative", "urlparse", "urllib.parse"), +] +for attr in _urllib_parse_moved_attributes: + setattr(Module_six_moves_urllib_parse, attr.name, attr) +del attr + +Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes + +_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"), + "moves.urllib_parse", "moves.urllib.parse") + + +class Module_six_moves_urllib_error(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_error""" + + +_urllib_error_moved_attributes = [ + MovedAttribute("URLError", "urllib2", "urllib.error"), + MovedAttribute("HTTPError", "urllib2", "urllib.error"), + MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), +] +for attr in _urllib_error_moved_attributes: + setattr(Module_six_moves_urllib_error, attr.name, attr) +del attr + +Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes + +_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"), + "moves.urllib_error", "moves.urllib.error") + + +class Module_six_moves_urllib_request(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_request""" + + +_urllib_request_moved_attributes = [ + MovedAttribute("urlopen", "urllib2", "urllib.request"), + MovedAttribute("install_opener", "urllib2", "urllib.request"), + MovedAttribute("build_opener", "urllib2", "urllib.request"), + MovedAttribute("pathname2url", "urllib", "urllib.request"), + MovedAttribute("url2pathname", "urllib", "urllib.request"), + MovedAttribute("getproxies", "urllib", "urllib.request"), + MovedAttribute("Request", "urllib2", "urllib.request"), + MovedAttribute("OpenerDirector", "urllib2", "urllib.request"), + MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"), + MovedAttribute("ProxyHandler", "urllib2", "urllib.request"), + MovedAttribute("BaseHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"), + MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"), + MovedAttribute("FileHandler", "urllib2", "urllib.request"), + MovedAttribute("FTPHandler", "urllib2", "urllib.request"), + MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"), + MovedAttribute("UnknownHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"), + MovedAttribute("urlretrieve", "urllib", "urllib.request"), + MovedAttribute("urlcleanup", "urllib", "urllib.request"), + MovedAttribute("proxy_bypass", "urllib", "urllib.request"), + MovedAttribute("parse_http_list", "urllib2", "urllib.request"), + MovedAttribute("parse_keqv_list", "urllib2", "urllib.request"), +] +if sys.version_info[:2] < (3, 14): + _urllib_request_moved_attributes.extend( + [ + MovedAttribute("URLopener", "urllib", "urllib.request"), + MovedAttribute("FancyURLopener", "urllib", "urllib.request"), + ] + ) +for attr in _urllib_request_moved_attributes: + setattr(Module_six_moves_urllib_request, attr.name, attr) +del attr + +Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes + +_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"), + "moves.urllib_request", "moves.urllib.request") + + +class Module_six_moves_urllib_response(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_response""" + + +_urllib_response_moved_attributes = [ + MovedAttribute("addbase", "urllib", "urllib.response"), + MovedAttribute("addclosehook", "urllib", "urllib.response"), + MovedAttribute("addinfo", "urllib", "urllib.response"), + MovedAttribute("addinfourl", "urllib", "urllib.response"), +] +for attr in _urllib_response_moved_attributes: + setattr(Module_six_moves_urllib_response, attr.name, attr) +del attr + +Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes + +_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"), + "moves.urllib_response", "moves.urllib.response") + + +class Module_six_moves_urllib_robotparser(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_robotparser""" + + +_urllib_robotparser_moved_attributes = [ + MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"), +] +for attr in _urllib_robotparser_moved_attributes: + setattr(Module_six_moves_urllib_robotparser, attr.name, attr) +del attr + +Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes + +_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"), + "moves.urllib_robotparser", "moves.urllib.robotparser") + + +class Module_six_moves_urllib(types.ModuleType): + + """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" + __path__ = [] # mark as package + parse = _importer._get_module("moves.urllib_parse") + error = _importer._get_module("moves.urllib_error") + request = _importer._get_module("moves.urllib_request") + response = _importer._get_module("moves.urllib_response") + robotparser = _importer._get_module("moves.urllib_robotparser") + + def __dir__(self): + return ['parse', 'error', 'request', 'response', 'robotparser'] + +_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"), + "moves.urllib") + + +def add_move(move): + """Add an item to six.moves.""" + setattr(_MovedItems, move.name, move) + + +def remove_move(name): + """Remove item from six.moves.""" + try: + delattr(_MovedItems, name) + except AttributeError: + try: + del moves.__dict__[name] + except KeyError: + raise AttributeError("no such move, %r" % (name,)) + + +if PY3: + _meth_func = "__func__" + _meth_self = "__self__" + + _func_closure = "__closure__" + _func_code = "__code__" + _func_defaults = "__defaults__" + _func_globals = "__globals__" +else: + _meth_func = "im_func" + _meth_self = "im_self" + + _func_closure = "func_closure" + _func_code = "func_code" + _func_defaults = "func_defaults" + _func_globals = "func_globals" + + +try: + advance_iterator = next +except NameError: + def advance_iterator(it): + return it.next() +next = advance_iterator + + +try: + callable = callable +except NameError: + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + + +if PY3: + def get_unbound_function(unbound): + return unbound + + create_bound_method = types.MethodType + + def create_unbound_method(func, cls): + return func + + Iterator = object +else: + def get_unbound_function(unbound): + return unbound.im_func + + def create_bound_method(func, obj): + return types.MethodType(func, obj, obj.__class__) + + def create_unbound_method(func, cls): + return types.MethodType(func, None, cls) + + class Iterator(object): + + def next(self): + return type(self).__next__(self) + + callable = callable +_add_doc(get_unbound_function, + """Get the function out of a possibly unbound function""") + + +get_method_function = operator.attrgetter(_meth_func) +get_method_self = operator.attrgetter(_meth_self) +get_function_closure = operator.attrgetter(_func_closure) +get_function_code = operator.attrgetter(_func_code) +get_function_defaults = operator.attrgetter(_func_defaults) +get_function_globals = operator.attrgetter(_func_globals) + + +if PY3: + def iterkeys(d, **kw): + return iter(d.keys(**kw)) + + def itervalues(d, **kw): + return iter(d.values(**kw)) + + def iteritems(d, **kw): + return iter(d.items(**kw)) + + def iterlists(d, **kw): + return iter(d.lists(**kw)) + + viewkeys = operator.methodcaller("keys") + + viewvalues = operator.methodcaller("values") + + viewitems = operator.methodcaller("items") +else: + def iterkeys(d, **kw): + return d.iterkeys(**kw) + + def itervalues(d, **kw): + return d.itervalues(**kw) + + def iteritems(d, **kw): + return d.iteritems(**kw) + + def iterlists(d, **kw): + return d.iterlists(**kw) + + viewkeys = operator.methodcaller("viewkeys") + + viewvalues = operator.methodcaller("viewvalues") + + viewitems = operator.methodcaller("viewitems") + +_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.") +_add_doc(itervalues, "Return an iterator over the values of a dictionary.") +_add_doc(iteritems, + "Return an iterator over the (key, value) pairs of a dictionary.") +_add_doc(iterlists, + "Return an iterator over the (key, [values]) pairs of a dictionary.") + + +if PY3: + def b(s): + return s.encode("latin-1") + + def u(s): + return s + unichr = chr + import struct + int2byte = struct.Struct(">B").pack + del struct + byte2int = operator.itemgetter(0) + indexbytes = operator.getitem + iterbytes = iter + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + del io + _assertCountEqual = "assertCountEqual" + if sys.version_info[1] <= 1: + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" + _assertNotRegex = "assertNotRegexpMatches" + else: + _assertRaisesRegex = "assertRaisesRegex" + _assertRegex = "assertRegex" + _assertNotRegex = "assertNotRegex" +else: + def b(s): + return s + # Workaround for standalone backslash + + def u(s): + return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape") + unichr = unichr + int2byte = chr + + def byte2int(bs): + return ord(bs[0]) + + def indexbytes(buf, i): + return ord(buf[i]) + iterbytes = functools.partial(itertools.imap, ord) + import StringIO + StringIO = BytesIO = StringIO.StringIO + _assertCountEqual = "assertItemsEqual" + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" + _assertNotRegex = "assertNotRegexpMatches" +_add_doc(b, """Byte literal""") +_add_doc(u, """Text literal""") + + +def assertCountEqual(self, *args, **kwargs): + return getattr(self, _assertCountEqual)(*args, **kwargs) + + +def assertRaisesRegex(self, *args, **kwargs): + return getattr(self, _assertRaisesRegex)(*args, **kwargs) + + +def assertRegex(self, *args, **kwargs): + return getattr(self, _assertRegex)(*args, **kwargs) + + +def assertNotRegex(self, *args, **kwargs): + return getattr(self, _assertNotRegex)(*args, **kwargs) + + +if PY3: + exec_ = getattr(moves.builtins, "exec") + + def reraise(tp, value, tb=None): + try: + if value is None: + value = tp() + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + finally: + value = None + tb = None + +else: + def exec_(_code_, _globs_=None, _locs_=None): + """Execute code in a namespace.""" + if _globs_ is None: + frame = sys._getframe(1) + _globs_ = frame.f_globals + if _locs_ is None: + _locs_ = frame.f_locals + del frame + elif _locs_ is None: + _locs_ = _globs_ + exec("""exec _code_ in _globs_, _locs_""") + + exec_("""def reraise(tp, value, tb=None): + try: + raise tp, value, tb + finally: + tb = None +""") + + +if sys.version_info[:2] > (3,): + exec_("""def raise_from(value, from_value): + try: + raise value from from_value + finally: + value = None +""") +else: + def raise_from(value, from_value): + raise value + + +print_ = getattr(moves.builtins, "print", None) +if print_ is None: + def print_(*args, **kwargs): + """The new-style print function for Python 2.4 and 2.5.""" + fp = kwargs.pop("file", sys.stdout) + if fp is None: + return + + def write(data): + if not isinstance(data, basestring): + data = str(data) + # If the file has an encoding, encode unicode with it. + if (isinstance(fp, file) and + isinstance(data, unicode) and + fp.encoding is not None): + errors = getattr(fp, "errors", None) + if errors is None: + errors = "strict" + data = data.encode(fp.encoding, errors) + fp.write(data) + want_unicode = False + sep = kwargs.pop("sep", None) + if sep is not None: + if isinstance(sep, unicode): + want_unicode = True + elif not isinstance(sep, str): + raise TypeError("sep must be None or a string") + end = kwargs.pop("end", None) + if end is not None: + if isinstance(end, unicode): + want_unicode = True + elif not isinstance(end, str): + raise TypeError("end must be None or a string") + if kwargs: + raise TypeError("invalid keyword arguments to print()") + if not want_unicode: + for arg in args: + if isinstance(arg, unicode): + want_unicode = True + break + if want_unicode: + newline = unicode("\n") + space = unicode(" ") + else: + newline = "\n" + space = " " + if sep is None: + sep = space + if end is None: + end = newline + for i, arg in enumerate(args): + if i: + write(sep) + write(arg) + write(end) +if sys.version_info[:2] < (3, 3): + _print = print_ + + def print_(*args, **kwargs): + fp = kwargs.get("file", sys.stdout) + flush = kwargs.pop("flush", False) + _print(*args, **kwargs) + if flush and fp is not None: + fp.flush() + +_add_doc(reraise, """Reraise an exception.""") + +if sys.version_info[0:2] < (3, 4): + # This does exactly the same what the :func:`py3:functools.update_wrapper` + # function does on Python versions after 3.2. It sets the ``__wrapped__`` + # attribute on ``wrapper`` object and it doesn't raise an error if any of + # the attributes mentioned in ``assigned`` and ``updated`` are missing on + # ``wrapped`` object. + def _update_wrapper(wrapper, wrapped, + assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + for attr in assigned: + try: + value = getattr(wrapped, attr) + except AttributeError: + continue + else: + setattr(wrapper, attr, value) + for attr in updated: + getattr(wrapper, attr).update(getattr(wrapped, attr, {})) + wrapper.__wrapped__ = wrapped + return wrapper + _update_wrapper.__doc__ = functools.update_wrapper.__doc__ + + def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + return functools.partial(_update_wrapper, wrapped=wrapped, + assigned=assigned, updated=updated) + wraps.__doc__ = functools.wraps.__doc__ + +else: + wraps = functools.wraps + + +def with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + # This requires a bit of explanation: the basic idea is to make a dummy + # metaclass for one level of class instantiation that replaces itself with + # the actual metaclass. + class metaclass(type): + + def __new__(cls, name, this_bases, d): + if sys.version_info[:2] >= (3, 7): + # This version introduced PEP 560 that requires a bit + # of extra care (we mimic what is done by __build_class__). + resolved_bases = types.resolve_bases(bases) + if resolved_bases is not bases: + d['__orig_bases__'] = bases + else: + resolved_bases = bases + return meta(name, resolved_bases, d) + + @classmethod + def __prepare__(cls, name, this_bases): + return meta.__prepare__(name, bases) + return type.__new__(metaclass, 'temporary_class', (), {}) + + +def add_metaclass(metaclass): + """Class decorator for creating a class with a metaclass.""" + def wrapper(cls): + orig_vars = cls.__dict__.copy() + slots = orig_vars.get('__slots__') + if slots is not None: + if isinstance(slots, str): + slots = [slots] + for slots_var in slots: + orig_vars.pop(slots_var) + orig_vars.pop('__dict__', None) + orig_vars.pop('__weakref__', None) + if hasattr(cls, '__qualname__'): + orig_vars['__qualname__'] = cls.__qualname__ + return metaclass(cls.__name__, cls.__bases__, orig_vars) + return wrapper + + +def ensure_binary(s, encoding='utf-8', errors='strict'): + """Coerce **s** to six.binary_type. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> encoded to `bytes` + - `bytes` -> `bytes` + """ + if isinstance(s, binary_type): + return s + if isinstance(s, text_type): + return s.encode(encoding, errors) + raise TypeError("not expecting type '%s'" % type(s)) + + +def ensure_str(s, encoding='utf-8', errors='strict'): + """Coerce *s* to `str`. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + # Optimization: Fast return for the common case. + if type(s) is str: + return s + if PY2 and isinstance(s, text_type): + return s.encode(encoding, errors) + elif PY3 and isinstance(s, binary_type): + return s.decode(encoding, errors) + elif not isinstance(s, (text_type, binary_type)): + raise TypeError("not expecting type '%s'" % type(s)) + return s + + +def ensure_text(s, encoding='utf-8', errors='strict'): + """Coerce *s* to six.text_type. + + For Python 2: + - `unicode` -> `unicode` + - `str` -> `unicode` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + if isinstance(s, binary_type): + return s.decode(encoding, errors) + elif isinstance(s, text_type): + return s + else: + raise TypeError("not expecting type '%s'" % type(s)) + + +def python_2_unicode_compatible(klass): + """ + A class decorator that defines __unicode__ and __str__ methods under Python 2. + Under Python 3 it does nothing. + + To support Python 2 and 3 with a single code base, define a __str__ method + returning text and apply this decorator to the class. + """ + if PY2: + if '__str__' not in klass.__dict__: + raise ValueError("@python_2_unicode_compatible cannot be applied " + "to %s because it doesn't define __str__()." % + klass.__name__) + klass.__unicode__ = klass.__str__ + klass.__str__ = lambda self: self.__unicode__().encode('utf-8') + return klass + + +# Complete the moves implementation. +# This code is at the end of this module to speed up module loading. +# Turn this module into a package. +__path__ = [] # required for PEP 302 and PEP 451 +__package__ = __name__ # see PEP 366 @ReservedAssignment +if globals().get("__spec__") is not None: + __spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable +# Remove other six meta path importers, since they cause problems. This can +# happen if six is removed from sys.modules and then reloaded. (Setuptools does +# this for some reason.) +if sys.meta_path: + for i, importer in enumerate(sys.meta_path): + # Here's some real nastiness: Another "instance" of the six module might + # be floating around. Therefore, we can't use isinstance() to check for + # the six meta path importer, since the other six instance will have + # inserted an importer with different class. + if (type(importer).__name__ == "_SixMetaPathImporter" and + importer.name == __name__): + del sys.meta_path[i] + break + del i, importer +# Finally, add the importer to the meta path import hook. +sys.meta_path.append(_importer) diff --git a/thirdparty/urllib3/poolmanager.py b/thirdparty/urllib3/poolmanager.py index e2bd3bd..085d1db 100644 --- a/thirdparty/urllib3/poolmanager.py +++ b/thirdparty/urllib3/poolmanager.py @@ -1,24 +1,33 @@ -from __future__ import absolute_import -import collections +from __future__ import annotations + import functools import logging +import typing import warnings +from types import TracebackType +from urllib.parse import urljoin -from ._collections import RecentlyUsedContainer -from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool -from .connectionpool import port_by_scheme +from ._collections import HTTPHeaderDict, RecentlyUsedContainer +from ._request_methods import RequestMethods +from .connection import ProxyConfig +from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme from .exceptions import ( LocationValueError, MaxRetryError, ProxySchemeUnknown, - InvalidProxyConfigurationWarning, + URLSchemeUnknown, ) -from .packages import six -from .packages.six.moves.urllib.parse import urljoin -from .request import RequestMethods -from .util.url import parse_url +from .response import BaseHTTPResponse +from .util.connection import _TYPE_SOCKET_OPTIONS +from .util.proxy import connection_requires_http_tunnel from .util.retry import Retry +from .util.timeout import Timeout +from .util.url import Url, parse_url + +if typing.TYPE_CHECKING: + import ssl + from typing_extensions import Self __all__ = ["PoolManager", "ProxyManager", "proxy_from_url"] @@ -30,48 +39,62 @@ "cert_file", "cert_reqs", "ca_certs", + "ca_cert_data", "ssl_version", + "ssl_minimum_version", + "ssl_maximum_version", "ca_cert_dir", "ssl_context", "key_password", + "server_hostname", ) +# Default value for `blocksize` - a new parameter introduced to +# http.client.HTTPConnection & http.client.HTTPSConnection in Python 3.7 +_DEFAULT_BLOCKSIZE = 16384 -# All known keyword arguments that could be provided to the pool manager, its -# pools, or the underlying connections. This is used to construct a pool key. -_key_fields = ( - "key_scheme", # str - "key_host", # str - "key_port", # int - "key_timeout", # int or float or Timeout - "key_retries", # int or Retry - "key_strict", # bool - "key_block", # bool - "key_source_address", # str - "key_key_file", # str - "key_key_password", # str - "key_cert_file", # str - "key_cert_reqs", # str - "key_ca_certs", # str - "key_ssl_version", # str - "key_ca_cert_dir", # str - "key_ssl_context", # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext - "key_maxsize", # int - "key_headers", # dict - "key__proxy", # parsed proxy url - "key__proxy_headers", # dict - "key_socket_options", # list of (level (int), optname (int), value (int or str)) tuples - "key__socks_options", # dict - "key_assert_hostname", # bool or string - "key_assert_fingerprint", # str - "key_server_hostname", # str -) -#: The namedtuple class used to construct keys for the connection pool. -#: All custom key schemes should include the fields in this key at a minimum. -PoolKey = collections.namedtuple("PoolKey", _key_fields) +class PoolKey(typing.NamedTuple): + """ + All known keyword arguments that could be provided to the pool manager, its + pools, or the underlying connections. + All custom key schemes should include the fields in this key at a minimum. + """ -def _default_key_normalizer(key_class, request_context): + key_scheme: str + key_host: str + key_port: int | None + key_timeout: Timeout | float | int | None + key_retries: Retry | bool | int | None + key_block: bool | None + key_source_address: tuple[str, int] | None + key_key_file: str | None + key_key_password: str | None + key_cert_file: str | None + key_cert_reqs: str | None + key_ca_certs: str | None + key_ca_cert_data: str | bytes | None + key_ssl_version: int | str | None + key_ssl_minimum_version: ssl.TLSVersion | None + key_ssl_maximum_version: ssl.TLSVersion | None + key_ca_cert_dir: str | None + key_ssl_context: ssl.SSLContext | None + key_maxsize: int | None + key_headers: frozenset[tuple[str, str]] | None + key__proxy: Url | None + key__proxy_headers: frozenset[tuple[str, str]] | None + key__proxy_config: ProxyConfig | None + key_socket_options: _TYPE_SOCKET_OPTIONS | None + key__socks_options: frozenset[tuple[str, str]] | None + key_assert_hostname: bool | str | None + key_assert_fingerprint: str | None + key_server_hostname: str | None + key_blocksize: int | None + + +def _default_key_normalizer( + key_class: type[PoolKey], request_context: dict[str, typing.Any] +) -> PoolKey: """ Create a pool key out of a request context dictionary. @@ -117,6 +140,10 @@ def _default_key_normalizer(key_class, request_context): if field not in context: context[field] = None + # Default key_blocksize to _DEFAULT_BLOCKSIZE if missing from the context + if context.get("key_blocksize") is None: + context["key_blocksize"] = _DEFAULT_BLOCKSIZE + return key_class(**context) @@ -149,40 +176,65 @@ class PoolManager(RequestMethods): Additional parameters are used to create fresh :class:`urllib3.connectionpool.ConnectionPool` instances. - Example:: + Example: + + .. code-block:: python - >>> manager = PoolManager(num_pools=2) - >>> r = manager.request('GET', 'http://google.com/') - >>> r = manager.request('GET', 'http://google.com/mail') - >>> r = manager.request('GET', 'http://yahoo.com/') - >>> len(manager.pools) - 2 + import urllib3 + + http = urllib3.PoolManager(num_pools=2) + + resp1 = http.request("GET", "https://google.com/") + resp2 = http.request("GET", "https://google.com/mail") + resp3 = http.request("GET", "https://yahoo.com/") + + print(len(http.pools)) + # 2 """ - proxy = None + proxy: Url | None = None + proxy_config: ProxyConfig | None = None - def __init__(self, num_pools=10, headers=None, **connection_pool_kw): - RequestMethods.__init__(self, headers) + def __init__( + self, + num_pools: int = 10, + headers: typing.Mapping[str, str] | None = None, + **connection_pool_kw: typing.Any, + ) -> None: + super().__init__(headers) self.connection_pool_kw = connection_pool_kw - self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + + self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPool] + self.pools = RecentlyUsedContainer(num_pools) # Locally set the pool classes and keys so other PoolManagers can # override them. self.pool_classes_by_scheme = pool_classes_by_scheme self.key_fn_by_scheme = key_fn_by_scheme.copy() - def __enter__(self): + def __enter__(self) -> Self: return self - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, + ) -> typing.Literal[False]: self.clear() # Return False to re-raise any potential exceptions return False - def _new_pool(self, scheme, host, port, request_context=None): + def _new_pool( + self, + scheme: str, + host: str, + port: int, + request_context: dict[str, typing.Any] | None = None, + ) -> HTTPConnectionPool: """ - Create a new :class:`ConnectionPool` based on host, port, scheme, and + Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and any additional pool keyword arguments. If ``request_context`` is provided, it is provided as keyword arguments @@ -190,10 +242,15 @@ def _new_pool(self, scheme, host, port, request_context=None): connection pools handed out by :meth:`connection_from_url` and companion methods. It is intended to be overridden for customization. """ - pool_cls = self.pool_classes_by_scheme[scheme] + pool_cls: type[HTTPConnectionPool] = self.pool_classes_by_scheme[scheme] if request_context is None: request_context = self.connection_pool_kw.copy() + # Default blocksize to _DEFAULT_BLOCKSIZE if missing or explicitly + # set to 'None' in the request_context. + if request_context.get("blocksize") is None: + request_context["blocksize"] = _DEFAULT_BLOCKSIZE + # Although the context has everything necessary to create the pool, # this function has historically only used the scheme, host, and port # in the positional args. When an API change is acceptable these can @@ -207,7 +264,7 @@ def _new_pool(self, scheme, host, port, request_context=None): return pool_cls(host, port, **request_context) - def clear(self): + def clear(self) -> None: """ Empty our store of pools and direct them all to close. @@ -216,9 +273,15 @@ def clear(self): """ self.pools.clear() - def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): + def connection_from_host( + self, + host: str | None, + port: int | None = None, + scheme: str | None = "http", + pool_kwargs: dict[str, typing.Any] | None = None, + ) -> HTTPConnectionPool: """ - Get a :class:`ConnectionPool` based on the host, port, and scheme. + Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme. If ``port`` isn't given, it will be derived from the ``scheme`` using ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is @@ -239,22 +302,36 @@ def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None) return self.connection_from_context(request_context) - def connection_from_context(self, request_context): + def connection_from_context( + self, request_context: dict[str, typing.Any] + ) -> HTTPConnectionPool: """ - Get a :class:`ConnectionPool` based on the request context. + Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context. ``request_context`` must at least contain the ``scheme`` key and its value must be a key in ``key_fn_by_scheme`` instance variable. """ + if "strict" in request_context: + warnings.warn( + "The 'strict' parameter is no longer needed on Python 3+. " + "This will raise an error in urllib3 v2.1.0.", + DeprecationWarning, + ) + request_context.pop("strict") + scheme = request_context["scheme"].lower() - pool_key_constructor = self.key_fn_by_scheme[scheme] + pool_key_constructor = self.key_fn_by_scheme.get(scheme) + if not pool_key_constructor: + raise URLSchemeUnknown(scheme) pool_key = pool_key_constructor(request_context) return self.connection_from_pool_key(pool_key, request_context=request_context) - def connection_from_pool_key(self, pool_key, request_context=None): + def connection_from_pool_key( + self, pool_key: PoolKey, request_context: dict[str, typing.Any] + ) -> HTTPConnectionPool: """ - Get a :class:`ConnectionPool` based on the provided pool key. + Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key. ``pool_key`` should be a namedtuple that only contains immutable objects. At a minimum it must have the ``scheme``, ``host``, and @@ -276,7 +353,9 @@ def connection_from_pool_key(self, pool_key, request_context=None): return pool - def connection_from_url(self, url, pool_kwargs=None): + def connection_from_url( + self, url: str, pool_kwargs: dict[str, typing.Any] | None = None + ) -> HTTPConnectionPool: """ Similar to :func:`urllib3.connectionpool.connection_from_url`. @@ -292,7 +371,9 @@ def connection_from_url(self, url, pool_kwargs=None): u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs ) - def _merge_pool_kwargs(self, override): + def _merge_pool_kwargs( + self, override: dict[str, typing.Any] | None + ) -> dict[str, typing.Any]: """ Merge a dictionary of override values for self.connection_pool_kw. @@ -312,9 +393,24 @@ def _merge_pool_kwargs(self, override): base_pool_kwargs[key] = value return base_pool_kwargs - def urlopen(self, method, url, redirect=True, **kw): + def _proxy_requires_url_absolute_form(self, parsed_url: Url) -> bool: + """ + Indicates if the proxy requires the complete destination URL in the + request. Normally this is only needed when not using an HTTP CONNECT + tunnel. + """ + if self.proxy is None: + return False + + return not connection_requires_http_tunnel( + self.proxy, self.proxy_config, parsed_url.scheme + ) + + def urlopen( # type: ignore[override] + self, method: str, url: str, redirect: bool = True, **kw: typing.Any + ) -> BaseHTTPResponse: """ - Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` + Same as :meth:`urllib3.HTTPConnectionPool.urlopen` with custom cross-host redirect logic and only sends the request-uri portion of the ``url``. @@ -322,15 +418,26 @@ def urlopen(self, method, url, redirect=True, **kw): :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. """ u = parse_url(url) + + if u.scheme is None: + warnings.warn( + "URLs without a scheme (ie 'https://') are deprecated and will raise an error " + "in a future version of urllib3. To avoid this DeprecationWarning ensure all URLs " + "start with 'https://' or 'http://'. Read more in this issue: " + "https://github.com/urllib3/urllib3/issues/2920", + category=DeprecationWarning, + stacklevel=2, + ) + conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) kw["assert_same_host"] = False kw["redirect"] = False if "headers" not in kw: - kw["headers"] = self.headers.copy() + kw["headers"] = self.headers - if self.proxy is not None and u.scheme == "http": + if self._proxy_requires_url_absolute_form(u): response = conn.urlopen(method, url, **kw) else: response = conn.urlopen(method, u.request_uri, **kw) @@ -342,9 +449,12 @@ def urlopen(self, method, url, redirect=True, **kw): # Support relative URLs for redirecting. redirect_location = urljoin(url, redirect_location) - # RFC 7231, Section 6.4.4 if response.status == 303: + # Change the method according to RFC 9110, Section 15.4.4. method = "GET" + # And lose the body not to transfer anything sensitive. + kw["body"] = None + kw["headers"] = HTTPHeaderDict(kw["headers"])._prepare_for_method_change() retries = kw.get("retries") if not isinstance(retries, Retry): @@ -356,10 +466,11 @@ def urlopen(self, method, url, redirect=True, **kw): if retries.remove_headers_on_redirect and not conn.is_same_host( redirect_location ): - headers = list(six.iterkeys(kw["headers"])) - for header in headers: + new_headers = kw["headers"].copy() + for header in kw["headers"]: if header.lower() in retries.remove_headers_on_redirect: - kw["headers"].pop(header, None) + new_headers.pop(header, None) + kw["headers"] = new_headers try: retries = retries.increment(method, url, response=response, _pool=conn) @@ -392,61 +503,107 @@ class ProxyManager(PoolManager): HTTPS/CONNECT case they are sent only once. Could be used for proxy authentication. + :param proxy_ssl_context: + The proxy SSL context is used to establish the TLS connection to the + proxy when using HTTPS proxies. + + :param use_forwarding_for_https: + (Defaults to False) If set to True will forward requests to the HTTPS + proxy to be made on behalf of the client instead of creating a TLS + tunnel via the CONNECT method. **Enabling this flag means that request + and response headers and content will be visible from the HTTPS proxy** + whereas tunneling keeps request and response headers and content + private. IP address, target hostname, SNI, and port are always visible + to an HTTPS proxy even when this flag is disabled. + + :param proxy_assert_hostname: + The hostname of the certificate to verify against. + + :param proxy_assert_fingerprint: + The fingerprint of the certificate to verify against. + Example: - >>> proxy = urllib3.ProxyManager('http://localhost:3128/') - >>> r1 = proxy.request('GET', 'http://google.com/') - >>> r2 = proxy.request('GET', 'http://httpbin.org/') - >>> len(proxy.pools) - 1 - >>> r3 = proxy.request('GET', 'https://httpbin.org/') - >>> r4 = proxy.request('GET', 'https://twitter.com/') - >>> len(proxy.pools) - 3 + + .. code-block:: python + + import urllib3 + + proxy = urllib3.ProxyManager("https://localhost:3128/") + + resp1 = proxy.request("GET", "https://google.com/") + resp2 = proxy.request("GET", "https://httpbin.org/") + + print(len(proxy.pools)) + # 1 + + resp3 = proxy.request("GET", "https://httpbin.org/") + resp4 = proxy.request("GET", "https://twitter.com/") + + print(len(proxy.pools)) + # 3 """ def __init__( self, - proxy_url, - num_pools=10, - headers=None, - proxy_headers=None, - **connection_pool_kw - ): - + proxy_url: str, + num_pools: int = 10, + headers: typing.Mapping[str, str] | None = None, + proxy_headers: typing.Mapping[str, str] | None = None, + proxy_ssl_context: ssl.SSLContext | None = None, + use_forwarding_for_https: bool = False, + proxy_assert_hostname: None | str | typing.Literal[False] = None, + proxy_assert_fingerprint: str | None = None, + **connection_pool_kw: typing.Any, + ) -> None: if isinstance(proxy_url, HTTPConnectionPool): - proxy_url = "%s://%s:%i" % ( - proxy_url.scheme, - proxy_url.host, - proxy_url.port, - ) - proxy = parse_url(proxy_url) - if not proxy.port: - port = port_by_scheme.get(proxy.scheme, 80) - proxy = proxy._replace(port=port) + str_proxy_url = f"{proxy_url.scheme}://{proxy_url.host}:{proxy_url.port}" + else: + str_proxy_url = proxy_url + proxy = parse_url(str_proxy_url) if proxy.scheme not in ("http", "https"): raise ProxySchemeUnknown(proxy.scheme) + if not proxy.port: + port = port_by_scheme.get(proxy.scheme, 80) + proxy = proxy._replace(port=port) + self.proxy = proxy self.proxy_headers = proxy_headers or {} + self.proxy_ssl_context = proxy_ssl_context + self.proxy_config = ProxyConfig( + proxy_ssl_context, + use_forwarding_for_https, + proxy_assert_hostname, + proxy_assert_fingerprint, + ) connection_pool_kw["_proxy"] = self.proxy connection_pool_kw["_proxy_headers"] = self.proxy_headers + connection_pool_kw["_proxy_config"] = self.proxy_config - super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw) + super().__init__(num_pools, headers, **connection_pool_kw) - def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): + def connection_from_host( + self, + host: str | None, + port: int | None = None, + scheme: str | None = "http", + pool_kwargs: dict[str, typing.Any] | None = None, + ) -> HTTPConnectionPool: if scheme == "https": - return super(ProxyManager, self).connection_from_host( + return super().connection_from_host( host, port, scheme, pool_kwargs=pool_kwargs ) - return super(ProxyManager, self).connection_from_host( - self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs + return super().connection_from_host( + self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs # type: ignore[union-attr] ) - def _set_proxy_headers(self, url, headers=None): + def _set_proxy_headers( + self, url: str, headers: typing.Mapping[str, str] | None = None + ) -> typing.Mapping[str, str]: """ Sets headers needed by proxies: specifically, the Accept and Host headers. Only sets headers not provided by the user. @@ -461,32 +618,20 @@ def _set_proxy_headers(self, url, headers=None): headers_.update(headers) return headers_ - def _validate_proxy_scheme_url_selection(self, url_scheme): - if url_scheme == "https" and self.proxy.scheme == "https": - warnings.warn( - "Your proxy configuration specified an HTTPS scheme for the proxy. " - "Are you sure you want to use HTTPS to contact the proxy? " - "This most likely indicates an error in your configuration. " - "Read this issue for more info: " - "https://github.com/urllib3/urllib3/issues/1850", - InvalidProxyConfigurationWarning, - stacklevel=3, - ) - - def urlopen(self, method, url, redirect=True, **kw): + def urlopen( # type: ignore[override] + self, method: str, url: str, redirect: bool = True, **kw: typing.Any + ) -> BaseHTTPResponse: "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." u = parse_url(url) - self._validate_proxy_scheme_url_selection(u.scheme) - - if u.scheme == "http": - # For proxied HTTPS requests, httplib sets the necessary headers - # on the CONNECT to the proxy. For HTTP, we'll definitely - # need to set 'Host' at the very least. + if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme): + # For connections using HTTP CONNECT, httplib sets the necessary + # headers on the CONNECT to the proxy. If we're not using CONNECT, + # we'll definitely need to set 'Host' at the very least. headers = kw.get("headers", self.headers) kw["headers"] = self._set_proxy_headers(url, headers) - return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) + return super().urlopen(method, url, redirect=redirect, **kw) -def proxy_from_url(url, **kw): +def proxy_from_url(url: str, **kw: typing.Any) -> ProxyManager: return ProxyManager(proxy_url=url, **kw) diff --git a/thirdparty/urllib3/py.typed b/thirdparty/urllib3/py.typed new file mode 100644 index 0000000..5f3ea3d --- /dev/null +++ b/thirdparty/urllib3/py.typed @@ -0,0 +1,2 @@ +# Instruct type checkers to look for inline type annotations in this package. +# See PEP 561. diff --git a/thirdparty/urllib3/response.py b/thirdparty/urllib3/response.py index 43a85ac..66c6a68 100644 --- a/thirdparty/urllib3/response.py +++ b/thirdparty/urllib3/response.py @@ -1,45 +1,88 @@ -from __future__ import absolute_import -from contextlib import contextmanager -import zlib +from __future__ import annotations + +import collections import io +import json as _json import logging +import re +import socket +import sys +import typing +import warnings +import zlib +from contextlib import contextmanager +from http.client import HTTPMessage as _HttplibHTTPMessage +from http.client import HTTPResponse as _HttplibHTTPResponse from socket import timeout as SocketTimeout -from socket import error as SocketError + +if typing.TYPE_CHECKING: + from ._base_connection import BaseHTTPConnection try: - import brotli + try: + import brotlicffi as brotli # type: ignore[import-not-found] + except ImportError: + import brotli # type: ignore[import-not-found] except ImportError: brotli = None +try: + import zstandard as zstd +except (AttributeError, ImportError, ValueError): # Defensive: + HAS_ZSTD = False +else: + # The package 'zstandard' added the 'eof' property starting + # in v0.18.0 which we require to ensure a complete and + # valid zstd stream was fed into the ZstdDecoder. + # See: https://github.com/urllib3/urllib3/pull/2624 + _zstd_version = tuple( + map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr] + ) + if _zstd_version < (0, 18): # Defensive: + HAS_ZSTD = False + else: + HAS_ZSTD = True + +from . import util +from ._base_connection import _TYPE_BODY from ._collections import HTTPHeaderDict +from .connection import BaseSSLError, HTTPConnection, HTTPException from .exceptions import ( BodyNotHttplibCompatible, - ProtocolError, DecodeError, - ReadTimeoutError, - ResponseNotChunked, + HTTPError, IncompleteRead, + InvalidChunkLength, InvalidHeader, - HTTPError, + ProtocolError, + ReadTimeoutError, + ResponseNotChunked, + SSLError, ) -from .packages.six import string_types as basestring, PY3 -from .packages.six.moves import http_client as httplib -from .connection import HTTPException, BaseSSLError from .util.response import is_fp_closed, is_response_to_head +from .util.retry import Retry + +if typing.TYPE_CHECKING: + from .connectionpool import HTTPConnectionPool log = logging.getLogger(__name__) -class DeflateDecoder(object): - def __init__(self): +class ContentDecoder: + def decompress(self, data: bytes) -> bytes: + raise NotImplementedError() + + def flush(self) -> bytes: + raise NotImplementedError() + + +class DeflateDecoder(ContentDecoder): + def __init__(self) -> None: self._first_try = True self._data = b"" self._obj = zlib.decompressobj() - def __getattr__(self, name): - return getattr(self._obj, name) - - def decompress(self, data): + def decompress(self, data: bytes) -> bytes: if not data: return data @@ -51,7 +94,7 @@ def decompress(self, data): decompressed = self._obj.decompress(data) if decompressed: self._first_try = False - self._data = None + self._data = None # type: ignore[assignment] return decompressed except zlib.error: self._first_try = False @@ -59,25 +102,24 @@ def decompress(self, data): try: return self.decompress(self._data) finally: - self._data = None + self._data = None # type: ignore[assignment] + def flush(self) -> bytes: + return self._obj.flush() -class GzipDecoderState(object): +class GzipDecoderState: FIRST_MEMBER = 0 OTHER_MEMBERS = 1 SWALLOW_DATA = 2 -class GzipDecoder(object): - def __init__(self): +class GzipDecoder(ContentDecoder): + def __init__(self) -> None: self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) self._state = GzipDecoderState.FIRST_MEMBER - def __getattr__(self, name): - return getattr(self._obj, name) - - def decompress(self, data): + def decompress(self, data: bytes) -> bytes: ret = bytearray() if self._state == GzipDecoderState.SWALLOW_DATA or not data: return bytes(ret) @@ -98,27 +140,53 @@ def decompress(self, data): self._state = GzipDecoderState.OTHER_MEMBERS self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + def flush(self) -> bytes: + return self._obj.flush() + if brotli is not None: - class BrotliDecoder(object): + class BrotliDecoder(ContentDecoder): # Supports both 'brotlipy' and 'Brotli' packages # since they share an import name. The top branches # are for 'brotlipy' and bottom branches for 'Brotli' - def __init__(self): + def __init__(self) -> None: self._obj = brotli.Decompressor() if hasattr(self._obj, "decompress"): - self.decompress = self._obj.decompress + setattr(self, "decompress", self._obj.decompress) else: - self.decompress = self._obj.process + setattr(self, "decompress", self._obj.process) - def flush(self): + def flush(self) -> bytes: if hasattr(self._obj, "flush"): - return self._obj.flush() + return self._obj.flush() # type: ignore[no-any-return] return b"" -class MultiDecoder(object): +if HAS_ZSTD: + + class ZstdDecoder(ContentDecoder): + def __init__(self) -> None: + self._obj = zstd.ZstdDecompressor().decompressobj() + + def decompress(self, data: bytes) -> bytes: + if not data: + return b"" + data_parts = [self._obj.decompress(data)] + while self._obj.eof and self._obj.unused_data: + unused_data = self._obj.unused_data + self._obj = zstd.ZstdDecompressor().decompressobj() + data_parts.append(self._obj.decompress(unused_data)) + return b"".join(data_parts) + + def flush(self) -> bytes: + ret = self._obj.flush() # note: this is a no-op + if not self._obj.eof: + raise DecodeError("Zstandard data is incomplete") + return ret + + +class MultiDecoder(ContentDecoder): """ From RFC7231: If one or more encodings have been applied to a representation, the @@ -127,42 +195,363 @@ class MultiDecoder(object): they were applied. """ - def __init__(self, modes): + def __init__(self, modes: str) -> None: self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] - def flush(self): + def flush(self) -> bytes: return self._decoders[0].flush() - def decompress(self, data): + def decompress(self, data: bytes) -> bytes: for d in reversed(self._decoders): data = d.decompress(data) return data -def _get_decoder(mode): +def _get_decoder(mode: str) -> ContentDecoder: if "," in mode: return MultiDecoder(mode) - if mode == "gzip": + # According to RFC 9110 section 8.4.1.3, recipients should + # consider x-gzip equivalent to gzip + if mode in ("gzip", "x-gzip"): return GzipDecoder() if brotli is not None and mode == "br": return BrotliDecoder() + if HAS_ZSTD and mode == "zstd": + return ZstdDecoder() + return DeflateDecoder() -class HTTPResponse(io.IOBase): +class BytesQueueBuffer: + """Memory-efficient bytes buffer + + To return decoded data in read() and still follow the BufferedIOBase API, we need a + buffer to always return the correct amount of bytes. + + This buffer should be filled using calls to put() + + Our maximum memory usage is determined by the sum of the size of: + + * self.buffer, which contains the full data + * the largest chunk that we will copy in get() + + The worst case scenario is a single chunk, in which case we'll make a full copy of + the data inside get(). + """ + + def __init__(self) -> None: + self.buffer: typing.Deque[bytes] = collections.deque() + self._size: int = 0 + + def __len__(self) -> int: + return self._size + + def put(self, data: bytes) -> None: + self.buffer.append(data) + self._size += len(data) + + def get(self, n: int) -> bytes: + if n == 0: + return b"" + elif not self.buffer: + raise RuntimeError("buffer is empty") + elif n < 0: + raise ValueError("n should be > 0") + + fetched = 0 + ret = io.BytesIO() + while fetched < n: + remaining = n - fetched + chunk = self.buffer.popleft() + chunk_length = len(chunk) + if remaining < chunk_length: + left_chunk, right_chunk = chunk[:remaining], chunk[remaining:] + ret.write(left_chunk) + self.buffer.appendleft(right_chunk) + self._size -= remaining + break + else: + ret.write(chunk) + self._size -= chunk_length + fetched += chunk_length + + if not self.buffer: + break + + return ret.getvalue() + + def get_all(self) -> bytes: + buffer = self.buffer + if not buffer: + assert self._size == 0 + return b"" + if len(buffer) == 1: + result = buffer.pop() + else: + ret = io.BytesIO() + ret.writelines(buffer.popleft() for _ in range(len(buffer))) + result = ret.getvalue() + self._size = 0 + return result + + +class BaseHTTPResponse(io.IOBase): + CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"] + if brotli is not None: + CONTENT_DECODERS += ["br"] + if HAS_ZSTD: + CONTENT_DECODERS += ["zstd"] + REDIRECT_STATUSES = [301, 302, 303, 307, 308] + + DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error) + if brotli is not None: + DECODER_ERROR_CLASSES += (brotli.error,) + + if HAS_ZSTD: + DECODER_ERROR_CLASSES += (zstd.ZstdError,) + + def __init__( + self, + *, + headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, + status: int, + version: int, + version_string: str, + reason: str | None, + decode_content: bool, + request_url: str | None, + retries: Retry | None = None, + ) -> None: + if isinstance(headers, HTTPHeaderDict): + self.headers = headers + else: + self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type] + self.status = status + self.version = version + self.version_string = version_string + self.reason = reason + self.decode_content = decode_content + self._has_decoded_content = False + self._request_url: str | None = request_url + self.retries = retries + + self.chunked = False + tr_enc = self.headers.get("transfer-encoding", "").lower() + # Don't incur the penalty of creating a list and then discarding it + encodings = (enc.strip() for enc in tr_enc.split(",")) + if "chunked" in encodings: + self.chunked = True + + self._decoder: ContentDecoder | None = None + self.length_remaining: int | None + + def get_redirect_location(self) -> str | None | typing.Literal[False]: + """ + Should we redirect and where to? + + :returns: Truthy redirect location string if we got a redirect status + code and valid location. ``None`` if redirect status and no + location. ``False`` if not a redirect status code. + """ + if self.status in self.REDIRECT_STATUSES: + return self.headers.get("location") + return False + + @property + def data(self) -> bytes: + raise NotImplementedError() + + def json(self) -> typing.Any: + """ + Deserializes the body of the HTTP response as a Python object. + + The body of the HTTP response must be encoded using UTF-8, as per + `RFC 8529 Section 8.1 `_. + + To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to + your custom decoder instead. + + If the body of the HTTP response is not decodable to UTF-8, a + `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a + valid JSON document, a `json.JSONDecodeError` will be raised. + + Read more :ref:`here `. + + :returns: The body of the HTTP response as a Python object. + """ + data = self.data.decode("utf-8") + return _json.loads(data) + + @property + def url(self) -> str | None: + raise NotImplementedError() + + @url.setter + def url(self, url: str | None) -> None: + raise NotImplementedError() + + @property + def connection(self) -> BaseHTTPConnection | None: + raise NotImplementedError() + + @property + def retries(self) -> Retry | None: + return self._retries + + @retries.setter + def retries(self, retries: Retry | None) -> None: + # Override the request_url if retries has a redirect location. + if retries is not None and retries.history: + self.url = retries.history[-1].redirect_location + self._retries = retries + + def stream( + self, amt: int | None = 2**16, decode_content: bool | None = None + ) -> typing.Iterator[bytes]: + raise NotImplementedError() + + def read( + self, + amt: int | None = None, + decode_content: bool | None = None, + cache_content: bool = False, + ) -> bytes: + raise NotImplementedError() + + def read1( + self, + amt: int | None = None, + decode_content: bool | None = None, + ) -> bytes: + raise NotImplementedError() + + def read_chunked( + self, + amt: int | None = None, + decode_content: bool | None = None, + ) -> typing.Iterator[bytes]: + raise NotImplementedError() + + def release_conn(self) -> None: + raise NotImplementedError() + + def drain_conn(self) -> None: + raise NotImplementedError() + + def shutdown(self) -> None: + raise NotImplementedError() + + def close(self) -> None: + raise NotImplementedError() + + def _init_decoder(self) -> None: + """ + Set-up the _decoder attribute if necessary. + """ + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 + content_encoding = self.headers.get("content-encoding", "").lower() + if self._decoder is None: + if content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + elif "," in content_encoding: + encodings = [ + e.strip() + for e in content_encoding.split(",") + if e.strip() in self.CONTENT_DECODERS + ] + if encodings: + self._decoder = _get_decoder(content_encoding) + + def _decode( + self, data: bytes, decode_content: bool | None, flush_decoder: bool + ) -> bytes: + """ + Decode the data passed in and potentially flush the decoder. + """ + if not decode_content: + if self._has_decoded_content: + raise RuntimeError( + "Calling read(decode_content=False) is not supported after " + "read(decode_content=True) was called." + ) + return data + + try: + if self._decoder: + data = self._decoder.decompress(data) + self._has_decoded_content = True + except self.DECODER_ERROR_CLASSES as e: + content_encoding = self.headers.get("content-encoding", "").lower() + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, + e, + ) from e + if flush_decoder: + data += self._flush_decoder() + + return data + + def _flush_decoder(self) -> bytes: + """ + Flushes the decoder. Should only be called if the decoder is actually + being used. + """ + if self._decoder: + return self._decoder.decompress(b"") + self._decoder.flush() + return b"" + + # Compatibility methods for `io` module + def readinto(self, b: bytearray) -> int: + temp = self.read(len(b)) + if len(temp) == 0: + return 0 + else: + b[: len(temp)] = temp + return len(temp) + + # Compatibility methods for http.client.HTTPResponse + def getheaders(self) -> HTTPHeaderDict: + warnings.warn( + "HTTPResponse.getheaders() is deprecated and will be removed " + "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.", + category=DeprecationWarning, + stacklevel=2, + ) + return self.headers + + def getheader(self, name: str, default: str | None = None) -> str | None: + warnings.warn( + "HTTPResponse.getheader() is deprecated and will be removed " + "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).", + category=DeprecationWarning, + stacklevel=2, + ) + return self.headers.get(name, default) + + # Compatibility method for http.cookiejar + def info(self) -> HTTPHeaderDict: + return self.headers + + def geturl(self) -> str | None: + return self.url + + +class HTTPResponse(BaseHTTPResponse): """ HTTP Response container. - Backwards-compatible to httplib's HTTPResponse but the response ``body`` is + Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is loaded and decoded on-demand when the ``data`` property is accessed. This class is also compatible with the Python standard library's :mod:`io` module, and can hence be treated as a readable object in the context of that framework. - Extra parameters for behaviour not present in httplib.HTTPResponse: + Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: :param preload_content: If True, the response's body will be preloaded during construction. @@ -172,7 +561,7 @@ class is also compatible with the Python standard library's :mod:`io` 'content-encoding' header. :param original_response: - When this HTTPResponse wrapper is generated from an httplib.HTTPResponse + When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` object, it's convenient to include the original for debug purposes. It's otherwise unused. @@ -185,99 +574,78 @@ class is also compatible with the Python standard library's :mod:`io` value of Content-Length header, if present. Otherwise, raise error. """ - CONTENT_DECODERS = ["gzip", "deflate"] - if brotli is not None: - CONTENT_DECODERS += ["br"] - REDIRECT_STATUSES = [301, 302, 303, 307, 308] - def __init__( self, - body="", - headers=None, - status=0, - version=0, - reason=None, - strict=0, - preload_content=True, - decode_content=True, - original_response=None, - pool=None, - connection=None, - msg=None, - retries=None, - enforce_content_length=False, - request_method=None, - request_url=None, - auto_close=True, - ): + body: _TYPE_BODY = "", + headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, + status: int = 0, + version: int = 0, + version_string: str = "HTTP/?", + reason: str | None = None, + preload_content: bool = True, + decode_content: bool = True, + original_response: _HttplibHTTPResponse | None = None, + pool: HTTPConnectionPool | None = None, + connection: HTTPConnection | None = None, + msg: _HttplibHTTPMessage | None = None, + retries: Retry | None = None, + enforce_content_length: bool = True, + request_method: str | None = None, + request_url: str | None = None, + auto_close: bool = True, + sock_shutdown: typing.Callable[[int], None] | None = None, + ) -> None: + super().__init__( + headers=headers, + status=status, + version=version, + version_string=version_string, + reason=reason, + decode_content=decode_content, + request_url=request_url, + retries=retries, + ) - if isinstance(headers, HTTPHeaderDict): - self.headers = headers - else: - self.headers = HTTPHeaderDict(headers) - self.status = status - self.version = version - self.reason = reason - self.strict = strict - self.decode_content = decode_content - self.retries = retries self.enforce_content_length = enforce_content_length self.auto_close = auto_close - self._decoder = None self._body = None - self._fp = None + self._fp: _HttplibHTTPResponse | None = None self._original_response = original_response self._fp_bytes_read = 0 self.msg = msg - self._request_url = request_url - if body and isinstance(body, (basestring, bytes)): + if body and isinstance(body, (str, bytes)): self._body = body self._pool = pool self._connection = connection if hasattr(body, "read"): - self._fp = body + self._fp = body # type: ignore[assignment] + self._sock_shutdown = sock_shutdown # Are we using the chunked-style of transfer encoding? - self.chunked = False - self.chunk_left = None - tr_enc = self.headers.get("transfer-encoding", "").lower() - # Don't incur the penalty of creating a list and then discarding it - encodings = (enc.strip() for enc in tr_enc.split(",")) - if "chunked" in encodings: - self.chunked = True + self.chunk_left: int | None = None # Determine length of response self.length_remaining = self._init_length(request_method) + # Used to return the correct amount of bytes for partial read()s + self._decoded_buffer = BytesQueueBuffer() + # If requested, preload the body. if preload_content and not self._body: self._body = self.read(decode_content=decode_content) - def get_redirect_location(self): - """ - Should we redirect and where to? - - :returns: Truthy redirect location string if we got a redirect status - code and valid location. ``None`` if redirect status and no - location. ``False`` if not a redirect status code. - """ - if self.status in self.REDIRECT_STATUSES: - return self.headers.get("location") - - return False - - def release_conn(self): + def release_conn(self) -> None: if not self._pool or not self._connection: - return + return None self._pool._put_conn(self._connection) self._connection = None - def drain_conn(self): + def drain_conn(self) -> None: """ Read and discard any remaining HTTP response data in the response connection. @@ -285,40 +653,43 @@ def drain_conn(self): """ try: self.read() - except (HTTPError, SocketError, BaseSSLError, HTTPException): + except (HTTPError, OSError, BaseSSLError, HTTPException): pass @property - def data(self): - # For backwords-compat with earlier urllib3 0.4 and earlier. + def data(self) -> bytes: + # For backwards-compat with earlier urllib3 0.4 and earlier. if self._body: - return self._body + return self._body # type: ignore[return-value] if self._fp: return self.read(cache_content=True) + return None # type: ignore[return-value] + @property - def connection(self): + def connection(self) -> HTTPConnection | None: return self._connection - def isclosed(self): + def isclosed(self) -> bool: return is_fp_closed(self._fp) - def tell(self): + def tell(self) -> int: """ Obtain the number of bytes pulled over the wire so far. May differ from - the amount of content returned by :meth:``HTTPResponse.read`` if bytes - are encoded on the wire (e.g, compressed). + the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` + if bytes are encoded on the wire (e.g, compressed). """ return self._fp_bytes_read - def _init_length(self, request_method): + def _init_length(self, request_method: str | None) -> int | None: """ Set initial length value for Response content if available. """ - length = self.headers.get("content-length") + length: int | None + content_length: str | None = self.headers.get("content-length") - if length is not None: + if content_length is not None: if self.chunked: # This Response will fail with an IncompleteRead if it can't be # received as chunked. This method falls back to attempt reading @@ -338,11 +709,11 @@ def _init_length(self, request_method): # (e.g. Content-Length: 42, 42). This line ensures the values # are all valid ints and that as long as the `set` length is 1, # all values are the same. Otherwise, the header is invalid. - lengths = set([int(val) for val in length.split(",")]) + lengths = {int(val) for val in content_length.split(",")} if len(lengths) > 1: raise InvalidHeader( "Content-Length contained multiple " - "unmatching values (%s)" % length + "unmatching values (%s)" % content_length ) length = lengths.pop() except ValueError: @@ -351,6 +722,9 @@ def _init_length(self, request_method): if length < 0: length = None + else: # if content_length is None + length = None + # Convert status to int for comparison # In some cases, httplib returns a status of "_UNKNOWN" try: @@ -364,64 +738,8 @@ def _init_length(self, request_method): return length - def _init_decoder(self): - """ - Set-up the _decoder attribute if necessary. - """ - # Note: content-encoding value should be case-insensitive, per RFC 7230 - # Section 3.2 - content_encoding = self.headers.get("content-encoding", "").lower() - if self._decoder is None: - if content_encoding in self.CONTENT_DECODERS: - self._decoder = _get_decoder(content_encoding) - elif "," in content_encoding: - encodings = [ - e.strip() - for e in content_encoding.split(",") - if e.strip() in self.CONTENT_DECODERS - ] - if len(encodings): - self._decoder = _get_decoder(content_encoding) - - DECODER_ERROR_CLASSES = (IOError, zlib.error) - if brotli is not None: - DECODER_ERROR_CLASSES += (brotli.error,) - - def _decode(self, data, decode_content, flush_decoder): - """ - Decode the data passed in and potentially flush the decoder. - """ - if not decode_content: - return data - - try: - if self._decoder: - data = self._decoder.decompress(data) - except self.DECODER_ERROR_CLASSES as e: - content_encoding = self.headers.get("content-encoding", "").lower() - raise DecodeError( - "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, - e, - ) - if flush_decoder: - data += self._flush_decoder() - - return data - - def _flush_decoder(self): - """ - Flushes the decoder. Should only be called if the decoder is actually - being used. - """ - if self._decoder: - buf = self._decoder.decompress(b"") - return buf + self._decoder.flush() - - return b"" - @contextmanager - def _error_catcher(self): + def _error_catcher(self) -> typing.Generator[None]: """ Catch low-level python exceptions, instead re-raising urllib3 variants, so that low-level exceptions are not leaked in the @@ -435,23 +753,32 @@ def _error_catcher(self): try: yield - except SocketTimeout: + except SocketTimeout as e: # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but # there is yet no clean way to get at it from this context. - raise ReadTimeoutError(self._pool, None, "Read timed out.") + raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] except BaseSSLError as e: # FIXME: Is there a better way to differentiate between SSLErrors? - if "read operation timed out" not in str(e): # Defensive: - # This shouldn't happen but just in case we're missing an edge - # case, let's avoid swallowing SSL errors. - raise + if "read operation timed out" not in str(e): + # SSL errors related to framing/MAC get wrapped and reraised here + raise SSLError(e) from e - raise ReadTimeoutError(self._pool, None, "Read timed out.") + raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] - except (HTTPException, SocketError) as e: - # This includes IncompleteRead. - raise ProtocolError("Connection broken: %r" % e, e) + except IncompleteRead as e: + if ( + e.expected is not None + and e.partial is not None + and e.expected == -e.partial + ): + arg = "Response may not contain content." + else: + arg = f"Connection broken: {e!r}" + raise ProtocolError(arg, e) from e + + except (HTTPException, OSError) as e: + raise ProtocolError(f"Connection broken: {e!r}", e) from e # If no exception is thrown, we should avoid cleaning up # unnecessarily. @@ -477,9 +804,123 @@ def _error_catcher(self): if self._original_response and self._original_response.isclosed(): self.release_conn() - def read(self, amt=None, decode_content=None, cache_content=False): + def _fp_read( + self, + amt: int | None = None, + *, + read1: bool = False, + ) -> bytes: + """ + Read a response with the thought that reading the number of bytes + larger than can fit in a 32-bit int at a time via SSL in some + known cases leads to an overflow error that has to be prevented + if `amt` or `self.length_remaining` indicate that a problem may + happen. + + The known cases: + * CPython < 3.9.7 because of a bug + https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. + * urllib3 injected with pyOpenSSL-backed SSL-support. + * CPython < 3.10 only when `amt` does not fit 32-bit int. """ - Similar to :meth:`httplib.HTTPResponse.read`, but with two additional + assert self._fp + c_int_max = 2**31 - 1 + if ( + (amt and amt > c_int_max) + or ( + amt is None + and self.length_remaining + and self.length_remaining > c_int_max + ) + ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)): + if read1: + return self._fp.read1(c_int_max) + buffer = io.BytesIO() + # Besides `max_chunk_amt` being a maximum chunk size, it + # affects memory overhead of reading a response by this + # method in CPython. + # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum + # chunk size that does not lead to an overflow error, but + # 256 MiB is a compromise. + max_chunk_amt = 2**28 + while amt is None or amt != 0: + if amt is not None: + chunk_amt = min(amt, max_chunk_amt) + amt -= chunk_amt + else: + chunk_amt = max_chunk_amt + data = self._fp.read(chunk_amt) + if not data: + break + buffer.write(data) + del data # to reduce peak memory usage by `max_chunk_amt`. + return buffer.getvalue() + elif read1: + return self._fp.read1(amt) if amt is not None else self._fp.read1() + else: + # StringIO doesn't like amt=None + return self._fp.read(amt) if amt is not None else self._fp.read() + + def _raw_read( + self, + amt: int | None = None, + *, + read1: bool = False, + ) -> bytes: + """ + Reads `amt` of bytes from the socket. + """ + if self._fp is None: + return None # type: ignore[return-value] + + fp_closed = getattr(self._fp, "closed", False) + + with self._error_catcher(): + data = self._fp_read(amt, read1=read1) if not fp_closed else b"" + if amt is not None and amt != 0 and not data: + # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() + if ( + self.enforce_content_length + and self.length_remaining is not None + and self.length_remaining != 0 + ): + # This is an edge case that httplib failed to cover due + # to concerns of backward compatibility. We're + # addressing it here to make sure IncompleteRead is + # raised during streaming, so all calls with incorrect + # Content-Length are caught. + raise IncompleteRead(self._fp_bytes_read, self.length_remaining) + elif read1 and ( + (amt != 0 and not data) or self.length_remaining == len(data) + ): + # All data has been read, but `self._fp.read1` in + # CPython 3.12 and older doesn't always close + # `http.client.HTTPResponse`, so we close it here. + # See https://github.com/python/cpython/issues/113199 + self._fp.close() + + if data: + self._fp_bytes_read += len(data) + if self.length_remaining is not None: + self.length_remaining -= len(data) + return data + + def read( + self, + amt: int | None = None, + decode_content: bool | None = None, + cache_content: bool = False, + ) -> bytes: + """ + Similar to :meth:`http.client.HTTPResponse.read`, but with two additional parameters: ``decode_content`` and ``cache_content``. :param amt: @@ -502,56 +943,107 @@ def read(self, amt=None, decode_content=None, cache_content=False): if decode_content is None: decode_content = self.decode_content - if self._fp is None: - return + if amt and amt < 0: + # Negative numbers and `None` should be treated the same. + amt = None + elif amt is not None: + cache_content = False - flush_decoder = False - fp_closed = getattr(self._fp, "closed", False) + if len(self._decoded_buffer) >= amt: + return self._decoded_buffer.get(amt) - with self._error_catcher(): - if amt is None: - # cStringIO doesn't like amt=None - data = self._fp.read() if not fp_closed else b"" - flush_decoder = True - else: - cache_content = False - data = self._fp.read(amt) if not fp_closed else b"" - if ( - amt != 0 and not data - ): # Platform-specific: Buggy versions of Python. - # Close the connection when no data is returned - # - # This is redundant to what httplib/http.client _should_ - # already do. However, versions of python released before - # December 15, 2012 (http://bugs.python.org/issue16298) do - # not properly close the connection in all cases. There is - # no harm in redundantly calling close. - self._fp.close() - flush_decoder = True - if self.enforce_content_length and self.length_remaining not in ( - 0, - None, - ): - # This is an edge case that httplib failed to cover due - # to concerns of backward compatibility. We're - # addressing it here to make sure IncompleteRead is - # raised during streaming, so all calls with incorrect - # Content-Length are caught. - raise IncompleteRead(self._fp_bytes_read, self.length_remaining) + data = self._raw_read(amt) - if data: - self._fp_bytes_read += len(data) - if self.length_remaining is not None: - self.length_remaining -= len(data) + flush_decoder = amt is None or (amt != 0 and not data) - data = self._decode(data, decode_content, flush_decoder) + if not data and len(self._decoded_buffer) == 0: + return data + if amt is None: + data = self._decode(data, decode_content, flush_decoder) if cache_content: self._body = data + else: + # do not waste memory on buffer when not decoding + if not decode_content: + if self._has_decoded_content: + raise RuntimeError( + "Calling read(decode_content=False) is not supported after " + "read(decode_content=True) was called." + ) + return data + + decoded_data = self._decode(data, decode_content, flush_decoder) + self._decoded_buffer.put(decoded_data) + + while len(self._decoded_buffer) < amt and data: + # TODO make sure to initially read enough data to get past the headers + # For example, the GZ file header takes 10 bytes, we don't want to read + # it one byte at a time + data = self._raw_read(amt) + decoded_data = self._decode(data, decode_content, flush_decoder) + self._decoded_buffer.put(decoded_data) + data = self._decoded_buffer.get(amt) return data - def stream(self, amt=2 ** 16, decode_content=None): + def read1( + self, + amt: int | None = None, + decode_content: bool | None = None, + ) -> bytes: + """ + Similar to ``http.client.HTTPResponse.read1`` and documented + in :meth:`io.BufferedReader.read1`, but with an additional parameter: + ``decode_content``. + + :param amt: + How much of the content to read. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + if decode_content is None: + decode_content = self.decode_content + if amt and amt < 0: + # Negative numbers and `None` should be treated the same. + amt = None + # try and respond without going to the network + if self._has_decoded_content: + if not decode_content: + raise RuntimeError( + "Calling read1(decode_content=False) is not supported after " + "read1(decode_content=True) was called." + ) + if len(self._decoded_buffer) > 0: + if amt is None: + return self._decoded_buffer.get_all() + return self._decoded_buffer.get(amt) + if amt == 0: + return b"" + + # FIXME, this method's type doesn't say returning None is possible + data = self._raw_read(amt, read1=True) + if not decode_content or data is None: + return data + + self._init_decoder() + while True: + flush_decoder = not data + decoded_data = self._decode(data, decode_content, flush_decoder) + self._decoded_buffer.put(decoded_data) + if decoded_data or flush_decoder: + break + data = self._raw_read(8192, read1=True) + + if amt is None: + return self._decoded_buffer.get_all() + return self._decoded_buffer.get(amt) + + def stream( + self, amt: int | None = 2**16, decode_content: bool | None = None + ) -> typing.Generator[bytes]: """ A generator wrapper for the read() method. A call will block until ``amt`` bytes have been read from the connection or until the @@ -568,61 +1060,27 @@ def stream(self, amt=2 ** 16, decode_content=None): 'content-encoding' header. """ if self.chunked and self.supports_chunked_reads(): - for line in self.read_chunked(amt, decode_content=decode_content): - yield line + yield from self.read_chunked(amt, decode_content=decode_content) else: - while not is_fp_closed(self._fp): + while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0: data = self.read(amt=amt, decode_content=decode_content) if data: yield data - @classmethod - def from_httplib(ResponseCls, r, **response_kw): - """ - Given an :class:`httplib.HTTPResponse` instance ``r``, return a - corresponding :class:`urllib3.response.HTTPResponse` object. - - Remaining parameters are passed to the HTTPResponse constructor, along - with ``original_response=r``. - """ - headers = r.msg - - if not isinstance(headers, HTTPHeaderDict): - if PY3: - headers = HTTPHeaderDict(headers.items()) - else: - # Python 2.7 - headers = HTTPHeaderDict.from_httplib(headers) - - # HTTPResponse objects in Python 3 don't have a .strict attribute - strict = getattr(r, "strict", 0) - resp = ResponseCls( - body=r, - headers=headers, - status=r.status, - version=r.version, - reason=r.reason, - strict=strict, - original_response=r, - **response_kw - ) - return resp - - # Backwards-compatibility methods for httplib.HTTPResponse - def getheaders(self): - return self.headers + # Overrides from io.IOBase + def readable(self) -> bool: + return True - def getheader(self, name, default=None): - return self.headers.get(name, default) + def shutdown(self) -> None: + if not self._sock_shutdown: + raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set") + self._sock_shutdown(socket.SHUT_RD) - # Backwards compatibility for http.cookiejar - def info(self): - return self.headers + def close(self) -> None: + self._sock_shutdown = None - # Overrides from io.IOBase - def close(self): - if not self.closed: + if not self.closed and self._fp: self._fp.close() if self._connection: @@ -632,9 +1090,9 @@ def close(self): io.IOBase.close(self) @property - def closed(self): + def closed(self) -> bool: if not self.auto_close: - return io.IOBase.closed.__get__(self) + return io.IOBase.closed.__get__(self) # type: ignore[no-any-return] elif self._fp is None: return True elif hasattr(self._fp, "isclosed"): @@ -644,18 +1102,18 @@ def closed(self): else: return True - def fileno(self): + def fileno(self) -> int: if self._fp is None: - raise IOError("HTTPResponse has no file to get a fileno from") + raise OSError("HTTPResponse has no file to get a fileno from") elif hasattr(self._fp, "fileno"): return self._fp.fileno() else: - raise IOError( + raise OSError( "The file-like object this HTTPResponse is wrapped " "around has no file descriptor" ) - def flush(self): + def flush(self) -> None: if ( self._fp is not None and hasattr(self._fp, "flush") @@ -663,65 +1121,58 @@ def flush(self): ): return self._fp.flush() - def readable(self): - # This method is required for `io` module compatibility. - return True - - def readinto(self, b): - # This method is required for `io` module compatibility. - temp = self.read(len(b)) - if len(temp) == 0: - return 0 - else: - b[: len(temp)] = temp - return len(temp) - - def supports_chunked_reads(self): + def supports_chunked_reads(self) -> bool: """ Checks if the underlying file-like object looks like a - httplib.HTTPResponse object. We do this by testing for the fp - attribute. If it is present we assume it returns raw chunks as + :class:`http.client.HTTPResponse` object. We do this by testing for + the fp attribute. If it is present we assume it returns raw chunks as processed by read_chunked(). """ return hasattr(self._fp, "fp") - def _update_chunk_length(self): + def _update_chunk_length(self) -> None: # First, we'll figure out length of a chunk and then # we'll try to read it from socket. if self.chunk_left is not None: - return - line = self._fp.fp.readline() + return None + line = self._fp.fp.readline() # type: ignore[union-attr] line = line.split(b";", 1)[0] try: self.chunk_left = int(line, 16) except ValueError: - # Invalid chunked protocol response, abort. self.close() - raise httplib.IncompleteRead(line) + if line: + # Invalid chunked protocol response, abort. + raise InvalidChunkLength(self, line) from None + else: + # Truncated at start of next chunk + raise ProtocolError("Response ended prematurely") from None - def _handle_chunk(self, amt): + def _handle_chunk(self, amt: int | None) -> bytes: returned_chunk = None if amt is None: - chunk = self._fp._safe_read(self.chunk_left) + chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] returned_chunk = chunk - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. self.chunk_left = None - elif amt < self.chunk_left: - value = self._fp._safe_read(amt) + elif self.chunk_left is not None and amt < self.chunk_left: + value = self._fp._safe_read(amt) # type: ignore[union-attr] self.chunk_left = self.chunk_left - amt returned_chunk = value elif amt == self.chunk_left: - value = self._fp._safe_read(amt) - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + value = self._fp._safe_read(amt) # type: ignore[union-attr] + self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. self.chunk_left = None returned_chunk = value else: # amt > self.chunk_left - returned_chunk = self._fp._safe_read(self.chunk_left) - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] + self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. self.chunk_left = None - return returned_chunk + return returned_chunk # type: ignore[no-any-return] - def read_chunked(self, amt=None, decode_content=None): + def read_chunked( + self, amt: int | None = None, decode_content: bool | None = None + ) -> typing.Generator[bytes]: """ Similar to :meth:`HTTPResponse.read`, but with an additional parameter: ``decode_content``. @@ -744,7 +1195,7 @@ def read_chunked(self, amt=None, decode_content=None): ) if not self.supports_chunked_reads(): raise BodyNotHttplibCompatible( - "Body should be httplib.HTTPResponse like. " + "Body should be http.client.HTTPResponse like. " "It should have have an fp attribute which returns raw chunks." ) @@ -752,12 +1203,17 @@ def read_chunked(self, amt=None, decode_content=None): # Don't bother reading the body of a HEAD request. if self._original_response and is_response_to_head(self._original_response): self._original_response.close() - return + return None # If a response is already read and closed # then return immediately. - if self._fp.fp is None: - return + if self._fp.fp is None: # type: ignore[union-attr] + return None + + if amt and amt < 0: + # Negative numbers and `None` should be treated the same, + # but httplib handles only `None` correctly. + amt = None while True: self._update_chunk_length() @@ -779,7 +1235,7 @@ def read_chunked(self, amt=None, decode_content=None): yield decoded # Chunk content ends with \r\n: discard it. - while True: + while self._fp is not None: line = self._fp.fp.readline() if not line: # Some sites may not end with '\r\n'. @@ -791,27 +1247,29 @@ def read_chunked(self, amt=None, decode_content=None): if self._original_response: self._original_response.close() - def geturl(self): + @property + def url(self) -> str | None: """ Returns the URL that was the source of this response. If the request that generated this response redirected, this method will return the final redirect location. """ - if self.retries is not None and len(self.retries.history): - return self.retries.history[-1].redirect_location - else: - return self._request_url + return self._request_url + + @url.setter + def url(self, url: str) -> None: + self._request_url = url - def __iter__(self): - buffer = [] + def __iter__(self) -> typing.Iterator[bytes]: + buffer: list[bytes] = [] for chunk in self.stream(decode_content=True): if b"\n" in chunk: - chunk = chunk.split(b"\n") - yield b"".join(buffer) + chunk[0] + b"\n" - for x in chunk[1:-1]: + chunks = chunk.split(b"\n") + yield b"".join(buffer) + chunks[0] + b"\n" + for x in chunks[1:-1]: yield x + b"\n" - if chunk[-1]: - buffer = [chunk[-1]] + if chunks[-1]: + buffer = [chunks[-1]] else: buffer = [] else: diff --git a/thirdparty/urllib3/util/__init__.py b/thirdparty/urllib3/util/__init__.py index a96c73a..5341260 100644 --- a/thirdparty/urllib3/util/__init__.py +++ b/thirdparty/urllib3/util/__init__.py @@ -1,46 +1,42 @@ -from __future__ import absolute_import - # For backwards compatibility, provide imports that used to be here. +from __future__ import annotations + from .connection import is_connection_dropped -from .request import make_headers +from .request import SKIP_HEADER, SKIPPABLE_HEADERS, make_headers from .response import is_fp_closed +from .retry import Retry from .ssl_ import ( - SSLContext, - HAS_SNI, + ALPN_PROTOCOLS, IS_PYOPENSSL, - IS_SECURETRANSPORT, + SSLContext, assert_fingerprint, + create_urllib3_context, resolve_cert_reqs, resolve_ssl_version, ssl_wrap_socket, - PROTOCOL_TLS, ) -from .timeout import current_time, Timeout - -from .retry import Retry -from .url import get_host, parse_url, split_first, Url +from .timeout import Timeout +from .url import Url, parse_url from .wait import wait_for_read, wait_for_write __all__ = ( - "HAS_SNI", "IS_PYOPENSSL", - "IS_SECURETRANSPORT", "SSLContext", - "PROTOCOL_TLS", + "ALPN_PROTOCOLS", "Retry", "Timeout", "Url", "assert_fingerprint", - "current_time", + "create_urllib3_context", "is_connection_dropped", "is_fp_closed", - "get_host", "parse_url", "make_headers", "resolve_cert_reqs", "resolve_ssl_version", - "split_first", "ssl_wrap_socket", "wait_for_read", "wait_for_write", + "SKIP_HEADER", + "SKIPPABLE_HEADERS", ) diff --git a/thirdparty/urllib3/util/connection.py b/thirdparty/urllib3/util/connection.py index 86f0a3b..f92519e 100644 --- a/thirdparty/urllib3/util/connection.py +++ b/thirdparty/urllib3/util/connection.py @@ -1,29 +1,23 @@ -from __future__ import absolute_import +from __future__ import annotations + import socket -from .wait import NoWayToWaitForSocketError, wait_for_read -from ..contrib import _appengine_environ +import typing +from ..exceptions import LocationParseError +from .timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT -def is_connection_dropped(conn): # Platform-specific - """ - Returns True if the connection is dropped and should be closed. +_TYPE_SOCKET_OPTIONS = list[tuple[int, int, typing.Union[int, bytes]]] - :param conn: - :class:`httplib.HTTPConnection` object. +if typing.TYPE_CHECKING: + from .._base_connection import BaseHTTPConnection - Note: For platforms like AppEngine, this will always return ``False`` to - let the platform handle connection recycling transparently for us. + +def is_connection_dropped(conn: BaseHTTPConnection) -> bool: # Platform-specific """ - sock = getattr(conn, "sock", False) - if sock is False: # Platform-specific: AppEngine - return False - if sock is None: # Connection already closed (such as by httplib). - return True - try: - # Returns True if readable, which here means it's been dropped - return wait_for_read(sock, timeout=0.0) - except NoWayToWaitForSocketError: # Platform-specific: AppEngine - return False + Returns True if the connection is dropped and should be closed. + :param conn: :class:`urllib3.connection.HTTPConnection` object. + """ + return not conn.is_connected # This function is copied from socket.py in the Python 2.7 standard @@ -31,18 +25,18 @@ def is_connection_dropped(conn): # Platform-specific # One additional modification is that we avoid binding to IPv6 servers # discovered in DNS if the system doesn't have IPv6 functionality. def create_connection( - address, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None, - socket_options=None, -): + address: tuple[str, int], + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + source_address: tuple[str, int] | None = None, + socket_options: _TYPE_SOCKET_OPTIONS | None = None, +) -> socket.socket: """Connect to *address* and return the socket object. Convenience function. Connect to *address* (a 2-tuple ``(host, port)``) and return the socket object. Passing the optional *timeout* parameter will set the timeout on the socket instance before attempting to connect. If no *timeout* is supplied, the - global default timeout setting returned by :func:`getdefaulttimeout` + global default timeout setting returned by :func:`socket.getdefaulttimeout` is used. If *source_address* is set it must be a tuple of (host, port) for the socket to bind as a source address before making the connection. An host of '' or port 0 tells the OS to use the default. @@ -58,6 +52,11 @@ def create_connection( # The original create_connection function always returns all records. family = allowed_gai_family() + try: + host.encode("idna") + except UnicodeError: + raise LocationParseError(f"'{host}', label empty or too long") from None + for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM): af, socktype, proto, canonname, sa = res sock = None @@ -67,26 +66,33 @@ def create_connection( # If provided, set socket level options before connecting. _set_socket_options(sock, socket_options) - if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + if timeout is not _DEFAULT_TIMEOUT: sock.settimeout(timeout) if source_address: sock.bind(source_address) sock.connect(sa) + # Break explicitly a reference cycle + err = None return sock - except socket.error as e: - err = e + except OSError as _: + err = _ if sock is not None: sock.close() - sock = None if err is not None: - raise err - - raise socket.error("getaddrinfo returns an empty list") + try: + raise err + finally: + # Break explicitly a reference cycle + err = None + else: + raise OSError("getaddrinfo returns an empty list") -def _set_socket_options(sock, options): +def _set_socket_options( + sock: socket.socket, options: _TYPE_SOCKET_OPTIONS | None +) -> None: if options is None: return @@ -94,7 +100,7 @@ def _set_socket_options(sock, options): sock.setsockopt(*opt) -def allowed_gai_family(): +def allowed_gai_family() -> socket.AddressFamily: """This function is designed to work in the context of getaddrinfo, where family=socket.AF_UNSPEC is the default and will perform a DNS search for both IPv6 and IPv4 records.""" @@ -105,18 +111,11 @@ def allowed_gai_family(): return family -def _has_ipv6(host): - """ Returns True if the system can bind an IPv6 address. """ +def _has_ipv6(host: str) -> bool: + """Returns True if the system can bind an IPv6 address.""" sock = None has_ipv6 = False - # App Engine doesn't support IPV6 sockets and actually has a quota on the - # number of sockets that can be used, so just early out here instead of - # creating a socket needlessly. - # See https://github.com/urllib3/urllib3/issues/1446 - if _appengine_environ.is_appengine_sandbox(): - return False - if socket.has_ipv6: # has_ipv6 returns true if cPython was compiled with IPv6 support. # It does not tell us if the system has IPv6 support enabled. To diff --git a/thirdparty/urllib3/util/proxy.py b/thirdparty/urllib3/util/proxy.py new file mode 100644 index 0000000..908fc66 --- /dev/null +++ b/thirdparty/urllib3/util/proxy.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import typing + +from .url import Url + +if typing.TYPE_CHECKING: + from ..connection import ProxyConfig + + +def connection_requires_http_tunnel( + proxy_url: Url | None = None, + proxy_config: ProxyConfig | None = None, + destination_scheme: str | None = None, +) -> bool: + """ + Returns True if the connection requires an HTTP CONNECT through the proxy. + + :param URL proxy_url: + URL of the proxy. + :param ProxyConfig proxy_config: + Proxy configuration from poolmanager.py + :param str destination_scheme: + The scheme of the destination. (i.e https, http, etc) + """ + # If we're not using a proxy, no way to use a tunnel. + if proxy_url is None: + return False + + # HTTP destinations never require tunneling, we always forward. + if destination_scheme == "http": + return False + + # Support for forwarding with HTTPS proxies and HTTPS destinations. + if ( + proxy_url.scheme == "https" + and proxy_config + and proxy_config.use_forwarding_for_https + ): + return False + + # Otherwise always use a tunnel. + return True diff --git a/thirdparty/urllib3/util/request.py b/thirdparty/urllib3/util/request.py index 3b7bb54..94392a1 100644 --- a/thirdparty/urllib3/util/request.py +++ b/thirdparty/urllib3/util/request.py @@ -1,28 +1,65 @@ -from __future__ import absolute_import +from __future__ import annotations + +import io +import typing from base64 import b64encode +from enum import Enum -from ..packages.six import b, integer_types from ..exceptions import UnrewindableBodyError +from .util import to_bytes + +if typing.TYPE_CHECKING: + from typing import Final + +# Pass as a value within ``headers`` to skip +# emitting some HTTP headers that are added automatically. +# The only headers that are supported are ``Accept-Encoding``, +# ``Host``, and ``User-Agent``. +SKIP_HEADER = "@@@SKIP_HEADER@@@" +SKIPPABLE_HEADERS = frozenset(["accept-encoding", "host", "user-agent"]) ACCEPT_ENCODING = "gzip,deflate" try: - import brotli as _unused_module_brotli # noqa: F401 + try: + import brotlicffi as _unused_module_brotli # type: ignore[import-not-found] # noqa: F401 + except ImportError: + import brotli as _unused_module_brotli # type: ignore[import-not-found] # noqa: F401 except ImportError: pass else: ACCEPT_ENCODING += ",br" +try: + import zstandard as _unused_module_zstd # noqa: F401 +except ImportError: + pass +else: + ACCEPT_ENCODING += ",zstd" + + +class _TYPE_FAILEDTELL(Enum): + token = 0 -_FAILEDTELL = object() + +_FAILEDTELL: Final[_TYPE_FAILEDTELL] = _TYPE_FAILEDTELL.token + +_TYPE_BODY_POSITION = typing.Union[int, _TYPE_FAILEDTELL] + +# When sending a request with these methods we aren't expecting +# a body so don't need to set an explicit 'Content-Length: 0' +# The reason we do this in the negative instead of tracking methods +# which 'should' have a body is because unknown methods should be +# treated as if they were 'POST' which *does* expect a body. +_METHODS_NOT_EXPECTING_BODY = {"GET", "HEAD", "DELETE", "TRACE", "OPTIONS", "CONNECT"} def make_headers( - keep_alive=None, - accept_encoding=None, - user_agent=None, - basic_auth=None, - proxy_basic_auth=None, - disable_cache=None, -): + keep_alive: bool | None = None, + accept_encoding: bool | list[str] | str | None = None, + user_agent: str | None = None, + basic_auth: str | None = None, + proxy_basic_auth: str | None = None, + disable_cache: bool | None = None, +) -> dict[str, str]: """ Shortcuts for generating request headers. @@ -31,7 +68,10 @@ def make_headers( :param accept_encoding: Can be a boolean, list, or string. - ``True`` translates to 'gzip,deflate'. + ``True`` translates to 'gzip,deflate'. If the dependencies for + Brotli (either the ``brotli`` or ``brotlicffi`` package) and/or Zstandard + (the ``zstandard`` package) algorithms are installed, then their encodings are + included in the string ('br' and 'zstd', respectively). List will get joined by comma. String will be used as provided. @@ -50,14 +90,18 @@ def make_headers( :param disable_cache: If ``True``, adds 'cache-control: no-cache' header. - Example:: + Example: + + .. code-block:: python - >>> make_headers(keep_alive=True, user_agent="Batman/1.0") - {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} - >>> make_headers(accept_encoding=True) - {'accept-encoding': 'gzip,deflate'} + import urllib3 + + print(urllib3.util.make_headers(keep_alive=True, user_agent="Batman/1.0")) + # {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + print(urllib3.util.make_headers(accept_encoding=True)) + # {'accept-encoding': 'gzip,deflate'} """ - headers = {} + headers: dict[str, str] = {} if accept_encoding: if isinstance(accept_encoding, str): pass @@ -74,12 +118,14 @@ def make_headers( headers["connection"] = "keep-alive" if basic_auth: - headers["authorization"] = "Basic " + b64encode(b(basic_auth)).decode("utf-8") + headers["authorization"] = ( + f"Basic {b64encode(basic_auth.encode('latin-1')).decode()}" + ) if proxy_basic_auth: - headers["proxy-authorization"] = "Basic " + b64encode( - b(proxy_basic_auth) - ).decode("utf-8") + headers["proxy-authorization"] = ( + f"Basic {b64encode(proxy_basic_auth.encode('latin-1')).decode()}" + ) if disable_cache: headers["cache-control"] = "no-cache" @@ -87,7 +133,9 @@ def make_headers( return headers -def set_file_position(body, pos): +def set_file_position( + body: typing.Any, pos: _TYPE_BODY_POSITION | None +) -> _TYPE_BODY_POSITION | None: """ If a position is provided, move file to that point. Otherwise, we'll attempt to record a position for future use. @@ -97,7 +145,7 @@ def set_file_position(body, pos): elif getattr(body, "tell", None) is not None: try: pos = body.tell() - except (IOError, OSError): + except OSError: # This differentiates from None, allowing us to catch # a failed `tell()` later when trying to rewind the body. pos = _FAILEDTELL @@ -105,7 +153,7 @@ def set_file_position(body, pos): return pos -def rewind_body(body, body_pos): +def rewind_body(body: typing.IO[typing.AnyStr], body_pos: _TYPE_BODY_POSITION) -> None: """ Attempt to rewind body to a certain position. Primarily used for request redirects and retries. @@ -117,13 +165,13 @@ def rewind_body(body, body_pos): Position to seek to in file. """ body_seek = getattr(body, "seek", None) - if body_seek is not None and isinstance(body_pos, integer_types): + if body_seek is not None and isinstance(body_pos, int): try: body_seek(body_pos) - except (IOError, OSError): + except OSError as e: raise UnrewindableBodyError( "An error occurred when rewinding request body for redirect/retry." - ) + ) from e elif body_pos is _FAILEDTELL: raise UnrewindableBodyError( "Unable to record file position for rewinding " @@ -131,5 +179,80 @@ def rewind_body(body, body_pos): ) else: raise ValueError( - "body_pos must be of type integer, instead it was %s." % type(body_pos) + f"body_pos must be of type integer, instead it was {type(body_pos)}." ) + + +class ChunksAndContentLength(typing.NamedTuple): + chunks: typing.Iterable[bytes] | None + content_length: int | None + + +def body_to_chunks( + body: typing.Any | None, method: str, blocksize: int +) -> ChunksAndContentLength: + """Takes the HTTP request method, body, and blocksize and + transforms them into an iterable of chunks to pass to + socket.sendall() and an optional 'Content-Length' header. + + A 'Content-Length' of 'None' indicates the length of the body + can't be determined so should use 'Transfer-Encoding: chunked' + for framing instead. + """ + + chunks: typing.Iterable[bytes] | None + content_length: int | None + + # No body, we need to make a recommendation on 'Content-Length' + # based on whether that request method is expected to have + # a body or not. + if body is None: + chunks = None + if method.upper() not in _METHODS_NOT_EXPECTING_BODY: + content_length = 0 + else: + content_length = None + + # Bytes or strings become bytes + elif isinstance(body, (str, bytes)): + chunks = (to_bytes(body),) + content_length = len(chunks[0]) + + # File-like object, TODO: use seek() and tell() for length? + elif hasattr(body, "read"): + + def chunk_readable() -> typing.Iterable[bytes]: + nonlocal body, blocksize + encode = isinstance(body, io.TextIOBase) + while True: + datablock = body.read(blocksize) + if not datablock: + break + if encode: + datablock = datablock.encode("utf-8") + yield datablock + + chunks = chunk_readable() + content_length = None + + # Otherwise we need to start checking via duck-typing. + else: + try: + # Check if the body implements the buffer API. + mv = memoryview(body) + except TypeError: + try: + # Check if the body is an iterable + chunks = iter(body) + content_length = None + except TypeError: + raise TypeError( + f"'body' must be a bytes-like object, file-like " + f"object, or iterable. Instead was {body!r}" + ) from None + else: + # Since it implements the buffer API can be passed directly to socket.sendall() + chunks = (body,) + content_length = mv.nbytes + + return ChunksAndContentLength(chunks=chunks, content_length=content_length) diff --git a/thirdparty/urllib3/util/response.py b/thirdparty/urllib3/util/response.py index 715868d..0f45786 100644 --- a/thirdparty/urllib3/util/response.py +++ b/thirdparty/urllib3/util/response.py @@ -1,10 +1,12 @@ -from __future__ import absolute_import -from ..packages.six.moves import http_client as httplib +from __future__ import annotations + +import http.client as httplib +from email.errors import MultipartInvariantViolationDefect, StartBoundaryNotFoundDefect from ..exceptions import HeaderParsingError -def is_fp_closed(obj): +def is_fp_closed(obj: object) -> bool: """ Checks whether a given file-like object is closed. @@ -15,35 +17,34 @@ def is_fp_closed(obj): try: # Check `isclosed()` first, in case Python3 doesn't set `closed`. # GH Issue #928 - return obj.isclosed() + return obj.isclosed() # type: ignore[no-any-return, attr-defined] except AttributeError: pass try: # Check via the official file-like-object way. - return obj.closed + return obj.closed # type: ignore[no-any-return, attr-defined] except AttributeError: pass try: # Check if the object is a container for another file-like object that # gets released on exhaustion (e.g. HTTPResponse). - return obj.fp is None + return obj.fp is None # type: ignore[attr-defined] except AttributeError: pass raise ValueError("Unable to determine whether fp is closed.") -def assert_header_parsing(headers): +def assert_header_parsing(headers: httplib.HTTPMessage) -> None: """ Asserts whether all headers have been successfully parsed. Extracts encountered errors from the result of parsing headers. Only works on Python 3. - :param headers: Headers to verify. - :type headers: `httplib.HTTPMessage`. + :param http.client.HTTPMessage headers: Headers to verify. :raises urllib3.exceptions.HeaderParsingError: If parsing errors are found. @@ -52,35 +53,49 @@ def assert_header_parsing(headers): # This will fail silently if we pass in the wrong kind of parameter. # To make debugging easier add an explicit check. if not isinstance(headers, httplib.HTTPMessage): - raise TypeError("expected httplib.Message, got {0}.".format(type(headers))) - - defects = getattr(headers, "defects", None) - get_payload = getattr(headers, "get_payload", None) + raise TypeError(f"expected httplib.Message, got {type(headers)}.") unparsed_data = None - if get_payload: - # get_payload is actually email.message.Message.get_payload; - # we're only interested in the result if it's not a multipart message - if not headers.is_multipart(): - payload = get_payload() - if isinstance(payload, (bytes, str)): - unparsed_data = payload + # get_payload is actually email.message.Message.get_payload; + # we're only interested in the result if it's not a multipart message + if not headers.is_multipart(): + payload = headers.get_payload() + + if isinstance(payload, (bytes, str)): + unparsed_data = payload + + # httplib is assuming a response body is available + # when parsing headers even when httplib only sends + # header data to parse_headers() This results in + # defects on multipart responses in particular. + # See: https://github.com/urllib3/urllib3/issues/800 + + # So we ignore the following defects: + # - StartBoundaryNotFoundDefect: + # The claimed start boundary was never found. + # - MultipartInvariantViolationDefect: + # A message claimed to be a multipart but no subparts were found. + defects = [ + defect + for defect in headers.defects + if not isinstance( + defect, (StartBoundaryNotFoundDefect, MultipartInvariantViolationDefect) + ) + ] if defects or unparsed_data: raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data) -def is_response_to_head(response): +def is_response_to_head(response: httplib.HTTPResponse) -> bool: """ Checks whether the request of a response has been a HEAD-request. - Handles the quirks of AppEngine. - :param conn: - :type conn: :class:`httplib.HTTPResponse` + :param http.client.HTTPResponse response: + Response to check if the originating request + used 'HEAD' as a method. """ # FIXME: Can we do this somehow without accessing private httplib _method? - method = response._method - if isinstance(method, int): # Platform-specific: Appengine - return method == 3 - return method.upper() == "HEAD" + method_str = response._method # type: str # type: ignore[attr-defined] + return method_str.upper() == "HEAD" diff --git a/thirdparty/urllib3/util/retry.py b/thirdparty/urllib3/util/retry.py index b639c0c..0456cce 100644 --- a/thirdparty/urllib3/util/retry.py +++ b/thirdparty/urllib3/util/retry.py @@ -1,51 +1,68 @@ -from __future__ import absolute_import -import time -import logging -from collections import namedtuple -from itertools import takewhile +from __future__ import annotations + import email +import logging +import random import re +import time +import typing +from itertools import takewhile +from types import TracebackType from ..exceptions import ( ConnectTimeoutError, + InvalidHeader, MaxRetryError, ProtocolError, + ProxyError, ReadTimeoutError, ResponseError, - InvalidHeader, - ProxyError, ) -from ..packages import six +from .util import reraise + +if typing.TYPE_CHECKING: + from typing_extensions import Self + from ..connectionpool import ConnectionPool + from ..response import BaseHTTPResponse log = logging.getLogger(__name__) # Data structure for representing the metadata of requests that result in a retry. -RequestHistory = namedtuple( - "RequestHistory", ["method", "url", "error", "status", "redirect_location"] -) +class RequestHistory(typing.NamedTuple): + method: str | None + url: str | None + error: Exception | None + status: int | None + redirect_location: str | None -class Retry(object): +class Retry: """Retry configuration. Each retry attempt will create a new Retry object with updated values, so they can be safely reused. - Retries can be defined as a default for a pool:: + Retries can be defined as a default for a pool: + + .. code-block:: python retries = Retry(connect=5, read=2, redirect=5) http = PoolManager(retries=retries) - response = http.request('GET', 'http://example.com/') + response = http.request("GET", "https://example.com/") + + Or per-request (which overrides the default for the pool): + + .. code-block:: python - Or per-request (which overrides the default for the pool):: + response = http.request("GET", "https://example.com/", retries=Retry(10)) - response = http.request('GET', 'http://example.com/', retries=Retry(10)) + Retries can be disabled by passing ``False``: - Retries can be disabled by passing ``False``:: + .. code-block:: python - response = http.request('GET', 'http://example.com/', retries=False) + response = http.request("GET", "https://example.com/", retries=False) Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless retries are disabled, in which case the causing exception will be raised. @@ -54,8 +71,7 @@ class Retry(object): Total number of retries to allow. Takes precedence over other counts. Set to ``None`` to remove this constraint and fall back on other - counts. It's a good idea to set this to some sensibly-high value to - account for unexpected edge cases and avoid infinite retry loops. + counts. Set to ``0`` to fail on the first retry. @@ -96,18 +112,30 @@ class Retry(object): Set to ``0`` to fail on the first retry of this type. - :param iterable method_whitelist: + :param int other: + How many times to retry on other errors. + + Other errors are errors that are not connect, read, redirect or status errors. + These errors might be raised after the request was sent to the server, so the + request might have side-effects. + + Set to ``0`` to fail on the first retry of this type. + + If ``total`` is not set, it's a good idea to set this to 0 to account + for unexpected edge cases and avoid infinite retry loops. + + :param Collection allowed_methods: Set of uppercased HTTP method verbs that we should retry on. By default, we only retry on methods which are considered to be idempotent (multiple requests with the same parameters end with the - same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. + same state). See :attr:`Retry.DEFAULT_ALLOWED_METHODS`. - Set to a ``False`` value to retry on any verb. + Set to a ``None`` value to retry on any verb. - :param iterable status_forcelist: + :param Collection status_forcelist: A set of integer HTTP status codes that we should force a retry on. - A retry is initiated if the request method is in ``method_whitelist`` + A retry is initiated if the request method is in ``allowed_methods`` and the response status code is in ``status_forcelist``. By default, this is disabled with ``None``. @@ -117,13 +145,17 @@ class Retry(object): (most errors are resolved immediately by a second try without a delay). urllib3 will sleep for:: - {backoff factor} * (2 ** ({number of total retries} - 1)) + {backoff factor} * (2 ** ({number of previous retries})) + + seconds. If `backoff_jitter` is non-zero, this sleep is extended by:: - seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep - for [0.0s, 0.2s, 0.4s, ...] between retries. It will never be longer - than :attr:`Retry.BACKOFF_MAX`. + random.uniform(0, {backoff jitter}) - By default, backoff is disabled (set to 0). + seconds. For example, if the backoff_factor is 0.1, then :func:`Retry.sleep` will + sleep for [0.0s, 0.2s, 0.4s, 0.8s, ...] between retries. No backoff will ever + be longer than `backoff_max`. + + By default, backoff is disabled (factor set to 0). :param bool raise_on_redirect: Whether, if the number of redirects is exhausted, to raise a MaxRetryError, or to return a response with a @@ -142,44 +174,57 @@ class Retry(object): Whether to respect Retry-After header on status codes defined as :attr:`Retry.RETRY_AFTER_STATUS_CODES` or not. - :param iterable remove_headers_on_redirect: + :param Collection remove_headers_on_redirect: Sequence of headers to remove from the request when a response indicating a redirect is returned before firing off the redirected request. """ - DEFAULT_METHOD_WHITELIST = frozenset( + #: Default methods to be used for ``allowed_methods`` + DEFAULT_ALLOWED_METHODS = frozenset( ["HEAD", "GET", "PUT", "DELETE", "OPTIONS", "TRACE"] ) + #: Default status codes to be used for ``status_forcelist`` RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503]) - DEFAULT_REDIRECT_HEADERS_BLACKLIST = frozenset(["Authorization"]) + #: Default headers to be used for ``remove_headers_on_redirect`` + DEFAULT_REMOVE_HEADERS_ON_REDIRECT = frozenset( + ["Cookie", "Authorization", "Proxy-Authorization"] + ) + + #: Default maximum backoff time. + DEFAULT_BACKOFF_MAX = 120 - #: Maximum backoff time. - BACKOFF_MAX = 120 + # Backward compatibility; assigned outside of the class. + DEFAULT: typing.ClassVar[Retry] def __init__( self, - total=10, - connect=None, - read=None, - redirect=None, - status=None, - method_whitelist=DEFAULT_METHOD_WHITELIST, - status_forcelist=None, - backoff_factor=0, - raise_on_redirect=True, - raise_on_status=True, - history=None, - respect_retry_after_header=True, - remove_headers_on_redirect=DEFAULT_REDIRECT_HEADERS_BLACKLIST, - ): - + total: bool | int | None = 10, + connect: int | None = None, + read: int | None = None, + redirect: bool | int | None = None, + status: int | None = None, + other: int | None = None, + allowed_methods: typing.Collection[str] | None = DEFAULT_ALLOWED_METHODS, + status_forcelist: typing.Collection[int] | None = None, + backoff_factor: float = 0, + backoff_max: float = DEFAULT_BACKOFF_MAX, + raise_on_redirect: bool = True, + raise_on_status: bool = True, + history: tuple[RequestHistory, ...] | None = None, + respect_retry_after_header: bool = True, + remove_headers_on_redirect: typing.Collection[ + str + ] = DEFAULT_REMOVE_HEADERS_ON_REDIRECT, + backoff_jitter: float = 0.0, + ) -> None: self.total = total self.connect = connect self.read = read self.status = status + self.other = other if redirect is False or total is False: redirect = 0 @@ -187,38 +232,49 @@ def __init__( self.redirect = redirect self.status_forcelist = status_forcelist or set() - self.method_whitelist = method_whitelist + self.allowed_methods = allowed_methods self.backoff_factor = backoff_factor + self.backoff_max = backoff_max self.raise_on_redirect = raise_on_redirect self.raise_on_status = raise_on_status - self.history = history or tuple() + self.history = history or () self.respect_retry_after_header = respect_retry_after_header self.remove_headers_on_redirect = frozenset( - [h.lower() for h in remove_headers_on_redirect] + h.lower() for h in remove_headers_on_redirect ) + self.backoff_jitter = backoff_jitter - def new(self, **kw): + def new(self, **kw: typing.Any) -> Self: params = dict( total=self.total, connect=self.connect, read=self.read, redirect=self.redirect, status=self.status, - method_whitelist=self.method_whitelist, + other=self.other, + allowed_methods=self.allowed_methods, status_forcelist=self.status_forcelist, backoff_factor=self.backoff_factor, + backoff_max=self.backoff_max, raise_on_redirect=self.raise_on_redirect, raise_on_status=self.raise_on_status, history=self.history, remove_headers_on_redirect=self.remove_headers_on_redirect, respect_retry_after_header=self.respect_retry_after_header, + backoff_jitter=self.backoff_jitter, ) + params.update(kw) - return type(self)(**params) + return type(self)(**params) # type: ignore[arg-type] @classmethod - def from_int(cls, retries, redirect=True, default=None): - """ Backwards-compatibility for the old retries format.""" + def from_int( + cls, + retries: Retry | bool | int | None, + redirect: bool | int | None = True, + default: Retry | bool | int | None = None, + ) -> Retry: + """Backwards-compatibility for the old retries format.""" if retries is None: retries = default if default is not None else cls.DEFAULT @@ -230,7 +286,7 @@ def from_int(cls, retries, redirect=True, default=None): log.debug("Converted retries value: %r -> %r", retries, new_retries) return new_retries - def get_backoff_time(self): + def get_backoff_time(self) -> float: """Formula for computing the current backoff :rtype: float @@ -245,42 +301,38 @@ def get_backoff_time(self): return 0 backoff_value = self.backoff_factor * (2 ** (consecutive_errors_len - 1)) - return min(self.BACKOFF_MAX, backoff_value) + if self.backoff_jitter != 0.0: + backoff_value += random.random() * self.backoff_jitter + return float(max(0, min(self.backoff_max, backoff_value))) - def parse_retry_after(self, retry_after): + def parse_retry_after(self, retry_after: str) -> float: + seconds: float # Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4 if re.match(r"^\s*[0-9]+\s*$", retry_after): seconds = int(retry_after) else: retry_date_tuple = email.utils.parsedate_tz(retry_after) if retry_date_tuple is None: - raise InvalidHeader("Invalid Retry-After header: %s" % retry_after) - if retry_date_tuple[9] is None: # Python 2 - # Assume UTC if no timezone was specified - # On Python2.7, parsedate_tz returns None for a timezone offset - # instead of 0 if no timezone is given, where mktime_tz treats - # a None timezone offset as local time. - retry_date_tuple = retry_date_tuple[:9] + (0,) + retry_date_tuple[10:] + raise InvalidHeader(f"Invalid Retry-After header: {retry_after}") retry_date = email.utils.mktime_tz(retry_date_tuple) seconds = retry_date - time.time() - if seconds < 0: - seconds = 0 + seconds = max(seconds, 0) return seconds - def get_retry_after(self, response): - """ Get the value of Retry-After in seconds. """ + def get_retry_after(self, response: BaseHTTPResponse) -> float | None: + """Get the value of Retry-After in seconds.""" - retry_after = response.getheader("Retry-After") + retry_after = response.headers.get("Retry-After") if retry_after is None: return None return self.parse_retry_after(retry_after) - def sleep_for_retry(self, response=None): + def sleep_for_retry(self, response: BaseHTTPResponse) -> bool: retry_after = self.get_retry_after(response) if retry_after: time.sleep(retry_after) @@ -288,13 +340,13 @@ def sleep_for_retry(self, response=None): return False - def _sleep_backoff(self): + def _sleep_backoff(self) -> None: backoff = self.get_backoff_time() if backoff <= 0: return time.sleep(backoff) - def sleep(self, response=None): + def sleep(self, response: BaseHTTPResponse | None = None) -> None: """Sleep between retry attempts. This method will respect a server's ``Retry-After`` response header @@ -310,7 +362,7 @@ def sleep(self, response=None): self._sleep_backoff() - def _is_connection_error(self, err): + def _is_connection_error(self, err: Exception) -> bool: """Errors when we're fairly sure that the server did not receive the request, so it should be safe to retry. """ @@ -318,23 +370,24 @@ def _is_connection_error(self, err): err = err.original_error return isinstance(err, ConnectTimeoutError) - def _is_read_error(self, err): + def _is_read_error(self, err: Exception) -> bool: """Errors that occur after the request has been started, so we should assume that the server began processing it. """ return isinstance(err, (ReadTimeoutError, ProtocolError)) - def _is_method_retryable(self, method): + def _is_method_retryable(self, method: str) -> bool: """Checks if a given HTTP method should be retried upon, depending if - it is included on the method whitelist. + it is included in the allowed_methods """ - if self.method_whitelist and method.upper() not in self.method_whitelist: + if self.allowed_methods and method.upper() not in self.allowed_methods: return False - return True - def is_retry(self, method, status_code, has_retry_after=False): - """Is this method/status code retryable? (Based on whitelists and control + def is_retry( + self, method: str, status_code: int, has_retry_after: bool = False + ) -> bool: + """Is this method/status code retryable? (Based on allowlists and control variables such as the number of total retries to allow, whether to respect the Retry-After header, whether this header is present, and whether the returned status code is on the list of status codes to @@ -346,17 +399,27 @@ def is_retry(self, method, status_code, has_retry_after=False): if self.status_forcelist and status_code in self.status_forcelist: return True - return ( + return bool( self.total and self.respect_retry_after_header and has_retry_after and (status_code in self.RETRY_AFTER_STATUS_CODES) ) - def is_exhausted(self): - """ Are we out of retries? """ - retry_counts = (self.total, self.connect, self.read, self.redirect, self.status) - retry_counts = list(filter(None, retry_counts)) + def is_exhausted(self) -> bool: + """Are we out of retries?""" + retry_counts = [ + x + for x in ( + self.total, + self.connect, + self.read, + self.redirect, + self.status, + self.other, + ) + if x + ] if not retry_counts: return False @@ -364,18 +427,18 @@ def is_exhausted(self): def increment( self, - method=None, - url=None, - response=None, - error=None, - _pool=None, - _stacktrace=None, - ): + method: str | None = None, + url: str | None = None, + response: BaseHTTPResponse | None = None, + error: Exception | None = None, + _pool: ConnectionPool | None = None, + _stacktrace: TracebackType | None = None, + ) -> Self: """Return a new Retry object with incremented retry counters. :param response: A response object, or None, if the server did not return a response. - :type response: :class:`~urllib3.response.HTTPResponse` + :type response: :class:`~urllib3.response.BaseHTTPResponse` :param Exception error: An error encountered during the request, or None if the response was received successfully. @@ -383,7 +446,7 @@ def increment( """ if self.total is False and error: # Disabled, indicate to re-raise the error. - raise six.reraise(type(error), error, _stacktrace) + raise reraise(type(error), error, _stacktrace) total = self.total if total is not None: @@ -393,6 +456,7 @@ def increment( read = self.read redirect = self.redirect status_count = self.status + other = self.other cause = "unknown" status = None redirect_location = None @@ -400,28 +464,35 @@ def increment( if error and self._is_connection_error(error): # Connect retry? if connect is False: - raise six.reraise(type(error), error, _stacktrace) + raise reraise(type(error), error, _stacktrace) elif connect is not None: connect -= 1 elif error and self._is_read_error(error): # Read retry? - if read is False or not self._is_method_retryable(method): - raise six.reraise(type(error), error, _stacktrace) + if read is False or method is None or not self._is_method_retryable(method): + raise reraise(type(error), error, _stacktrace) elif read is not None: read -= 1 + elif error: + # Other retry? + if other is not None: + other -= 1 + elif response and response.get_redirect_location(): # Redirect retry? if redirect is not None: redirect -= 1 cause = "too many redirects" - redirect_location = response.get_redirect_location() + response_redirect_location = response.get_redirect_location() + if response_redirect_location: + redirect_location = response_redirect_location status = response.status else: # Incrementing because of a server error like a 500 in - # status_forcelist and a the given method is in the whitelist + # status_forcelist and the given method is in the allowed_methods cause = ResponseError.GENERIC_ERROR if response and response.status: if status_count is not None: @@ -439,21 +510,23 @@ def increment( read=read, redirect=redirect, status=status_count, + other=other, history=history, ) if new_retry.is_exhausted(): - raise MaxRetryError(_pool, url, error or ResponseError(cause)) + reason = error or ResponseError(cause) + raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type] log.debug("Incremented Retry for (url='%s'): %r", url, new_retry) return new_retry - def __repr__(self): + def __repr__(self) -> str: return ( - "{cls.__name__}(total={self.total}, connect={self.connect}, " - "read={self.read}, redirect={self.redirect}, status={self.status})" - ).format(cls=type(self), self=self) + f"{type(self).__name__}(total={self.total}, connect={self.connect}, " + f"read={self.read}, redirect={self.redirect}, status={self.status})" + ) # For backwards compatibility (equivalent to pre-v1.9): diff --git a/thirdparty/urllib3/util/ssl_.py b/thirdparty/urllib3/util/ssl_.py index bbe040f..278128e 100644 --- a/thirdparty/urllib3/util/ssl_.py +++ b/thirdparty/urllib3/util/ssl_.py @@ -1,159 +1,149 @@ -from __future__ import absolute_import -import errno -import warnings +from __future__ import annotations + +import hashlib import hmac import os +import socket import sys +import typing +import warnings +from binascii import unhexlify -from binascii import hexlify, unhexlify -from hashlib import md5, sha1, sha256 - -from .url import IPV4_RE, BRACELESS_IPV6_ADDRZ_RE -from ..exceptions import SSLError, InsecurePlatformWarning, SNIMissingWarning -from ..packages import six - +from ..exceptions import ProxySchemeUnsupported, SSLError +from .url import _BRACELESS_IPV6_ADDRZ_RE, _IPV4_RE SSLContext = None -HAS_SNI = False +SSLTransport = None +HAS_NEVER_CHECK_COMMON_NAME = False IS_PYOPENSSL = False -IS_SECURETRANSPORT = False - -# Maps the length of a digest to a possible hash function producing this digest -HASHFUNC_MAP = {32: md5, 40: sha1, 64: sha256} +ALPN_PROTOCOLS = ["http/1.1"] +_TYPE_VERSION_INFO = tuple[int, int, int, str, int] -def _const_compare_digest_backport(a, b): - """ - Compare two digests of equal length in constant time. - - The digests must be of type str/bytes. - Returns True if the digests match, and False otherwise. +# Maps the length of a digest to a possible hash function producing this digest +HASHFUNC_MAP = { + length: getattr(hashlib, algorithm, None) + for length, algorithm in ((32, "md5"), (40, "sha1"), (64, "sha256")) +} + + +def _is_bpo_43522_fixed( + implementation_name: str, + version_info: _TYPE_VERSION_INFO, + pypy_version_info: _TYPE_VERSION_INFO | None, +) -> bool: + """Return True for CPython 3.9.3+ or 3.10+ and PyPy 7.3.8+ where + setting SSLContext.hostname_checks_common_name to False works. + + Outside of CPython and PyPy we don't know which implementations work + or not so we conservatively use our hostname matching as we know that works + on all implementations. + + https://github.com/urllib3/urllib3/issues/2192#issuecomment-821832963 + https://foss.heptapod.net/pypy/pypy/-/issues/3539 """ - result = abs(len(a) - len(b)) - for left, right in zip(bytearray(a), bytearray(b)): - result |= left ^ right - return result == 0 - - -_const_compare_digest = getattr(hmac, "compare_digest", _const_compare_digest_backport) - -try: # Test for SSL features + if implementation_name == "pypy": + # https://foss.heptapod.net/pypy/pypy/-/issues/3129 + return pypy_version_info >= (7, 3, 8) # type: ignore[operator] + elif implementation_name == "cpython": + major_minor = version_info[:2] + micro = version_info[2] + return (major_minor == (3, 9) and micro >= 3) or major_minor >= (3, 10) + else: # Defensive: + return False + + +def _is_has_never_check_common_name_reliable( + openssl_version: str, + openssl_version_number: int, + implementation_name: str, + version_info: _TYPE_VERSION_INFO, + pypy_version_info: _TYPE_VERSION_INFO | None, +) -> bool: + # As of May 2023, all released versions of LibreSSL fail to reject certificates with + # only common names, see https://github.com/urllib3/urllib3/pull/3024 + is_openssl = openssl_version.startswith("OpenSSL ") + # Before fixing OpenSSL issue #14579, the SSL_new() API was not copying hostflags + # like X509_CHECK_FLAG_NEVER_CHECK_SUBJECT, which tripped up CPython. + # https://github.com/openssl/openssl/issues/14579 + # This was released in OpenSSL 1.1.1l+ (>=0x101010cf) + is_openssl_issue_14579_fixed = openssl_version_number >= 0x101010CF + + return is_openssl and ( + is_openssl_issue_14579_fixed + or _is_bpo_43522_fixed(implementation_name, version_info, pypy_version_info) + ) + + +if typing.TYPE_CHECKING: + from ssl import VerifyMode + from typing import TypedDict + + from .ssltransport import SSLTransport as SSLTransportType + + class _TYPE_PEER_CERT_RET_DICT(TypedDict, total=False): + subjectAltName: tuple[tuple[str, str], ...] + subject: tuple[tuple[tuple[str, str], ...], ...] + serialNumber: str + + +# Mapping from 'ssl.PROTOCOL_TLSX' to 'TLSVersion.X' +_SSL_VERSION_TO_TLS_VERSION: dict[int, int] = {} + +try: # Do we have ssl at all? import ssl - from ssl import wrap_socket, CERT_REQUIRED - from ssl import HAS_SNI # Has SNI? -except ImportError: - pass - -try: # Platform-specific: Python 3.6 - from ssl import PROTOCOL_TLS + from ssl import ( # type: ignore[assignment] + CERT_REQUIRED, + HAS_NEVER_CHECK_COMMON_NAME, + OP_NO_COMPRESSION, + OP_NO_TICKET, + OPENSSL_VERSION, + OPENSSL_VERSION_NUMBER, + PROTOCOL_TLS, + PROTOCOL_TLS_CLIENT, + OP_NO_SSLv2, + OP_NO_SSLv3, + SSLContext, + TLSVersion, + ) PROTOCOL_SSLv23 = PROTOCOL_TLS -except ImportError: - try: - from ssl import PROTOCOL_SSLv23 as PROTOCOL_TLS - - PROTOCOL_SSLv23 = PROTOCOL_TLS - except ImportError: - PROTOCOL_SSLv23 = PROTOCOL_TLS = 2 + # Setting SSLContext.hostname_checks_common_name = False didn't work before CPython + # 3.9.3, and 3.10 (but OK on PyPy) or OpenSSL 1.1.1l+ + if HAS_NEVER_CHECK_COMMON_NAME and not _is_has_never_check_common_name_reliable( + OPENSSL_VERSION, + OPENSSL_VERSION_NUMBER, + sys.implementation.name, + sys.version_info, + sys.pypy_version_info if sys.implementation.name == "pypy" else None, # type: ignore[attr-defined] + ): + HAS_NEVER_CHECK_COMMON_NAME = False + + # Need to be careful here in case old TLS versions get + # removed in future 'ssl' module implementations. + for attr in ("TLSv1", "TLSv1_1", "TLSv1_2"): + try: + _SSL_VERSION_TO_TLS_VERSION[getattr(ssl, f"PROTOCOL_{attr}")] = getattr( + TLSVersion, attr + ) + except AttributeError: # Defensive: + continue -try: - from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION -except ImportError: - OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 - OP_NO_COMPRESSION = 0x20000 - - -# A secure default. -# Sources for more information on TLS ciphers: -# -# - https://wiki.mozilla.org/Security/Server_Side_TLS -# - https://www.ssllabs.com/projects/best-practices/index.html -# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ -# -# The general intent is: -# - prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), -# - prefer ECDHE over DHE for better performance, -# - prefer any AES-GCM and ChaCha20 over any AES-CBC for better performance and -# security, -# - prefer AES-GCM over ChaCha20 because hardware-accelerated AES is common, -# - disable NULL authentication, MD5 MACs, DSS, and other -# insecure ciphers for security reasons. -# - NOTE: TLS 1.3 cipher suites are managed through a different interface -# not exposed by CPython (yet!) and are enabled by default if they're available. -DEFAULT_CIPHERS = ":".join( - [ - "ECDHE+AESGCM", - "ECDHE+CHACHA20", - "DHE+AESGCM", - "DHE+CHACHA20", - "ECDH+AESGCM", - "DH+AESGCM", - "ECDH+AES", - "DH+AES", - "RSA+AESGCM", - "RSA+AES", - "!aNULL", - "!eNULL", - "!MD5", - "!DSS", - ] -) - -try: - from ssl import SSLContext # Modern SSL? + from .ssltransport import SSLTransport # type: ignore[assignment] except ImportError: + OP_NO_COMPRESSION = 0x20000 # type: ignore[assignment] + OP_NO_TICKET = 0x4000 # type: ignore[assignment] + OP_NO_SSLv2 = 0x1000000 # type: ignore[assignment] + OP_NO_SSLv3 = 0x2000000 # type: ignore[assignment] + PROTOCOL_SSLv23 = PROTOCOL_TLS = 2 # type: ignore[assignment] + PROTOCOL_TLS_CLIENT = 16 # type: ignore[assignment] - class SSLContext(object): # Platform-specific: Python 2 - def __init__(self, protocol_version): - self.protocol = protocol_version - # Use default values from a real SSLContext - self.check_hostname = False - self.verify_mode = ssl.CERT_NONE - self.ca_certs = None - self.options = 0 - self.certfile = None - self.keyfile = None - self.ciphers = None - - def load_cert_chain(self, certfile, keyfile): - self.certfile = certfile - self.keyfile = keyfile - - def load_verify_locations(self, cafile=None, capath=None, cadata=None): - self.ca_certs = cafile - if capath is not None: - raise SSLError("CA directories not supported in older Pythons") +_TYPE_PEER_CERT_RET = typing.Union["_TYPE_PEER_CERT_RET_DICT", bytes, None] - if cadata is not None: - raise SSLError("CA data not supported in older Pythons") - def set_ciphers(self, cipher_suite): - self.ciphers = cipher_suite - - def wrap_socket(self, socket, server_hostname=None, server_side=False): - warnings.warn( - "A true SSLContext object is not available. This prevents " - "urllib3 from configuring SSL appropriately and may cause " - "certain SSL connections to fail. You can upgrade to a newer " - "version of Python to solve this. For more information, see " - "https://urllib3.readthedocs.io/en/latest/advanced-usage.html" - "#ssl-warnings", - InsecurePlatformWarning, - ) - kwargs = { - "keyfile": self.keyfile, - "certfile": self.certfile, - "ca_certs": self.ca_certs, - "cert_reqs": self.verify_mode, - "ssl_version": self.protocol, - "server_side": server_side, - } - return wrap_socket(socket, ciphers=self.ciphers, **kwargs) - - -def assert_fingerprint(cert, fingerprint): +def assert_fingerprint(cert: bytes | None, fingerprint: str) -> None: """ Checks if given fingerprint matches the supplied certificate. @@ -163,26 +153,31 @@ def assert_fingerprint(cert, fingerprint): Fingerprint as string of hexdigits, can be interspersed by colons. """ + if cert is None: + raise SSLError("No certificate for the peer.") + fingerprint = fingerprint.replace(":", "").lower() digest_length = len(fingerprint) + if digest_length not in HASHFUNC_MAP: + raise SSLError(f"Fingerprint of invalid length: {fingerprint}") hashfunc = HASHFUNC_MAP.get(digest_length) - if not hashfunc: - raise SSLError("Fingerprint of invalid length: {0}".format(fingerprint)) + if hashfunc is None: + raise SSLError( + f"Hash function implementation unavailable for fingerprint length: {digest_length}" + ) # We need encode() here for py32; works on py2 and p33. fingerprint_bytes = unhexlify(fingerprint.encode()) cert_digest = hashfunc(cert).digest() - if not _const_compare_digest(cert_digest, fingerprint_bytes): + if not hmac.compare_digest(cert_digest, fingerprint_bytes): raise SSLError( - 'Fingerprints did not match. Expected "{0}", got "{1}".'.format( - fingerprint, hexlify(cert_digest) - ) + f'Fingerprints did not match. Expected "{fingerprint}", got "{cert_digest.hex()}"' ) -def resolve_cert_reqs(candidate): +def resolve_cert_reqs(candidate: None | int | str) -> VerifyMode: """ Resolves the argument to a numeric constant, which can be passed to the wrap_socket function/method from the ssl module. @@ -200,12 +195,12 @@ def resolve_cert_reqs(candidate): res = getattr(ssl, candidate, None) if res is None: res = getattr(ssl, "CERT_" + candidate) - return res + return res # type: ignore[no-any-return] - return candidate + return candidate # type: ignore[return-value] -def resolve_ssl_version(candidate): +def resolve_ssl_version(candidate: None | int | str) -> int: """ like resolve_cert_reqs """ @@ -216,50 +211,94 @@ def resolve_ssl_version(candidate): res = getattr(ssl, candidate, None) if res is None: res = getattr(ssl, "PROTOCOL_" + candidate) - return res + return typing.cast(int, res) return candidate def create_urllib3_context( - ssl_version=None, cert_reqs=None, options=None, ciphers=None -): - """All arguments have the same meaning as ``ssl_wrap_socket``. - - By default, this function does a lot of the same work that - ``ssl.create_default_context`` does on Python 3.4+. It: - - - Disables SSLv2, SSLv3, and compression - - Sets a restricted set of server ciphers - - If you wish to enable SSLv3, you can do:: - - from urllib3.util import ssl_ - context = ssl_.create_urllib3_context() - context.options &= ~ssl_.OP_NO_SSLv3 - - You can do the same to enable compression (substituting ``COMPRESSION`` - for ``SSLv3`` in the last line above). + ssl_version: int | None = None, + cert_reqs: int | None = None, + options: int | None = None, + ciphers: str | None = None, + ssl_minimum_version: int | None = None, + ssl_maximum_version: int | None = None, +) -> ssl.SSLContext: + """Creates and configures an :class:`ssl.SSLContext` instance for use with urllib3. :param ssl_version: The desired protocol version to use. This will default to PROTOCOL_SSLv23 which will negotiate the highest protocol that both the server and your installation of OpenSSL support. + + This parameter is deprecated instead use 'ssl_minimum_version'. + :param ssl_minimum_version: + The minimum version of TLS to be used. Use the 'ssl.TLSVersion' enum for specifying the value. + :param ssl_maximum_version: + The maximum version of TLS to be used. Use the 'ssl.TLSVersion' enum for specifying the value. + Not recommended to set to anything other than 'ssl.TLSVersion.MAXIMUM_SUPPORTED' which is the + default value. :param cert_reqs: Whether to require the certificate verification. This defaults to ``ssl.CERT_REQUIRED``. :param options: Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``, - ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``. + ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``, and ``ssl.OP_NO_TICKET``. :param ciphers: - Which cipher suites to allow the server to select. + Which cipher suites to allow the server to select. Defaults to either system configured + ciphers if OpenSSL 1.1.1+, otherwise uses a secure default set of ciphers. :returns: Constructed SSLContext object with specified options :rtype: SSLContext """ - context = SSLContext(ssl_version or PROTOCOL_TLS) + if SSLContext is None: + raise TypeError("Can't create an SSLContext object without an ssl module") + + # This means 'ssl_version' was specified as an exact value. + if ssl_version not in (None, PROTOCOL_TLS, PROTOCOL_TLS_CLIENT): + # Disallow setting 'ssl_version' and 'ssl_minimum|maximum_version' + # to avoid conflicts. + if ssl_minimum_version is not None or ssl_maximum_version is not None: + raise ValueError( + "Can't specify both 'ssl_version' and either " + "'ssl_minimum_version' or 'ssl_maximum_version'" + ) + + # 'ssl_version' is deprecated and will be removed in the future. + else: + # Use 'ssl_minimum_version' and 'ssl_maximum_version' instead. + ssl_minimum_version = _SSL_VERSION_TO_TLS_VERSION.get( + ssl_version, TLSVersion.MINIMUM_SUPPORTED + ) + ssl_maximum_version = _SSL_VERSION_TO_TLS_VERSION.get( + ssl_version, TLSVersion.MAXIMUM_SUPPORTED + ) + + # This warning message is pushing users to use 'ssl_minimum_version' + # instead of both min/max. Best practice is to only set the minimum version and + # keep the maximum version to be it's default value: 'TLSVersion.MAXIMUM_SUPPORTED' + warnings.warn( + "'ssl_version' option is deprecated and will be " + "removed in urllib3 v2.1.0. Instead use 'ssl_minimum_version'", + category=DeprecationWarning, + stacklevel=2, + ) - context.set_ciphers(ciphers or DEFAULT_CIPHERS) + # PROTOCOL_TLS is deprecated in Python 3.10 so we always use PROTOCOL_TLS_CLIENT + context = SSLContext(PROTOCOL_TLS_CLIENT) + + if ssl_minimum_version is not None: + context.minimum_version = ssl_minimum_version + else: # Python <3.10 defaults to 'MINIMUM_SUPPORTED' so explicitly set TLSv1.2 here + context.minimum_version = TLSVersion.TLSv1_2 + + if ssl_maximum_version is not None: + context.maximum_version = ssl_maximum_version + + # Unless we're given ciphers defer to either system ciphers in + # the case of OpenSSL 1.1.1+ or use our own secure default ciphers. + if ciphers: + context.set_ciphers(ciphers) # Setting the default here, as we may have no ssl module on import cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs @@ -273,55 +312,101 @@ def create_urllib3_context( # Disable compression to prevent CRIME attacks for OpenSSL 1.0+ # (issue #309) options |= OP_NO_COMPRESSION + # TLSv1.2 only. Unless set explicitly, do not request tickets. + # This may save some bandwidth on wire, and although the ticket is encrypted, + # there is a risk associated with it being on wire, + # if the server is not rotating its ticketing keys properly. + options |= OP_NO_TICKET context.options |= options # Enable post-handshake authentication for TLS 1.3, see GH #1634. PHA is # necessary for conditional client cert authentication with TLS 1.3. - # The attribute is None for OpenSSL <= 1.1.0 or does not exist in older - # versions of Python. We only enable on Python 3.7.4+ or if certificate - # verification is enabled to work around Python issue #37428 - # See: https://bugs.python.org/issue37428 - if (cert_reqs == ssl.CERT_REQUIRED or sys.version_info >= (3, 7, 4)) and getattr( - context, "post_handshake_auth", None - ) is not None: + # The attribute is None for OpenSSL <= 1.1.0 or does not exist when using + # an SSLContext created by pyOpenSSL. + if getattr(context, "post_handshake_auth", None) is not None: context.post_handshake_auth = True - context.verify_mode = cert_reqs - if ( - getattr(context, "check_hostname", None) is not None - ): # Platform-specific: Python 3.2 - # We do our own verification, including fingerprints and alternative - # hostnames. So disable it here + # The order of the below lines setting verify_mode and check_hostname + # matter due to safe-guards SSLContext has to prevent an SSLContext with + # check_hostname=True, verify_mode=NONE/OPTIONAL. + # We always set 'check_hostname=False' for pyOpenSSL so we rely on our own + # 'ssl.match_hostname()' implementation. + if cert_reqs == ssl.CERT_REQUIRED and not IS_PYOPENSSL: + context.verify_mode = cert_reqs + context.check_hostname = True + else: context.check_hostname = False + context.verify_mode = cert_reqs + + try: + context.hostname_checks_common_name = False + except AttributeError: # Defensive: for CPython < 3.9.3; for PyPy < 7.3.8 + pass - # Enable logging of TLS session keys via defacto standard environment variable - # 'SSLKEYLOGFILE', if the feature is available (Python 3.8+). Skip empty values. - if hasattr(context, "keylog_filename"): - sslkeylogfile = os.environ.get("SSLKEYLOGFILE") - if sslkeylogfile: - context.keylog_filename = sslkeylogfile + sslkeylogfile = os.environ.get("SSLKEYLOGFILE") + if sslkeylogfile: + context.keylog_filename = sslkeylogfile return context +@typing.overload +def ssl_wrap_socket( + sock: socket.socket, + keyfile: str | None = ..., + certfile: str | None = ..., + cert_reqs: int | None = ..., + ca_certs: str | None = ..., + server_hostname: str | None = ..., + ssl_version: int | None = ..., + ciphers: str | None = ..., + ssl_context: ssl.SSLContext | None = ..., + ca_cert_dir: str | None = ..., + key_password: str | None = ..., + ca_cert_data: None | str | bytes = ..., + tls_in_tls: typing.Literal[False] = ..., +) -> ssl.SSLSocket: ... + + +@typing.overload def ssl_wrap_socket( - sock, - keyfile=None, - certfile=None, - cert_reqs=None, - ca_certs=None, - server_hostname=None, - ssl_version=None, - ciphers=None, - ssl_context=None, - ca_cert_dir=None, - key_password=None, - ca_cert_data=None, -): + sock: socket.socket, + keyfile: str | None = ..., + certfile: str | None = ..., + cert_reqs: int | None = ..., + ca_certs: str | None = ..., + server_hostname: str | None = ..., + ssl_version: int | None = ..., + ciphers: str | None = ..., + ssl_context: ssl.SSLContext | None = ..., + ca_cert_dir: str | None = ..., + key_password: str | None = ..., + ca_cert_data: None | str | bytes = ..., + tls_in_tls: bool = ..., +) -> ssl.SSLSocket | SSLTransportType: ... + + +def ssl_wrap_socket( + sock: socket.socket, + keyfile: str | None = None, + certfile: str | None = None, + cert_reqs: int | None = None, + ca_certs: str | None = None, + server_hostname: str | None = None, + ssl_version: int | None = None, + ciphers: str | None = None, + ssl_context: ssl.SSLContext | None = None, + ca_cert_dir: str | None = None, + key_password: str | None = None, + ca_cert_data: None | str | bytes = None, + tls_in_tls: bool = False, +) -> ssl.SSLSocket | SSLTransportType: """ - All arguments except for server_hostname, ssl_context, and ca_cert_dir have - the same meaning as they do when using :func:`ssl.wrap_socket`. + All arguments except for server_hostname, ssl_context, tls_in_tls, ca_cert_data and + ca_cert_dir have the same meaning as they do when using + :func:`ssl.create_default_context`, :meth:`ssl.SSLContext.load_cert_chain`, + :meth:`ssl.SSLContext.set_ciphers` and :meth:`ssl.SSLContext.wrap_socket`. :param server_hostname: When SNI is supported, the expected hostname of the certificate @@ -339,28 +424,23 @@ def ssl_wrap_socket( :param ca_cert_data: Optional string containing CA certificates in PEM format suitable for passing as the cadata parameter to SSLContext.load_verify_locations() + :param tls_in_tls: + Use SSLTransport to wrap the existing socket. """ context = ssl_context if context is None: - # Note: This branch of code and all the variables in it are no longer - # used by urllib3 itself. We should consider deprecating and removing - # this code. + # Note: This branch of code and all the variables in it are only used in tests. + # We should consider deprecating and removing this code. context = create_urllib3_context(ssl_version, cert_reqs, ciphers=ciphers) if ca_certs or ca_cert_dir or ca_cert_data: try: context.load_verify_locations(ca_certs, ca_cert_dir, ca_cert_data) - except IOError as e: # Platform-specific: Python 2.7 - raise SSLError(e) - # Py33 raises FileNotFoundError which subclasses OSError - # These are not equivalent unless we check the errno attribute - except OSError as e: # Platform-specific: Python 3.3 and beyond - if e.errno == errno.ENOENT: - raise SSLError(e) - raise + except OSError as e: + raise SSLError(e) from e elif ssl_context is None and hasattr(context, "load_default_certs"): - # try to load OS default certs; works well on Windows (require Python3.4+) + # try to load OS default certs; works well on Windows. context.load_default_certs() # Attempt to detect if we get the goofy behavior of the @@ -375,49 +455,50 @@ def ssl_wrap_socket( else: context.load_cert_chain(certfile, keyfile, key_password) - # If we detect server_hostname is an IP address then the SNI - # extension should not be used according to RFC3546 Section 3.1 - # We shouldn't warn the user if SNI isn't available but we would - # not be using SNI anyways due to IP address for server_hostname. - if ( - server_hostname is not None and not is_ipaddress(server_hostname) - ) or IS_SECURETRANSPORT: - if HAS_SNI and server_hostname is not None: - return context.wrap_socket(sock, server_hostname=server_hostname) - - warnings.warn( - "An HTTPS request has been made, but the SNI (Server Name " - "Indication) extension to TLS is not available on this platform. " - "This may cause the server to present an incorrect TLS " - "certificate, which can cause validation failures. You can upgrade to " - "a newer version of Python to solve this. For more information, see " - "https://urllib3.readthedocs.io/en/latest/advanced-usage.html" - "#ssl-warnings", - SNIMissingWarning, - ) + context.set_alpn_protocols(ALPN_PROTOCOLS) - return context.wrap_socket(sock) + ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname) + return ssl_sock -def is_ipaddress(hostname): +def is_ipaddress(hostname: str | bytes) -> bool: """Detects whether the hostname given is an IPv4 or IPv6 address. Also detects IPv6 addresses with Zone IDs. :param str hostname: Hostname to examine. :return: True if the hostname is an IP address, False otherwise. """ - if not six.PY2 and isinstance(hostname, bytes): + if isinstance(hostname, bytes): # IDN A-label bytes are ASCII compatible. hostname = hostname.decode("ascii") - return bool(IPV4_RE.match(hostname) or BRACELESS_IPV6_ADDRZ_RE.match(hostname)) + return bool(_IPV4_RE.match(hostname) or _BRACELESS_IPV6_ADDRZ_RE.match(hostname)) -def _is_key_file_encrypted(key_file): +def _is_key_file_encrypted(key_file: str) -> bool: """Detects if a key file is encrypted or not.""" - with open(key_file, "r") as f: + with open(key_file) as f: for line in f: # Look for Proc-Type: 4,ENCRYPTED if "ENCRYPTED" in line: return True return False + + +def _ssl_wrap_socket_impl( + sock: socket.socket, + ssl_context: ssl.SSLContext, + tls_in_tls: bool, + server_hostname: str | None = None, +) -> ssl.SSLSocket | SSLTransportType: + if tls_in_tls: + if not SSLTransport: + # Import error, ssl is not available. + raise ProxySchemeUnsupported( + "TLS in TLS requires support for the 'ssl' module" + ) + + SSLTransport._validate_ssl_context_for_tls_in_tls(ssl_context) + return SSLTransport(sock, ssl_context, server_hostname) + + return ssl_context.wrap_socket(sock, server_hostname=server_hostname) diff --git a/thirdparty/urllib3/util/ssl_match_hostname.py b/thirdparty/urllib3/util/ssl_match_hostname.py new file mode 100644 index 0000000..453cfd4 --- /dev/null +++ b/thirdparty/urllib3/util/ssl_match_hostname.py @@ -0,0 +1,159 @@ +"""The match_hostname() function from Python 3.5, essential when using SSL.""" + +# Note: This file is under the PSF license as the code comes from the python +# stdlib. http://docs.python.org/3/license.html +# It is modified to remove commonName support. + +from __future__ import annotations + +import ipaddress +import re +import typing +from ipaddress import IPv4Address, IPv6Address + +if typing.TYPE_CHECKING: + from .ssl_ import _TYPE_PEER_CERT_RET_DICT + +__version__ = "3.5.0.1" + + +class CertificateError(ValueError): + pass + + +def _dnsname_match( + dn: typing.Any, hostname: str, max_wildcards: int = 1 +) -> typing.Match[str] | None | bool: + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ + pats = [] + if not dn: + return False + + # Ported from python3-syntax: + # leftmost, *remainder = dn.split(r'.') + parts = dn.split(r".") + leftmost = parts[0] + remainder = parts[1:] + + wildcards = leftmost.count("*") + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survey of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn) + ) + + # speed up common case w/o wildcards + if not wildcards: + return bool(dn.lower() == hostname.lower()) + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == "*": + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append("[^.]+") + elif leftmost.startswith("xn--") or hostname.startswith("xn--"): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r"\*", "[^.]*")) + + # add the remaining fragments, ignore any wildcards + for frag in remainder: + pats.append(re.escape(frag)) + + pat = re.compile(r"\A" + r"\.".join(pats) + r"\Z", re.IGNORECASE) + return pat.match(hostname) + + +def _ipaddress_match(ipname: str, host_ip: IPv4Address | IPv6Address) -> bool: + """Exact matching of IP addresses. + + RFC 9110 section 4.3.5: "A reference identity of IP-ID contains the decoded + bytes of the IP address. An IP version 4 address is 4 octets, and an IP + version 6 address is 16 octets. [...] A reference identity of type IP-ID + matches if the address is identical to an iPAddress value of the + subjectAltName extension of the certificate." + """ + # OpenSSL may add a trailing newline to a subjectAltName's IP address + # Divergence from upstream: ipaddress can't handle byte str + ip = ipaddress.ip_address(ipname.rstrip()) + return bool(ip.packed == host_ip.packed) + + +def match_hostname( + cert: _TYPE_PEER_CERT_RET_DICT | None, + hostname: str, + hostname_checks_common_name: bool = False, +) -> None: + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError( + "empty or no certificate, match_hostname needs a " + "SSL socket or SSL context with either " + "CERT_OPTIONAL or CERT_REQUIRED" + ) + try: + # Divergence from upstream: ipaddress can't handle byte str + # + # The ipaddress module shipped with Python < 3.9 does not support + # scoped IPv6 addresses so we unconditionally strip the Zone IDs for + # now. Once we drop support for Python 3.9 we can remove this branch. + if "%" in hostname: + host_ip = ipaddress.ip_address(hostname[: hostname.rfind("%")]) + else: + host_ip = ipaddress.ip_address(hostname) + + except ValueError: + # Not an IP address (common case) + host_ip = None + dnsnames = [] + san: tuple[tuple[str, str], ...] = cert.get("subjectAltName", ()) + key: str + value: str + for key, value in san: + if key == "DNS": + if host_ip is None and _dnsname_match(value, hostname): + return + dnsnames.append(value) + elif key == "IP Address": + if host_ip is not None and _ipaddress_match(value, host_ip): + return + dnsnames.append(value) + + # We only check 'commonName' if it's enabled and we're not verifying + # an IP address. IP addresses aren't valid within 'commonName'. + if hostname_checks_common_name and host_ip is None and not dnsnames: + for sub in cert.get("subject", ()): + for key, value in sub: + if key == "commonName": + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + + if len(dnsnames) > 1: + raise CertificateError( + "hostname %r " + "doesn't match either of %s" % (hostname, ", ".join(map(repr, dnsnames))) + ) + elif len(dnsnames) == 1: + raise CertificateError(f"hostname {hostname!r} doesn't match {dnsnames[0]!r}") + else: + raise CertificateError("no appropriate subjectAltName fields were found") diff --git a/thirdparty/urllib3/util/ssltransport.py b/thirdparty/urllib3/util/ssltransport.py new file mode 100644 index 0000000..6d59bc3 --- /dev/null +++ b/thirdparty/urllib3/util/ssltransport.py @@ -0,0 +1,271 @@ +from __future__ import annotations + +import io +import socket +import ssl +import typing + +from ..exceptions import ProxySchemeUnsupported + +if typing.TYPE_CHECKING: + from typing_extensions import Self + + from .ssl_ import _TYPE_PEER_CERT_RET, _TYPE_PEER_CERT_RET_DICT + + +_WriteBuffer = typing.Union[bytearray, memoryview] +_ReturnValue = typing.TypeVar("_ReturnValue") + +SSL_BLOCKSIZE = 16384 + + +class SSLTransport: + """ + The SSLTransport wraps an existing socket and establishes an SSL connection. + + Contrary to Python's implementation of SSLSocket, it allows you to chain + multiple TLS connections together. It's particularly useful if you need to + implement TLS within TLS. + + The class supports most of the socket API operations. + """ + + @staticmethod + def _validate_ssl_context_for_tls_in_tls(ssl_context: ssl.SSLContext) -> None: + """ + Raises a ProxySchemeUnsupported if the provided ssl_context can't be used + for TLS in TLS. + + The only requirement is that the ssl_context provides the 'wrap_bio' + methods. + """ + + if not hasattr(ssl_context, "wrap_bio"): + raise ProxySchemeUnsupported( + "TLS in TLS requires SSLContext.wrap_bio() which isn't " + "available on non-native SSLContext" + ) + + def __init__( + self, + socket: socket.socket, + ssl_context: ssl.SSLContext, + server_hostname: str | None = None, + suppress_ragged_eofs: bool = True, + ) -> None: + """ + Create an SSLTransport around socket using the provided ssl_context. + """ + self.incoming = ssl.MemoryBIO() + self.outgoing = ssl.MemoryBIO() + + self.suppress_ragged_eofs = suppress_ragged_eofs + self.socket = socket + + self.sslobj = ssl_context.wrap_bio( + self.incoming, self.outgoing, server_hostname=server_hostname + ) + + # Perform initial handshake. + self._ssl_io_loop(self.sslobj.do_handshake) + + def __enter__(self) -> Self: + return self + + def __exit__(self, *_: typing.Any) -> None: + self.close() + + def fileno(self) -> int: + return self.socket.fileno() + + def read(self, len: int = 1024, buffer: typing.Any | None = None) -> int | bytes: + return self._wrap_ssl_read(len, buffer) + + def recv(self, buflen: int = 1024, flags: int = 0) -> int | bytes: + if flags != 0: + raise ValueError("non-zero flags not allowed in calls to recv") + return self._wrap_ssl_read(buflen) + + def recv_into( + self, + buffer: _WriteBuffer, + nbytes: int | None = None, + flags: int = 0, + ) -> None | int | bytes: + if flags != 0: + raise ValueError("non-zero flags not allowed in calls to recv_into") + if nbytes is None: + nbytes = len(buffer) + return self.read(nbytes, buffer) + + def sendall(self, data: bytes, flags: int = 0) -> None: + if flags != 0: + raise ValueError("non-zero flags not allowed in calls to sendall") + count = 0 + with memoryview(data) as view, view.cast("B") as byte_view: + amount = len(byte_view) + while count < amount: + v = self.send(byte_view[count:]) + count += v + + def send(self, data: bytes, flags: int = 0) -> int: + if flags != 0: + raise ValueError("non-zero flags not allowed in calls to send") + return self._ssl_io_loop(self.sslobj.write, data) + + def makefile( + self, + mode: str, + buffering: int | None = None, + *, + encoding: str | None = None, + errors: str | None = None, + newline: str | None = None, + ) -> typing.BinaryIO | typing.TextIO | socket.SocketIO: + """ + Python's httpclient uses makefile and buffered io when reading HTTP + messages and we need to support it. + + This is unfortunately a copy and paste of socket.py makefile with small + changes to point to the socket directly. + """ + if not set(mode) <= {"r", "w", "b"}: + raise ValueError(f"invalid mode {mode!r} (only r, w, b allowed)") + + writing = "w" in mode + reading = "r" in mode or not writing + assert reading or writing + binary = "b" in mode + rawmode = "" + if reading: + rawmode += "r" + if writing: + rawmode += "w" + raw = socket.SocketIO(self, rawmode) # type: ignore[arg-type] + self.socket._io_refs += 1 # type: ignore[attr-defined] + if buffering is None: + buffering = -1 + if buffering < 0: + buffering = io.DEFAULT_BUFFER_SIZE + if buffering == 0: + if not binary: + raise ValueError("unbuffered streams must be binary") + return raw + buffer: typing.BinaryIO + if reading and writing: + buffer = io.BufferedRWPair(raw, raw, buffering) # type: ignore[assignment] + elif reading: + buffer = io.BufferedReader(raw, buffering) + else: + assert writing + buffer = io.BufferedWriter(raw, buffering) + if binary: + return buffer + text = io.TextIOWrapper(buffer, encoding, errors, newline) + text.mode = mode # type: ignore[misc] + return text + + def unwrap(self) -> None: + self._ssl_io_loop(self.sslobj.unwrap) + + def close(self) -> None: + self.socket.close() + + @typing.overload + def getpeercert( + self, binary_form: typing.Literal[False] = ... + ) -> _TYPE_PEER_CERT_RET_DICT | None: ... + + @typing.overload + def getpeercert(self, binary_form: typing.Literal[True]) -> bytes | None: ... + + def getpeercert(self, binary_form: bool = False) -> _TYPE_PEER_CERT_RET: + return self.sslobj.getpeercert(binary_form) # type: ignore[return-value] + + def version(self) -> str | None: + return self.sslobj.version() + + def cipher(self) -> tuple[str, str, int] | None: + return self.sslobj.cipher() + + def selected_alpn_protocol(self) -> str | None: + return self.sslobj.selected_alpn_protocol() + + def shared_ciphers(self) -> list[tuple[str, str, int]] | None: + return self.sslobj.shared_ciphers() + + def compression(self) -> str | None: + return self.sslobj.compression() + + def settimeout(self, value: float | None) -> None: + self.socket.settimeout(value) + + def gettimeout(self) -> float | None: + return self.socket.gettimeout() + + def _decref_socketios(self) -> None: + self.socket._decref_socketios() # type: ignore[attr-defined] + + def _wrap_ssl_read(self, len: int, buffer: bytearray | None = None) -> int | bytes: + try: + return self._ssl_io_loop(self.sslobj.read, len, buffer) + except ssl.SSLError as e: + if e.errno == ssl.SSL_ERROR_EOF and self.suppress_ragged_eofs: + return 0 # eof, return 0. + else: + raise + + # func is sslobj.do_handshake or sslobj.unwrap + @typing.overload + def _ssl_io_loop(self, func: typing.Callable[[], None]) -> None: ... + + # func is sslobj.write, arg1 is data + @typing.overload + def _ssl_io_loop(self, func: typing.Callable[[bytes], int], arg1: bytes) -> int: ... + + # func is sslobj.read, arg1 is len, arg2 is buffer + @typing.overload + def _ssl_io_loop( + self, + func: typing.Callable[[int, bytearray | None], bytes], + arg1: int, + arg2: bytearray | None, + ) -> bytes: ... + + def _ssl_io_loop( + self, + func: typing.Callable[..., _ReturnValue], + arg1: None | bytes | int = None, + arg2: bytearray | None = None, + ) -> _ReturnValue: + """Performs an I/O loop between incoming/outgoing and the socket.""" + should_loop = True + ret = None + + while should_loop: + errno = None + try: + if arg1 is None and arg2 is None: + ret = func() + elif arg2 is None: + ret = func(arg1) + else: + ret = func(arg1, arg2) + except ssl.SSLError as e: + if e.errno not in (ssl.SSL_ERROR_WANT_READ, ssl.SSL_ERROR_WANT_WRITE): + # WANT_READ, and WANT_WRITE are expected, others are not. + raise e + errno = e.errno + + buf = self.outgoing.read() + self.socket.sendall(buf) + + if errno is None: + should_loop = False + elif errno == ssl.SSL_ERROR_WANT_READ: + buf = self.socket.recv(SSL_BLOCKSIZE) + if buf: + self.incoming.write(buf) + else: + self.incoming.write_eof() + return typing.cast(_ReturnValue, ret) diff --git a/thirdparty/urllib3/util/timeout.py b/thirdparty/urllib3/util/timeout.py index d7201f8..4bb1be1 100644 --- a/thirdparty/urllib3/util/timeout.py +++ b/thirdparty/urllib3/util/timeout.py @@ -1,38 +1,56 @@ -from __future__ import absolute_import +from __future__ import annotations -# The default socket timeout, used by httplib to indicate that no timeout was -# specified by the user -from socket import _GLOBAL_DEFAULT_TIMEOUT import time +import typing +from enum import Enum +from socket import getdefaulttimeout from ..exceptions import TimeoutStateError -# A sentinel value to indicate that no timeout was specified by the user in -# urllib3 -_Default = object() +if typing.TYPE_CHECKING: + from typing import Final -# Use time.monotonic if available. -current_time = getattr(time, "monotonic", time.time) +class _TYPE_DEFAULT(Enum): + # This value should never be passed to socket.settimeout() so for safety we use a -1. + # socket.settimout() raises a ValueError for negative values. + token = -1 -class Timeout(object): +_DEFAULT_TIMEOUT: Final[_TYPE_DEFAULT] = _TYPE_DEFAULT.token + +_TYPE_TIMEOUT = typing.Optional[typing.Union[float, _TYPE_DEFAULT]] + + +class Timeout: """Timeout configuration. - Timeouts can be defined as a default for a pool:: + Timeouts can be defined as a default for a pool: + + .. code-block:: python + + import urllib3 + + timeout = urllib3.util.Timeout(connect=2.0, read=7.0) + + http = urllib3.PoolManager(timeout=timeout) + + resp = http.request("GET", "https://example.com/") + + print(resp.status) - timeout = Timeout(connect=2.0, read=7.0) - http = PoolManager(timeout=timeout) - response = http.request('GET', 'http://example.com/') + Or per-request (which overrides the default for the pool): - Or per-request (which overrides the default for the pool):: + .. code-block:: python - response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) + response = http.request("GET", "https://example.com/", timeout=Timeout(10)) - Timeouts can be disabled by setting all the parameters to ``None``:: + Timeouts can be disabled by setting all the parameters to ``None``: - no_timeout = Timeout(connect=None, read=None) - response = http.request('GET', 'http://example.com/, timeout=no_timeout) + .. code-block:: python + + no_timeout = Timeout(connect=None, read=None) + response = http.request("GET", "https://example.com/", timeout=no_timeout) :param total: @@ -43,7 +61,7 @@ class Timeout(object): Defaults to None. - :type total: integer, float, or None + :type total: int, float, or None :param connect: The maximum amount of time (in seconds) to wait for a connection @@ -53,7 +71,7 @@ class Timeout(object): `_. None will set an infinite timeout for connection attempts. - :type connect: integer, float, or None + :type connect: int, float, or None :param read: The maximum amount of time (in seconds) to wait between consecutive @@ -63,7 +81,7 @@ class Timeout(object): `_. None will set an infinite timeout. - :type read: integer, float, or None + :type read: int, float, or None .. note:: @@ -83,34 +101,34 @@ class Timeout(object): the case; if a server streams one byte every fifteen seconds, a timeout of 20 seconds will not trigger, even though the request will take several minutes to complete. - - If your goal is to cut off any request after a set amount of wall clock - time, consider having a second "watcher" thread to cut off a slow - request. """ #: A sentinel object representing the default timeout value - DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT - - def __init__(self, total=None, connect=_Default, read=_Default): + DEFAULT_TIMEOUT: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT + + def __init__( + self, + total: _TYPE_TIMEOUT = None, + connect: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + read: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + ) -> None: self._connect = self._validate_timeout(connect, "connect") self._read = self._validate_timeout(read, "read") self.total = self._validate_timeout(total, "total") - self._start_connect = None + self._start_connect: float | None = None - def __repr__(self): - return "%s(connect=%r, read=%r, total=%r)" % ( - type(self).__name__, - self._connect, - self._read, - self.total, - ) + def __repr__(self) -> str: + return f"{type(self).__name__}(connect={self._connect!r}, read={self._read!r}, total={self.total!r})" # __str__ provided for backwards compatibility __str__ = __repr__ + @staticmethod + def resolve_default_timeout(timeout: _TYPE_TIMEOUT) -> float | None: + return getdefaulttimeout() if timeout is _DEFAULT_TIMEOUT else timeout + @classmethod - def _validate_timeout(cls, value, name): + def _validate_timeout(cls, value: _TYPE_TIMEOUT, name: str) -> _TYPE_TIMEOUT: """Check that a timeout attribute is valid. :param value: The timeout value to validate @@ -120,10 +138,7 @@ def _validate_timeout(cls, value, name): :raises ValueError: If it is a numeric value less than or equal to zero, or the type is not an integer, float, or None. """ - if value is _Default: - return cls.DEFAULT_TIMEOUT - - if value is None or value is cls.DEFAULT_TIMEOUT: + if value is None or value is _DEFAULT_TIMEOUT: return value if isinstance(value, bool): @@ -137,7 +152,7 @@ def _validate_timeout(cls, value, name): raise ValueError( "Timeout value %s was %s, but it must be an " "int, float or None." % (name, value) - ) + ) from None try: if value <= 0: @@ -147,16 +162,15 @@ def _validate_timeout(cls, value, name): "than or equal to 0." % (name, value) ) except TypeError: - # Python 3 raise ValueError( "Timeout value %s was %s, but it must be an " "int, float or None." % (name, value) - ) + ) from None return value @classmethod - def from_float(cls, timeout): + def from_float(cls, timeout: _TYPE_TIMEOUT) -> Timeout: """Create a new Timeout from a legacy timeout value. The timeout value used by httplib.py sets the same timeout on the @@ -165,13 +179,13 @@ def from_float(cls, timeout): passed to this function. :param timeout: The legacy timeout value. - :type timeout: integer, float, sentinel default object, or None + :type timeout: integer, float, :attr:`urllib3.util.Timeout.DEFAULT_TIMEOUT`, or None :return: Timeout object :rtype: :class:`Timeout` """ return Timeout(read=timeout, connect=timeout) - def clone(self): + def clone(self) -> Timeout: """Create a copy of the timeout object Timeout properties are stored per-pool but each request needs a fresh @@ -185,7 +199,7 @@ def clone(self): # detect the user default. return Timeout(connect=self._connect, read=self._read, total=self.total) - def start_connect(self): + def start_connect(self) -> float: """Start the timeout clock, used during a connect() attempt :raises urllib3.exceptions.TimeoutStateError: if you attempt @@ -193,10 +207,10 @@ def start_connect(self): """ if self._start_connect is not None: raise TimeoutStateError("Timeout timer has already been started.") - self._start_connect = current_time() + self._start_connect = time.monotonic() return self._start_connect - def get_connect_duration(self): + def get_connect_duration(self) -> float: """Gets the time elapsed since the call to :meth:`start_connect`. :return: Elapsed time in seconds. @@ -208,10 +222,10 @@ def get_connect_duration(self): raise TimeoutStateError( "Can't get connect duration for timer that has not started." ) - return current_time() - self._start_connect + return time.monotonic() - self._start_connect @property - def connect_timeout(self): + def connect_timeout(self) -> _TYPE_TIMEOUT: """Get the value to use when setting a connection timeout. This will be a positive float or integer, the value None @@ -223,13 +237,13 @@ def connect_timeout(self): if self.total is None: return self._connect - if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + if self._connect is None or self._connect is _DEFAULT_TIMEOUT: return self.total - return min(self._connect, self.total) + return min(self._connect, self.total) # type: ignore[type-var] @property - def read_timeout(self): + def read_timeout(self) -> float | None: """Get the value for the read timeout. This assumes some time has elapsed in the connection timeout and @@ -241,21 +255,21 @@ def read_timeout(self): raised. :return: Value to use for the read timeout. - :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :rtype: int, float or None :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` has not yet been called on this object. """ if ( self.total is not None - and self.total is not self.DEFAULT_TIMEOUT + and self.total is not _DEFAULT_TIMEOUT and self._read is not None - and self._read is not self.DEFAULT_TIMEOUT + and self._read is not _DEFAULT_TIMEOUT ): # In case the connect timeout has not yet been established. if self._start_connect is None: return self._read return max(0, min(self.total - self.get_connect_duration(), self._read)) - elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + elif self.total is not None and self.total is not _DEFAULT_TIMEOUT: return max(0, self.total - self.get_connect_duration()) else: - return self._read + return self.resolve_default_timeout(self._read) diff --git a/thirdparty/urllib3/util/url.py b/thirdparty/urllib3/util/url.py index 57d701b..db057f1 100644 --- a/thirdparty/urllib3/util/url.py +++ b/thirdparty/urllib3/util/url.py @@ -1,22 +1,20 @@ -from __future__ import absolute_import +from __future__ import annotations + import re -from collections import namedtuple +import typing from ..exceptions import LocationParseError -from ..packages import six - - -url_attrs = ["scheme", "auth", "host", "port", "path", "query", "fragment"] +from .util import to_str # We only want to normalize urls with an HTTP(S) scheme. # urllib3 infers URLs without a scheme (None) to be http. -NORMALIZABLE_SCHEMES = ("http", "https", None) +_NORMALIZABLE_SCHEMES = ("http", "https", None) # Almost all of these patterns were derived from the # 'rfc3986' module: https://github.com/python-hyper/rfc3986 -PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}") -SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)") -URI_RE = re.compile( +_PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}") +_SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)") +_URI_RE = re.compile( r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?" r"(?://([^\\/?#]*))?" r"([^?#]*)" @@ -25,10 +23,10 @@ re.UNICODE | re.DOTALL, ) -IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}" -HEX_PAT = "[0-9A-Fa-f]{1,4}" -LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=HEX_PAT, ipv4=IPV4_PAT) -_subs = {"hex": HEX_PAT, "ls32": LS32_PAT} +_IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}" +_HEX_PAT = "[0-9A-Fa-f]{1,4}" +_LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=_HEX_PAT, ipv4=_IPV4_PAT) +_subs = {"hex": _HEX_PAT, "ls32": _LS32_PAT} _variations = [ # 6( h16 ":" ) ls32 "(?:%(hex)s:){6}%(ls32)s", @@ -50,69 +48,78 @@ "(?:(?:%(hex)s:){0,6}%(hex)s)?::", ] -UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._!\-~" -IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")" -ZONE_ID_PAT = "(?:%25|%)(?:[" + UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+" -IPV6_ADDRZ_PAT = r"\[" + IPV6_PAT + r"(?:" + ZONE_ID_PAT + r")?\]" -REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*" -TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$") - -IPV4_RE = re.compile("^" + IPV4_PAT + "$") -IPV6_RE = re.compile("^" + IPV6_PAT + "$") -IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$") -BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$") -ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$") - -SUBAUTHORITY_PAT = (u"^(?:(.*)@)?(%s|%s|%s)(?::([0-9]{0,5}))?$") % ( - REG_NAME_PAT, - IPV4_PAT, - IPV6_ADDRZ_PAT, +_UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._\-~" +_IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")" +_ZONE_ID_PAT = "(?:%25|%)(?:[" + _UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+" +_IPV6_ADDRZ_PAT = r"\[" + _IPV6_PAT + r"(?:" + _ZONE_ID_PAT + r")?\]" +_REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*" +_TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$") + +_IPV4_RE = re.compile("^" + _IPV4_PAT + "$") +_IPV6_RE = re.compile("^" + _IPV6_PAT + "$") +_IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT + "$") +_BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT[2:-2] + "$") +_ZONE_ID_RE = re.compile("(" + _ZONE_ID_PAT + r")\]$") + +_HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % ( + _REG_NAME_PAT, + _IPV4_PAT, + _IPV6_ADDRZ_PAT, ) -SUBAUTHORITY_RE = re.compile(SUBAUTHORITY_PAT, re.UNICODE | re.DOTALL) +_HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL) -UNRESERVED_CHARS = set( +_UNRESERVED_CHARS = set( "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~" ) -SUB_DELIM_CHARS = set("!$&'()*+,;=") -USERINFO_CHARS = UNRESERVED_CHARS | SUB_DELIM_CHARS | {":"} -PATH_CHARS = USERINFO_CHARS | {"@", "/"} -QUERY_CHARS = FRAGMENT_CHARS = PATH_CHARS | {"?"} - - -class Url(namedtuple("Url", url_attrs)): +_SUB_DELIM_CHARS = set("!$&'()*+,;=") +_USERINFO_CHARS = _UNRESERVED_CHARS | _SUB_DELIM_CHARS | {":"} +_PATH_CHARS = _USERINFO_CHARS | {"@", "/"} +_QUERY_CHARS = _FRAGMENT_CHARS = _PATH_CHARS | {"?"} + + +class Url( + typing.NamedTuple( + "Url", + [ + ("scheme", typing.Optional[str]), + ("auth", typing.Optional[str]), + ("host", typing.Optional[str]), + ("port", typing.Optional[int]), + ("path", typing.Optional[str]), + ("query", typing.Optional[str]), + ("fragment", typing.Optional[str]), + ], + ) +): """ Data structure for representing an HTTP URL. Used as a return value for :func:`parse_url`. Both the scheme and host are normalized as they are both case-insensitive according to RFC 3986. """ - __slots__ = () - - def __new__( + def __new__( # type: ignore[no-untyped-def] cls, - scheme=None, - auth=None, - host=None, - port=None, - path=None, - query=None, - fragment=None, + scheme: str | None = None, + auth: str | None = None, + host: str | None = None, + port: int | None = None, + path: str | None = None, + query: str | None = None, + fragment: str | None = None, ): if path and not path.startswith("/"): path = "/" + path if scheme is not None: scheme = scheme.lower() - return super(Url, cls).__new__( - cls, scheme, auth, host, port, path, query, fragment - ) + return super().__new__(cls, scheme, auth, host, port, path, query, fragment) @property - def hostname(self): + def hostname(self) -> str | None: """For backwards-compatibility with urlparse. We're nice like that.""" return self.host @property - def request_uri(self): + def request_uri(self) -> str: """Absolute path including the query string.""" uri = self.path or "/" @@ -122,14 +129,37 @@ def request_uri(self): return uri @property - def netloc(self): - """Network location including host and port""" + def authority(self) -> str | None: + """ + Authority component as defined in RFC 3986 3.2. + This includes userinfo (auth), host and port. + + i.e. + userinfo@host:port + """ + userinfo = self.auth + netloc = self.netloc + if netloc is None or userinfo is None: + return netloc + else: + return f"{userinfo}@{netloc}" + + @property + def netloc(self) -> str | None: + """ + Network location including host and port. + + If you need the equivalent of urllib.parse's ``netloc``, + use the ``authority`` property instead. + """ + if self.host is None: + return None if self.port: - return "%s:%d" % (self.host, self.port) + return f"{self.host}:{self.port}" return self.host @property - def url(self): + def url(self) -> str: """ Convert self into a url @@ -138,88 +168,77 @@ def url(self): :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls with a blank port will have : removed). - Example: :: + Example: + + .. code-block:: python + + import urllib3 + + U = urllib3.util.parse_url("https://google.com/mail/") + + print(U.url) + # "https://google.com/mail/" - >>> U = parse_url('http://google.com/mail/') - >>> U.url - 'http://google.com/mail/' - >>> Url('http', 'username:password', 'host.com', 80, - ... '/path', 'query', 'fragment').url - 'http://username:password@host.com:80/path?query#fragment' + print( urllib3.util.Url("https", "username:password", + "host.com", 80, "/path", "query", "fragment" + ).url + ) + # "https://username:password@host.com:80/path?query#fragment" """ scheme, auth, host, port, path, query, fragment = self - url = u"" + url = "" # We use "is not None" we want things to happen with empty strings (or 0 port) if scheme is not None: - url += scheme + u"://" + url += scheme + "://" if auth is not None: - url += auth + u"@" + url += auth + "@" if host is not None: url += host if port is not None: - url += u":" + str(port) + url += ":" + str(port) if path is not None: url += path if query is not None: - url += u"?" + query + url += "?" + query if fragment is not None: - url += u"#" + fragment + url += "#" + fragment return url - def __str__(self): + def __str__(self) -> str: return self.url -def split_first(s, delims): - """ - .. deprecated:: 1.25 - - Given a string and an iterable of delimiters, split on the first found - delimiter. Return two split parts and the matched delimiter. - - If not found, then the first part is the full input string. - - Example:: - - >>> split_first('foo/bar?baz', '?/=') - ('foo', 'bar?baz', '/') - >>> split_first('foo/bar?baz', '123') - ('foo/bar?baz', '', None) +@typing.overload +def _encode_invalid_chars( + component: str, allowed_chars: typing.Container[str] +) -> str: # Abstract + ... - Scales linearly with number of delims. Not ideal for large number of delims. - """ - min_idx = None - min_delim = None - for d in delims: - idx = s.find(d) - if idx < 0: - continue - - if min_idx is None or idx < min_idx: - min_idx = idx - min_delim = d - - if min_idx is None or min_idx < 0: - return s, "", None - return s[:min_idx], s[min_idx + 1 :], min_delim +@typing.overload +def _encode_invalid_chars( + component: None, allowed_chars: typing.Container[str] +) -> None: # Abstract + ... -def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"): +def _encode_invalid_chars( + component: str | None, allowed_chars: typing.Container[str] +) -> str | None: """Percent-encodes a URI component without reapplying onto an already percent-encoded component. """ if component is None: return component - component = six.ensure_text(component) + component = to_str(component) # Normalize existing percent-encoded bytes. # Try to see if the component we're encoding is already percent-encoded # so we can skip all '%' characters but still encode all others. - component, percent_encodings = PERCENT_RE.subn( + component, percent_encodings = _PERCENT_RE.subn( lambda match: match.group(0).upper(), component ) @@ -228,7 +247,7 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"): encoded_component = bytearray() for i in range(0, len(uri_bytes)): - # Will return a single character bytestring on both Python 2 & 3 + # Will return a single character bytestring byte = uri_bytes[i : i + 1] byte_ord = ord(byte) if (is_percent_encoded and byte == b"%") or ( @@ -238,10 +257,10 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"): continue encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper())) - return encoded_component.decode(encoding) + return encoded_component.decode() -def _remove_path_dot_segments(path): +def _remove_path_dot_segments(path: str) -> str: # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code segments = path.split("/") # Turn the path into a list of segments output = [] # Initialize the variable to use to store output @@ -251,7 +270,7 @@ def _remove_path_dot_segments(path): if segment == ".": continue # Anything other than '..', should be appended to the output - elif segment != "..": + if segment != "..": output.append(segment) # In this case segment == '..', if we can, we should pop the last # element @@ -271,15 +290,23 @@ def _remove_path_dot_segments(path): return "/".join(output) -def _normalize_host(host, scheme): - if host: - if isinstance(host, six.binary_type): - host = six.ensure_str(host) +@typing.overload +def _normalize_host(host: None, scheme: str | None) -> None: ... + + +@typing.overload +def _normalize_host(host: str, scheme: str | None) -> str: ... - if scheme in NORMALIZABLE_SCHEMES: - is_ipv6 = IPV6_ADDRZ_RE.match(host) + +def _normalize_host(host: str | None, scheme: str | None) -> str | None: + if host: + if scheme in _NORMALIZABLE_SCHEMES: + is_ipv6 = _IPV6_ADDRZ_RE.match(host) if is_ipv6: - match = ZONE_ID_RE.search(host) + # IPv6 hosts of the form 'a::b%zone' are encoded in a URL as + # such per RFC 6874: 'a::b%25zone'. Unquote the ZoneID + # separator as necessary to return a valid RFC 4007 scoped IP. + match = _ZONE_ID_RE.search(host) if match: start, end = match.span(1) zone_id = host[start:end] @@ -288,108 +315,138 @@ def _normalize_host(host, scheme): zone_id = zone_id[3:] else: zone_id = zone_id[1:] - zone_id = "%" + _encode_invalid_chars(zone_id, UNRESERVED_CHARS) - return host[:start].lower() + zone_id + host[end:] + zone_id = _encode_invalid_chars(zone_id, _UNRESERVED_CHARS) + return f"{host[:start].lower()}%{zone_id}{host[end:]}" else: return host.lower() - elif not IPV4_RE.match(host): - return six.ensure_str( - b".".join([_idna_encode(label) for label in host.split(".")]) + elif not _IPV4_RE.match(host): + return to_str( + b".".join([_idna_encode(label) for label in host.split(".")]), + "ascii", ) return host -def _idna_encode(name): - if name and any([ord(x) > 128 for x in name]): +def _idna_encode(name: str) -> bytes: + if not name.isascii(): try: - import thirdparty.idna as idna + import idna except ImportError: - six.raise_from( - LocationParseError("Unable to parse URL without the 'idna' module"), - None, - ) + raise LocationParseError( + "Unable to parse URL without the 'idna' module" + ) from None + try: return idna.encode(name.lower(), strict=True, std3_rules=True) except idna.IDNAError: - six.raise_from( - LocationParseError(u"Name '%s' is not a valid IDNA label" % name), None - ) + raise LocationParseError( + f"Name '{name}' is not a valid IDNA label" + ) from None + return name.lower().encode("ascii") -def _encode_target(target): - """Percent-encodes a request target so that there are no invalid characters""" - path, query = TARGET_RE.match(target).groups() - target = _encode_invalid_chars(path, PATH_CHARS) - query = _encode_invalid_chars(query, QUERY_CHARS) +def _encode_target(target: str) -> str: + """Percent-encodes a request target so that there are no invalid characters + + Pre-condition for this function is that 'target' must start with '/'. + If that is the case then _TARGET_RE will always produce a match. + """ + match = _TARGET_RE.match(target) + if not match: # Defensive: + raise LocationParseError(f"{target!r} is not a valid request URI") + + path, query = match.groups() + encoded_target = _encode_invalid_chars(path, _PATH_CHARS) if query is not None: - target += "?" + query - return target + query = _encode_invalid_chars(query, _QUERY_CHARS) + encoded_target += "?" + query + return encoded_target -def parse_url(url): +def parse_url(url: str) -> Url: """ Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is performed to parse incomplete urls. Fields not provided will be None. - This parser is RFC 3986 compliant. + This parser is RFC 3986 and RFC 6874 compliant. The parser logic and helper functions are based heavily on work done in the ``rfc3986`` module. :param str url: URL to parse into a :class:`.Url` namedtuple. - Partly backwards-compatible with :mod:`urlparse`. + Partly backwards-compatible with :mod:`urllib.parse`. + + Example: + + .. code-block:: python + + import urllib3 - Example:: + print( urllib3.util.parse_url('http://google.com/mail/')) + # Url(scheme='http', host='google.com', port=None, path='/mail/', ...) - >>> parse_url('http://google.com/mail/') - Url(scheme='http', host='google.com', port=None, path='/mail/', ...) - >>> parse_url('google.com:80') - Url(scheme=None, host='google.com', port=80, path=None, ...) - >>> parse_url('/foo?bar') - Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + print( urllib3.util.parse_url('google.com:80')) + # Url(scheme=None, host='google.com', port=80, path=None, ...) + + print( urllib3.util.parse_url('/foo?bar')) + # Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) """ if not url: # Empty return Url() source_url = url - if not SCHEME_RE.search(url): + if not _SCHEME_RE.search(url): url = "//" + url + scheme: str | None + authority: str | None + auth: str | None + host: str | None + port: str | None + port_int: int | None + path: str | None + query: str | None + fragment: str | None + try: - scheme, authority, path, query, fragment = URI_RE.match(url).groups() - normalize_uri = scheme is None or scheme.lower() in NORMALIZABLE_SCHEMES + scheme, authority, path, query, fragment = _URI_RE.match(url).groups() # type: ignore[union-attr] + normalize_uri = scheme is None or scheme.lower() in _NORMALIZABLE_SCHEMES if scheme: scheme = scheme.lower() if authority: - auth, host, port = SUBAUTHORITY_RE.match(authority).groups() + auth, _, host_port = authority.rpartition("@") + auth = auth or None + host, port = _HOST_PORT_RE.match(host_port).groups() # type: ignore[union-attr] if auth and normalize_uri: - auth = _encode_invalid_chars(auth, USERINFO_CHARS) + auth = _encode_invalid_chars(auth, _USERINFO_CHARS) if port == "": port = None else: auth, host, port = None, None, None if port is not None: - port = int(port) - if not (0 <= port <= 65535): + port_int = int(port) + if not (0 <= port_int <= 65535): raise LocationParseError(url) + else: + port_int = None host = _normalize_host(host, scheme) if normalize_uri and path: path = _remove_path_dot_segments(path) - path = _encode_invalid_chars(path, PATH_CHARS) + path = _encode_invalid_chars(path, _PATH_CHARS) if normalize_uri and query: - query = _encode_invalid_chars(query, QUERY_CHARS) + query = _encode_invalid_chars(query, _QUERY_CHARS) if normalize_uri and fragment: - fragment = _encode_invalid_chars(fragment, FRAGMENT_CHARS) + fragment = _encode_invalid_chars(fragment, _FRAGMENT_CHARS) - except (ValueError, AttributeError): - return six.raise_from(LocationParseError(source_url), None) + except (ValueError, AttributeError) as e: + raise LocationParseError(source_url) from e # For the sake of backwards compatibility we put empty # string values for path if there are any defined values @@ -401,30 +458,12 @@ def parse_url(url): else: path = None - # Ensure that each part of the URL is a `str` for - # backwards compatibility. - if isinstance(url, six.text_type): - ensure_func = six.ensure_text - else: - ensure_func = six.ensure_str - - def ensure_type(x): - return x if x is None else ensure_func(x) - return Url( - scheme=ensure_type(scheme), - auth=ensure_type(auth), - host=ensure_type(host), - port=port, - path=ensure_type(path), - query=ensure_type(query), - fragment=ensure_type(fragment), + scheme=scheme, + auth=auth, + host=host, + port=port_int, + path=path, + query=query, + fragment=fragment, ) - - -def get_host(url): - """ - Deprecated. Use :func:`parse_url` instead. - """ - p = parse_url(url) - return p.scheme or "http", p.hostname, p.port diff --git a/thirdparty/urllib3/util/util.py b/thirdparty/urllib3/util/util.py new file mode 100644 index 0000000..35c77e4 --- /dev/null +++ b/thirdparty/urllib3/util/util.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import typing +from types import TracebackType + + +def to_bytes( + x: str | bytes, encoding: str | None = None, errors: str | None = None +) -> bytes: + if isinstance(x, bytes): + return x + elif not isinstance(x, str): + raise TypeError(f"not expecting type {type(x).__name__}") + if encoding or errors: + return x.encode(encoding or "utf-8", errors=errors or "strict") + return x.encode() + + +def to_str( + x: str | bytes, encoding: str | None = None, errors: str | None = None +) -> str: + if isinstance(x, str): + return x + elif not isinstance(x, bytes): + raise TypeError(f"not expecting type {type(x).__name__}") + if encoding or errors: + return x.decode(encoding or "utf-8", errors=errors or "strict") + return x.decode() + + +def reraise( + tp: type[BaseException] | None, + value: BaseException, + tb: TracebackType | None = None, +) -> typing.NoReturn: + try: + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + finally: + value = None # type: ignore[assignment] + tb = None diff --git a/thirdparty/urllib3/util/wait.py b/thirdparty/urllib3/util/wait.py index 53d10ee..aeca0c7 100644 --- a/thirdparty/urllib3/util/wait.py +++ b/thirdparty/urllib3/util/wait.py @@ -1,18 +1,10 @@ -import errno -from functools import partial -import select -import sys - -try: - from time import monotonic -except ImportError: - from time import time as monotonic - -__all__ = ["NoWayToWaitForSocketError", "wait_for_read", "wait_for_write"] +from __future__ import annotations +import select +import socket +from functools import partial -class NoWayToWaitForSocketError(Exception): - pass +__all__ = ["wait_for_read", "wait_for_write"] # How should we wait on sockets? @@ -37,38 +29,13 @@ class NoWayToWaitForSocketError(Exception): # So: on Windows we use select(), and everywhere else we use poll(). We also # fall back to select() in case poll() is somehow broken or missing. -if sys.version_info >= (3, 5): - # Modern Python, that retries syscalls by default - def _retry_on_intr(fn, timeout): - return fn(timeout) - - -else: - # Old and broken Pythons. - def _retry_on_intr(fn, timeout): - if timeout is None: - deadline = float("inf") - else: - deadline = monotonic() + timeout - - while True: - try: - return fn(timeout) - # OSError for 3 <= pyver < 3.5, select.error for pyver <= 2.7 - except (OSError, select.error) as e: - # 'e.args[0]' incantation works for both OSError and select.error - if e.args[0] != errno.EINTR: - raise - else: - timeout = deadline - monotonic() - if timeout < 0: - timeout = 0 - if timeout == float("inf"): - timeout = None - continue - - -def select_wait_for_socket(sock, read=False, write=False, timeout=None): + +def select_wait_for_socket( + sock: socket.socket, + read: bool = False, + write: bool = False, + timeout: float | None = None, +) -> bool: if not read and not write: raise RuntimeError("must specify at least one of read=True, write=True") rcheck = [] @@ -83,11 +50,16 @@ def select_wait_for_socket(sock, read=False, write=False, timeout=None): # sockets for both conditions. (The stdlib selectors module does the same # thing.) fn = partial(select.select, rcheck, wcheck, wcheck) - rready, wready, xready = _retry_on_intr(fn, timeout) + rready, wready, xready = fn(timeout) return bool(rready or wready or xready) -def poll_wait_for_socket(sock, read=False, write=False, timeout=None): +def poll_wait_for_socket( + sock: socket.socket, + read: bool = False, + write: bool = False, + timeout: float | None = None, +) -> bool: if not read and not write: raise RuntimeError("must specify at least one of read=True, write=True") mask = 0 @@ -99,32 +71,33 @@ def poll_wait_for_socket(sock, read=False, write=False, timeout=None): poll_obj.register(sock, mask) # For some reason, poll() takes timeout in milliseconds - def do_poll(t): + def do_poll(t: float | None) -> list[tuple[int, int]]: if t is not None: t *= 1000 return poll_obj.poll(t) - return bool(_retry_on_intr(do_poll, timeout)) - - -def null_wait_for_socket(*args, **kwargs): - raise NoWayToWaitForSocketError("no select-equivalent available") + return bool(do_poll(timeout)) -def _have_working_poll(): +def _have_working_poll() -> bool: # Apparently some systems have a select.poll that fails as soon as you try # to use it, either due to strange configuration or broken monkeypatching # from libraries like eventlet/greenlet. try: poll_obj = select.poll() - _retry_on_intr(poll_obj.poll, 0) + poll_obj.poll(0) except (AttributeError, OSError): return False else: return True -def wait_for_socket(*args, **kwargs): +def wait_for_socket( + sock: socket.socket, + read: bool = False, + write: bool = False, + timeout: float | None = None, +) -> bool: # We delay choosing which implementation to use until the first time we're # called. We could do it at import time, but then we might make the wrong # decision if someone goes wild with monkeypatching select.poll after @@ -134,19 +107,17 @@ def wait_for_socket(*args, **kwargs): wait_for_socket = poll_wait_for_socket elif hasattr(select, "select"): wait_for_socket = select_wait_for_socket - else: # Platform-specific: Appengine. - wait_for_socket = null_wait_for_socket - return wait_for_socket(*args, **kwargs) + return wait_for_socket(sock, read, write, timeout) -def wait_for_read(sock, timeout=None): +def wait_for_read(sock: socket.socket, timeout: float | None = None) -> bool: """Waits for reading to be available on a given socket. Returns True if the socket is readable, or False if the timeout expired. """ return wait_for_socket(sock, read=True, timeout=timeout) -def wait_for_write(sock, timeout=None): +def wait_for_write(sock: socket.socket, timeout: float | None = None) -> bool: """Waits for writing to be available on a given socket. Returns True if the socket is readable, or False if the timeout expired. """ From 3827f62e2607737ab021722691adac0f5f028962 Mon Sep 17 00:00:00 2001 From: 4n4l1st-ch4rl3s <63286909+4n4l1st-ch4rl3s@users.noreply.github.com> Date: Thu, 24 Jul 2025 16:49:45 +0300 Subject: [PATCH 2/3] fix: improve error handling and connection reliability Key improvements: - Add robust DNS resolution error handling with specific error messages - Suppress SSL verification warnings and improve SSL handling - Reduce timeouts and add retry limits for faster failure detection - Improve connection handling with proper cleanup and retries - Add graceful fallbacks for DNS and HTTP(S) connections Modified files: - lib/tools/dnslookup.py: Better DNS lookup handling and timeouts - lib/tools/ispcheck.py: Improved connection checks and error handling - lib/tools/sublist3r.py: Better subdomain filtering and error handling - lib/tools/bruter.py: Add proper error handling for None returns --- data/APIs/api.conf | 12 ---- data/txt/domains.txt | 22 ++++++ lib/tools/bruter.py | 3 + lib/tools/dnslookup.py | 152 +++++++++++++++++++++++++++++++++-------- lib/tools/ispcheck.py | 74 +++++++++++++++----- lib/tools/sublist3r.py | 135 ++++++++++++++++++------------------ 6 files changed, 273 insertions(+), 125 deletions(-) delete mode 100644 data/APIs/api.conf diff --git a/data/APIs/api.conf b/data/APIs/api.conf deleted file mode 100644 index 9add3b4..0000000 --- a/data/APIs/api.conf +++ /dev/null @@ -1,12 +0,0 @@ -[VTOTAL] -api_key = - -[CENSYS] -api_id = -secret = - -[SHODAN] -api_key = - -[SECURITYTRAILS] -api_key = \ No newline at end of file diff --git a/data/txt/domains.txt b/data/txt/domains.txt index 653fa3f..5cd7bc7 100644 --- a/data/txt/domains.txt +++ b/data/txt/domains.txt @@ -17,8 +17,30 @@ .us .uk .co.uk +.in +.asia +.eu .aero .mobi .int .edu .coop +.io +.app +.tech +.store +.website +.online +.club +.blog +.xyz +.ai +.dev +.me +.app +.gg +.host +.cloud +.inc +.bot +.wiki diff --git a/lib/tools/bruter.py b/lib/tools/bruter.py index 282763c..f30d147 100644 --- a/lib/tools/bruter.py +++ b/lib/tools/bruter.py @@ -51,6 +51,9 @@ def nameserver(domain): checking = bruter(domain) good_dns = [] print(info + 'Bruteforcing domain extensions and getting DNS records') + if checking is None: + print(tab + bad + "No domains to check - bruter function failed") + return good_dns print(tab + warn + f'Total domain extension used: {Y}{len(checking)}{W}') for item in checking: try: diff --git a/lib/tools/dnslookup.py b/lib/tools/dnslookup.py index 6f05c9b..e03ae66 100644 --- a/lib/tools/dnslookup.py +++ b/lib/tools/dnslookup.py @@ -1,45 +1,143 @@ import random from os import environ +import socket +import warnings +import urllib3 import thirdparty.requests as requests -from thirdparty.dns.resolver import Resolver +from thirdparty.dns.resolver import Resolver, NXDOMAIN, NoAnswer, NoNameservers, Timeout from thirdparty.html_similarity import similarity +from urllib3.util.retry import Retry +from requests.adapters import HTTPAdapter -from ..utils.colors import Y, bad, good, info, tab -from ..utils.settings import config +from ..utils.colors import Y, bad, good, info, tab, warn +# Suppress only the specific InsecureRequestWarning +warnings.filterwarnings('ignore', category=urllib3.exceptions.InsecureRequestWarning) + +def create_session(): + session = requests.Session() + retry_strategy = Retry( + total=3, + backoff_factor=0.5, + status_forcelist=[500, 502, 503, 504], + allowed_methods=frozenset(['GET', 'HEAD', 'OPTIONS']) + ) + adapter = HTTPAdapter(max_retries=retry_strategy) + session.mount("http://", adapter) + session.mount("https://", adapter) + return session def scan(domain, host, userAgent, randomAgent, header): + session = create_session() headers = dict(x.replace(' ', '').split(':') for x in header.split(',')) if header is not None else {} - (headers.update({'User-agent': random.choice(open("data/txt/random_agents.txt").readlines()).rstrip("\n")}) - if randomAgent is True else '') - headers.update({'User-agent': userAgent}) if userAgent is not None else '' - try: - print("\n" + Y + "Attempting to track real IP using: %s\n" % host) - print(info + "Checking if {0} is similar to {1}".format(host, domain)) - get_domain = requests.get('http://' + domain, headers=headers, timeout=config['http_timeout_seconds']) - get_host = requests.get('http://' + host, headers=headers, timeout=config['http_timeout_seconds']) - page_similarity = similarity(get_domain.text, get_host.text) - if page_similarity > config['response_similarity_threshold']: - print(tab + good + 'HTML content is %d%% structurally similar to: %s' - % (round(100 * page_similarity, 2), domain)) - else: - print(tab + bad + 'Sorry, but HTML content is %d%% structurally similar to: %s' - % (round(100 * page_similarity, 2), domain)) - except Exception: - print(tab + bad + 'Connection cannot be established with: %s' % (host)) + if randomAgent: + with open("data/txt/random_agents.txt") as f: + headers.update({'User-agent': random.choice(f.readlines()).strip()}) + elif userAgent: + headers.update({'User-agent': userAgent}) + print("\n" + Y + "Attempting to track real IP using: %s\n" % host) + print(info + "Checking if {0} is similar to {1}".format(host, domain)) + + try: + # Try HTTPS first, then fallback to HTTP + for protocol in ['https://', 'http://']: + try: + domain_url = protocol + domain + host_url = protocol + host + + # Set a shorter timeout and verify=False for both requests + get_domain = session.get(domain_url, headers=headers, timeout=3, verify=False) + get_host = session.get(host_url, headers=headers, timeout=3, verify=False) + + page_similarity = similarity(get_domain.text, get_host.text) + if page_similarity > 0.75: # 75% similarity threshold + print(tab + good + 'HTML content is %d%% structurally similar to: %s' + % (round(100 * page_similarity, 2), domain)) + else: + print(tab + bad + 'HTML content is only %d%% structurally similar to: %s' + % (round(100 * page_similarity, 2), domain)) + return + except requests.exceptions.SSLError: + print(tab + warn + f'SSL verification failed for {protocol}, trying without verification') + continue + except requests.exceptions.ReadTimeout: + print(tab + warn + f'Connection timed out for {protocol}, trying alternative') + continue + except requests.exceptions.RequestException as e: + print(tab + warn + f'Connection failed with {protocol}: {str(e)}') + continue + + print(tab + bad + f'Connection cannot be established with: {host} (tried both HTTP and HTTPS)') + + except Exception as e: + print(tab + bad + f'Unexpected error while scanning {host}: {str(e)}') def DNSLookup(domain, host): isAndroid = "ANDROID_DATA" in environ sys_r = Resolver(filename='/data/data/com.termux/files/usr/etc/resolv.conf') if isAndroid else Resolver() - dns = [host] + sys_r.timeout = 2 # Reduce timeout to 2 seconds + sys_r.lifetime = 4 # Total lookup time + sys_r.tries = 2 # Number of retries + try: - dream_dns = [item.address for server in dns for item in sys_r.query(server)] + # Try to resolve the host first + try: + dream_dns = [] + answers = sys_r.query(host, 'A') + for rdata in answers: + dream_dns.append(rdata.address) + except NXDOMAIN: + print(tab + bad + f'Domain {host} does not exist') + return None + except NoAnswer: + print(tab + bad + f'No DNS records found for {host}') + return None + except NoNameservers: + print(tab + bad + f'No nameservers available for {host}') + return None + except Timeout: + print(tab + bad + f'DNS lookup timed out for {host}') + return None + except Exception as e: + print(tab + bad + f'DNS resolution failed for {host}: {str(e)}') + return None + + if not dream_dns: + print(tab + bad + f'No DNS records found for {host}') + return None + + # Now try to resolve the domain using the host's nameservers dream_r = Resolver() dream_r.nameservers = dream_dns - answer = dream_r.query(domain, 'A') - for A in answer.rrset.items: - return A - except Exception: - pass + dream_r.timeout = 2 # Reduce timeout + dream_r.lifetime = 4 # Total lookup time + dream_r.tries = 2 # Number of retries + + try: + answer = dream_r.query(domain, 'A') + if answer and answer.rrset and answer.rrset.items: + return answer.rrset.items[0] + else: + print(tab + bad + f'No A records found for {domain} using {host} nameservers') + return None + except NXDOMAIN: + print(tab + bad + f'Domain {domain} not found using {host} nameservers') + return None + except NoAnswer: + print(tab + bad + f'No answer from {host} nameservers for {domain}') + return None + except NoNameservers: + print(tab + bad + f'No working nameservers found from {host} for {domain}') + return None + except Timeout: + print(tab + bad + f'DNS lookup timed out using {host} nameservers') + return None + except Exception as e: + print(tab + bad + f'Failed to resolve {domain} using {host} nameservers: {str(e)}') + return None + + except Exception as e: + print(tab + bad + f'Unexpected error in DNS lookup: {str(e)}') + return None diff --git a/lib/tools/ispcheck.py b/lib/tools/ispcheck.py index f50c597..ed967ba 100644 --- a/lib/tools/ispcheck.py +++ b/lib/tools/ispcheck.py @@ -1,7 +1,10 @@ import re import socket +import time +from urllib3.util.retry import Retry +from requests.adapters import HTTPAdapter -from lib.utils.colors import R, W, Y, tab, warn +from lib.utils.colors import R, W, Y, tab, warn, bad, good from thirdparty import requests from thirdparty.bs4 import BeautifulSoup @@ -9,18 +12,48 @@ 'cloudflare', 'incapsula'] +def create_session(): + session = requests.Session() + retry_strategy = Retry( + total=3, # number of retries + backoff_factor=0.5, # wait 0.5, 1, 2, 4... seconds between retries + status_forcelist=[500, 502, 503, 504] # retry on these status codes + ) + adapter = HTTPAdapter(max_retries=retry_strategy) + session.mount("http://", adapter) + session.mount("https://", adapter) + return session def ISPCheck(domain): reg = re.compile(rf'(?i){"|".join(cloudlist)}') + session = create_session() + + # Try to resolve the domain first try: - header = requests.get('http://' + domain, timeout=1).headers['server'].lower() - if reg.search(header): - return f' is protected by {Y}{header.capitalize()}{W}' - return None - except Exception: - req = requests.get(f'https://check-host.net/ip-info?host={domain}').text - UrlHTML = BeautifulSoup(req, "lxml") - print(f'{tab*2}{warn}Something has gone wrong. Retrying connection') + socket.gethostbyname(domain) + except socket.gaierror: + print(f'{tab*2}{warn}DNS resolution failed for {domain}') + return f' [{R}DNS resolution failed{W}]' + + # Try direct connection first + try: + response = session.get('http://' + domain, timeout=3) + if 'server' in response.headers: + header = response.headers['server'].lower() + if reg.search(header): + return f' is protected by {Y}{header.capitalize()}{W}' + return None + except (requests.exceptions.RequestException, socket.error): + pass # Continue to fallback method + + # Fallback to check-host.net + try: + print(f'{tab*2}{warn}Direct connection failed, trying check-host.net...') + response = session.get(f'https://check-host.net/ip-info?host={domain}', timeout=5) + if response.status_code != 200: + return f' [{R}cannot retrieve information{W}]' + + UrlHTML = BeautifulSoup(response.text, "lxml") if UrlHTML.findAll('div', {'class': 'error'}): return f' [{R}cannot retrieve information{W}]' @@ -30,13 +63,22 @@ def ISPCheck(domain): if reg.search(org): return f' is protected by {Y}{reg.match(org).group().capitalize()}{W}' + # Check ports as last resort ports = [80, 443] - for port in ports: - checker = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - checker.settimeout(0.5) - if checker.connect_ex((domain, port)) == 0: + try: + checker = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + checker.settimeout(1) + if checker.connect_ex((domain, port)) == 0: + checker.close() + print(f'{tab*2}{good}Port {port} is open') + return None checker.close() - return - checker.close() - return f' [{R}http{W}/{R}https{W}] ports filtered or closed' + except socket.error: + continue + + return f' [{R}http{W}/{R}https{W}] ports filtered or closed' + + except Exception as e: + print(f'{tab*2}{bad}Error checking domain: {str(e)}') + return f' [{R}cannot retrieve information{W}]' diff --git a/lib/tools/sublist3r.py b/lib/tools/sublist3r.py index d8348c4..fbc4614 100644 --- a/lib/tools/sublist3r.py +++ b/lib/tools/sublist3r.py @@ -316,6 +316,10 @@ def generate_query(self): query = "site:{domain} -www.{domain}".format(domain=self.domain) return query + def send_req(self, query, page_no=1): + self.should_sleep() + return super(GoogleEnum, self).send_req(query, page_no) + class YahooEnum(enumeratorBaseThreaded): def __init__(self, domain, subdomains=None, q=None, silent=False, verbose=True): @@ -642,13 +646,19 @@ def req(self, req_method, url, params=None): def get_csrftoken(self, resp): csrf_regex = re.compile('', re.S) - token = csrf_regex.findall(resp)[0] - return token.strip() + try: + token = csrf_regex.findall(resp)[0] + return token.strip() + except (IndexError, TypeError): + self.print_(f"{R}[-]ERROR: Could not find CSRF token. DNSdumpster's response format may have changed.{W}") + return None def enumerate(self): self.lock = threading.BoundedSemaphore(value=70) resp = self.req('GET', self.base_url) token = self.get_csrftoken(resp) + if token is None: + return [] params = {'csrfmiddlewaretoken': token, 'targetip': self.domain} post_resp = self.req('POST', self.base_url, params) self.extract_domains(post_resp) @@ -691,78 +701,52 @@ def __init__(self, domain, subdomains=None, q=None, silent=False, verbose=True): try: self.apikey = self.parser.get('VTOTAL', 'api_key') + if not self.apikey or self.apikey.isspace(): + raise Exception("Empty API key") except Exception: - self.print_(bad + R + "Couldn't Open \"api.conf\" file and read it!" + W) + self.print_(bad + R + "Virustotal API key not found or invalid in api.conf!" + W) + self.apikey = "" return - # the main send_req need to be rewritten def send_req(self, url): try: - if self.apikey == "": - cmd = """ -from lib.utils.colors import * -try: - from configparser import ConfigParser -except ImportError: - from ConfigParser import ConfigParser - -parser = ConfigParser() -parser.read("data/APIs/api.conf") -parser.set('VTOTAL', 'api_key', - """ - end_cmd = """) -with open('data/APIs/api.conf', 'w') as configfile: - parser.write(configfile)""" - quest(f"{warn}Virustotal API key not found! Do you want to enter your API key?: ", - cmd + f"input(\"{tab}{warn}\" + \"Virustotal API key: \")" + end_cmd, - cmd + "'noapi'" + end_cmd, - "no", - **{"printSuccess": "{good}API key saved.", - "printError": "{bad}API key not saved.", - } - ) + if not self.apikey: + return None + apiHeads = { "X-Apikey": self.apikey } resp = requests.request(method="GET", url=url, headers=apiHeads, timeout=self.timeout) + if resp.status_code == 401: + self.print_(bad + R + "Invalid Virustotal API key!" + W) + return None + elif resp.status_code != 200: + self.print_(bad + R + f"Virustotal API error (status code: {resp.status_code})" + W) + return None + + return resp.text # Return text instead of raw response + except Exception as e: - self.print_(e) - resp = None - - return self.get_response(resp) + self.print_(bad + R + f"Virustotal API error: {str(e)}" + W) + return None - # once the send_req is rewritten we don't need to call this function, the stock one should be ok def enumerate(self): - while self.url != '': - resp = self.send_req(self.url) - resp = json.loads(resp) - # - Old debugging - if 'error' in resp and self.apikey != "": - self.print_( - f"{bad}{R}Virustotal probably now is blocking our requests! Or you have not a valid api key!{W}" - ) - break - if 'links' in resp and 'next' in resp['links']: - self.url = resp['links']['next'] - else: - self.url = '' - self.extract_domains(resp) - return self.subdomains - - def extract_domains(self, resp): - # resp is already parsed as json + resp = self.send_req(self.url) + if resp is None: + return self.subdomains + try: - for i in resp['data']: - if i['type'] == 'domain': - subdomain = i['id'] - if not subdomain.endswith(self.domain): - continue - if subdomain not in self.subdomains and subdomain != self.domain: - if self.verbose: - self.print_(tab*2 + "%s%s: %s%s" % (R, self.engine_name, W, subdomain)) - self.subdomains.append(subdomain.strip()) - except Exception: - pass + resp_json = json.loads(resp) + if 'data' in resp_json: + for entry in resp_json['data']: + if 'id' in entry: + subdomain = entry['id'] + if subdomain not in self.subdomains: + self.subdomains.append(subdomain) + except Exception as e: + self.print_(bad + R + f"Error parsing Virustotal response: {str(e)}" + W) + + return self.subdomains class ThreatCrowd(enumeratorBaseThreaded): @@ -1049,16 +1033,27 @@ def main(domain, threads, savefile, ports, silent, verbose, enable_bruteforce, e print(tab*2 + bad + subdomain + f" ({R}error getting ip{W})") print(info + 'Enumerating misconfigured subdomains') - subdomains = [misconfigured for misconfigured in subdomains if ISPCheck(misconfigured) is None] - - print(tab + warn + 'Total Misconfigured Subdomains Found: %s' % len(subdomains)) - if len(subdomains) == 0: - print(tab*2 + bad + 'misconfigured subdomains not found') - else: - for subdomain in subdomains: - print(tab*2 + good + subdomain) + misconfigured = [] + for subdomain in subdomains: + try: + check_result = ISPCheck(subdomain) + if check_result is None: + misconfigured.append(subdomain) + elif '[DNS resolution failed]' in check_result: + continue # Skip domains that can't be resolved + elif verbose: + print(f"{tab*2}{warn}Skipping {subdomain}:{check_result}") + except Exception as e: + if verbose: + print(f"{tab*2}{bad}Error checking {subdomain}: {str(e)}") + continue + + if not silent: + print(f"{tab}{warn}Total Misconfigured Subdomains Found: {Y}{len(misconfigured)}{W}") + for subdomain in misconfigured: + print(f"{tab*2}{good}{subdomain}") - return subdomains + return misconfigured def interactive(): From 3682f43ff9bc9394de5ce57bf5b3670150677b5c Mon Sep 17 00:00:00 2001 From: 4n4l1st-ch4rl3s <63286909+4n4l1st-ch4rl3s@users.noreply.github.com> Date: Thu, 24 Jul 2025 16:51:30 +0300 Subject: [PATCH 3/3] . --- data/APIs/api.conf | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 data/APIs/api.conf diff --git a/data/APIs/api.conf b/data/APIs/api.conf new file mode 100644 index 0000000..dfeb3ec --- /dev/null +++ b/data/APIs/api.conf @@ -0,0 +1,13 @@ +[VTOTAL] +api_key = xxxx + +[CENSYS] +api_id = +secret = + +[SHODAN] +api_key = xxxx + +[SECURITYTRAILS] +api_key = xxxx +