diff --git a/docs/conf.py b/docs/conf.py index 3d48f2f1..604c43fa 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -46,6 +46,8 @@ sys.path.insert(0, os.path.abspath("..")) from docs.utils import _validate_git_ref # noqa from docs.utils import slugify_header # noqa +from docs.utils import sanitize_url # noqa +from docs.utils import sanitize_intersphinx_mapping # noqa # Define the branch reference for linkcode_resolve DOCS_BUILD_REF: str = _validate_git_ref(os.environ.get("DOCS_BUILD_REF", "stable")) @@ -309,7 +311,7 @@ myst_gfm_only = False myst_html_meta = { - "github_url": f"https://github.com/reactive-firewall/{project}" + "github_url": sanitize_url(f"https://github.com/reactive-firewall/{project}"), } # For GH-style admonitions to MyST conversion @@ -419,7 +421,7 @@ # -- Link resolver ------------------------------------------------------------- -linkcode_url_prefix: str = f"https://github.com/reactive-firewall/{project}" +linkcode_url_prefix: str = sanitize_url(f"https://github.com/reactive-firewall/{project}") suffix = "/issues/%s" @@ -432,9 +434,11 @@ # try to link with official python3 documentation. # see https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html for more -intersphinx_mapping = { - "python": ("https://docs.python.org/3", (None, "python-inv.txt")) -} +intersphinx_mapping = sanitize_intersphinx_mapping( + { + "python": ("https://docs.python.org/3", (None, "python-inv.txt")), + }, +) def linkcode_resolve(domain, info): @@ -450,4 +454,4 @@ def linkcode_resolve(domain, info): theResult = theResult.replace("/multicast.py", "/multicast/__init__.py") if "/tests.py" in theResult: theResult = theResult.replace("/tests.py", "/tests/__init__.py") - return quote(theResult, safe=":/-._") + return sanitize_url(quote(theResult, safe=":/-._")) diff --git a/docs/utils.py b/docs/utils.py index ba7d2dd7..a596ecdd 100644 --- a/docs/utils.py +++ b/docs/utils.py @@ -17,6 +17,7 @@ # limitations under the License. import re +from urllib.parse import urlparse, urlunparse, quote # Git reference validation pattern @@ -26,6 +27,110 @@ GIT_REF_PATTERN = r'^[a-zA-Z0-9][a-zA-Z0-9_\-./]*$' +# URL allowed scheme list +# Enforces: +# - URLs Must start with https +URL_ALLOWED_SCHEMES = frozenset({"https"}) + + +# URL allowed domain list +# Enforces: +# - URLs Must belong to one of these domains +URL_ALLOWED_NETLOCS = frozenset({"github.com", "readthedocs.com", "docs.python.org"}) + + +# Maximum allowed URL length +MAX_URL_LENGTH = 2048 # Common browser limit +"""Maximum allowed length for URL validation. + +Should be large enough for most URLs but no larger than common browser limits. + +Unit-Testing: + + First set up test fixtures by importing utils. + + >>> import docs.utils as _utils + >>> + + >>> _utils.MAX_URL_LENGTH is not None + True + >>> type(_utils.MAX_URL_LENGTH) is type(int()) + True + >>> _utils.MAX_URL_LENGTH > 0 + True + >>> _utils.MAX_URL_LENGTH >= 256 + True + >>> _utils.MAX_URL_LENGTH <= 2048 + True + >>> + +""" + + +# Error messages for URL validation +INVALID_LENGTH_ERROR = f"URL exceeds maximum length of {MAX_URL_LENGTH} characters." +"""Length error message for URL validation. + +Unit-Testing: + + First set up test fixtures by importing utils. + + >>> import docs.utils as _utils + >>> + + >>> _utils.INVALID_LENGTH_ERROR is not None + True + >>> type(_utils.INVALID_LENGTH_ERROR) is type(str()) + True + >>> len(_utils.INVALID_LENGTH_ERROR) > 0 + True + >>> + +""" + + +INVALID_SCHEME_ERROR = "Invalid URL scheme. Only 'https' is allowed." +"""Scheme error message for URL validation. + +Unit-Testing: + + First set up test fixtures by importing utils. + + >>> import docs.utils as _utils + >>> + + >>> _utils.INVALID_SCHEME_ERROR is not None + True + >>> type(_utils.INVALID_SCHEME_ERROR) is type(str()) + True + >>> len(_utils.INVALID_SCHEME_ERROR) > 0 + True + >>> + +""" + + +INVALID_DOMAIN_ERROR = f"Invalid or untrusted domain. Only {URL_ALLOWED_NETLOCS} are allowed." +"""Domain error message for URL validation. + +Unit-Testing: + + First set up test fixtures by importing utils. + + >>> import docs.utils as _utils + >>> + + >>> _utils.INVALID_DOMAIN_ERROR is not None + True + >>> type(_utils.INVALID_DOMAIN_ERROR) is type(str()) + True + >>> len(_utils.INVALID_DOMAIN_ERROR) > 0 + True + >>> + +""" + + def _validate_git_ref(ref: str) -> str: """ Validate if the provided string is a valid Git reference. @@ -126,3 +231,92 @@ def slugify_header(s: str) -> str: text = re.sub(r'[^\w\- ]', "", s).strip().lower() # Then replace consecutive spaces or dashes with a single dash return re.sub(r'[-\s]+', "-", text) + + +def sanitize_url(url: str) -> str: + """ + Sanitize and validate a URL according to allowed schemes and domains. + + This function validates that the URL uses an allowed scheme (https) and points + to a trusted domain, then safely encodes its path and query components. + + Args: + url (str) -- The URL to sanitize. + + Returns: + str -- The sanitized URL. + + Raises: + ValueError -- If the URL has an invalid scheme or points to an untrusted domain. + + + Unit-Testing: + + Testcase 0: First set up test fixtures by importing utils. + + >>> import docs.utils as _utils + >>> + + Testcase 1: Basic URL with spaces and special characters. + + >>> url_fxtr = "https://github.com/user/Hello World!" + >>> _utils.sanitize_url(url_fxtr) + 'https://github.com/user/Hello%20World%21' + >>> + + """ + # Validate length + if len(url) > MAX_URL_LENGTH: + raise ValueError(INVALID_LENGTH_ERROR) + parsed_url = urlparse(url) + # Validate scheme + if parsed_url.scheme not in URL_ALLOWED_SCHEMES: + raise ValueError(INVALID_SCHEME_ERROR) + # Validate netloc + if parsed_url.netloc not in URL_ALLOWED_NETLOCS: + raise ValueError(INVALID_DOMAIN_ERROR) + # Sanitize path and query - using the safe parameter to preserve URL structure + sanitized_path = quote(parsed_url.path, safe="/=") + sanitized_query = quote(parsed_url.query, safe="&=") + # Reconstruct the sanitized URL + return urlunparse(( + parsed_url.scheme, + parsed_url.netloc, + sanitized_path, + parsed_url.params, + sanitized_query, + parsed_url.fragment, + )) + + +def sanitize_intersphinx_mapping(mapping: dict) -> dict: + """ + Sanitize URLs in an intersphinx mapping dictionary. + + This function applies URL sanitization to each URL in the mapping while + preserving the associated extra values. + + Args: + mapping (dict) -- A dictionary mapping names to tuples of (url, extra_value). + + Returns: + dict -- A dictionary with the same structure but with sanitized URLs. + + Unit-Testing: + + Testcase 1: Basic intersphinx mapping. + + >>> mapping = {'python': ('https://docs.python.org/3', None)} + >>> sanitize_intersphinx_mapping(mapping) + {'python': ('https://docs.python.org/3', None)} + + Testcase 2: Mapping with URL containing special characters. + + >>> mapping = {'project': ('https://github.com/user/project with spaces', None)} + >>> result = sanitize_intersphinx_mapping(mapping) + >>> result['project'][0] + 'https://github.com/user/project%20with%20spaces' + >>> + + """ + return {key: (sanitize_url(url), extra_value) for key, (url, extra_value) in mapping.items()} diff --git a/tests/__init__.py b/tests/__init__.py index 03b9875a..2b867867 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -355,6 +355,13 @@ def loadDocstringsFromModule(module: types.ModuleType) -> TestSuite: except Exception: # pragma: no branch _LOGGER.warning("Error loading optional debug tests", exc_info=True) +try: + from tests import test_extra + depends.insert(11, test_extra) + EXTRA_TESTS["security"].append(test_extra.ExtraDocsUtilsTestSuite) +except Exception: # pragma: no branch + _LOGGER.warning("Error loading optional extra tests", exc_info=True) + try: FUZZING_TESTS = { "slow": [ diff --git a/tests/check_spelling b/tests/check_spelling index c946b129..3b99ea21 100755 --- a/tests/check_spelling +++ b/tests/check_spelling @@ -147,6 +147,7 @@ declare -a SPECIFIC_TYPOS=( "sentance:sentence" # from #330 "reccomended:recommended" # from #348 "absolutly:absolutely" # from #348 + "belone:belong" # from #373 ) function cleanup() { diff --git a/tests/test_extra.py b/tests/test_extra.py new file mode 100644 index 00000000..741fe949 --- /dev/null +++ b/tests/test_extra.py @@ -0,0 +1,96 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Multicast Python Module (Testing) +# .................................. +# Copyright (c) 2025, Mr. Walls +# .................................. +# Licensed under MIT (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# .......................................... +# https://www.github.com/reactive-firewall/multicast/LICENSE.md +# .......................................... +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Extra Test module for docs.utils functionality. + +This module provides extra test cases for the docs.utils module, focusing on the +utils.sanitize_url method for url encoding. +""" + + +__module__ = "tests" + + +try: + try: + import context + except Exception as _: # pragma: no branch + del _ # skipcq - cleanup any error vars early + from . import context + if context.__name__ is None: + raise ModuleNotFoundError("[CWE-758] Failed to import context") from None + else: + from context import unittest + import docs.utils +except Exception as err: + raise ImportError("[CWE-758] Failed to import test context") from err + + +@context.markWithMetaTag("extra", "security") +class ExtraDocsUtilsTestSuite(context.BasicUsageTestSuite): + """Test cases for docs.utils module.""" + + __module__ = "tests.test_extra" + + URL_TEST_FIXTURES = [ + { + "input_url": "https://github.com/user/repo", + "expected": "https://github.com/user/repo", + }, + { + "input_url": "https://github.com/user/repo with spaces", + "expected": "https://github.com/user/repo%20with%20spaces", + }, + { + "input_url": "https://github.com/user/repo?q=test&sort=desc", + "expected": "https://github.com/user/repo?q%3Dtest%26sort%3Ddesc", + }, + { + "input_url": "https://github.com/user/repo#section", + "expected": "https://github.com/user/repo#section", + }, + { + "input_url": "https://github.com/user/repo/", + "expected": "https://github.com/user/repo/%3Cscript%3Ealert%28%27xss%27%29%3C/script%3E", + }, + ] + + def test_sanitize_url_GIVEN_raw_url_IS_reliable(self) -> None: + """Test case 1: Test to ensure reliable URL sanitization.""" + # Mock _hearstep to return a non-empty response + for test_params in self.URL_TEST_FIXTURES: + sanitized_url = docs.utils.sanitize_url(test_params["input_url"]) + # check for results + self.assertIsNotNone(sanitized_url) + # Verify results + if test_params["input_url"] == test_params["expected"]: + self.assertEqual( + test_params["input_url"], sanitized_url, + "Input and output URLs were different, should be the same.", + ) + else: + self.assertNotEqual( + test_params["input_url"], sanitized_url, + "Input and output URLs were the same, should be different.", + ) + self.assertEqual(sanitized_url, test_params["expected"]) + + +if __name__ == '__main__': + unittest.main()