Closed
Description
consider something like:
from urllib.parse import urlparse, urlunparse, quote
def sanitize_url(url):
allowed_schemes = {'https'}
allowed_netlocs = {'github.com', 'readthedocs.com'}
parsed_url = urlparse(url)
# Validate scheme
if parsed_url.scheme not in allowed_schemes:
raise ValueError("Invalid URL scheme. Only 'https' is allowed.")
# Validate netloc
if parsed_url.netloc not in allowed_netlocs:
raise ValueError("Invalid or untrusted domain. Only 'github.com' and 'readthedocs.com' are allowed.")
# Sanitize path and query
sanitized_path = quote(parsed_url.path)
sanitized_query = quote(parsed_url.query)
# Reconstruct the sanitized URL
sanitized_url = urlunparse((
parsed_url.scheme,
parsed_url.netloc,
sanitized_path,
parsed_url.params,
sanitized_query,
parsed_url.fragment
))
return sanitized_url
# Example usage
try:
original_url = "https://github.com/user/repo?query=1"
safe_url = sanitize_url(original_url)
print("Sanitized URL:", safe_url)
except ValueError as e:
print("Error:", e)
Metadata
Metadata
Assignees
Labels
Type
Projects
Status
Archive Backlog