From fc8864b8d6dd5e1203f7c6194f8cb799b3e592f6 Mon Sep 17 00:00:00 2001 From: Jakub Stasiak Date: Fri, 22 Dec 2023 12:45:32 +0100 Subject: [PATCH 1/7] Format RFC links with anchors nicely Previously if we wrote :rfc:`1234#section-2` links would be formatted like RFC 1234#section-2 which isn't quite nice. After this patch: RFC 1234 Section 2 which looks and reads better. Resolves: https://github.com/sphinx-doc/sphinx/issues/7027 --- sphinx/roles.py | 16 +++++++++++++--- tests/test_markup.py | 12 ++++++------ tests/test_roles.py | 14 +++++++++++++- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/sphinx/roles.py b/sphinx/roles.py index d734429ebad..fb8eb93a696 100644 --- a/sphinx/roles.py +++ b/sphinx/roles.py @@ -210,8 +210,8 @@ def build_uri(self) -> str: class RFC(ReferenceRole): def run(self) -> tuple[list[Node], list[system_message]]: target_id = 'index-%s' % self.env.new_serialno('index') - entries = [('single', 'RFC; RFC %s' % self.target, target_id, '', None)] - + formatted_target = _format_rfc_target(self.target) + entries = [('single', 'RFC; %s' % formatted_target, target_id, '', None)] index = addnodes.index(entries=entries) target = nodes.target('', '', ids=[target_id]) self.inliner.document.note_explicit_target(target) @@ -222,7 +222,7 @@ def run(self) -> tuple[list[Node], list[system_message]]: if self.has_explicit_title: reference += nodes.strong(self.title, self.title) else: - title = "RFC " + self.title + title = formatted_target reference += nodes.strong(title, title) except ValueError: msg = self.inliner.reporter.error(__('invalid RFC number %s') % self.target, @@ -241,6 +241,16 @@ def build_uri(self) -> str: return base_url + self.inliner.rfc_url % int(ret[0]) +def _format_rfc_target(target: str): + """ + Takes an RFC number with an optional anchor (like ``123#section-2.5.3``) and returns + a nicely formatted title for it. + """ + parts = target.replace('#', ' ').replace('-', ' ').split() + if len(parts) >= 2: + parts[1] = parts[1].title() + return ' '.join(['RFC', *parts]) + _amp_re = re.compile(r'(?' - 'RFC 2324#id1

'), + 'href="https://datatracker.ietf.org/doc/html/rfc2324.html#section-1">' + 'RFC 2324 Section 1

'), ('\\sphinxAtStartPar\n' - '\\index{RFC@\\spxentry{RFC}!RFC 2324\\#id1@\\spxentry{RFC 2324\\#id1}}' - '\\sphinxhref{https://datatracker.ietf.org/doc/html/rfc2324.html\\#id1}' - '{\\sphinxstylestrong{RFC 2324\\#id1}}'), + '\\index{RFC@\\spxentry{RFC}!RFC 2324 Section 1@\\spxentry{RFC 2324 Section 1}}' + '\\sphinxhref{https://datatracker.ietf.org/doc/html/rfc2324.html\\#section-1}' + '{\\sphinxstylestrong{RFC 2324 Section 1}}'), ), ( # correct interpretation of code with whitespace diff --git a/tests/test_roles.py b/tests/test_roles.py index 67a13c8ef1b..96b2b52f360 100644 --- a/tests/test_roles.py +++ b/tests/test_roles.py @@ -3,8 +3,9 @@ from unittest.mock import Mock from docutils import nodes +import pytest -from sphinx.roles import EmphasizedLiteral +from sphinx.roles import _format_rfc_target, EmphasizedLiteral from sphinx.testing.util import assert_node @@ -73,3 +74,14 @@ def test_samp(): assert_node(ret[0], [nodes.literal, ("print 1+\\", [nodes.emphasis, "variable"])]) assert msg == [] + + +@pytest.mark.parametrize('target,expected_output', [ + ['123', 'RFC 123'], + ['123#section-1', 'RFC 123 Section 1'], + ['123#section-2.5.3', 'RFC 123 Section 2.5.3'], + ['123#page-13', 'RFC 123 Page 13'], + ['123#appendix-B', 'RFC 123 Appendix B'], +]) +def test_format_rfc_target(target: str, expected_output: str) -> None: + assert _format_rfc_target(target) == expected_output From 7ed70a8c702b836f48e08e416033540b6770ee5a Mon Sep 17 00:00:00 2001 From: Jakub Stasiak Date: Fri, 22 Dec 2023 12:58:49 +0100 Subject: [PATCH 2/7] Fix stuff --- sphinx/roles.py | 3 ++- tests/test_roles.py | 14 +++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/sphinx/roles.py b/sphinx/roles.py index fb8eb93a696..4cf2ffbf2ea 100644 --- a/sphinx/roles.py +++ b/sphinx/roles.py @@ -241,7 +241,7 @@ def build_uri(self) -> str: return base_url + self.inliner.rfc_url % int(ret[0]) -def _format_rfc_target(target: str): +def _format_rfc_target(target: str) -> str: """ Takes an RFC number with an optional anchor (like ``123#section-2.5.3``) and returns a nicely formatted title for it. @@ -251,6 +251,7 @@ def _format_rfc_target(target: str): parts[1] = parts[1].title() return ' '.join(['RFC', *parts]) + _amp_re = re.compile(r'(? None: assert _format_rfc_target(target) == expected_output From 9c834c7c20121a0e1ebb8048d27b2bd2d20c6479 Mon Sep 17 00:00:00 2001 From: Jakub Stasiak Date: Fri, 22 Dec 2023 13:04:57 +0100 Subject: [PATCH 3/7] Change the type here --- tests/test_roles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_roles.py b/tests/test_roles.py index 916f67a5b0b..2ed01fe13d9 100644 --- a/tests/test_roles.py +++ b/tests/test_roles.py @@ -76,7 +76,7 @@ def test_samp(): assert msg == [] -@pytest.mark.parametrize('target,expected_output', [ +@pytest.mark.parametrize(('target', 'expected_output'), [ ('123', 'RFC 123'), ('123#section-1', 'RFC 123 Section 1'), ('123#section-2.5.3', 'RFC 123 Section 2.5.3'), From 98c44f04f45a1534fa24840cf56430d6938fd0b9 Mon Sep 17 00:00:00 2001 From: Jakub Stasiak Date: Fri, 22 Dec 2023 21:52:49 +0100 Subject: [PATCH 4/7] Make it safer --- sphinx/roles.py | 18 ++++++++++++------ tests/test_roles.py | 4 ++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/sphinx/roles.py b/sphinx/roles.py index 4cf2ffbf2ea..73618ee7910 100644 --- a/sphinx/roles.py +++ b/sphinx/roles.py @@ -243,13 +243,19 @@ def build_uri(self) -> str: def _format_rfc_target(target: str) -> str: """ - Takes an RFC number with an optional anchor (like ``123#section-2.5.3``) and returns - a nicely formatted title for it. + Takes an RFC number with an optional anchor (like ``123#section-2.5.3``) and attempts to + produce a human-friendly title for it. + + We have a set of known anchors that we format nicely, everything else we leave alone. """ - parts = target.replace('#', ' ').replace('-', ' ').split() - if len(parts) >= 2: - parts[1] = parts[1].title() - return ' '.join(['RFC', *parts]) + [number, *anchor] = target.split('#', maxsplit=1) + anchor_parts = anchor[0].split('-') if anchor else None + if not anchor_parts or anchor_parts[0] not in ['section', 'page', 'appendix']: + return 'RFC %s' % (target,) + return 'RFC %s %s' % ( + number, + ' '.join([anchor_parts[0].title(), *anchor_parts[1:]]), + ) _amp_re = re.compile(r'(? None: assert _format_rfc_target(target) == expected_output From 259cf09071228249d9f1a62fb833c1914f360cb5 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sun, 6 Oct 2024 03:52:34 +0100 Subject: [PATCH 5/7] post-merge --- sphinx/roles.py | 3 ++- tests/test_markup/test_markup.py | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sphinx/roles.py b/sphinx/roles.py index 3596bc353b1..3d0e53341e5 100644 --- a/sphinx/roles.py +++ b/sphinx/roles.py @@ -333,7 +333,8 @@ class RFC(ReferenceRole): def run(self) -> tuple[list[Node], list[system_message]]: target_id = 'index-%s' % self.env.new_serialno('index') formatted_target = _format_rfc_target(self.target) - entries = [('single', 'RFC; %s' % formatted_target, target_id, '', None)] + entries = [('single', f'RFC; {formatted_target}', target_id, '', None)] + index = addnodes.index(entries=entries) target = nodes.target('', '', ids=[target_id]) self.inliner.document.note_explicit_target(target) diff --git a/tests/test_markup/test_markup.py b/tests/test_markup/test_markup.py index 1a045c6529f..98052de2572 100644 --- a/tests/test_markup/test_markup.py +++ b/tests/test_markup/test_markup.py @@ -278,17 +278,17 @@ def get(name): ( # rfc role with anchor 'verify', - ':rfc:`2324#id1`', + ':rfc:`2324#section-1`', ( '

' - 'RFC 2324#id1

' + 'href="https://datatracker.ietf.org/doc/html/rfc2324.html#section-1">' + 'RFC 2324 Section 1

' ), ( '\\sphinxAtStartPar\n' - '\\index{RFC@\\spxentry{RFC}!RFC 2324\\#id1@\\spxentry{RFC 2324\\#id1}}' - '\\sphinxhref{https://datatracker.ietf.org/doc/html/rfc2324.html\\#id1}' - '{\\sphinxstylestrong{RFC 2324\\#id1}}' + '\\index{RFC@\\spxentry{RFC}!RFC 2324 Section 1@\\spxentry{RFC 2324 Section 1}}' + '\\sphinxhref{https://datatracker.ietf.org/doc/html/rfc2324.html\\#section-1}' + '{\\sphinxstylestrong{RFC 2324 Section 1}}' ), ), ( From f78df8cac5fd792334be9358edccb44dadbfe6ea Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sun, 6 Oct 2024 04:12:19 +0100 Subject: [PATCH 6/7] post-merge --- sphinx/roles.py | 25 +++++++++++++------------ tests/test_roles.py | 28 +++++++++++++++++----------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/sphinx/roles.py b/sphinx/roles.py index 3d0e53341e5..27ceed29532 100644 --- a/sphinx/roles.py +++ b/sphinx/roles.py @@ -367,21 +367,22 @@ def build_uri(self) -> str: return base_url + self.inliner.rfc_url % int(ret[0]) -def _format_rfc_target(target: str) -> str: +def _format_rfc_target(target: str, /) -> str: """ - Takes an RFC number with an optional anchor (like ``123#section-2.5.3``) and attempts to - produce a human-friendly title for it. + Takes an RFC number with an optional anchor (like ``123#section-2.5.3``) + and attempts to produce a human-friendly title for it. - We have a set of known anchors that we format nicely, everything else we leave alone. + We have a set of known anchors that we format nicely, + everything else we leave alone. """ - [number, *anchor] = target.split('#', maxsplit=1) - anchor_parts = anchor[0].split('-') if anchor else None - if not anchor_parts or anchor_parts[0] not in ['section', 'page', 'appendix']: - return 'RFC %s' % (target,) - return 'RFC %s %s' % ( - number, - ' '.join([anchor_parts[0].title(), *anchor_parts[1:]]), - ) + number, _, anchor = target.partition('#') + if anchor: + first, _, remaining = anchor.partition('-') + if first in {'appendix', 'page', 'section'}: + if remaining: + return f'RFC {number} {first.title()} {remaining}' + return f'RFC {number} {first.title()}' + return f'RFC {target}' class GUILabel(SphinxRole): diff --git a/tests/test_roles.py b/tests/test_roles.py index c399f55a62a..127f671a145 100644 --- a/tests/test_roles.py +++ b/tests/test_roles.py @@ -116,16 +116,22 @@ def test_samp(): assert msg == [] -@pytest.mark.parametrize(('target', 'expected_output'), [ - ('123', 'RFC 123'), - ('123#section-1', 'RFC 123 Section 1'), - ('123#section-2.5.3', 'RFC 123 Section 2.5.3'), - ('123#page-13', 'RFC 123 Page 13'), - ('123#appendix-B', 'RFC 123 Appendix B'), - # Section names are also present in some RFC anchors. Until we come up with a reliable way - # to reconstruct the section names from the corresponding anchors with the correct - # capitalization it's probably better to leave them alone. - ('9076#name-risks-in-the-dns-data', 'RFC 9076#name-risks-in-the-dns-data'), -]) +@pytest.mark.parametrize( + ('target', 'expected_output'), + [ + ('123', 'RFC 123'), + ('123#', 'RFC 123#'), + ('123#id1', 'RFC 123#id1'), + ('123#section', 'RFC 123 Section'), + ('123#section-1', 'RFC 123 Section 1'), + ('123#section-2.5.3', 'RFC 123 Section 2.5.3'), + ('123#page-13', 'RFC 123 Page 13'), + ('123#appendix-B', 'RFC 123 Appendix B'), + # Section names are also present in some RFC anchors. Until we come up with a reliable way + # to reconstruct the section names from the corresponding anchors with the correct + # capitalization it's probably better to leave them alone. + ('9076#name-risks-in-the-dns-data', 'RFC 9076#name-risks-in-the-dns-data'), + ], +) def test_format_rfc_target(target: str, expected_output: str) -> None: assert _format_rfc_target(target) == expected_output From f26cc63fc375a21642f28205404351fd5af00e2d Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sun, 6 Oct 2024 04:13:40 +0100 Subject: [PATCH 7/7] CHANGES --- CHANGES.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index ababf23e98e..a0acf5227ab 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -64,6 +64,8 @@ Features added * #11781: Add roles for referencing CVEs (:rst:role:`:cve: `) and CWEs (:rst:role:`:cwe: `). Patch by Hugo van Kemenade. +* #11809: Improve the formatting for RFC section anchors. + Patch by Jakub Stasiak and Adam Turner. Bugs fixed ----------