Skip to content

Commit bdecdb7

Browse files
committed
fix: HTML clean-up with spceific word betwwen double-anchors
1 parent 9a67c27 commit bdecdb7

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

wikidict/context.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,9 @@ def clean_html_input(code: str, locale: str) -> str:
395395
'ab'
396396
>>> clean_html_input('a<references>xcv</references>b', "fr")
397397
'ab'
398+
399+
>>> clean_html_input("# {{lb|en|<<transitive>> or (obsolete) <<reflexive>>}} to [[ask]] politely, to say [[please]]", "en")
400+
'# {{lb|en|<<transitive>> or (obsolete) <<reflexive>>}} to [[ask]] politely, to say [[please]]'
398401
"""
399402
sub = re.sub
400403

@@ -436,7 +439,7 @@ def clean_html_input(code: str, locale: str) -> str:
436439
# <ref>foo</ref> → ''
437440
# <ref name="CFC">{{Import:CFC}}</ref> → ''
438441
# <ref name="CFC"><tag>...</tag></ref> → ''
439-
code = sub(r"<ref[^>]*/?>[\s\S]*?(?:</\s*ref[^>]*>|$)", "", code)
442+
code = sub(r"<+ref[^>]*/?>[\s\S]*?(?:</\s*ref[^>]*>|$)", lambda m: m[0] if m[0].startswith("<<") else "", code)
440443

441444
# <ref> → ''
442445
# </ref> → ''

0 commit comments

Comments
 (0)