Skip to content
This repository was archived by the owner on Oct 18, 2024. It is now read-only.

Commit b8cd295

Browse files
committed
submod: common: remove redundant interpunction
(cherry picked from commit d3ff49e)
1 parent d4833f1 commit b8cd295

File tree

3 files changed

+11
-6
lines changed

3 files changed

+11
-6
lines changed

Contents/Libraries/Shared/submod_test.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
if debug:
2121
logging.basicConfig(level=logging.DEBUG)
2222

23-
sub = Subtitle(Language.fromietf("pol"), mods=["OCR_fixes", "common", "remove_tags", "OCR_fixes", "shift_offset(s=-5, ms=-350)"])
23+
sub = Subtitle(Language.fromietf("eng"), mods=["OCR_fixes", "common", "remove_tags", "OCR_fixes", "shift_offset(s=-5, ms=-350)"])
2424
sub.content = open(fn).read()
2525
sub.normalize()
2626
content = sub.get_modified_content(debug=True)
@@ -35,10 +35,10 @@
3535
#content = fix_text(Subtitle.pysubs2_to_unicode(submod.f, format=format), **ftfy_defaults)\
3636
# .encode(encoding="utf-8")
3737
#print submod.f.to_string("srt", encoding="utf-8")
38-
#print repr(srt)
39-
f = codecs.open("testout.srt", "w+")
40-
f.write(content)
41-
f.close()
38+
#print repr(content)
39+
#f = codecs.open("testout.srt", "w+")
40+
#f.write(content)
41+
#f.close()
4242
#print submod.f.to_string("srt")
4343
#submod.modify("OCR_fixes")
4444
#submod.modify("change_FPS(from=24,to=25)")

Contents/Libraries/Shared/subzero/modification/mods/common.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,11 @@ class CommonFixes(SubtitleTextModification):
7171
NReProcessor(re.compile(ur'(?u)((?:[^.\s])+\.\s+)([a-zà-ž])'),
7272
lambda match: ur'%s%s' % (match.group(1), match.group(2).upper()), name="CM_uppercase_after_dot"),
7373

74+
# remove double interpunction
75+
NReProcessor(re.compile(ur'(?u)(\s*[,!?])\s*([,.!?][,.!?\s]*)'),
76+
lambda match: match.group(1).strip() + (" " if match.group(2).endswith(" ") else ""),
77+
name="CM_double_interpunct"),
78+
7479
# remove spaces before punctuation
7580
NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=\w)) +([!?.,](?![!?.,]))'), r"\1", name="CM_punctuation_space"),
7681
]

Contents/Libraries/Shared/test.srt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ Mah numbar is wrong: 1 91 7
2222
"less text before colons: Earth. Utah, North America."
2323
MUSIC PLAYS What is that sound?!
2424
ls it , and a punctuation issue ? lol
25-
take them balls it. L like turtles
25+
take them balls it. L like turtles !! ! this, . is bad .
2626

2727
6
2828
00:00:19,686 --> 00:00:21,103

0 commit comments

Comments
 (0)