File tree Expand file tree Collapse file tree 5 files changed +25
-8
lines changed
Sources/_RegexParser/Regex/Parse Expand file tree Collapse file tree 5 files changed +25
-8
lines changed Original file line number Diff line number Diff line change @@ -43,6 +43,7 @@ enum ParseError: Error, Hashable {
43
43
case expectedEscape
44
44
case invalidEscape( Character )
45
45
case confusableCharacter( Character )
46
+ case literalCharMustBeNFC( Character )
46
47
47
48
case quoteMayNotSpanMultipleLines
48
49
case unsetExtendedSyntaxMayNotSpanMultipleLines
@@ -145,6 +146,8 @@ extension ParseError: CustomStringConvertible {
145
146
return " invalid escape sequence ' \\ \( c) ' "
146
147
case . confusableCharacter( let c) :
147
148
return " ' \( c) ' is confusable for a metacharacter; use ' \\ u{...}' instead "
149
+ case . literalCharMustBeNFC( let c) :
150
+ return " ' \( c) ' may not have the expected scalars; specify them explicitly with ' \\ u{...}' instead "
148
151
case . quoteMayNotSpanMultipleLines:
149
152
return " quoted sequence may not span multiple lines in multi-line literal "
150
153
case . unsetExtendedSyntaxMayNotSpanMultipleLines:
Original file line number Diff line number Diff line change @@ -2055,13 +2055,14 @@ extension Parser {
2055
2055
return . invalid
2056
2056
}
2057
2057
2058
- guard let charLoc = p. tryEatWithLoc ( ) else {
2058
+ guard let charWithLoc = p. tryEatWithLoc ( ) else {
2059
2059
// We check at the beginning of the function for `isEmpty`, so we should
2060
2060
// not be at the end of the input here.
2061
2061
p. unreachable ( " Unexpected end of input " )
2062
2062
return nil
2063
2063
}
2064
- let char = charLoc. value
2064
+ let char = charWithLoc. value
2065
+ let charLoc = charWithLoc. location
2065
2066
switch char {
2066
2067
case " ) " , " | " :
2067
2068
if customCC {
@@ -2092,7 +2093,11 @@ extension Parser {
2092
2093
let scalars = char. unicodeScalars
2093
2094
if scalars. count > 1 && scalars. first!. isASCII && char != " \r \n " &&
2094
2095
!char. isLetter && !char. isNumber {
2095
- p. error ( . confusableCharacter( char) , at: charLoc. location)
2096
+ p. error ( . confusableCharacter( char) , at: charLoc)
2097
+ }
2098
+ // Reject unescaped non-NFC characters.
2099
+ if !char. isNFC {
2100
+ p. error ( . literalCharMustBeNFC( char) , at: charLoc)
2096
2101
}
2097
2102
break
2098
2103
}
Original file line number Diff line number Diff line change @@ -332,7 +332,7 @@ extension RegexTests {
332
332
doesNotContain: [ . match, . consumeBy, . matchScalarUnchecked] )
333
333
// quoted literal is not all ascii -> match scalar when possible, always do boundary checks
334
334
expectProgram (
335
- for: " aaa \u{301} " ,
335
+ for: # "aaa\u{301}"# ,
336
336
contains: [ . match, . matchScalar] ,
337
337
doesNotContain: [ . consumeBy, . matchScalarUnchecked] )
338
338
// scalar mode -> always emit match scalar without boundary checks
@@ -347,7 +347,7 @@ extension RegexTests {
347
347
contains: [ . matchScalarUnchecked] ,
348
348
doesNotContain: [ . match, . consumeBy, . matchScalar] )
349
349
expectProgram (
350
- for: " aaa \u{301} " ,
350
+ for: # "aaa\u{301}"# ,
351
351
semanticLevel: . unicodeScalar,
352
352
contains: [ . matchScalarUnchecked] ,
353
353
doesNotContain: [ . match, . consumeBy, . matchScalar] )
Original file line number Diff line number Diff line change @@ -226,7 +226,7 @@ extension RegexTests {
226
226
227
227
// MARK: Allowed combining characters
228
228
229
- firstMatchTest ( " e \u{301} " , input: " e \u{301} " , match: " e \u{301} " )
229
+ firstMatchTest ( # "e\u{301}"# , input: " e \u{301} " , match: " e \u{301} " )
230
230
firstMatchTest ( " 1 \u{358} " , input: " 1 \u{358} " , match: " 1 \u{358} " )
231
231
firstMatchTest ( #"\ \#u{361}"# , input: " \u{361} " , match: " \u{361} " )
232
232
@@ -774,7 +774,7 @@ extension RegexTests {
774
774
firstMatchTest ( #"[\d]"# , input: " 1️⃣ " , match: " 1️⃣ " )
775
775
firstMatchTest ( #"(?P)[\d]"# , input: " 1️⃣ " , match: nil )
776
776
firstMatchTest ( " [0-2&&1-3] " , input: " 1️⃣ " , match: nil )
777
- firstMatchTest ( " [1-2e \u{301} ] " , input: " 1️⃣ " , match: nil )
777
+ firstMatchTest ( # "[1-2e\u{301}]"# , input: " 1️⃣ " , match: nil )
778
778
779
779
firstMatchTest ( #"[\u{3A9}-\u{3A9}]"# , input: " \u{3A9} " , match: " \u{3A9} " )
780
780
Original file line number Diff line number Diff line change @@ -374,10 +374,19 @@ extension RegexTests {
374
374
375
375
// MARK: Allowed combining characters
376
376
377
- parseTest ( " e \u{301} " , " e \u{301} " )
378
377
parseTest ( " 1 \u{358} " , " 1 \u{358} " )
379
378
parseTest ( #"\ \#u{361}"# , " \u{361} " )
380
379
380
+ // We don't allow non-NFC literal characters.
381
+ parseTest (
382
+ " e \u{301} " , " e \u{301} " , throwsError: . literalCharMustBeNFC( " e \u{301} " )
383
+ )
384
+ parseTest ( " \u{E9} " , " e \u{301} " )
385
+
386
+ // Can't be escaped either, must be written using `\u{...}`.
387
+ parseTest (
388
+ " \\ e \u{301} " , " e \u{301} " , throwsError: . invalidEscape( " e \u{301} " ) )
389
+
381
390
// MARK: Alternations
382
391
383
392
parseTest (
You can’t perform that action at this time.
0 commit comments