Skip to content

Commit dc8b762

Browse files
authored
Merge pull request #15 from YOCKOW/development
Pitch#6
2 parents 0387b67 + f04e449 commit dc8b762

File tree

10 files changed

+273
-1206
lines changed

10 files changed

+273
-1206
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ This repository is also a Swift package[^dependencies], accordingly you can try
1010
let package = Package(
1111
...
1212
dependencies: [
13-
.package(url: "https://github.com/YOCKOW/SF-StringEncodingNameImpl.git", from: "0.3.1"),
13+
.package(url: "https://github.com/YOCKOW/SF-StringEncodingNameImpl.git", from: "0.4.0"),
1414
],
1515
...
1616
)
@@ -21,8 +21,8 @@ let package = Package(
2121
import StringEncodingNameImpl
2222

2323
// Based on IANA registry
24-
print(String.Encoding.utf8.name!) // Prints "UTF-8"
25-
print(String.Encoding(name: "ISO_646.irv:1991") == .ascii) // Prints "true"
24+
print(String.Encoding.utf8.ianaName!) // Prints "UTF-8"
25+
print(String.Encoding(ianaName: "ISO_646.irv:1991") == .ascii) // Prints "true"
2626
```
2727

2828
[^dependencies]: This package depends on other packages which are required for the code generator. `StringEncodingNameImpl` itself has no dependencies.

Sources/CodeGenerator/main.swift

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,20 @@ let requiredIANACharsets: [String] = [
2828
"US-ASCII",
2929
"EUC-JP",
3030
"ISO-8859-1",
31-
"Adobe-Symbol-Encoding",
32-
"Shift_JIS", "Windows-31J",
31+
// "Adobe-Symbol-Encoding",
32+
"Shift_JIS", // "Windows-31J",
3333
"ISO-8859-2",
34-
"UTF-16", "ISO-10646-UCS-2", "UNICODE-1-1",
34+
"UTF-16", // "ISO-10646-UCS-2", "UNICODE-1-1",
3535
"windows-1251",
36-
"windows-1252", "ISO-8859-1-Windows-3.0-Latin-1", "ISO-8859-1-Windows-3.1-Latin-1",
36+
"windows-1252", // "ISO-8859-1-Windows-3.0-Latin-1", "ISO-8859-1-Windows-3.1-Latin-1",
3737
"windows-1253",
38-
"windows-1254", "ISO-8859-9-Windows-Latin-5",
39-
"windows-1250", "ISO-8859-2-Windows-Latin-2",
38+
"windows-1254", // "ISO-8859-9-Windows-Latin-5",
39+
"windows-1250", // "ISO-8859-2-Windows-Latin-2",
4040
"ISO-2022-JP",
4141
"macintosh",
4242
"UTF-16BE",
4343
"UTF-16LE",
44-
"UTF-32", "ISO-10646-UCS-4",
44+
"UTF-32", // "ISO-10646-UCS-4",
4545
"UTF-32BE",
4646
"UTF-32LE",
4747
]
@@ -66,7 +66,7 @@ extension IANACharset: Decodable {
6666

6767
var isRequired: Bool {
6868
for req in requiredIANACharsets {
69-
if self.matches(req) {
69+
if self.matches(req, tokenizedBy: UTS22Tokenizer.self) {
7070
return true
7171
}
7272
}

Sources/StringEncodingNameImpl/IANACharset.swift

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,19 @@ package struct IANACharset {
1919
self.aliases = aliases
2020
}
2121

22-
package func matches(_ string: String) -> Bool {
22+
package func matches<T>(
23+
_ string: String,
24+
tokenizedBy tokenizer: T.Type
25+
) -> Bool where T: StringEncodingNameTokenizer {
2326
if let preferredMIMEName = self.preferredMIMEName,
24-
preferredMIMEName.isEqual(to: string, tokenizedBy: IANACharsetNameTokenizer.self) {
27+
preferredMIMEName.isEqual(to: string, tokenizedBy: tokenizer) {
2528
return true
2629
}
27-
if name.isEqual(to: string, tokenizedBy: IANACharsetNameTokenizer.self) {
30+
if name.isEqual(to: string, tokenizedBy: tokenizer) {
2831
return true
2932
}
3033
for alias in aliases {
31-
if alias.isEqual(to: string, tokenizedBy: IANACharsetNameTokenizer.self) {
34+
if alias.isEqual(to: string, tokenizedBy: tokenizer) {
3235
return true
3336
}
3437
}

Sources/StringEncodingNameImpl/IANACharsetNames.swift

Lines changed: 9 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -301,23 +301,9 @@ extension IANACharset {
301301

302302
// SKIPPED: IANA Character Set `KZ-1048`
303303

304-
/// IANA Characater Set `ISO-10646-UCS-2`
305-
static let iso10646UCS2 = IANACharset(
306-
preferredMIMEName: nil,
307-
name: "ISO-10646-UCS-2",
308-
aliases: [
309-
"csUnicode",
310-
]
311-
)
304+
// SKIPPED: IANA Character Set `ISO-10646-UCS-2`
312305

313-
/// IANA Characater Set `ISO-10646-UCS-4`
314-
static let iso10646UCS4 = IANACharset(
315-
preferredMIMEName: nil,
316-
name: "ISO-10646-UCS-4",
317-
aliases: [
318-
"csUCS4",
319-
]
320-
)
306+
// SKIPPED: IANA Character Set `ISO-10646-UCS-4`
321307

322308
// SKIPPED: IANA Character Set `ISO-10646-UCS-Basic`
323309

@@ -335,14 +321,7 @@ extension IANACharset {
335321

336322
// SKIPPED: IANA Character Set `ISO-Unicode-IBM-1265`
337323

338-
/// IANA Characater Set `UNICODE-1-1`
339-
static let unicode1_1 = IANACharset(
340-
preferredMIMEName: nil,
341-
name: "UNICODE-1-1",
342-
aliases: [
343-
"csUnicode11",
344-
]
345-
)
324+
// SKIPPED: IANA Character Set `UNICODE-1-1`
346325

347326
// SKIPPED: IANA Character Set `SCSU`
348327

@@ -408,41 +387,13 @@ extension IANACharset {
408387

409388
// SKIPPED: IANA Character Set `UTF-7-IMAP`
410389

411-
/// IANA Characater Set `ISO-8859-1-Windows-3.0-Latin-1`
412-
static let iso8859_1Windows3_0Latin1 = IANACharset(
413-
preferredMIMEName: nil,
414-
name: "ISO-8859-1-Windows-3.0-Latin-1",
415-
aliases: [
416-
"csWindows30Latin1",
417-
]
418-
)
390+
// SKIPPED: IANA Character Set `ISO-8859-1-Windows-3.0-Latin-1`
419391

420-
/// IANA Characater Set `ISO-8859-1-Windows-3.1-Latin-1`
421-
static let iso8859_1Windows3_1Latin1 = IANACharset(
422-
preferredMIMEName: nil,
423-
name: "ISO-8859-1-Windows-3.1-Latin-1",
424-
aliases: [
425-
"csWindows31Latin1",
426-
]
427-
)
392+
// SKIPPED: IANA Character Set `ISO-8859-1-Windows-3.1-Latin-1`
428393

429-
/// IANA Characater Set `ISO-8859-2-Windows-Latin-2`
430-
static let iso8859_2WindowsLatin2 = IANACharset(
431-
preferredMIMEName: nil,
432-
name: "ISO-8859-2-Windows-Latin-2",
433-
aliases: [
434-
"csWindows31Latin2",
435-
]
436-
)
394+
// SKIPPED: IANA Character Set `ISO-8859-2-Windows-Latin-2`
437395

438-
/// IANA Characater Set `ISO-8859-9-Windows-Latin-5`
439-
static let iso8859_9WindowsLatin5 = IANACharset(
440-
preferredMIMEName: nil,
441-
name: "ISO-8859-9-Windows-Latin-5",
442-
aliases: [
443-
"csWindows31Latin5",
444-
]
445-
)
396+
// SKIPPED: IANA Character Set `ISO-8859-9-Windows-Latin-5`
446397

447398
// SKIPPED: IANA Character Set `hp-roman8`
448399

@@ -472,29 +423,15 @@ extension IANACharset {
472423

473424
// SKIPPED: IANA Character Set `HP-Math8`
474425

475-
/// IANA Characater Set `Adobe-Symbol-Encoding`
476-
static let adobeSymbolEncoding = IANACharset(
477-
preferredMIMEName: nil,
478-
name: "Adobe-Symbol-Encoding",
479-
aliases: [
480-
"csHPPSMath",
481-
]
482-
)
426+
// SKIPPED: IANA Character Set `Adobe-Symbol-Encoding`
483427

484428
// SKIPPED: IANA Character Set `HP-DeskTop`
485429

486430
// SKIPPED: IANA Character Set `Ventura-Math`
487431

488432
// SKIPPED: IANA Character Set `Microsoft-Publishing`
489433

490-
/// IANA Characater Set `Windows-31J`
491-
static let windows31J = IANACharset(
492-
preferredMIMEName: nil,
493-
name: "Windows-31J",
494-
aliases: [
495-
"csWindows31J",
496-
]
497-
)
434+
// SKIPPED: IANA Character Set `Windows-31J`
498435

499436
// SKIPPED: IANA Character Set `GB2312`
500437

Sources/StringEncodingNameImpl/StringEncodingName.swift

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,17 @@ extension String.Encoding {
2727
}
2828

2929
/// The name of this encoding that is compatible with the one of the IANA registry "charset".
30-
public var name: String? {
30+
public var ianaName: String? {
3131
return _ianaCharset?.representativeName
3232
}
3333

3434
/// Creates an instance from the name of the IANA registry "charset".
35-
public init?(name charsetName: String) {
35+
public init?(ianaName charsetName: String) {
3636
func __determineEncoding() -> String.Encoding? {
3737
func __matches(_ charsets: IANACharset...) -> Bool {
3838
assert(!charsets.isEmpty)
3939
for charset in charsets {
40-
if charset.matches(charsetName) {
40+
if charset.matches(charsetName, tokenizedBy: ASCIICaseInsensitiveTokenizer.self) {
4141
return true
4242
}
4343
}
@@ -52,23 +52,21 @@ extension String.Encoding {
5252
.japaneseEUC
5353
} else if __matches(.iso8859_1) {
5454
.isoLatin1
55-
} else if __matches(.adobeSymbolEncoding) {
56-
.symbol
57-
} else if __matches(.shiftJIS, .windows31J) {
55+
} else if __matches(.shiftJIS) {
5856
.shiftJIS
5957
} else if __matches(.iso8859_2) {
6058
.isoLatin2
61-
} else if __matches(.utf16, .iso10646UCS2, .unicode1_1) {
59+
} else if __matches(.utf16) {
6260
.utf16
6361
} else if __matches(.windows1251) {
6462
.windowsCP1251
65-
} else if __matches(.windows1252, .iso8859_1Windows3_0Latin1, .iso8859_1Windows3_1Latin1) {
63+
} else if __matches(.windows1252) {
6664
.windowsCP1252
6765
} else if __matches(.windows1253) {
6866
.windowsCP1253
69-
} else if __matches(.windows1254, .iso8859_9WindowsLatin5) {
67+
} else if __matches(.windows1254) {
7068
.windowsCP1254
71-
} else if __matches(.windows1250, .iso8859_2WindowsLatin2) {
69+
} else if __matches(.windows1250) {
7270
.windowsCP1250
7371
} else if __matches(.iso2022JP) {
7472
.iso2022JP
@@ -78,7 +76,7 @@ extension String.Encoding {
7876
.utf16BigEndian
7977
} else if __matches(.utf16LE) {
8078
.utf16LittleEndian
81-
} else if __matches(.utf32, .iso10646UCS4) {
79+
} else if __matches(.utf32) {
8280
.utf32
8381
} else if __matches(.utf32BE) {
8482
.utf32BigEndian

0 commit comments

Comments
 (0)