Skip to content

Commit 2e6902a

Browse files
authored
Merge pull request #42 from Marcondiro/master
Expose UNICODE_VERSION const
2 parents 9bc6dd3 + 95d8b29 commit 2e6902a

File tree

5 files changed

+46
-5
lines changed

5 files changed

+46
-5
lines changed

generate/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@ version = "0.0.0"
44
authors = ["David Tolnay <[email protected]>"]
55
edition = "2018"
66
publish = false
7+
8+
[dependencies]
9+
regex = "1.12.2"

generate/src/parse.rs

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
1+
use regex::Regex;
12
use std::collections::BTreeSet as Set;
23
use std::fs;
34
use std::io::{self, Write};
45
use std::path::Path;
56
use std::process;
67

78
pub struct Properties {
9+
unicode_version: (u64, u64, u64),
810
xid_start: Set<u32>,
911
xid_continue: Set<u32>,
1012
}
1113

1214
impl Properties {
15+
pub const fn unicode_version(&self) -> (u64, u64, u64) {
16+
self.unicode_version
17+
}
18+
1319
pub fn is_xid_start(&self, ch: char) -> bool {
1420
self.xid_start.contains(&(ch as u32))
1521
}
@@ -20,11 +26,6 @@ impl Properties {
2026
}
2127

2228
pub fn parse_xid_properties(ucd_dir: &Path) -> Properties {
23-
let mut properties = Properties {
24-
xid_start: Set::new(),
25-
xid_continue: Set::new(),
26-
};
27-
2829
let filename = "DerivedCoreProperties.txt";
2930
let path = ucd_dir.join(filename);
3031
let contents = fs::read_to_string(path).unwrap_or_else(|err| {
@@ -34,6 +35,12 @@ pub fn parse_xid_properties(ucd_dir: &Path) -> Properties {
3435
process::exit(1);
3536
});
3637

38+
let mut properties = Properties {
39+
unicode_version: parse_unicode_version(filename, &contents),
40+
xid_start: Set::new(),
41+
xid_continue: Set::new(),
42+
};
43+
3744
for (i, line) in contents.lines().enumerate() {
3845
if line.starts_with('#') || line.trim().is_empty() {
3946
continue;
@@ -73,3 +80,24 @@ fn parse_line(line: &str) -> Option<(u32, u32, &str)> {
7380
fn parse_codepoint(s: &str) -> Option<u32> {
7481
u32::from_str_radix(s, 16).ok()
7582
}
83+
84+
fn parse_unicode_version(filename: &str, contents: &str) -> (u64, u64, u64) {
85+
let (name, extension) = filename
86+
.rsplit_once('.')
87+
.expect("Failed to split file name into name and extension");
88+
let re = Regex::new(&format!(r"# {name}-(\d+).(\d+).(\d+).{extension}")).unwrap();
89+
let caps = re
90+
.captures(contents)
91+
.expect("Failed to find unicode version in unicode data");
92+
let v = caps
93+
.iter()
94+
.skip(1)
95+
.map(|s| {
96+
s.unwrap()
97+
.as_str()
98+
.parse()
99+
.expect("Failed to parse unicode version")
100+
})
101+
.collect::<Vec<u64>>();
102+
(v[0], v[1], v[2])
103+
}

generate/src/write.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ pub fn output(
2424
let mut out = Output::new();
2525
writeln!(out, "{}", HEAD);
2626

27+
writeln!(
28+
out,
29+
"pub const UNICODE_VERSION: (u64, u64, u64) = {:?};",
30+
properties.unicode_version()
31+
);
32+
writeln!(out);
33+
2734
let ascii_start = (0u8..128)
2835
.map(|c| (properties.is_xid_start(c as char) as u128) << c)
2936
.sum::<u128>();

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@
252252
#[rustfmt::skip]
253253
mod tables;
254254

255+
pub use crate::tables::UNICODE_VERSION;
255256
use crate::tables::{ASCII_CONTINUE, ASCII_START, CHUNK, LEAF, TRIE_CONTINUE, TRIE_START};
256257

257258
static ZERO: u8 = 0;

src/tables.rs

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)