Skip to content

Commit 29c7237

Browse files
authored
Automate vsb authentication for scraper (#665)
1 parent a72623f commit 29c7237

File tree

6 files changed

+288
-24
lines changed

6 files changed

+288
-24
lines changed

Cargo.lock

Lines changed: 122 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

justfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ dev: services typeshare
4343
dev-deps:
4444
cargo install present
4545
cargo install typeshare-cli
46+
brew install --cask chromedriver
4647
curl -LsSf https://astral.sh/uv/install.sh | sh
4748

4849
e2e:
@@ -71,13 +72,12 @@ initialize *args: restart-services
7172
lint *args:
7273
pnpm run lint {{args}}
7374

74-
load cookie:
75-
cargo run --manifest-path crates/scraper/Cargo.toml -- --source=seed \
75+
load:
76+
cargo run --manifest-path tools/scraper/Cargo.toml -- --source=seed \
7677
--batch-size=5 \
7778
--scrape-vsb \
7879
--user-agent "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" \
79-
--course-delay 1000 \
80-
--cookie '{{cookie}}'
80+
--course-delay 1000
8181

8282
readme:
8383
present --in-place README.md

tools/scraper/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ reqwest = { version = "0.12.15", default-features = false, features = [ "blockin
1717
scraper = "0.23.1"
1818
serde = "1.0.219"
1919
serde_json = "1.0.140"
20+
thirtyfour = "0.35.0"
21+
tokio = "1.45.1"
22+
totp-rs = "5.7.0"
2023

2124
[dev-dependencies]
2225
include_dir = "0.7.4"

tools/scraper/src/auth.rs

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
use super::*;
2+
3+
const CHROMEDRIVER_PORT: usize = 9515;
4+
const MAX_ELEM_RETRIES: usize = 5;
5+
const VSB_LOGIN_URL: &str = "https://vsb.mcgill.ca/login.jsp";
6+
7+
struct Driver(Child);
8+
9+
impl Drop for Driver {
10+
fn drop(&mut self) {
11+
let _ = self.0.kill();
12+
let _ = self.0.wait();
13+
}
14+
}
15+
16+
pub(crate) fn authenticate() -> Result<String> {
17+
let email =
18+
env::var("VSB_EMAIL").expect("VSB_EMAIL must be specified for scraping");
19+
20+
let password = env::var("VSB_PASSWORD")
21+
.expect("VSB_PASSWORD must be specified for scraping");
22+
23+
let otp_secret = env::var("VSB_OTP_SECRET")
24+
.expect("VSB_OTP_SECRET must be specified for scraping");
25+
26+
info!("Starting chromedriver server");
27+
28+
let _chromedriver = Driver(
29+
Command::new("chromedriver")
30+
.args([format!("--port={}", CHROMEDRIVER_PORT)])
31+
.spawn()?,
32+
);
33+
34+
thread::sleep(Duration::from_secs(2));
35+
36+
info!("Retrieving cookie for VSB authentication...");
37+
38+
let rt = tokio::runtime::Builder::new_current_thread()
39+
.enable_all()
40+
.build()?;
41+
42+
rt.block_on(get_vsb_cookie(email, password, otp_secret))
43+
}
44+
45+
fn format_cookie(cookies: Vec<Cookie>) -> String {
46+
cookies
47+
.into_iter()
48+
.map(|c| format!("{}={}", c.name, c.value))
49+
.collect::<Vec<String>>()
50+
.join("; ")
51+
}
52+
53+
async fn get_vsb_cookie(
54+
email: String,
55+
password: String,
56+
otp_secret: String,
57+
) -> Result<String> {
58+
let mut caps = DesiredCapabilities::chrome();
59+
caps.set_headless()?;
60+
61+
let driver =
62+
WebDriver::new(format!("http://localhost:{}", CHROMEDRIVER_PORT), caps)
63+
.await?;
64+
65+
// Need to use `new_unchecked` because Microsoft auth secret length is too short.
66+
let totp = TOTP::new_unchecked(
67+
totp_rs::Algorithm::SHA1,
68+
6,
69+
1,
70+
30,
71+
Secret::Encoded(otp_secret.to_uppercase()).to_bytes()?,
72+
);
73+
74+
driver.goto(VSB_LOGIN_URL).await?;
75+
76+
info!("Entering email...");
77+
let email_field = retry_until_visible(&driver, By::Name("loginfmt")).await?;
78+
email_field.send_keys(email).await?;
79+
email_field.send_keys(Key::Return).await?;
80+
tokio::time::sleep(Duration::from_secs(1)).await;
81+
82+
info!("Entering password...");
83+
let password_field = retry_until_visible(&driver, By::Name("passwd")).await?;
84+
password_field.send_keys(password).await?;
85+
tokio::time::sleep(Duration::from_secs(1)).await;
86+
87+
info!("Signing in...");
88+
let submit_button =
89+
retry_until_visible(&driver, By::Id("idSIButton9")).await?;
90+
submit_button.click().await?;
91+
tokio::time::sleep(Duration::from_secs(1)).await;
92+
93+
info!("Entering OTP...");
94+
let otp_field = retry_until_visible(&driver, By::Name("otc")).await?;
95+
otp_field.send_keys(totp.generate_current()?).await?;
96+
otp_field.send_keys(Key::Return).await?;
97+
tokio::time::sleep(Duration::from_secs(3)).await;
98+
99+
info!("Finishing up...");
100+
let no_button = retry_until_visible(&driver, By::Id("idBtn_Back")).await?;
101+
no_button.click().await?;
102+
tokio::time::sleep(Duration::from_secs(1)).await;
103+
104+
let cookies = driver.get_all_cookies().await?;
105+
106+
driver.quit().await?;
107+
108+
Ok(format_cookie(cookies))
109+
}
110+
111+
async fn retry_until_visible(driver: &WebDriver, by: By) -> Result<WebElement> {
112+
let mut retries = 0;
113+
114+
let mut elem = driver.find(by.clone()).await;
115+
116+
while elem.is_err() {
117+
if retries > MAX_ELEM_RETRIES {
118+
break;
119+
}
120+
121+
tokio::time::sleep(Duration::from_secs(1)).await;
122+
123+
retries += 1;
124+
125+
elem = driver.find(by.clone()).await;
126+
}
127+
128+
Ok(elem?)
129+
}

0 commit comments

Comments
 (0)