Skip to content

Commit bd14659

Browse files
authored
Merge pull request #34 from cuappdev/lax-fix
Fix missing team field and bad formatting for lacrosse
2 parents 3dc57b7 + 8990b60 commit bd14659

File tree

2 files changed

+33
-4
lines changed

2 files changed

+33
-4
lines changed

src/scrapers/game_details_scrape.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,26 @@
1+
import re
12
import requests
23
from bs4 import BeautifulSoup
34
from src.utils.constants import *
45

6+
def clean_name(name):
7+
"""Strip extra information from player names, keeping only first and last name."""
8+
# try to match firstname, lastname format
9+
if ',' in name:
10+
match = re.match(r'^([^,]+),\s*(\w+)', name)
11+
if match:
12+
return f"{match.group(1)}, {match.group(2)}"
13+
else:
14+
match = re.match(r'^(\w+)\s+(\w+)', name)
15+
if match:
16+
return f"{match.group(1)} {match.group(2)}"
17+
18+
# fallback for removing common extra characters
19+
cleaned = re.sub(r'\s*\([^)]*\).*$', '', name)
20+
cleaned = re.sub(r'\s*\d+.*$', '', cleaned)
21+
cleaned = cleaned.strip()
22+
return cleaned
23+
524
def fetch_page(url):
625
response = requests.get(url)
726
return BeautifulSoup(response.text, 'html.parser')
@@ -150,16 +169,25 @@ def lacrosse_summary(box_score_section):
150169
scoring_rows = scoring_table.find(TAG_TBODY)
151170
if scoring_rows:
152171
for row in scoring_rows.find_all(TAG_TR):
172+
team = row.find_all(TAG_TD)[1].find(TAG_IMG)[ATTR_ALT]
153173
period = row.find_all(TAG_TD)[2].text.strip()
154174
time = row.find_all(TAG_TD)[3].text.strip()
155-
scorer = row.find_all(TAG_TD)[4].text.strip()
156-
assist = row.find_all(TAG_TD)[5].text.strip()
175+
scorer = clean_name(row.find_all(TAG_TD)[4].text.strip())
176+
assist = clean_name(row.find_all(TAG_TD)[5].text.strip())
157177
opp_score = row.find_all(TAG_TD)[7].text.strip()
158178
cor_score = row.find_all(TAG_TD)[6].text.strip()
179+
180+
if assist and assist != "Unassisted":
181+
desc = f"Scored by {scorer}, assisted by {assist}"
182+
else:
183+
desc = f"Scored by {scorer}"
184+
159185
summary.append({
186+
'team': team,
160187
'period': period,
161188
'time': time,
162189
'scorer': scorer,
190+
'description': desc,
163191
'assist': assist,
164192
'cor_score': cor_score,
165193
'opp_score': opp_score,

src/scrapers/games_scraper.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,9 @@ def process_game_data(game_data):
170170
city = geo_location
171171
state = geo_location
172172
else:
173-
city, state = map(str.strip, geo_location.split(","))
173+
parts = [part.strip() for part in geo_location.split(",")]
174+
city = parts[0]
175+
state = parts[-1]
174176
location = location_data[1] if len(location_data) > 1 else None
175177

176178
team = TeamService.get_team_by_name(game_data["opponent_name"])
@@ -228,7 +230,6 @@ def process_game_data(game_data):
228230

229231
# If they don't match, flip the arrays
230232
if str(final_box_cor_score) != str(cor_final) or str(final_box_opp_score) != str(opp_final):
231-
print("flipping")
232233
game_data["score_breakdown"] = game_data["score_breakdown"][::-1]
233234

234235
# finds any existing game with the same key fields regardless of time

0 commit comments

Comments
 (0)