Skip to content

Commit d9f00e4

Browse files
authored
fix(collector): fix exception in collector (#161)
1 parent 3d28917 commit d9f00e4

File tree

8 files changed

+3066
-125
lines changed

8 files changed

+3066
-125
lines changed

COLORSCHEMES.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,4 @@ ColorSchemes List (43)
146146
- duskfox
147147
- nightfox
148148
- nordfox
149-
- terafox
149+
- terafox

collect.py

Lines changed: 80 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ def __init__(
188188
if isinstance(config.git_branch, str):
189189
self.git_branch = config.git_branch
190190
self.color_names: list[str] = []
191+
logging.debug(f"initialize ColorSpec:{self}")
191192

192193
def _init_url(self, handle: str) -> str:
193194
handle = handle.strip()
@@ -270,15 +271,17 @@ def truncate() -> None:
270271
def all() -> list:
271272
try:
272273
records = ColorSpec.DB.all()
274+
for i, r in enumerate(records):
275+
logging.debug(f"all records-{i}:{r}")
273276
return [
274277
ColorSpec(
275-
handle=j[ColorSpec.HANDLE],
276-
github_stars=j[ColorSpec.GITHUB_STARS],
277-
last_git_commit=datetime_fromstring(j[ColorSpec.LAST_GIT_COMMIT]),
278-
priority=j[ColorSpec.PRIORITY],
279-
source=j[ColorSpec.SOURCE],
278+
handle=r[ColorSpec.HANDLE],
279+
github_stars=r[ColorSpec.GITHUB_STARS],
280+
last_git_commit=datetime_fromstring(r[ColorSpec.LAST_GIT_COMMIT]),
281+
priority=r[ColorSpec.PRIORITY],
282+
source=r[ColorSpec.SOURCE],
280283
)
281-
for j in records
284+
for r in records
282285
]
283286
except:
284287
return []
@@ -290,6 +293,7 @@ def download_git_object(self) -> bool:
290293
clone_cmd = f"git clone --depth=1 --single-branch --branch {self.git_branch} {self.url} {self.candidate_path}"
291294
else:
292295
clone_cmd = f"git clone --depth=1 {self.url} {self.candidate_path}"
296+
logging.debug(f"self:{self}, candidate_path:{self.candidate_path}")
293297
candidate_dir = pathlib.Path(f"{self.candidate_path}")
294298
logging.debug(
295299
f"{candidate_dir} exist: {candidate_dir.exists()}, isdir: {candidate_dir.is_dir()}"
@@ -351,6 +355,9 @@ def make_driver() -> Chrome:
351355

352356
# https://vimcolorschemes.com/top
353357
class VimColorSchemes:
358+
def __init__(self) -> None:
359+
self.counter = 1
360+
354361
def _pages(self) -> typing.Iterable[str]:
355362
i = 0
356363
while True:
@@ -360,61 +367,81 @@ def _pages(self) -> typing.Iterable[str]:
360367
yield f"https://vimcolorschemes.com/top/page/{i+1}"
361368
i += 1
362369

363-
def _parse_spec(self, element: WebElement) -> ColorSpec:
364-
handle = "/".join(
365-
element.find_element(By.XPATH, "./a[@class='card__link']")
366-
.get_attribute("href")
367-
.split("/")[-2:]
368-
)
369-
github_stars = int(
370-
element.find_element(
370+
def _parse_spec(
371+
self, element: WebElement, source: str
372+
) -> typing.Optional[ColorSpec]:
373+
logging.debug(f"parsing (vsc) spec element:{element}")
374+
try:
375+
url = element.find_element(
376+
By.XPATH, "./a[@class='card__link']"
377+
).get_attribute("href")
378+
if url.endswith("/"):
379+
url = url[:-1]
380+
logging.debug(f"parsing (vsc) spec handle_elem:{url}")
381+
handle = "/".join(url.split("/")[-2:])
382+
logging.debug(f"parsing (vsc) spec handle:{handle}")
383+
github_stars = int(
384+
element.find_element(
385+
By.XPATH,
386+
"./a/section/header[@class='meta-header']//div[@class='meta-header__statistic']//b",
387+
).text
388+
)
389+
logging.debug(f"parsing (vsc) spec github_stars:{github_stars}")
390+
creates_updates = element.find_elements(
371391
By.XPATH,
372-
"./a/section/header[@class='meta-header']//div[@class='meta-header__statistic']//b",
373-
).text
374-
)
375-
creates_updates = element.find_elements(
376-
By.XPATH,
377-
"./a/section/footer[@class='meta-footer']//div[@class='meta-footer__column']//p[@class='meta-footer__row']",
378-
)
379-
last_git_commit = datetime.datetime.strptime(
380-
creates_updates[1]
381-
.find_element(By.XPATH, "./b/time")
382-
.get_attribute("datetime"),
383-
"%Y-%m-%dT%H:%M:%S.%fZ",
384-
)
385-
return ColorSpec(
386-
handle,
387-
github_stars,
388-
last_git_commit=last_git_commit,
389-
priority=0,
390-
source="vimcolorschemes",
391-
)
392+
"./a/section/footer[@class='meta-footer']//div[@class='meta-footer__column']//p[@class='meta-footer__row']",
393+
)
394+
logging.debug(f"parsing (vsc) spec creates_updates:{creates_updates}")
395+
last_git_commit = datetime.datetime.strptime(
396+
creates_updates[1]
397+
.find_element(By.XPATH, "./b/time")
398+
.get_attribute("datetime"),
399+
"%Y-%m-%dT%H:%M:%S.%fZ",
400+
)
401+
logging.debug(f"parsing (vsc) spec last_git_commit:{last_git_commit}")
402+
return ColorSpec(
403+
handle,
404+
github_stars,
405+
last_git_commit=last_git_commit,
406+
priority=0,
407+
source=source,
408+
)
409+
except Exception as e:
410+
logging.debug(e)
411+
return None
392412

393413
def fetch(self) -> None:
394414
with make_driver() as driver:
395415
for page_url in self._pages():
396416
driver.get(page_url)
417+
driver.execute_script("window.scrollBy(0,document.body.scrollHeight)")
397418
need_more_scan = False
398419
for element in find_elements(driver, "//article[@class='card']"):
399-
spec = self._parse_spec(element)
420+
spec = self._parse_spec(element, page_url)
400421
logging.debug(f"vsc repo:{spec}")
422+
if spec is None:
423+
continue
401424
if len(spec.handle.split("/")) != 2:
402425
logging.debug(f"skip for invalid handle - (vcs) spec:{spec}")
403426
continue
404427
if spec.github_stars < GITHUB_STARS:
405428
logging.debug(f"skip for lower stars - (vcs) spec:{spec}")
406429
continue
407-
logging.debug(f"get (vcs) spec:{spec}")
430+
logging.info(f"fetch (vcs) spec-{self.counter}:{spec}")
408431
need_more_scan = True
409432
spec.save()
433+
self.counter = self.counter + 1
410434
if not need_more_scan:
411435
logging.debug(f"no more enough github stars, exit...")
412436
break
413437

414438

415439
# https://www.trackawesomelist.com/rockerBOO/awesome-neovim/readme/#colorscheme
416440
class AwesomeNeovimColorScheme:
417-
def _parse_spec(self, element: WebElement) -> ColorSpec:
441+
def __init__(self) -> None:
442+
self.counter = 1
443+
444+
def _parse_spec(self, element: WebElement, source: str) -> ColorSpec:
418445
a = element.find_element(By.XPATH, "./a").text
419446
a_splits = a.split("(")
420447
handle = a_splits[0]
@@ -424,7 +451,7 @@ def _parse_spec(self, element: WebElement) -> ColorSpec:
424451
github_stars,
425452
last_git_commit=None,
426453
priority=100,
427-
source="awesome-neovim",
454+
source=source,
428455
)
429456

430457
def _parse_colors_list(self, driver: Chrome, tag_id: str) -> list[ColorSpec]:
@@ -434,22 +461,27 @@ def _parse_colors_list(self, driver: Chrome, tag_id: str) -> list[ColorSpec]:
434461
f"//h3[@id='{tag_id}']/following-sibling::p/following-sibling::ul",
435462
)
436463
for e in colors_group.find_elements(By.XPATH, "./li"):
437-
spec = self._parse_spec(e)
464+
spec = self._parse_spec(
465+
e,
466+
f"https://www.trackawesomelist.com/rockerBOO/awesome-neovim/readme#{tag_id}",
467+
)
438468
if len(spec.handle.split("/")) != 2:
439469
logging.debug(f"skip for invalid handle - (asn) spec:{spec}")
440470
continue
441471
if spec.github_stars < GITHUB_STARS:
442472
logging.debug(f"skip for lower stars - (asn) spec:{spec}")
443473
continue
444-
logging.debug(f"get (asn) repo:{spec}")
474+
logging.info(f"fetch (asn) repo-{self.counter}:{spec}")
445475
repos.append(spec)
476+
self.counter = self.counter + 1
446477
return repos
447478

448479
def fetch(self) -> None:
449480
with make_driver() as driver:
450481
driver.get(
451482
"https://www.trackawesomelist.com/rockerBOO/awesome-neovim/readme"
452483
)
484+
driver.execute_script("window.scrollBy(0,document.body.scrollHeight)")
453485
treesitter_specs = self._parse_colors_list(
454486
driver, "tree-sitter-supported-colorscheme"
455487
)
@@ -565,7 +597,8 @@ def build(self) -> None:
565597
total += 1
566598

567599
md = MdUtils(file_name="COLORSCHEMES", title=f"ColorSchemes List ({total})")
568-
for spec in ColorSpec.all():
600+
all_specs = sorted(ColorSpec.all(), key=lambda s: s.github_stars, reverse=True)
601+
for spec in all_specs:
569602
logging.info(f"collect spec:{spec}")
570603
color_names = spec.get_vim_color_names()
571604
color_names = sorted(color_names)
@@ -587,12 +620,17 @@ def build(self) -> None:
587620
)
588621
@click.option("--no-headless", "no_headless_opt", is_flag=True, help="disable headless")
589622
@click.option("--skip-fetch", "skip_fetch_opt", is_flag=True, help="skip fetching")
590-
def collect(debug_opt, no_headless_opt, skip_fetch_opt):
623+
@click.option(
624+
"--skip-remove-db", "skip_remove_db_opt", is_flag=True, help="skip removing db.json"
625+
)
626+
def collect(debug_opt, no_headless_opt, skip_fetch_opt, skip_remove_db_opt):
591627
global WEBDRIVER_HEADLESS
592628
init_logging(logging.DEBUG if debug_opt else logging.INFO)
593629
logging.debug(f"debug_opt:{debug_opt}, no_headless_opt:{no_headless_opt}")
594630
if no_headless_opt:
595631
WEBDRIVER_HEADLESS = False
632+
if not skip_remove_db_opt:
633+
os.remove("db.json")
596634
if not skip_fetch_opt:
597635
vcs = VimColorSchemes()
598636
vcs.fetch()

0 commit comments

Comments
 (0)