Skip to content

Commit c4abb50

Browse files
authored
Merge pull request #120 from cs50/feat/issue-78-word-lexer
Added custom WordLexer for .txt files
2 parents 5357786 + c32279a commit c4abb50

File tree

2 files changed

+23
-3
lines changed

2 files changed

+23
-3
lines changed

compare50/_data.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import abc
22
from collections.abc import Mapping, Sequence
3-
import os
43
import pathlib
54
import numbers
65

76
import attr
87
import pygments
98
import pygments.lexers
9+
from .lexers import WordLexer
1010

1111

1212
__all__ = ["Pass", "Comparator", "File", "Submission",
@@ -191,14 +191,18 @@ def lexer(self):
191191

192192
# get lexer for this file type
193193
try:
194-
lexer = pygments.lexers.get_lexer_for_filename(self.name.name)
194+
if ext == ".txt":
195+
lexer = WordLexer()
196+
else:
197+
lexer = pygments.lexers.get_lexer_for_filename(self.name.name)
198+
195199
self._lexer_cache[ext] = lexer
196200
return lexer
197201
except pygments.util.ClassNotFound:
198202
try:
199203
return pygments.lexers.guess_lexer(self.read())
200204
except pygments.util.ClassNotFound:
201-
return pygments.lexers.special.TextLexer()
205+
return WordLexer()
202206

203207
@classmethod
204208
def get(cls, id):

compare50/lexers.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from pygments.lexer import RegexLexer
2+
from pygments.token import Text, Name
3+
4+
class WordLexer(RegexLexer):
5+
"""Custom compare50 lexer that creates a token based on each 'word'."""
6+
name = "WordLexer"
7+
aliases = ["word"]
8+
filenames = ["*.txt"]
9+
10+
tokens = {
11+
"root": [
12+
(r"\s+", Text), # whitespace
13+
(r"\w+", Name), # word (alphanumeric)
14+
(r"\W", Text), # punctuation or other
15+
]
16+
}

0 commit comments

Comments
 (0)