Fix parser bug where "type" was misinterpreted as a keyword inside a match (#3950)

JelleZijlstra · web-flow · commit bb588073ab28 · 2023-10-17T00:59:15.000-07:00
Fixes #3790 Slightly hacky, but I think this is correct and it should also improve performance somewhat.
diff --git a/CHANGES.md b/CHANGES.md
@@ -37,6 +37,8 @@
 
 <!-- Changes to the parser or to version autodetection -->
 
+- Fix bug where attributes named `type` were not acccepted inside `match` statements
+  (#3950)
 - Add support for PEP 695 type aliases containing lambdas and other unusual expressions
   (#3949)
 
diff --git a/src/blib2to3/pgen2/parse.py b/src/blib2to3/pgen2/parse.py
@@ -211,6 +211,7 @@ def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None:
         # See note in docstring above. TL;DR this is ignored.
         self.convert = convert or lam_sub
         self.is_backtracking = False
+        self.last_token: Optional[int] = None
 
     def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None:
         """Prepare for parsing.
@@ -236,6 +237,7 @@ def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None:
         self.rootnode: Optional[NL] = None
         self.used_names: Set[str] = set()
         self.proxy = proxy
+        self.last_token = None
 
     def addtoken(self, type: int, value: str, context: Context) -> bool:
         """Add a token; return True iff this is the end of the program."""
@@ -317,6 +319,7 @@ def _addtoken(self, ilabel: int, type: int, value: str, context: Context) -> boo
                         dfa, state, node = self.stack[-1]
                         states, first = dfa
                     # Done with this token
+                    self.last_token = type
                     return False
 
             else:
@@ -343,9 +346,23 @@ def classify(self, type: int, value: str, context: Context) -> List[int]:
                 return [self.grammar.keywords[value]]
             elif value in self.grammar.soft_keywords:
                 assert type in self.grammar.tokens
+                # Current soft keywords (match, case, type) can only appear at the
+                # beginning of a statement. So as a shortcut, don't try to treat them
+                # like keywords in any other context.
+                # ('_' is also a soft keyword in the real grammar, but for our grammar
+                # it's just an expression, so we don't need to treat it specially.)
+                if self.last_token not in (
+                    None,
+                    token.INDENT,
+                    token.DEDENT,
+                    token.NEWLINE,
+                    token.SEMI,
+                    token.COLON,
+                ):
+                    return [self.grammar.tokens[type]]
                 return [
-                    self.grammar.soft_keywords[value],
                     self.grammar.tokens[type],
+                    self.grammar.soft_keywords[value],
                 ]
 
         ilabel = self.grammar.tokens.get(type)
diff --git a/tests/data/cases/pattern_matching_complex.py b/tests/data/cases/pattern_matching_complex.py
@@ -143,3 +143,7 @@
         y = 1
     case []:
         y = 2
+# issue 3790
+match (X.type, Y):
+    case _:
+        pass
diff --git a/tests/data/cases/type_aliases.py b/tests/data/cases/type_aliases.py
@@ -5,6 +5,8 @@
 type Alias[T]=lambda: T
 type And[T]=T and T
 type IfElse[T]=T if T else T
+type One = int; type Another = str
+class X: type InClass = int
 
 type = aliased
 print(type(42))
@@ -16,6 +18,13 @@
 type Alias[T] = lambda: T
 type And[T] = T and T
 type IfElse[T] = T if T else T
+type One = int
+type Another = str
+
+
+class X:
+    type InClass = int
+
 
 type = aliased
 print(type(42))