Skip to content

Commit a371b79

Browse files
committed
bpo-40334: Refactor peg_generator to receive a Tokens file when building c code
1 parent 5d1f32d commit a371b79

File tree

8 files changed

+218
-90
lines changed

8 files changed

+218
-90
lines changed

Makefile.pre.in

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,9 @@ regen-grammar: regen-token
823823
.PHONY: regen-pegen
824824
regen-pegen:
825825
@$(MKDIR_P) $(srcdir)/Parser/pegen
826-
PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen -c -q $(srcdir)/Grammar/python.gram \
826+
PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen -q c \
827+
$(srcdir)/Grammar/python.gram \
828+
$(srcdir)/Grammar/Tokens \
827829
-o $(srcdir)/Parser/pegen/parse.new.c
828830
$(UPDATE_FILE) $(srcdir)/Parser/pegen/parse.c $(srcdir)/Parser/pegen/parse.new.c
829831

PCbuild/regen.vcxproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@
168168
</Target>
169169
<Target Name="_RegenPegen" BeforeTargets="Build">
170170
<!-- Regenerate Parser/pegen/parse.c -->
171-
<Exec Command="&quot;$PYTHONPATH=$(srcdir)/Tools/peg_generator&quot; &quot;$(PythonExe)&quot; -m pegen -c -q &quot;$(PySourcePath)Grammar\python.gram&quot; -o &quot;$(IntDir)parse.c&quot;" />
171+
<Exec Command="&quot;$PYTHONPATH=$(srcdir)/Tools/peg_generator&quot; &quot;$(PythonExe)&quot; -m pegen -q c &quot;$(PySourcePath)Grammar\python.gram&quot; &quot;$(PySourcePath)Grammar\Tokens&quot; -o &quot;$(IntDir)parse.c&quot;" />
172172
<Copy SourceFiles="$(IntDir)parse.c" DestinationFiles="$(PySourcePath)Parser\pegen\parse.c">
173173
<Output TaskParameter="CopiedFiles" ItemName="_UpdatedParse" />
174174
</Copy>

Tools/peg_generator/Makefile

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ CPYTHON ?= ../../Lib
1010
MYPY ?= mypy
1111

1212
GRAMMAR = ../../Grammar/python.gram
13+
TOKENS = ../../Grammar/Tokens
1314
TESTFILE = data/cprog.py
1415
TIMEFILE = data/xxl.py
1516
TESTDIR = .
@@ -20,8 +21,8 @@ data/xxl.py:
2021

2122
build: peg_extension/parse.c
2223

23-
peg_extension/parse.c: $(GRAMMAR) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py
24-
$(PYTHON) -m pegen -q -c $(GRAMMAR) -o peg_extension/parse.c --compile-extension
24+
peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py
25+
$(PYTHON) -m pegen -q c $(GRAMMAR) $(TOKENS) -o peg_extension/parse.c --compile-extension
2526

2627
clean:
2728
-rm -f peg_extension/*.o peg_extension/*.so peg_extension/parse.c
@@ -79,7 +80,8 @@ time_stdlib_parse: data/xxl.py
7980

8081
test_local:
8182
$(PYTHON) scripts/test_parse_directory.py \
82-
-g $(GRAMMAR) \
83+
--grammar-file $(GRAMMAR) \
84+
--tokens-file $(TOKENS) \
8385
-d $(TESTDIR) \
8486
$(TESTFLAGS) \
8587
--exclude "*/failset/*" \
@@ -88,7 +90,8 @@ test_local:
8890

8991
test_global: $(CPYTHON)
9092
$(PYTHON) scripts/test_parse_directory.py \
91-
-g $(GRAMMAR) \
93+
--grammar-file $(GRAMMAR) \
94+
--tokens-file $(TOKENS) \
9295
-d $(CPYTHON) \
9396
$(TESTFLAGS) \
9497
--exclude "*/test2to3/*" \

Tools/peg_generator/pegen/__main__.py

Lines changed: 87 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,64 @@
1111
import token
1212
import traceback
1313

14+
from typing import Tuple
15+
16+
from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
17+
18+
19+
def generate_c_code(
20+
args: argparse.Namespace,
21+
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
22+
from pegen.build import build_c_parser_and_generator
23+
24+
verbose = args.verbose
25+
verbose_tokenizer = verbose >= 3
26+
verbose_parser = verbose == 2 or verbose >= 4
27+
try:
28+
grammar, parser, tokenizer, gen = build_c_parser_and_generator(
29+
args.grammar_filename,
30+
args.tokens_filename,
31+
args.output,
32+
args.compile_extension,
33+
verbose_tokenizer,
34+
verbose_parser,
35+
args.verbose,
36+
keep_asserts_in_extension=False if args.optimized else True,
37+
skip_actions=args.skip_actions,
38+
)
39+
return grammar, parser, tokenizer, gen
40+
except Exception as err:
41+
if args.verbose:
42+
raise # Show traceback
43+
traceback.print_exception(err.__class__, err, None)
44+
sys.stderr.write("For full traceback, use -v\n")
45+
sys.exit(1)
46+
47+
48+
def generate_python_code(
49+
args: argparse.Namespace,
50+
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
51+
from pegen.build import build_python_parser_and_generator
52+
53+
verbose = args.verbose
54+
verbose_tokenizer = verbose >= 3
55+
verbose_parser = verbose == 2 or verbose >= 4
56+
try:
57+
grammar, parser, tokenizer, gen = build_python_parser_and_generator(
58+
args.grammar_filename,
59+
args.output,
60+
verbose_tokenizer,
61+
verbose_parser,
62+
skip_actions=args.skip_actions,
63+
)
64+
return grammar, parser, tokenizer, gen
65+
except Exception as err:
66+
if args.verbose:
67+
raise # Show traceback
68+
traceback.print_exception(err.__class__, err, None)
69+
sys.stderr.write("For full traceback, use -v\n")
70+
sys.exit(1)
71+
1472

1573
argparser = argparse.ArgumentParser(
1674
prog="pegen", description="Experimental PEG-like parser generator"
@@ -23,64 +81,54 @@
2381
default=0,
2482
help="Print timing stats; repeat for more debug output",
2583
)
26-
argparser.add_argument(
27-
"-c", "--cpython", action="store_true", help="Generate C code for inclusion into CPython"
84+
subparsers = argparser.add_subparsers(help="target language for the generated code")
85+
86+
c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython")
87+
c_parser.set_defaults(func=generate_c_code)
88+
c_parser.add_argument("grammar_filename", help="Grammar description")
89+
c_parser.add_argument("tokens_filename", help="Tokens description")
90+
c_parser.add_argument(
91+
"-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser"
2892
)
29-
argparser.add_argument(
93+
c_parser.add_argument(
3094
"--compile-extension",
3195
action="store_true",
3296
help="Compile generated C code into an extension module",
3397
)
34-
argparser.add_argument(
98+
c_parser.add_argument(
99+
"--optimized", action="store_true", help="Compile the extension in optimized mode"
100+
)
101+
c_parser.add_argument(
102+
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
103+
)
104+
105+
python_parser = subparsers.add_parser("python", help="Generate Python code")
106+
python_parser.set_defaults(func=generate_python_code)
107+
python_parser.add_argument("grammar_filename", help="Grammar description")
108+
python_parser.add_argument(
35109
"-o",
36110
"--output",
37111
metavar="OUT",
38-
help="Where to write the generated parser (default parse.py or parse.c)",
112+
default="parse.py",
113+
help="Where to write the generated parser",
39114
)
40-
argparser.add_argument("filename", help="Grammar description")
41-
argparser.add_argument(
42-
"--optimized", action="store_true", help="Compile the extension in optimized mode"
43-
)
44-
argparser.add_argument(
115+
python_parser.add_argument(
45116
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
46117
)
47118

48119

49120
def main() -> None:
50-
from pegen.build import build_parser_and_generator
51121
from pegen.testutil import print_memstats
52122

53123
args = argparser.parse_args()
54-
verbose = args.verbose
55-
verbose_tokenizer = verbose >= 3
56-
verbose_parser = verbose == 2 or verbose >= 4
57-
t0 = time.time()
58-
59-
output_file = args.output
60-
if not output_file:
61-
if args.cpython:
62-
output_file = "parse.c"
63-
else:
64-
output_file = "parse.py"
65-
66-
try:
67-
grammar, parser, tokenizer, gen = build_parser_and_generator(
68-
args.filename,
69-
output_file,
70-
args.compile_extension,
71-
verbose_tokenizer,
72-
verbose_parser,
73-
args.verbose,
74-
keep_asserts_in_extension=False if args.optimized else True,
75-
skip_actions=args.skip_actions,
76-
)
77-
except Exception as err:
78-
if args.verbose:
79-
raise # Show traceback
80-
traceback.print_exception(err.__class__, err, None)
81-
sys.stderr.write("For full traceback, use -v\n")
124+
if "func" not in args:
125+
argparser.print_help()
82126
sys.exit(1)
83127

128+
t0 = time.time()
129+
grammar, parser, tokenizer, gen = args.func(args)
130+
t1 = time.time()
131+
84132
if not args.quiet:
85133
if args.verbose:
86134
print("Raw Grammar:")
@@ -110,8 +158,6 @@ def main() -> None:
110158
else:
111159
print()
112160

113-
t1 = time.time()
114-
115161
if args.verbose:
116162
dt = t1 - t0
117163
diag = tokenizer.diagnose()

0 commit comments

Comments
 (0)