Skip to content

bpo-40334: Refactor peg_generator to receive a Tokens file when building c code #19745

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,9 @@ regen-grammar: regen-token
.PHONY: regen-pegen
regen-pegen:
@$(MKDIR_P) $(srcdir)/Parser/pegen
PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen -c -q $(srcdir)/Grammar/python.gram \
PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen -q c \
$(srcdir)/Grammar/python.gram \
$(srcdir)/Grammar/Tokens \
-o $(srcdir)/Parser/pegen/parse.new.c
$(UPDATE_FILE) $(srcdir)/Parser/pegen/parse.c $(srcdir)/Parser/pegen/parse.new.c

Expand Down
2 changes: 1 addition & 1 deletion PCbuild/regen.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@
</Target>
<Target Name="_RegenPegen" BeforeTargets="Build">
<!-- Regenerate Parser/pegen/parse.c -->
<Exec Command="&quot;$PYTHONPATH=$(srcdir)/Tools/peg_generator&quot; &quot;$(PythonExe)&quot; -m pegen -c -q &quot;$(PySourcePath)Grammar\python.gram&quot; -o &quot;$(IntDir)parse.c&quot;" />
<Exec Command="&quot;$PYTHONPATH=$(srcdir)/Tools/peg_generator&quot; &quot;$(PythonExe)&quot; -m pegen -q c &quot;$(PySourcePath)Grammar\python.gram&quot; &quot;$(PySourcePath)Grammar\Tokens&quot; -o &quot;$(IntDir)parse.c&quot;" />
<Copy SourceFiles="$(IntDir)parse.c" DestinationFiles="$(PySourcePath)Parser\pegen\parse.c">
<Output TaskParameter="CopiedFiles" ItemName="_UpdatedParse" />
</Copy>
Expand Down
11 changes: 7 additions & 4 deletions Tools/peg_generator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ CPYTHON ?= ../../Lib
MYPY ?= mypy

GRAMMAR = ../../Grammar/python.gram
TOKENS = ../../Grammar/Tokens
TESTFILE = data/cprog.py
TIMEFILE = data/xxl.py
TESTDIR = .
Expand All @@ -20,8 +21,8 @@ data/xxl.py:

build: peg_extension/parse.c

peg_extension/parse.c: $(GRAMMAR) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py
$(PYTHON) -m pegen -q -c $(GRAMMAR) -o peg_extension/parse.c --compile-extension
peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py
$(PYTHON) -m pegen -q c $(GRAMMAR) $(TOKENS) -o peg_extension/parse.c --compile-extension

clean:
-rm -f peg_extension/*.o peg_extension/*.so peg_extension/parse.c
Expand Down Expand Up @@ -79,7 +80,8 @@ time_stdlib_parse: data/xxl.py

test_local:
$(PYTHON) scripts/test_parse_directory.py \
-g $(GRAMMAR) \
--grammar-file $(GRAMMAR) \
--tokens-file $(TOKENS) \
-d $(TESTDIR) \
$(TESTFLAGS) \
--exclude "*/failset/*" \
Expand All @@ -88,7 +90,8 @@ test_local:

test_global: $(CPYTHON)
$(PYTHON) scripts/test_parse_directory.py \
-g $(GRAMMAR) \
--grammar-file $(GRAMMAR) \
--tokens-file $(TOKENS) \
-d $(CPYTHON) \
$(TESTFLAGS) \
--exclude "*/test2to3/*" \
Expand Down
127 changes: 86 additions & 41 deletions Tools/peg_generator/pegen/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,64 @@
import token
import traceback

from typing import Tuple

from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator


def generate_c_code(
args: argparse.Namespace,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_c_parser_and_generator

verbose = args.verbose
verbose_tokenizer = verbose >= 3
verbose_parser = verbose == 2 or verbose >= 4
try:
grammar, parser, tokenizer, gen = build_c_parser_and_generator(
args.grammar_filename,
args.tokens_filename,
args.output,
args.compile_extension,
verbose_tokenizer,
verbose_parser,
args.verbose,
keep_asserts_in_extension=False if args.optimized else True,
skip_actions=args.skip_actions,
)
return grammar, parser, tokenizer, gen
except Exception as err:
if args.verbose:
raise # Show traceback
traceback.print_exception(err.__class__, err, None)
sys.stderr.write("For full traceback, use -v\n")
sys.exit(1)


def generate_python_code(
args: argparse.Namespace,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_python_parser_and_generator

verbose = args.verbose
verbose_tokenizer = verbose >= 3
verbose_parser = verbose == 2 or verbose >= 4
try:
grammar, parser, tokenizer, gen = build_python_parser_and_generator(
args.grammar_filename,
args.output,
verbose_tokenizer,
verbose_parser,
skip_actions=args.skip_actions,
)
return grammar, parser, tokenizer, gen
except Exception as err:
if args.verbose:
raise # Show traceback
traceback.print_exception(err.__class__, err, None)
sys.stderr.write("For full traceback, use -v\n")
sys.exit(1)


argparser = argparse.ArgumentParser(
prog="pegen", description="Experimental PEG-like parser generator"
Expand All @@ -23,63 +81,52 @@
default=0,
help="Print timing stats; repeat for more debug output",
)
argparser.add_argument(
"-c", "--cpython", action="store_true", help="Generate C code for inclusion into CPython"
subparsers = argparser.add_subparsers(help="target language for the generated code")

c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython")
c_parser.set_defaults(func=generate_c_code)
c_parser.add_argument("grammar_filename", help="Grammar description")
c_parser.add_argument("tokens_filename", help="Tokens description")
c_parser.add_argument(
"-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser"
)
argparser.add_argument(
c_parser.add_argument(
"--compile-extension",
action="store_true",
help="Compile generated C code into an extension module",
)
argparser.add_argument(
c_parser.add_argument(
"--optimized", action="store_true", help="Compile the extension in optimized mode"
)
c_parser.add_argument(
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
)

python_parser = subparsers.add_parser("python", help="Generate Python code")
python_parser.set_defaults(func=generate_python_code)
python_parser.add_argument("grammar_filename", help="Grammar description")
python_parser.add_argument(
"-o",
"--output",
metavar="OUT",
help="Where to write the generated parser (default parse.py or parse.c)",
default="parse.py",
help="Where to write the generated parser",
)
argparser.add_argument("filename", help="Grammar description")
argparser.add_argument(
"--optimized", action="store_true", help="Compile the extension in optimized mode"
)
argparser.add_argument(
python_parser.add_argument(
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
)


def main() -> None:
from pegen.build import build_parser_and_generator
from pegen.testutil import print_memstats

args = argparser.parse_args()
verbose = args.verbose
verbose_tokenizer = verbose >= 3
verbose_parser = verbose == 2 or verbose >= 4
t0 = time.time()

output_file = args.output
if not output_file:
if args.cpython:
output_file = "parse.c"
else:
output_file = "parse.py"
if "func" not in args:
argparser.error("Must specify the target language mode ('c' or 'python')")

try:
grammar, parser, tokenizer, gen = build_parser_and_generator(
args.filename,
output_file,
args.compile_extension,
verbose_tokenizer,
verbose_parser,
args.verbose,
keep_asserts_in_extension=False if args.optimized else True,
skip_actions=args.skip_actions,
)
except Exception as err:
if args.verbose:
raise # Show traceback
traceback.print_exception(err.__class__, err, None)
sys.stderr.write("For full traceback, use -v\n")
sys.exit(1)
t0 = time.time()
grammar, parser, tokenizer, gen = args.func(args)
t1 = time.time()

if not args.quiet:
if args.verbose:
Expand Down Expand Up @@ -110,8 +157,6 @@ def main() -> None:
else:
print()

t1 = time.time()

if args.verbose:
dt = t1 - t0
diag = tokenizer.diagnose()
Expand Down
Loading