diff --git a/.github/workflows/build-test-linux-x86_64.yml b/.github/workflows/build-test-linux-x86_64.yml index 09dde7a2af..34d1d94ae5 100644 --- a/.github/workflows/build-test-linux-x86_64.yml +++ b/.github/workflows/build-test-linux-x86_64.yml @@ -138,6 +138,15 @@ jobs: pushd . cd tests/py python -m pip install -r requirements.txt + major=${PYTHON_VERSION%%.*} + minor=${PYTHON_VERSION#*.} + minor=${minor%%.*} + if (( major > 3 || (major == 3 && minor >= 13) )); then + echo "flashinfer-python is not supported for python version 3.13 or higher" + else + echo "Installing flashinfer-python" + python -m pip install flashinfer-python --no-deps + fi cd dynamo python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin.py diff --git a/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py b/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py index dcfc70373f..f13d9a2375 100644 --- a/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py +++ b/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py @@ -1,5 +1,5 @@ import logging -from typing import Any +from typing import Any, Set import torch from torch_tensorrt._utils import sanitized_torch_version @@ -100,12 +100,13 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) def is_impure(self, node: torch.fx.node.Node) -> bool: - # Set of known quantization ops to be excluded from constant folding. + # Set of known quantization ops to be excluded from constant folding. # Currently, we exclude all quantization ops coming from modelopt library. - quantization_ops = {} + quantization_ops: Set[torch._ops.OpOverload] = set() try: - # modelopt import ensures torch.ops.tensorrt.quantize_op.default is registered - import modelopt.torch.quantization as mtq + # modelopt import ensures torch.ops.tensorrt.quantize_op.default is registered + import modelopt.torch.quantization as mtq # noqa: F401 + assert torch.ops.tensorrt.quantize_op.default quantization_ops.add(torch.ops.tensorrt.quantize_op.default) except Exception as e: diff --git a/tests/py/requirements.txt b/tests/py/requirements.txt index 13aa40fe44..dd217bfd5c 100644 --- a/tests/py/requirements.txt +++ b/tests/py/requirements.txt @@ -8,7 +8,6 @@ pytest>=8.2.1 pytest-xdist>=3.6.1 pyyaml timm>=1.0.3 -flashinfer-python; python_version < "3.13" transformers==4.49.0 nvidia-modelopt[all]~=0.27.0; python_version >'3.9' and python_version <'3.13' --extra-index-url https://pypi.nvidia.com \ No newline at end of file