diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml new file mode 100644 index 0000000000..7f8df34c38 --- /dev/null +++ b/.github/workflows/_e2e_test.yaml @@ -0,0 +1,192 @@ +name: 'e2e test' + +on: + workflow_call: + inputs: + vllm: + required: true + type: string + runner: + required: true + type: string + image: + required: true + type: string + type: + required: true + type: string + +jobs: + e2e: + name: singlecard + runs-on: ${{ inputs.runner }}-1 + container: + image: ${{ inputs.image }} + env: + VLLM_LOGGING_LEVEL: ERROR + VLLM_USE_MODELSCOPE: True + steps: + - name: Check npu and CANN info + run: | + npu-smi info + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + + - name: Config mirrors + run: | + sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list + pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple + pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local + apt-get update -y + apt install git -y + + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v4 + + - name: Install system dependencies + run: | + apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev + + - name: Checkout vllm-project/vllm repo + uses: actions/checkout@v4 + with: + repository: vllm-project/vllm + ref: ${{ inputs.vllm }} + path: ./vllm-empty + fetch-depth: 1 + + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty + run: | + VLLM_TARGET_DEVICE=empty pip install -e . + + - name: Install vllm-project/vllm-ascend + env: + PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi + run: | + pip install -r requirements-dev.txt + pip install -v -e . + + - name: Run vllm-project/vllm-ascend test + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + VLLM_USE_MODELSCOPE: True + if: ${{ inputs.type == 'light' }} + run: | + pytest -sv tests/e2e/singlecard/test_aclgraph.py + pytest -sv tests/e2e/singlecard/test_quantization.py + pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl + + - name: Run e2e test + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + VLLM_USE_MODELSCOPE: True + if: ${{ inputs.type == 'full' }} + run: | + # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run + # the test separately. + + pytest -sv tests/e2e/singlecard/test_aclgraph.py + pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py + pytest -sv tests/e2e/singlecard/test_camem.py + pytest -sv tests/e2e/singlecard/test_chunked.py + pytest -sv tests/e2e/singlecard/test_embedding.py + pytest -sv tests/e2e/singlecard/test_guided_decoding.py + #pytest -sv tests/e2e/singlecard/test_ilama_lora.py + pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py + pytest -sv tests/e2e/singlecard/test_quantization.py + pytest -sv tests/e2e/singlecard/test_sampler.py + pytest -sv tests/e2e/singlecard/test_vlm.py + + # ------------------------------------ v1 spec decode test ------------------------------------ # + pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py + pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py + pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py + + pytest -sv tests/e2e/singlecard/ops/ + + e2e-2-cards: + name: multicard + runs-on: ${{ inputs.runner }}-2 + container: + image: ${{ inputs.image }} + env: + VLLM_LOGGING_LEVEL: ERROR + VLLM_USE_MODELSCOPE: True + steps: + - name: Check npu and CANN info + run: | + npu-smi info + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + + - name: Config mirrors + run: | + sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list + pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple + pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local + apt-get update -y + apt install git -y + + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v4 + + - name: Install system dependencies + run: | + apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev + + - name: Checkout vllm-project/vllm repo + uses: actions/checkout@v4 + with: + repository: vllm-project/vllm + ref: ${{ inputs.vllm }} + path: ./vllm-empty + fetch-depth: 1 + + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty + run: | + VLLM_TARGET_DEVICE=empty pip install -e . + + - name: Install vllm-project/vllm-ascend + env: + PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi + run: | + pip install -r requirements-dev.txt + pip install -v -e . + + - name: Run vllm-project/vllm-ascend test (light) + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + VLLM_USE_MODELSCOPE: True + if: ${{ inputs.type == 'light' }} + run: | + pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP + + - name: Run vllm-project/vllm-ascend test (full) + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + VLLM_USE_MODELSCOPE: True + if: ${{ inputs.type == 'full' }} + run: | + pytest -sv tests/e2e/multicard/test_data_parallel.py + pytest -sv tests/e2e/multicard/test_expert_parallel.py + # external_launcher test is not stable enough. Fix it later + # pytest -sv tests/e2e/multicard/test_external_launcher.py + pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py + #pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py + + # To avoid oom, we need to run the test in a single process. + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8 + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1 + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight + + #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py + pytest -sv tests/e2e/multicard/test_prefix_caching.py + pytest -sv tests/e2e/multicard/test_qwen3_moe.py + pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml index c2d42504a9..2ba1fc7490 100644 --- a/.github/workflows/accuracy_test.yaml +++ b/.github/workflows/accuracy_test.yaml @@ -19,7 +19,7 @@ # 1. PR labeled with: '*accuracy-test' (ONLY 1 label valid) & 'ready-for-test' # 2. workflow_dispatch with models input # See detail rule in strategy.matrix note -name: Benchmarks / accuracy +name: ascend test / accuracy on: schedule: diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 0b2e84cbf8..993530be89 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -15,7 +15,7 @@ # limitations under the License. # -name: 'Benchmarks / Performance' +name: 'ascend test / performance' # This workflow runs nightly benchmarks for vllm-ascend. on: diff --git a/.github/workflows/vllm_ascend_doctest.yaml b/.github/workflows/vllm_ascend_doctest.yaml index 1b4faeacba..641c048564 100644 --- a/.github/workflows/vllm_ascend_doctest.yaml +++ b/.github/workflows/vllm_ascend_doctest.yaml @@ -15,7 +15,7 @@ # This file is a part of the vllm-ascend project. # -name: 'e2e test / doctest' +name: 'ascend test / doctest' on: workflow_dispatch: diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index c406907ebd..51b136bf0f 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -15,7 +15,7 @@ # This file is a part of the vllm-ascend project. # -name: 'test' +name: 'ascend test' on: push: @@ -43,7 +43,6 @@ jobs: uses: ./.github/workflows/pre-commit.yml changes: - if: github.event_name == 'pull_request' runs-on: ubuntu-latest outputs: e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }} @@ -123,7 +122,8 @@ jobs: --ignore=tests/ut/patch/worker/patch_common/test_patch_minicpm.py - name: Upload coverage to Codecov - if: ${{ matrix.vllm_version != 'v0.10.2' }} + # only upload coverage when commits merged + if: github.event_name == 'push' && github.ref == 'refs/heads/main' uses: codecov/codecov-action@v5 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} @@ -133,132 +133,17 @@ jobs: verbose: true e2e-light: - needs: [lint, changes] - # only trigger e2e test after lint passed and the change is e2e related with pull request. - if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }} + name: e2e-light strategy: - max-parallel: 2 matrix: - os: [linux-aarch64-a2-1] vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] - name: singlecard e2e test - light - runs-on: ${{ matrix.os }} - container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 - env: - VLLM_LOGGING_LEVEL: ERROR - VLLM_USE_MODELSCOPE: True - steps: - - name: Check npu and CANN info - run: | - npu-smi info - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - - name: Config mirrors - run: | - sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list - pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple - pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local - apt-get update -y - apt install git -y - - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v4 - - - name: Install system dependencies - run: | - apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v4 - with: - repository: vllm-project/vllm - ref: ${{ matrix.vllm_version }} - path: ./vllm-empty - fetch-depth: 1 - - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty - run: | - VLLM_TARGET_DEVICE=empty pip install -e . - - - name: Install vllm-project/vllm-ascend - env: - PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi - run: | - pip install -r requirements-dev.txt - pip install -v -e . - - - name: Run e2e test - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - VLLM_USE_MODELSCOPE: True - run: | - pytest -sv tests/e2e/singlecard/test_aclgraph.py - pytest -sv tests/e2e/singlecard/test_quantization.py - pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl - - e2e-2-cards-light: - needs: [e2e-light] - if: ${{ needs.e2e-light.result == 'success' }} - strategy: - max-parallel: 2 - matrix: - os: [linux-aarch64-a2-2] - vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] - name: multicard e2e test - light - runs-on: ${{ matrix.os }} - container: + # Note (yikun): If CI resource are limited we can split job into two chain jobs + needs: [lint, changes] + # only trigger e2e test after lint passed and the change is e2e related with pull request. + if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }} + uses: ./.github/workflows/_e2e_test.yaml + with: + vllm: v0.10.2 + runner: linux-aarch64-a2 image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 - env: - VLLM_LOGGING_LEVEL: ERROR - VLLM_USE_MODELSCOPE: True - steps: - - name: Check npu and CANN info - run: | - npu-smi info - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - - name: Config mirrors - run: | - sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list - pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple - pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local - apt-get update -y - apt install git -y - - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v4 - - - name: Install system dependencies - run: | - apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v4 - with: - repository: vllm-project/vllm - ref: ${{ matrix.vllm_version }} - path: ./vllm-empty - fetch-depth: 1 - - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty - run: | - VLLM_TARGET_DEVICE=empty pip install -e . - - - name: Install vllm-project/vllm-ascend - env: - PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi - run: | - pip install -r requirements-dev.txt - pip install -v -e . - - - name: Run vllm-project/vllm-ascend test - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - VLLM_USE_MODELSCOPE: True - run: | - pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP \ No newline at end of file + type: light diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml index 0c389a58a1..e23f6407e6 100644 --- a/.github/workflows/vllm_ascend_test_full.yaml +++ b/.github/workflows/vllm_ascend_test_full.yaml @@ -14,7 +14,7 @@ # limitations under the License. # This file is a part of the vllm-ascend project. # -name: 'test-full' +name: 'ascend test / full' on: pull_request: @@ -64,170 +64,16 @@ jobs: ut_tracker: - 'tests/ut/**' - e2e-full: - # only trigger full test when pull request is approved - needs: [changes] - if: ${{ needs.changes.outputs.e2e_tracker == 'true' }} + e2e-test: + name: e2e-full strategy: - max-parallel: 2 matrix: - os: [linux-aarch64-a2-1] vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] - name: singlecard e2e test - full - runs-on: ${{ matrix.os }} - container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 - env: - VLLM_LOGGING_LEVEL: ERROR - VLLM_USE_MODELSCOPE: True - steps: - - name: Check npu and CANN info - run: | - npu-smi info - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - - name: Config mirrors - run: | - sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list - pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple - pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local - apt-get update -y - apt install git -y - - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v4 - - - name: Install system dependencies - run: | - apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v4 - with: - repository: vllm-project/vllm - ref: ${{ matrix.vllm_version }} - path: ./vllm-empty - - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty - run: | - VLLM_TARGET_DEVICE=empty pip install -e . - - - name: Install vllm-project/vllm-ascend - env: - PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi - run: | - pip install -r requirements-dev.txt - pip install -v -e . - - - name: Run e2e test - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - VLLM_USE_MODELSCOPE: True - run: | - # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run - # the test separately. - - pytest -sv tests/e2e/singlecard/test_aclgraph.py - pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py - pytest -sv tests/e2e/singlecard/test_camem.py - pytest -sv tests/e2e/singlecard/test_chunked.py - pytest -sv tests/e2e/singlecard/test_embedding.py - pytest -sv tests/e2e/singlecard/test_guided_decoding.py - #pytest -sv tests/e2e/singlecard/test_ilama_lora.py - pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py - pytest -sv tests/e2e/singlecard/test_quantization.py - pytest -sv tests/e2e/singlecard/test_sampler.py - pytest -sv tests/e2e/singlecard/test_vlm.py - - # ------------------------------------ v1 spec decode test ------------------------------------ # - pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py - pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py - pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py - - pytest -sv tests/e2e/singlecard/ops/ - - e2e-2-cards-full: - # only trigger full test when pull request is approved needs: [changes] if: ${{ needs.changes.outputs.e2e_tracker == 'true' }} - strategy: - max-parallel: 2 - matrix: - os: [linux-aarch64-a2-2] - vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] - name: multicard e2e test - full - runs-on: ${{ matrix.os }} - container: + uses: ./.github/workflows/_e2e_test.yaml + with: + vllm: ${{ matrix.vllm_version }} + runner: linux-aarch64-a2 image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 - env: - VLLM_LOGGING_LEVEL: ERROR - VLLM_USE_MODELSCOPE: True - steps: - - name: Check npu and CANN info - run: | - npu-smi info - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - - name: Config mirrors - run: | - sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list - pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple - pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local - apt-get update -y - apt install git -y - - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v4 - - - name: Install system dependencies - run: | - apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v4 - with: - repository: vllm-project/vllm - ref: ${{ matrix.vllm_version }} - path: ./vllm-empty - - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty - run: | - VLLM_TARGET_DEVICE=empty pip install -e . - - - name: Install vllm-project/vllm-ascend - env: - PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi - run: | - pip install -r requirements-dev.txt - pip install -v -e . - - - name: Run vllm-project/vllm-ascend test - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - VLLM_USE_MODELSCOPE: True - run: | - #pytest -sv tests/e2e/multicard/test_data_parallel.py - pytest -sv tests/e2e/multicard/test_expert_parallel.py - # external_launcher test is not stable enough. Fix it later - # pytest -sv tests/e2e/multicard/test_external_launcher.py - pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py - #pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py - - # To avoid oom, we need to run the test in a single process. - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8 - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1 - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight - - #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py - pytest -sv tests/e2e/multicard/test_prefix_caching.py - pytest -sv tests/e2e/multicard/test_qwen3_moe.py - pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py + type: full diff --git a/.github/workflows/vllm_ascend_test_full_vllm_main.yaml b/.github/workflows/vllm_ascend_test_full_vllm_main.yaml new file mode 100644 index 0000000000..48dc695f8c --- /dev/null +++ b/.github/workflows/vllm_ascend_test_full_vllm_main.yaml @@ -0,0 +1,45 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# +name: 'ascend test / vllm main' + +on: + # Run 1-card and 2-cards e2e tests per 2h + schedule: + - cron: '0 */2 * * *' + workflow_dispatch: + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +# only cancel in-progress runs of the same workflow +# and ignore the lint / 1 card / 4 cards test type +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + e2e-test: + uses: ./.github/workflows/_e2e_test.yaml + with: + vllm: main + runner: linux-aarch64-a2 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 + type: full