diff --git a/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml b/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml index f61a9aad..929ff124 100644 --- a/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml +++ b/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml @@ -17,9 +17,9 @@ spec: kubectl rollout status --watch statefulset/airflow-webserver-default && kubectl rollout status --watch statefulset/airflow-scheduler-default && export AIRFLOW_ADMIN_PASSWORD=$(cat /airflow-credentials/adminUser.password) - && curl -i -s --user admin:$AIRFLOW_ADMIN_PASSWORD http://airflow-webserver-default:8080/api/v1/dags/sparkapp_dag - && curl -i -s --user admin:$AIRFLOW_ADMIN_PASSWORD -H 'Content-Type:application/json' -XPATCH http://airflow-webserver-default:8080/api/v1/dags/sparkapp_dag -d '{\"is_paused\": false}' - && curl -i -s --user admin:$AIRFLOW_ADMIN_PASSWORD -H 'Content-Type:application/json' -XPOST http://airflow-webserver-default:8080/api/v1/dags/sparkapp_dag/dagRuns -d '{}' + && export ACCESS_TOKEN=$(curl -XPOST http://airflow-webserver-default:8080/auth/token -H 'Content-Type: application/json' -d '{\"username\": \"admin\", \"password\": \"'$AIRFLOW_ADMIN_PASSWORD'\"}' | jq '.access_token' | tr -d '\"') + && curl -H \"Authorization: Bearer $ACCESS_TOKEN\" -H 'Content-Type: application/json' -XPATCH http://airflow-webserver-default:8080/api/v2/dags/sparkapp_dag -d '{\"is_paused\": false}' | jq + && curl -H \"Authorization: Bearer $ACCESS_TOKEN\" -H 'Content-Type: application/json' -XPOST http://airflow-webserver-default:8080/api/v2/dags/sparkapp_dag/dagRuns -d '{\"logical_date\": null}' | jq "] volumeMounts: - name: airflow-credentials diff --git a/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml b/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml index d72fe62c..087d7c9c 100644 --- a/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml +++ b/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml @@ -17,9 +17,9 @@ spec: kubectl rollout status --watch statefulset/airflow-webserver-default && kubectl rollout status --watch statefulset/airflow-scheduler-default && export AIRFLOW_ADMIN_PASSWORD=$(cat /airflow-credentials/adminUser.password) - && curl -i -s --user admin:$AIRFLOW_ADMIN_PASSWORD http://airflow-webserver-default:8080/api/v1/dags/date_demo - && curl -i -s --user admin:$AIRFLOW_ADMIN_PASSWORD -H 'Content-Type:application/json' -XPATCH http://airflow-webserver-default:8080/api/v1/dags/date_demo -d '{\"is_paused\": false}' - && curl -i -s --user admin:$AIRFLOW_ADMIN_PASSWORD -H 'Content-Type:application/json' -XPOST http://airflow-webserver-default:8080/api/v1/dags/date_demo/dagRuns -d '{}' + && export ACCESS_TOKEN=$(curl -XPOST http://airflow-webserver-default:8080/auth/token -H 'Content-Type: application/json' -d '{\"username\": \"admin\", \"password\": \"'$AIRFLOW_ADMIN_PASSWORD'\"}' | jq '.access_token' | tr -d '\"') + && curl -H \"Authorization: Bearer $ACCESS_TOKEN\" -H 'Content-Type: application/json' -XPATCH http://airflow-webserver-default:8080/api/v2/dags/date_demo -d '{\"is_paused\": false}' | jq + && curl -H \"Authorization: Bearer $ACCESS_TOKEN\" -H 'Content-Type: application/json' -XPOST http://airflow-webserver-default:8080/api/v2/dags/date_demo/dagRuns -d '{\"logical_date\": null}' | jq "] volumeMounts: - name: airflow-credentials diff --git a/docs/modules/demos/images/airflow-scheduled-job/airflow_1.png b/docs/modules/demos/images/airflow-scheduled-job/airflow_1.png index 2b6210ac..73c35611 100644 Binary files a/docs/modules/demos/images/airflow-scheduled-job/airflow_1.png and b/docs/modules/demos/images/airflow-scheduled-job/airflow_1.png differ diff --git a/docs/modules/demos/images/airflow-scheduled-job/airflow_10.png b/docs/modules/demos/images/airflow-scheduled-job/airflow_10.png index 8b0b9268..a3facd8f 100644 Binary files a/docs/modules/demos/images/airflow-scheduled-job/airflow_10.png and b/docs/modules/demos/images/airflow-scheduled-job/airflow_10.png differ diff --git a/docs/modules/demos/images/airflow-scheduled-job/airflow_11.png b/docs/modules/demos/images/airflow-scheduled-job/airflow_11.png index 6ee5e388..7a59b1aa 100644 Binary files a/docs/modules/demos/images/airflow-scheduled-job/airflow_11.png and b/docs/modules/demos/images/airflow-scheduled-job/airflow_11.png differ diff --git a/docs/modules/demos/images/airflow-scheduled-job/airflow_12.png b/docs/modules/demos/images/airflow-scheduled-job/airflow_12.png deleted file mode 100644 index 599808aa..00000000 Binary files a/docs/modules/demos/images/airflow-scheduled-job/airflow_12.png and /dev/null differ diff --git a/docs/modules/demos/images/airflow-scheduled-job/airflow_2.png b/docs/modules/demos/images/airflow-scheduled-job/airflow_2.png index f505d283..d9ed51ba 100644 Binary files a/docs/modules/demos/images/airflow-scheduled-job/airflow_2.png and b/docs/modules/demos/images/airflow-scheduled-job/airflow_2.png differ diff --git a/docs/modules/demos/images/airflow-scheduled-job/airflow_3.png b/docs/modules/demos/images/airflow-scheduled-job/airflow_3.png index dd95db6b..d4060b4e 100644 Binary files a/docs/modules/demos/images/airflow-scheduled-job/airflow_3.png and b/docs/modules/demos/images/airflow-scheduled-job/airflow_3.png differ diff --git a/docs/modules/demos/images/airflow-scheduled-job/airflow_4.png b/docs/modules/demos/images/airflow-scheduled-job/airflow_4.png index b0f12f84..3dff1b3b 100644 Binary files a/docs/modules/demos/images/airflow-scheduled-job/airflow_4.png and b/docs/modules/demos/images/airflow-scheduled-job/airflow_4.png differ diff --git a/docs/modules/demos/images/airflow-scheduled-job/airflow_5.png b/docs/modules/demos/images/airflow-scheduled-job/airflow_5.png index 190190f6..b8126718 100644 Binary files a/docs/modules/demos/images/airflow-scheduled-job/airflow_5.png and b/docs/modules/demos/images/airflow-scheduled-job/airflow_5.png differ diff --git a/docs/modules/demos/images/airflow-scheduled-job/airflow_6.png b/docs/modules/demos/images/airflow-scheduled-job/airflow_6.png index 78152f0c..4eb46bc2 100644 Binary files a/docs/modules/demos/images/airflow-scheduled-job/airflow_6.png and b/docs/modules/demos/images/airflow-scheduled-job/airflow_6.png differ diff --git a/docs/modules/demos/images/airflow-scheduled-job/airflow_7.png b/docs/modules/demos/images/airflow-scheduled-job/airflow_7.png index 871811f4..4729f420 100644 Binary files a/docs/modules/demos/images/airflow-scheduled-job/airflow_7.png and b/docs/modules/demos/images/airflow-scheduled-job/airflow_7.png differ diff --git a/docs/modules/demos/images/airflow-scheduled-job/airflow_8.png b/docs/modules/demos/images/airflow-scheduled-job/airflow_8.png new file mode 100644 index 00000000..61500d75 Binary files /dev/null and b/docs/modules/demos/images/airflow-scheduled-job/airflow_8.png differ diff --git a/docs/modules/demos/images/airflow-scheduled-job/airflow_9.png b/docs/modules/demos/images/airflow-scheduled-job/airflow_9.png index 4c8a4b70..32d9b89a 100644 Binary files a/docs/modules/demos/images/airflow-scheduled-job/airflow_9.png and b/docs/modules/demos/images/airflow-scheduled-job/airflow_9.png differ diff --git a/docs/modules/demos/pages/airflow-scheduled-job.adoc b/docs/modules/demos/pages/airflow-scheduled-job.adoc index c9eb155e..40d31fc9 100644 --- a/docs/modules/demos/pages/airflow-scheduled-job.adoc +++ b/docs/modules/demos/pages/airflow-scheduled-job.adoc @@ -46,11 +46,11 @@ To list the installed Stackable services run the following command: [source,console] ---- $ stackablectl stacklet list -┌──────────┬───────────────┬───────────┬───────────────────────────────────────────┬─────────────────────────────────┐ -│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS ┆ CONDITIONS │ -╞══════════╪═══════════════╪═══════════╪═══════════════════════════════════════════╪═════════════════════════════════╡ -│ airflow ┆ airflow ┆ default ┆ webserver-airflow http://172.18.0.2:31979 ┆ Available, Reconciling, Running │ -└──────────┴───────────────┴───────────┴───────────────────────────────────────────┴─────────────────────────────────┘ +┌─────────┬─────────┬───────────┬─────────────────────────────────────────────────┬─────────────────────────────────┐ +│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS ┆ CONDITIONS │ +╞═════════╪═════════╪═══════════╪═════════════════════════════════════════════════╪═════════════════════════════════╡ +│ airflow ┆ airflow ┆ default ┆ webserver-default-http http://172.19.0.5:30913 ┆ Available, Reconciling, Running │ +└─────────┴─────────┴───────────┴─────────────────────────────────────────────────┴─────────────────────────────────┘ ---- include::partial$instance-hint.adoc[] @@ -58,47 +58,47 @@ include::partial$instance-hint.adoc[] == Airflow Webserver UI Superset gives the ability to execute SQL queries and build dashboards. Open the `airflow` endpoint `webserver-airflow` -in your browser (`http://172.18.0.2:31979` in this case). +in your browser (`http://172.19.0.5:30913` in this case). image::airflow-scheduled-job/airflow_1.png[] -Log in with the username `admin` and password `adminadmin`. The overview screen shows the DAGs mounted during the demo -setup (`date_demo`). +Log in with the username `admin` and password `adminadmin`. +Click in 'Active DAGs' at the top and you will see an overview showing the DAGs mounted during the demo +setup (`date_demo` and `sparkapp_dag`). image::airflow-scheduled-job/airflow_2.png[] -There are two things to notice here. Both DAGs have been enabled, as shown by the slider to the left of the DAG name -(DAGs are all `paused` initially and can be activated manually in the UI or via a REST call, as done in the setup for -this demo): +There are two things to notice here. +Both DAGs have been enabled, as shown by the slider on the far right of the screen for each DAG +(DAGs are all `paused` initially and can be activated manually in the UI or via a REST call, as done in the setup for this demo): image::airflow-scheduled-job/airflow_3.png[] -Secondly, the `date_demo` job has been busy, with several runs already logged. The `sparkapp_dag` has only been run -once because they have been defined with different schedules. +Secondly, the `date_demo` job has been busy, with several runs already logged. +The `sparkapp_dag` has only been run once because they have been defined with different schedules. image::airflow-scheduled-job/airflow_4.png[] -Clicking on the number under `Runs` will display the individual job runs: +Clicking on the DAG name and then on `Runs` will display the individual job runs: image::airflow-scheduled-job/airflow_5.png[] -The `demo_date` job is running every minute. With Airflow, DAGs can be started manually or scheduled to run when certain -conditions are fulfilled- In this case, the DAG has been set up to run using a cron table, which is part of the DAG -definition. +The `demo_date` job is running every minute. +With Airflow, DAGs can be started manually or scheduled to run when certain conditions are fulfilled - in this case, the DAG has been set up to run using a cron table, which is part of the DAG definition. === `demo_date` DAG -Let's drill down a bit deeper into this DAG. Click on one of the job runs shown in the previous step to display the -details. The DAG is displayed as a graph (this job is so simple that it only has one step, called `run_every_minute`). +Let's drill down a bit deeper into this DAG. +At the top under the DAG name there is some scheduling information, which tells us that this job will run every minute continuously: image::airflow-scheduled-job/airflow_6.png[] -In the top right-hand corner there is some scheduling information, which tells us that this job will run every minute -continuously: +Click on one of the job runs in the list to display the details for the task instances. +In the left-side pane the DAG is displayed either as a graph (this job is so simple that it only has one step, called `run_every_minute`), or as a "bar chart" showing each run. image::airflow-scheduled-job/airflow_7.png[] -Click on the `run_every_minute` box in the centre of the page and then select `Logs`: +Click on the `run_every_minute` box in the centre of the page to select the logs: [WARNING] ==== @@ -108,26 +108,27 @@ See the https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/exec If you are interested in persisting the logs, take a look at the xref:logging.adoc[] demo. ==== -image::airflow-scheduled-job/airflow_9.png[] +image::airflow-scheduled-job/airflow_8.png[] -To look at the actual DAG code click on `Code`. Here we can see the crontab information used to schedule the job as well -the `bash` command that provides the output: +To look at the actual DAG code click on `Code`. +Here we can see the crontab information used to schedule the job as well the `bash` command that provides the output: -image::airflow-scheduled-job/airflow_10.png[] +image::airflow-scheduled-job/airflow_9.png[] === `sparkapp_dag` DAG -Go back to DAG overview screen. The `sparkapp_dag` job has a scheduled entry of `None` and a last-execution time -(`2022-09-19, 07:36:55`). This allows a DAG to be executed exactly once, with neither schedule-based runs nor any -https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/dag-run.html#backfill[backfill]. The DAG can -always be triggered manually again via REST or from within the Webserver UI. +Go back to DAG overview screen. +The `sparkapp_dag` job has a scheduled entry of `None` and a last-execution time. +This allows a DAG to be executed exactly once, with neither schedule-based runs nor any +https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/dag-run.html#backfill[backfill]. +The DAG can always be triggered manually again via REST or from within the Webserver UI. -image::airflow-scheduled-job/airflow_11.png[] +image::airflow-scheduled-job/airflow_10.png[] By navigating to the graphical overview of the job we can see that DAG has two steps, one to start the job - which runs asynchronously - and another to poll the running job to report on its status. -image::airflow-scheduled-job/airflow_12.png[] +image::airflow-scheduled-job/airflow_11.png[] == Summary diff --git a/stacks/airflow/airflow.yaml b/stacks/airflow/airflow.yaml index 36aeaa51..fb0ac7eb 100644 --- a/stacks/airflow/airflow.yaml +++ b/stacks/airflow/airflow.yaml @@ -6,9 +6,8 @@ metadata: name: airflow spec: image: - productVersion: 2.10.4 + productVersion: 3.0.1 clusterConfig: - listenerClass: external-unstable loadExamples: false exposeConfig: false credentialsSecret: airflow-credentials @@ -35,6 +34,7 @@ spec: memory: limit: 2Gi gracefulShutdownTimeout: 30s + listenerClass: external-unstable roleGroups: default: envOverrides: @@ -75,7 +75,7 @@ data: with DAG( dag_id='date_demo', - schedule_interval='0-59 * * * *', + schedule='0-59 * * * *', start_date=datetime(2021, 1, 1), catchup=False, dagrun_timeout=timedelta(minutes=5), @@ -222,7 +222,7 @@ data: with DAG( dag_id='sparkapp_dag', - schedule_interval=None, + schedule=None, start_date=datetime(2022, 1, 1), catchup=False, dagrun_timeout=timedelta(minutes=60),