From 58506fa41dc901047beb564d8149f2c3b36a0385 Mon Sep 17 00:00:00 2001 From: andreas kollert Date: Fri, 27 Jun 2025 09:00:56 +0200 Subject: [PATCH 1/2] updated notebook with proper example and new exercise --- advanced/map_blocks/simple_map_blocks.ipynb | 2118 ++++++++++++++++++- 1 file changed, 2095 insertions(+), 23 deletions(-) diff --git a/advanced/map_blocks/simple_map_blocks.ipynb b/advanced/map_blocks/simple_map_blocks.ipynb index 0fe06b16..46a1464f 100644 --- a/advanced/map_blocks/simple_map_blocks.ipynb +++ b/advanced/map_blocks/simple_map_blocks.ipynb @@ -12,7 +12,7 @@ "`map_blocks` is inspired by the `dask.array` function of the same name and lets\n", "you map a function on blocks of the xarray object (including Datasets!).\n", "\n", - "At _compute_ time, your function will receive an xarray object with concrete\n", + "At _compute_ time, your function will receive a chunk of an xarray object with concrete\n", "(computed) values along with appropriate metadata. This function should return\n", "an xarray object.\n" ] @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "2", "metadata": {}, "outputs": [], @@ -51,10 +51,327 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
\n", + "

Client

\n", + "

Client-49f20140-5324-11f0-8aa9-4851c5bfb2f6

\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "
Connection method: Cluster objectCluster type: distributed.LocalCluster
\n", + " Dashboard: http://127.0.0.1:8787/status\n", + "
\n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "

Cluster Info

\n", + "
\n", + "
\n", + "
\n", + "
\n", + "

LocalCluster

\n", + "

f688ce09

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + " \n", + "
\n", + " Dashboard: http://127.0.0.1:8787/status\n", + " \n", + " Workers: 4\n", + "
\n", + " Total threads: 16\n", + " \n", + " Total memory: 13.47 GiB\n", + "
Status: runningUsing processes: True
\n", + "\n", + "
\n", + " \n", + "

Scheduler Info

\n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "

Scheduler

\n", + "

Scheduler-8345fbce-73d6-4290-83e1-71b17ddebc74

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " Comm: tcp://127.0.0.1:40545\n", + " \n", + " Workers: 0 \n", + "
\n", + " Dashboard: http://127.0.0.1:8787/status\n", + " \n", + " Total threads: 0\n", + "
\n", + " Started: Just now\n", + " \n", + " Total memory: 0 B\n", + "
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "

Workers

\n", + "
\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 0

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:40439\n", + " \n", + " Total threads: 4\n", + "
\n", + " Dashboard: http://127.0.0.1:33441/status\n", + " \n", + " Memory: 3.37 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:42223\n", + "
\n", + " Local directory: /tmp/dask-scratch-space/worker-lvqbcxiz\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 1

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:36369\n", + " \n", + " Total threads: 4\n", + "
\n", + " Dashboard: http://127.0.0.1:39065/status\n", + " \n", + " Memory: 3.37 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:40887\n", + "
\n", + " Local directory: /tmp/dask-scratch-space/worker-921rtl_x\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 2

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:36751\n", + " \n", + " Total threads: 4\n", + "
\n", + " Dashboard: http://127.0.0.1:45787/status\n", + " \n", + " Memory: 3.37 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:34475\n", + "
\n", + " Local directory: /tmp/dask-scratch-space/worker-fijvprlt\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 3

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:44337\n", + " \n", + " Total threads: 4\n", + "
\n", + " Dashboard: http://127.0.0.1:42319/status\n", + " \n", + " Memory: 3.37 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:45909\n", + "
\n", + " Local directory: /tmp/dask-scratch-space/worker-1u00a5mu\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "\n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from dask.distributed import Client\n", "\n", @@ -74,10 +391,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " ...,\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.]], shape=(1000, 4))" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import dask.array\n", "\n", @@ -89,17 +423,629 @@ "id": "7", "metadata": {}, "source": [ - "Let's open a dataset. We specify `chunks` so that we create a dask arrays for the DataArrays" + "Let's open a dataset. We specify `chunks` so that we create a dask arrays for the DataArrays.\n", + "\n", + "Depending on the desired function to be applied on the chunks, it is vital to set the chunks correctly. Our goal is to compute the mean along the time dimension. Therefore we do not chunk the time dimension at all (indicated by `\"time\": -1`). We deliberately set `lat` and `lon` chunks to something smaller then the size of their respective dimension (otherwise we would potentially end up with a single big chunk for the entire `ds`)." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 31MB\n",
+       "Dimensions:  (lat: 25, time: 2920, lon: 53)\n",
+       "Coordinates:\n",
+       "  * lat      (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n",
+       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
+       "  * time     (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n",
+       "Data variables:\n",
+       "    air      (time, lat, lon) float64 31MB dask.array<chunksize=(2920, 5, 10), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    Conventions:  COARDS\n",
+       "    title:        4x daily NMC reanalysis (1948)\n",
+       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
+       "    platform:     Model\n",
+       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...
" + ], + "text/plain": [ + " Size: 31MB\n", + "Dimensions: (lat: 25, time: 2920, lon: 53)\n", + "Coordinates:\n", + " * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n", + "Data variables:\n", + " air (time, lat, lon) float64 31MB dask.array\n", + "Attributes:\n", + " Conventions: COARDS\n", + " title: 4x daily NMC reanalysis (1948)\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly..." + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "ds = xr.tutorial.open_dataset(\"air_temperature\", chunks={\"time\": 100})\n", + "ds = xr.tutorial.open_dataset(\"air_temperature\", chunks={\"time\": -1, \"lat\": 5, \"lon\": 10})\n", "ds" ] }, @@ -115,30 +1061,1097 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "10", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 11kB\n",
+       "Dimensions:  (lon: 53, lat: 25)\n",
+       "Coordinates:\n",
+       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
+       "  * lat      (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n",
+       "Data variables:\n",
+       "    air      (lat, lon) float64 11kB dask.array<chunksize=(5, 10), meta=np.ndarray>
" + ], + "text/plain": [ + " Size: 11kB\n", + "Dimensions: (lon: 53, lat: 25)\n", + "Coordinates:\n", + " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", + " * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n", + "Data variables:\n", + " air (lat, lon) float64 11kB dask.array" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "def time_mean(obj):\n", + "def time_mean(obj: xr.Dataset):\n", " # use xarray's convenient API here\n", " # you could convert to a pandas dataframe and use pandas' extensive API\n", " # or use .plot() and plt.savefig to save visualizations to disk in parallel.\n", - " return obj.mean(\"lat\")\n", - "\n", + " return obj.mean(\"time\")\n", "\n", "ds.map_blocks(time_mean) # this is lazy!" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, + "id": "438bbcd4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 11kB\n",
+       "Dimensions:  (lon: 53, lat: 25)\n",
+       "Coordinates:\n",
+       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
+       "  * lat      (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n",
+       "Data variables:\n",
+       "    air      (lat, lon) float64 11kB 260.4 260.2 259.9 ... 297.3 297.3 297.3
" + ], + "text/plain": [ + " Size: 11kB\n", + "Dimensions: (lon: 53, lat: 25)\n", + "Coordinates:\n", + " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", + " * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n", + "Data variables:\n", + " air (lat, lon) float64 11kB 260.4 260.2 259.9 ... 297.3 297.3 297.3" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# this triggers the actual computation\n", + "ds.map_blocks(time_mean).compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, "id": "11", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# this will calculate values and will return True if the computation works as expected\n", - "ds.map_blocks(time_mean).identical(ds.mean(\"lat\"))" + "ds.map_blocks(time_mean).equals(ds.mean(\"time\"))" ] }, { @@ -146,7 +2159,58 @@ "id": "12", "metadata": {}, "source": [ - "### Exercise\n", + "### Exercises\n" + ] + }, + { + "cell_type": "markdown", + "id": "d34d40d5", + "metadata": {}, + "source": [ + "::::{admonition} Exercise 1\n", + ":class: tip\n", + "\n", + "When opening the dataset, set the chunks for the dimension to anything smaller than the size of the time dimension (< 2920), e.g., `\"time\": 100`, and keep the size of the other chunks the same:\n", + "\n", + "```python\n", + "ds = xr.tutorial.open_dataset(\n", + " \"air_temperature\",\n", + " chunks={\"time\": 100, \"lat\": 5, \"lon\": 10},\n", + ")\n", + "```\n", + "\n", + "Now run the notebook again. The result of `ds.map_blocks(time_mean)` is no more equivalent to `ds.mean(\"time\")`. Why does `ds.map_blocks(time_mean)` return a different result this time?\n", + "\n", + ":::{admonition} Solution\n", + ":class: dropdown\n", + "\n", + "Quoting from the documentation of `map_blocks`: _The function will receive a subset or ‘block’ of obj (see below), corresponding to one chunk along each chunked dimension._\n", + "\n", + "`ds.mean(\"time\")` computes the mean over the entire time dimension. In our example `ds.map_blocks(time_mean)` passes individual chunks of `ds` to `time_mean`. Once the time dimension is chunked, `time_mean` receives more than a single chunk along the dimension, meaning `time_mean` computes the mean along the time dimension for a single chunk rather than along the entire time dimension. Therefore we do not receive an identical result.\n", + "\n", + "You can also modify the function to show the shape of the chunks passed to `time_mean`. Compare the output of the modified function with `ds.chunks` to find out how they relate to each other!\n", + "\n", + "```python\n", + "def time_mean(obj: xr.Dataset):\n", + " print(f\"received obj of type {type(obj)}\")\n", + " print(\"obj contains the following data variables:\")\n", + " for data_var in obj.data_vars:\n", + " print(f\"'{data_var}' with shape {obj[data_var].shape}\")\n", + "\n", + " return obj.mean(\"time\")\n", + "```\n", + "\n", + ":::\n", + "::::\n" + ] + }, + { + "cell_type": "markdown", + "id": "811a065d", + "metadata": {}, + "source": [ + "::::{admonition} Exercise 2\n", + ":class: tip \n", "\n", "Try applying the following function with `map_blocks`. Specify `scale` as an\n", "argument and `offset` as a kwarg.\n", @@ -154,10 +2218,12 @@ "The docstring should help:\n", "https://docs.xarray.dev/en/stable/generated/xarray.map_blocks.html\n", "\n", - "```\n", + "```python\n", "def time_mean_scaled(obj, scale, offset):\n", " return obj.mean(\"lat\") * scale + offset\n", - "```\n" + "```\n", + "\n", + "::::" ] }, { @@ -176,7 +2242,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "14", "metadata": {}, "outputs": [], @@ -186,6 +2252,11 @@ } ], "metadata": { + "kernelspec": { + "display_name": "xarray-tutorial", + "language": "python", + "name": "python3" + }, "language_info": { "codemirror_mode": { "name": "ipython", @@ -195,7 +2266,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.12.11" } }, "nbformat": 4, From cfe2be0d97c00fd020cdcced60e4053408fc5325 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 27 Jun 2025 07:11:20 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- advanced/map_blocks/simple_map_blocks.ipynb | 2052 +------------------ 1 file changed, 23 insertions(+), 2029 deletions(-) diff --git a/advanced/map_blocks/simple_map_blocks.ipynb b/advanced/map_blocks/simple_map_blocks.ipynb index 46a1464f..af46f405 100644 --- a/advanced/map_blocks/simple_map_blocks.ipynb +++ b/advanced/map_blocks/simple_map_blocks.ipynb @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "2", "metadata": {}, "outputs": [], @@ -51,327 +51,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "4", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "
\n", - "
\n", - "

Client

\n", - "

Client-49f20140-5324-11f0-8aa9-4851c5bfb2f6

\n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
Connection method: Cluster objectCluster type: distributed.LocalCluster
\n", - " Dashboard: http://127.0.0.1:8787/status\n", - "
\n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "
\n", - "

Cluster Info

\n", - "
\n", - "
\n", - "
\n", - "
\n", - "

LocalCluster

\n", - "

f688ce09

\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "\n", - " \n", - "
\n", - " Dashboard: http://127.0.0.1:8787/status\n", - " \n", - " Workers: 4\n", - "
\n", - " Total threads: 16\n", - " \n", - " Total memory: 13.47 GiB\n", - "
Status: runningUsing processes: True
\n", - "\n", - "
\n", - " \n", - "

Scheduler Info

\n", - "
\n", - "\n", - "
\n", - "
\n", - "
\n", - "
\n", - "

Scheduler

\n", - "

Scheduler-8345fbce-73d6-4290-83e1-71b17ddebc74

\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " Comm: tcp://127.0.0.1:40545\n", - " \n", - " Workers: 0 \n", - "
\n", - " Dashboard: http://127.0.0.1:8787/status\n", - " \n", - " Total threads: 0\n", - "
\n", - " Started: Just now\n", - " \n", - " Total memory: 0 B\n", - "
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "

Workers

\n", - "
\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "

Worker: 0

\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - "
\n", - " Comm: tcp://127.0.0.1:40439\n", - " \n", - " Total threads: 4\n", - "
\n", - " Dashboard: http://127.0.0.1:33441/status\n", - " \n", - " Memory: 3.37 GiB\n", - "
\n", - " Nanny: tcp://127.0.0.1:42223\n", - "
\n", - " Local directory: /tmp/dask-scratch-space/worker-lvqbcxiz\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "

Worker: 1

\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - "
\n", - " Comm: tcp://127.0.0.1:36369\n", - " \n", - " Total threads: 4\n", - "
\n", - " Dashboard: http://127.0.0.1:39065/status\n", - " \n", - " Memory: 3.37 GiB\n", - "
\n", - " Nanny: tcp://127.0.0.1:40887\n", - "
\n", - " Local directory: /tmp/dask-scratch-space/worker-921rtl_x\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "

Worker: 2

\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - "
\n", - " Comm: tcp://127.0.0.1:36751\n", - " \n", - " Total threads: 4\n", - "
\n", - " Dashboard: http://127.0.0.1:45787/status\n", - " \n", - " Memory: 3.37 GiB\n", - "
\n", - " Nanny: tcp://127.0.0.1:34475\n", - "
\n", - " Local directory: /tmp/dask-scratch-space/worker-fijvprlt\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "

Worker: 3

\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - "
\n", - " Comm: tcp://127.0.0.1:44337\n", - " \n", - " Total threads: 4\n", - "
\n", - " Dashboard: http://127.0.0.1:42319/status\n", - " \n", - " Memory: 3.37 GiB\n", - "
\n", - " Nanny: tcp://127.0.0.1:45909\n", - "
\n", - " Local directory: /tmp/dask-scratch-space/worker-1u00a5mu\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "\n", - "
\n", - "
\n", - "\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "\n", - "
\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from dask.distributed import Client\n", "\n", @@ -391,27 +74,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "6", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[1., 1., 1., 1.],\n", - " [1., 1., 1., 1.],\n", - " [1., 1., 1., 1.],\n", - " ...,\n", - " [1., 1., 1., 1.],\n", - " [1., 1., 1., 1.],\n", - " [1., 1., 1., 1.]], shape=(1000, 4))" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import dask.array\n", "\n", @@ -430,620 +96,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "8", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 31MB\n",
-       "Dimensions:  (lat: 25, time: 2920, lon: 53)\n",
-       "Coordinates:\n",
-       "  * lat      (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n",
-       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
-       "  * time     (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n",
-       "Data variables:\n",
-       "    air      (time, lat, lon) float64 31MB dask.array<chunksize=(2920, 5, 10), meta=np.ndarray>\n",
-       "Attributes:\n",
-       "    Conventions:  COARDS\n",
-       "    title:        4x daily NMC reanalysis (1948)\n",
-       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
-       "    platform:     Model\n",
-       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...
" - ], - "text/plain": [ - " Size: 31MB\n", - "Dimensions: (lat: 25, time: 2920, lon: 53)\n", - "Coordinates:\n", - " * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n", - " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", - " * time (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00\n", - "Data variables:\n", - " air (time, lat, lon) float64 31MB dask.array\n", - "Attributes:\n", - " Conventions: COARDS\n", - " title: 4x daily NMC reanalysis (1948)\n", - " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", - " platform: Model\n", - " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly..." - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "ds = xr.tutorial.open_dataset(\"air_temperature\", chunks={\"time\": -1, \"lat\": 5, \"lon\": 10})\n", "ds" @@ -1061,559 +117,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "10", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 11kB\n",
-       "Dimensions:  (lon: 53, lat: 25)\n",
-       "Coordinates:\n",
-       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
-       "  * lat      (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n",
-       "Data variables:\n",
-       "    air      (lat, lon) float64 11kB dask.array<chunksize=(5, 10), meta=np.ndarray>
" - ], - "text/plain": [ - " Size: 11kB\n", - "Dimensions: (lon: 53, lat: 25)\n", - "Coordinates:\n", - " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", - " * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n", - "Data variables:\n", - " air (lat, lon) float64 11kB dask.array" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "def time_mean(obj: xr.Dataset):\n", " # use xarray's convenient API here\n", @@ -1621,512 +128,16 @@ " # or use .plot() and plt.savefig to save visualizations to disk in parallel.\n", " return obj.mean(\"time\")\n", "\n", + "\n", "ds.map_blocks(time_mean) # this is lazy!" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "438bbcd4", + "execution_count": null, + "id": "11", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 11kB\n",
-       "Dimensions:  (lon: 53, lat: 25)\n",
-       "Coordinates:\n",
-       "  * lon      (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n",
-       "  * lat      (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n",
-       "Data variables:\n",
-       "    air      (lat, lon) float64 11kB 260.4 260.2 259.9 ... 297.3 297.3 297.3
" - ], - "text/plain": [ - " Size: 11kB\n", - "Dimensions: (lon: 53, lat: 25)\n", - "Coordinates:\n", - " * lon (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0\n", - " * lat (lat) float32 100B 75.0 72.5 70.0 67.5 65.0 ... 22.5 20.0 17.5 15.0\n", - "Data variables:\n", - " air (lat, lon) float64 11kB 260.4 260.2 259.9 ... 297.3 297.3 297.3" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# this triggers the actual computation\n", "ds.map_blocks(time_mean).compute()" @@ -2134,21 +145,10 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "11", + "execution_count": null, + "id": "12", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# this will calculate values and will return True if the computation works as expected\n", "ds.map_blocks(time_mean).equals(ds.mean(\"time\"))" @@ -2156,7 +156,7 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "13", "metadata": {}, "source": [ "### Exercises\n" @@ -2164,7 +164,7 @@ }, { "cell_type": "markdown", - "id": "d34d40d5", + "id": "14", "metadata": {}, "source": [ "::::{admonition} Exercise 1\n", @@ -2206,7 +206,7 @@ }, { "cell_type": "markdown", - "id": "811a065d", + "id": "15", "metadata": {}, "source": [ "::::{admonition} Exercise 2\n", @@ -2228,7 +228,7 @@ }, { "cell_type": "markdown", - "id": "13", + "id": "16", "metadata": {}, "source": [ "### More advanced functions\n", @@ -2242,8 +242,8 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "14", + "execution_count": null, + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -2252,11 +252,6 @@ } ], "metadata": { - "kernelspec": { - "display_name": "xarray-tutorial", - "language": "python", - "name": "python3" - }, "language_info": { "codemirror_mode": { "name": "ipython", @@ -2266,8 +261,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" + "pygments_lexer": "ipython3" } }, "nbformat": 4,