From d93352224dad8a6c9551ed1f4ac4cf8a9555090d Mon Sep 17 00:00:00 2001 From: Karel-van-de-Plassche Date: Fri, 1 Jun 2018 10:55:13 +0200 Subject: [PATCH 1/8] Fixes #2198: Drop chunksizes when original_shape is different Before this fix chunksizes was dropped even when original_shape was not found in encoding --- xarray/backends/netCDF4_.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index d26b2b5321e..8350183d5da 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -204,7 +204,9 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, chunks_too_big = any( c > d and dim not in unlimited_dims for c, d, dim in zip(chunksizes, variable.shape, variable.dims)) - changed_shape = encoding.get('original_shape') != variable.shape + has_original_shape = encoding.get('original_shape') is not None + changed_shape = (has_original_shape and + encoding.get('original_shape') != variable.shape) if chunks_too_big or changed_shape: del encoding['chunksizes'] From f62d0fc1165a736275dbd30aa18c10e5ae918b33 Mon Sep 17 00:00:00 2001 From: Karel-van-de-Plassche Date: Sun, 17 Jun 2018 13:12:25 +0200 Subject: [PATCH 2/8] More direct has_original_shape check --- xarray/backends/netCDF4_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 8350183d5da..7f3c3317d69 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -204,7 +204,7 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, chunks_too_big = any( c > d and dim not in unlimited_dims for c, d, dim in zip(chunksizes, variable.shape, variable.dims)) - has_original_shape = encoding.get('original_shape') is not None + has_original_shape = original_shape in encoding changed_shape = (has_original_shape and encoding.get('original_shape') != variable.shape) if chunks_too_big or changed_shape: From 3ec141a1addda0b16751a944472571620512d885 Mon Sep 17 00:00:00 2001 From: Karel-van-de-Plassche Date: Sun, 17 Jun 2018 13:49:13 +0200 Subject: [PATCH 3/8] Fixed typo --- xarray/backends/netCDF4_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 7f3c3317d69..75651f2f3cf 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -204,7 +204,7 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, chunks_too_big = any( c > d and dim not in unlimited_dims for c, d, dim in zip(chunksizes, variable.shape, variable.dims)) - has_original_shape = original_shape in encoding + has_original_shape = 'original_shape' in encoding changed_shape = (has_original_shape and encoding.get('original_shape') != variable.shape) if chunks_too_big or changed_shape: From 06dd7ebb745deab09bf4e064ab7cfb66964f3e3d Mon Sep 17 00:00:00 2001 From: Karel-van-de-Plassche Date: Sun, 17 Jun 2018 13:56:00 +0200 Subject: [PATCH 4/8] Added test if chunksizes is kept when no original shape --- xarray/tests/test_backends.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0e6151b2db5..4d0cbec5f18 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1103,6 +1103,23 @@ def test_encoding_kwarg_compression(self): self.assertEqual(ds.x.encoding, {}) + def test_keep_chunksizes_if_no_orignal_shape(self): + orig_ds = Dataset({'x': [1, 2, 3]}) + chunksizes = (2, ) + orig_ds.variables['x'].encoding = { + 'chunksizes': chunksizes + } + + with create_tmp_file() as tmp_file: + orig_ds.to_netcdf(path=tmp_file) + + nc = nc4.Dataset(tmp_file, mode='r') + with backends.NetCDF4DataStore(nc, autoclose=False) as store: + with open_dataset(store) as ds: + assert_identical(orig_ds, ds) + assert_array_equal(orig_ds['x'].encoding['chunksizes'], + ds['x'].encoding['chunksizes']) + def test_encoding_chunksizes_unlimited(self): # regression test for GH1225 ds = Dataset({'x': [1, 2, 3], 'y': ('x', [2, 3, 4])}) From 99f70a886c7a316c2ae9d520e93cd137c8abfcf4 Mon Sep 17 00:00:00 2001 From: Karel van de Plassche Date: Wed, 29 May 2019 20:11:55 +0200 Subject: [PATCH 5/8] Fix typo in test name Co-Authored-By: Deepak Cherian --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3e53402214a..43503df1360 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1134,7 +1134,7 @@ def test_encoding_kwarg_compression(self): assert ds.x.encoding == {} - def test_keep_chunksizes_if_no_orignal_shape(self): + def test_keep_chunksizes_if_no_original_shape(self): orig_ds = Dataset({'x': [1, 2, 3]}) chunksizes = (2, ) orig_ds.variables['x'].encoding = { From f4b4ea04e5837b532268cb8d012a95136b3c828c Mon Sep 17 00:00:00 2001 From: Karel-van-de-Plassche Date: Thu, 30 May 2019 09:09:09 +0200 Subject: [PATCH 6/8] Fix keep_chunksizes_if_no_orignal_shape test by using native open_dataset --- xarray/tests/test_backends.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 43503df1360..c621a0a0d31 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1145,11 +1145,10 @@ def test_keep_chunksizes_if_no_original_shape(self): orig_ds.to_netcdf(path=tmp_file) nc = nc4.Dataset(tmp_file, mode='r') - with backends.NetCDF4DataStore(nc, autoclose=False) as store: - with open_dataset(store) as ds: - assert_identical(orig_ds, ds) - assert_array_equal(orig_ds['x'].encoding['chunksizes'], - ds['x'].encoding['chunksizes']) + with open_dataset(tmp_file) as ds: + assert_identical(orig_ds, ds) + assert_array_equal(orig_ds['x'].encoding['chunksizes'], + ds['x'].encoding['chunksizes']) def test_encoding_chunksizes_unlimited(self): # regression test for GH1225 From 80962c17a9f63713947d6890ca739f7092677f0b Mon Sep 17 00:00:00 2001 From: Karel-van-de-Plassche Date: Thu, 30 May 2019 09:14:07 +0200 Subject: [PATCH 7/8] Added entry in whats-new --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index dfdca55d218..92258c1543d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,9 @@ v0.12.2 (unreleased) Enhancements ~~~~~~~~~~~~ +- netCDF chunksizes are now only dropped when original_shape is different, + not when it isn't found. (:issue:`2207`) + By `Karel van de Plassche `_. - Add ``fill_value`` argument for reindex, align, and merge operations to enable custom fill values. (:issue:`2876`) By `Zach Griffith `_. From 3dd051310a6cd6c17df9e392a4dc3d4b6fc6a479 Mon Sep 17 00:00:00 2001 From: Karel-van-de-Plassche Date: Thu, 30 May 2019 09:59:31 +0200 Subject: [PATCH 8/8] Use roundtrip mechanism in chunksizes conservation test --- xarray/tests/test_backends.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index c621a0a0d31..e36a9adef78 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1135,20 +1135,16 @@ def test_encoding_kwarg_compression(self): assert ds.x.encoding == {} def test_keep_chunksizes_if_no_original_shape(self): - orig_ds = Dataset({'x': [1, 2, 3]}) + ds = Dataset({'x': [1, 2, 3]}) chunksizes = (2, ) - orig_ds.variables['x'].encoding = { + ds.variables['x'].encoding = { 'chunksizes': chunksizes } - with create_tmp_file() as tmp_file: - orig_ds.to_netcdf(path=tmp_file) - - nc = nc4.Dataset(tmp_file, mode='r') - with open_dataset(tmp_file) as ds: - assert_identical(orig_ds, ds) - assert_array_equal(orig_ds['x'].encoding['chunksizes'], - ds['x'].encoding['chunksizes']) + with self.roundtrip(ds) as actual: + assert_identical(ds, actual) + assert_array_equal(ds['x'].encoding['chunksizes'], + actual['x'].encoding['chunksizes']) def test_encoding_chunksizes_unlimited(self): # regression test for GH1225