Skip to content

Commit fb847e0

Browse files
authored
Merge branch 'master' into kms/drop_dims
2 parents 6a0f04b + 612d390 commit fb847e0

File tree

11 files changed

+467
-84
lines changed

11 files changed

+467
-84
lines changed

doc/whats-new.rst

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,17 @@ Enhancements
6868
- :py:meth:`pandas.Series.dropna` is now supported for a
6969
:py:class:`pandas.Series` indexed by a :py:class:`~xarray.CFTimeIndex`
7070
(:issue:`2688`). By `Spencer Clark <https://github.com/spencerkclark>`_.
71+
- :py:meth:`~xarray.open_dataset` now accepts a ``use_cftime`` argument, which
72+
can be used to require that ``cftime.datetime`` objects are always used, or
73+
never used when decoding dates encoded with a standard calendar. This can be
74+
used to ensure consistent date types are returned when using
75+
:py:meth:`~xarray.open_mfdataset` (:issue:`1263`) and/or to silence
76+
serialization warnings raised if dates from a standard calendar are found to
77+
be outside the :py:class:`pandas.Timestamp`-valid range (:issue:`2754`). By
78+
`Spencer Clark <https://github.com/spencerkclark>`_.
7179
- Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`).
7280
By `Kevin Squire <https://github.com/kmsquire>`_.
73-
81+
7482
Bug fixes
7583
~~~~~~~~~
7684

@@ -96,6 +104,16 @@ Bug fixes
96104
- Masking data arrays with :py:meth:`xarray.DataArray.where` now returns an
97105
array with the name of the original masked array (:issue:`2748` and :issue:`2457`).
98106
By `Yohai Bar-Sinai <https://github.com/yohai>`_.
107+
- Fixed error when trying to reduce a DataArray using a function which does not
108+
require an axis argument. (:issue:`2768`)
109+
By `Tom Nicholas <http://github.com/TomNicholas>`_.
110+
111+
- Per `CF conventions
112+
<http://cfconventions.org/cf-conventions/cf-conventions.html#calendar>`_,
113+
specifying ``'standard'`` as the calendar type in
114+
:py:meth:`~xarray.cftime_range` now correctly refers to the ``'gregorian'``
115+
calendar instead of the ``'proleptic_gregorian'`` calendar (:issue:`2761`).
116+
99117
.. _whats-new.0.11.3:
100118

101119
v0.11.3 (26 January 2019)

xarray/backends/api.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True,
161161
mask_and_scale=None, decode_times=True, autoclose=None,
162162
concat_characters=True, decode_coords=True, engine=None,
163163
chunks=None, lock=None, cache=None, drop_variables=None,
164-
backend_kwargs=None):
164+
backend_kwargs=None, use_cftime=None):
165165
"""Load and decode a dataset from a file or file-like object.
166166
167167
Parameters
@@ -231,6 +231,16 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True,
231231
A dictionary of keyword arguments to pass on to the backend. This
232232
may be useful when backend options would improve performance or
233233
allow user control of dataset processing.
234+
use_cftime: bool, optional
235+
Only relevant if encoded dates come from a standard calendar
236+
(e.g. 'gregorian', 'proleptic_gregorian', 'standard', or not
237+
specified). If None (default), attempt to decode times to
238+
``np.datetime64[ns]`` objects; if this is not possible, decode times to
239+
``cftime.datetime`` objects. If True, always decode times to
240+
``cftime.datetime`` objects, regardless of whether or not they can be
241+
represented using ``np.datetime64[ns]`` objects. If False, always
242+
decode times to ``np.datetime64[ns]`` objects; if this is not possible
243+
raise an error.
234244
235245
Returns
236246
-------
@@ -269,7 +279,7 @@ def maybe_decode_store(store, lock=False):
269279
ds = conventions.decode_cf(
270280
store, mask_and_scale=mask_and_scale, decode_times=decode_times,
271281
concat_characters=concat_characters, decode_coords=decode_coords,
272-
drop_variables=drop_variables)
282+
drop_variables=drop_variables, use_cftime=use_cftime)
273283

274284
_protect_dataset_variables_inplace(ds, cache)
275285

@@ -284,7 +294,8 @@ def maybe_decode_store(store, lock=False):
284294
mtime = None
285295
token = tokenize(filename_or_obj, mtime, group, decode_cf,
286296
mask_and_scale, decode_times, concat_characters,
287-
decode_coords, engine, chunks, drop_variables)
297+
decode_coords, engine, chunks, drop_variables,
298+
use_cftime)
288299
name_prefix = 'open_dataset-%s' % token
289300
ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
290301
ds2._file_obj = ds._file_obj
@@ -360,7 +371,7 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True,
360371
mask_and_scale=None, decode_times=True, autoclose=None,
361372
concat_characters=True, decode_coords=True, engine=None,
362373
chunks=None, lock=None, cache=None, drop_variables=None,
363-
backend_kwargs=None):
374+
backend_kwargs=None, use_cftime=None):
364375
"""Open an DataArray from a netCDF file containing a single data variable.
365376
366377
This is designed to read netCDF files with only one data variable. If
@@ -428,6 +439,16 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True,
428439
A dictionary of keyword arguments to pass on to the backend. This
429440
may be useful when backend options would improve performance or
430441
allow user control of dataset processing.
442+
use_cftime: bool, optional
443+
Only relevant if encoded dates come from a standard calendar
444+
(e.g. 'gregorian', 'proleptic_gregorian', 'standard', or not
445+
specified). If None (default), attempt to decode times to
446+
``np.datetime64[ns]`` objects; if this is not possible, decode times to
447+
``cftime.datetime`` objects. If True, always decode times to
448+
``cftime.datetime`` objects, regardless of whether or not they can be
449+
represented using ``np.datetime64[ns]`` objects. If False, always
450+
decode times to ``np.datetime64[ns]`` objects; if this is not possible
451+
raise an error.
431452
432453
Notes
433454
-----
@@ -450,7 +471,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True,
450471
decode_coords=decode_coords, engine=engine,
451472
chunks=chunks, lock=lock, cache=cache,
452473
drop_variables=drop_variables,
453-
backend_kwargs=backend_kwargs)
474+
backend_kwargs=backend_kwargs,
475+
use_cftime=use_cftime)
454476

455477
if len(dataset.data_vars) != 1:
456478
raise ValueError('Given file dataset contains more than one data '

xarray/coding/cftime_offsets.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def get_date_type(calendar):
6868
'proleptic_gregorian': cftime.DatetimeProlepticGregorian,
6969
'julian': cftime.DatetimeJulian,
7070
'all_leap': cftime.DatetimeAllLeap,
71-
'standard': cftime.DatetimeProlepticGregorian
71+
'standard': cftime.DatetimeGregorian
7272
}
7373
return calendars[calendar]
7474

@@ -679,9 +679,9 @@ def cftime_range(start=None, end=None, periods=None, freq='D',
679679
+--------------------------------+---------------------------------------+
680680
| Alias | Date type |
681681
+================================+=======================================+
682-
| standard, proleptic_gregorian | ``cftime.DatetimeProlepticGregorian`` |
682+
| standard, gregorian | ``cftime.DatetimeGregorian`` |
683683
+--------------------------------+---------------------------------------+
684-
| gregorian | ``cftime.DatetimeGregorian`` |
684+
| proleptic_gregorian | ``cftime.DatetimeProlepticGregorian`` |
685685
+--------------------------------+---------------------------------------+
686686
| noleap, 365_day | ``cftime.DatetimeNoLeap`` |
687687
+--------------------------------+---------------------------------------+

xarray/coding/times.py

Lines changed: 76 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -80,32 +80,7 @@ def _unpack_netcdf_time_units(units):
8080
return delta_units, ref_date
8181

8282

83-
def _decode_datetime_with_cftime(num_dates, units, calendar):
84-
cftime = _import_cftime()
85-
86-
if cftime.__name__ == 'cftime':
87-
dates = np.asarray(cftime.num2date(num_dates, units, calendar,
88-
only_use_cftime_datetimes=True))
89-
else:
90-
# Must be using num2date from an old version of netCDF4 which
91-
# does not have the only_use_cftime_datetimes option.
92-
dates = np.asarray(cftime.num2date(num_dates, units, calendar))
93-
94-
if (dates[np.nanargmin(num_dates)].year < 1678 or
95-
dates[np.nanargmax(num_dates)].year >= 2262):
96-
if calendar in _STANDARD_CALENDARS:
97-
warnings.warn(
98-
'Unable to decode time axis into full '
99-
'numpy.datetime64 objects, continuing using dummy '
100-
'cftime.datetime objects instead, reason: dates out '
101-
'of range', SerializationWarning, stacklevel=3)
102-
else:
103-
if calendar in _STANDARD_CALENDARS:
104-
dates = cftime_to_nptime(dates)
105-
return dates
106-
107-
108-
def _decode_cf_datetime_dtype(data, units, calendar):
83+
def _decode_cf_datetime_dtype(data, units, calendar, use_cftime):
10984
# Verify that at least the first and last date can be decoded
11085
# successfully. Otherwise, tracebacks end up swallowed by
11186
# Dataset.__repr__ when users try to view their lazily decoded array.
@@ -115,7 +90,8 @@ def _decode_cf_datetime_dtype(data, units, calendar):
11590
last_item(values) or [0]])
11691

11792
try:
118-
result = decode_cf_datetime(example_value, units, calendar)
93+
result = decode_cf_datetime(example_value, units, calendar,
94+
use_cftime)
11995
except Exception:
12096
calendar_msg = ('the default calendar' if calendar is None
12197
else 'calendar %r' % calendar)
@@ -129,7 +105,52 @@ def _decode_cf_datetime_dtype(data, units, calendar):
129105
return dtype
130106

131107

132-
def decode_cf_datetime(num_dates, units, calendar=None):
108+
def _decode_datetime_with_cftime(num_dates, units, calendar):
109+
cftime = _import_cftime()
110+
111+
if cftime.__name__ == 'cftime':
112+
return np.asarray(cftime.num2date(num_dates, units, calendar,
113+
only_use_cftime_datetimes=True))
114+
else:
115+
# Must be using num2date from an old version of netCDF4 which
116+
# does not have the only_use_cftime_datetimes option.
117+
return np.asarray(cftime.num2date(num_dates, units, calendar))
118+
119+
120+
def _decode_datetime_with_pandas(flat_num_dates, units, calendar):
121+
if calendar not in _STANDARD_CALENDARS:
122+
raise OutOfBoundsDatetime(
123+
'Cannot decode times from a non-standard calendar, {!r}, using '
124+
'pandas.'.format(calendar))
125+
126+
delta, ref_date = _unpack_netcdf_time_units(units)
127+
delta = _netcdf_to_numpy_timeunit(delta)
128+
try:
129+
ref_date = pd.Timestamp(ref_date)
130+
except ValueError:
131+
# ValueError is raised by pd.Timestamp for non-ISO timestamp
132+
# strings, in which case we fall back to using cftime
133+
raise OutOfBoundsDatetime
134+
135+
# fixes: https://github.com/pydata/pandas/issues/14068
136+
# these lines check if the the lowest or the highest value in dates
137+
# cause an OutOfBoundsDatetime (Overflow) error
138+
with warnings.catch_warnings():
139+
warnings.filterwarnings('ignore', 'invalid value encountered',
140+
RuntimeWarning)
141+
pd.to_timedelta(flat_num_dates.min(), delta) + ref_date
142+
pd.to_timedelta(flat_num_dates.max(), delta) + ref_date
143+
144+
# Cast input dates to integers of nanoseconds because `pd.to_datetime`
145+
# works much faster when dealing with integers
146+
# make _NS_PER_TIME_DELTA an array to ensure type upcasting
147+
flat_num_dates_ns_int = (flat_num_dates.astype(np.float64) *
148+
_NS_PER_TIME_DELTA[delta]).astype(np.int64)
149+
150+
return (pd.to_timedelta(flat_num_dates_ns_int, 'ns') + ref_date).values
151+
152+
153+
def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None):
133154
"""Given an array of numeric dates in netCDF format, convert it into a
134155
numpy array of date time objects.
135156
@@ -149,41 +170,30 @@ def decode_cf_datetime(num_dates, units, calendar=None):
149170
if calendar is None:
150171
calendar = 'standard'
151172

152-
delta, ref_date = _unpack_netcdf_time_units(units)
153-
154-
try:
155-
if calendar not in _STANDARD_CALENDARS:
156-
raise OutOfBoundsDatetime
157-
158-
delta = _netcdf_to_numpy_timeunit(delta)
173+
if use_cftime is None:
159174
try:
160-
ref_date = pd.Timestamp(ref_date)
161-
except ValueError:
162-
# ValueError is raised by pd.Timestamp for non-ISO timestamp
163-
# strings, in which case we fall back to using cftime
164-
raise OutOfBoundsDatetime
165-
166-
# fixes: https://github.com/pydata/pandas/issues/14068
167-
# these lines check if the the lowest or the highest value in dates
168-
# cause an OutOfBoundsDatetime (Overflow) error
169-
with warnings.catch_warnings():
170-
warnings.filterwarnings('ignore', 'invalid value encountered',
171-
RuntimeWarning)
172-
pd.to_timedelta(flat_num_dates.min(), delta) + ref_date
173-
pd.to_timedelta(flat_num_dates.max(), delta) + ref_date
174-
175-
# Cast input dates to integers of nanoseconds because `pd.to_datetime`
176-
# works much faster when dealing with integers
177-
# make _NS_PER_TIME_DELTA an array to ensure type upcasting
178-
flat_num_dates_ns_int = (flat_num_dates.astype(np.float64) *
179-
_NS_PER_TIME_DELTA[delta]).astype(np.int64)
180-
181-
dates = (pd.to_timedelta(flat_num_dates_ns_int, 'ns') +
182-
ref_date).values
183-
184-
except (OutOfBoundsDatetime, OverflowError):
175+
dates = _decode_datetime_with_pandas(flat_num_dates, units,
176+
calendar)
177+
except (OutOfBoundsDatetime, OverflowError):
178+
dates = _decode_datetime_with_cftime(
179+
flat_num_dates.astype(np.float), units, calendar)
180+
181+
if (dates[np.nanargmin(num_dates)].year < 1678 or
182+
dates[np.nanargmax(num_dates)].year >= 2262):
183+
if calendar in _STANDARD_CALENDARS:
184+
warnings.warn(
185+
'Unable to decode time axis into full '
186+
'numpy.datetime64 objects, continuing using '
187+
'cftime.datetime objects instead, reason: dates out '
188+
'of range', SerializationWarning, stacklevel=3)
189+
else:
190+
if calendar in _STANDARD_CALENDARS:
191+
dates = cftime_to_nptime(dates)
192+
elif use_cftime:
185193
dates = _decode_datetime_with_cftime(
186194
flat_num_dates.astype(np.float), units, calendar)
195+
else:
196+
dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar)
187197

188198
return dates.reshape(num_dates.shape)
189199

@@ -383,6 +393,8 @@ def encode_cf_timedelta(timedeltas, units=None):
383393

384394

385395
class CFDatetimeCoder(VariableCoder):
396+
def __init__(self, use_cftime=None):
397+
self.use_cftime = use_cftime
386398

387399
def encode(self, variable, name=None):
388400
dims, data, attrs, encoding = unpack_for_encoding(variable)
@@ -403,9 +415,11 @@ def decode(self, variable, name=None):
403415
if 'units' in attrs and 'since' in attrs['units']:
404416
units = pop_to(attrs, encoding, 'units')
405417
calendar = pop_to(attrs, encoding, 'calendar')
406-
dtype = _decode_cf_datetime_dtype(data, units, calendar)
418+
dtype = _decode_cf_datetime_dtype(data, units, calendar,
419+
self.use_cftime)
407420
transform = partial(
408-
decode_cf_datetime, units=units, calendar=calendar)
421+
decode_cf_datetime, units=units, calendar=calendar,
422+
use_cftime=self.use_cftime)
409423
data = lazy_elemwise_func(data, transform, dtype)
410424

411425
return Variable(dims, data, attrs, encoding)

0 commit comments

Comments
 (0)