-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
sparse=True option for from_dataframe and from_series #3210
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -727,7 +727,7 @@ def reset_coords( | |
else: | ||
if self.name is None: | ||
raise ValueError( | ||
"cannot reset_coords with drop=False " "on an unnamed DataArrray" | ||
"cannot reset_coords with drop=False on an unnamed DataArrray" | ||
) | ||
dataset[self.name] = self.variable | ||
return dataset | ||
|
@@ -1468,9 +1468,7 @@ def expand_dims( | |
This object, but with an additional dimension(s). | ||
""" | ||
if isinstance(dim, int): | ||
raise TypeError( | ||
"dim should be hashable or sequence/mapping of " "hashables" | ||
) | ||
raise TypeError("dim should be hashable or sequence/mapping of hashables") | ||
elif isinstance(dim, Sequence) and not isinstance(dim, str): | ||
if len(dim) != len(set(dim)): | ||
raise ValueError("dims should not contain duplicate values.") | ||
|
@@ -2295,19 +2293,27 @@ def from_dict(cls, d: dict) -> "DataArray": | |
return obj | ||
|
||
@classmethod | ||
def from_series(cls, series: pd.Series) -> "DataArray": | ||
def from_series(cls, series: pd.Series, sparse: bool = False) -> "DataArray": | ||
"""Convert a pandas.Series into an xarray.DataArray. | ||
|
||
If the series's index is a MultiIndex, it will be expanded into a | ||
tensor product of one-dimensional coordinates (filling in missing | ||
values with NaN). Thus this operation should be the inverse of the | ||
`to_series` method. | ||
|
||
If sparse=True, creates a sparse array instead of a dense NumPy array. | ||
Requires the pydata/sparse package. | ||
|
||
See also | ||
-------- | ||
xarray.Dataset.from_dataframe | ||
""" | ||
# TODO: add a 'name' parameter | ||
name = series.name | ||
df = pd.DataFrame({name: series}) | ||
ds = Dataset.from_dataframe(df) | ||
return ds[name] | ||
temp_name = "__temporary_name" | ||
df = pd.DataFrame({temp_name: series}) | ||
ds = Dataset.from_dataframe(df, sparse=sparse) | ||
result = cast(DataArray, ds[temp_name]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I can get rid of the cast by changing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ... or not. Unlike @overload
def __getitem__(self, key: Mapping) -> "Dataset":
...
@overload
def __getitem__(self, key: Hashable) -> "DataArray":
...
@overload
def __getitem__(self, key: Any) -> "Dataset":
...
def __getitem__(self, key): mypy complains:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I tried something very similar. This is why I wrote the "TODO" note above mentioning python/mypy#7328. |
||
result.name = series.name | ||
return result | ||
|
||
def to_cdms2(self) -> "cdms2_Variable": | ||
"""Convert this array into a cdms2.Variable | ||
|
@@ -2722,7 +2728,7 @@ def dot( | |
""" | ||
if isinstance(other, Dataset): | ||
raise NotImplementedError( | ||
"dot products are not yet supported " "with Dataset objects." | ||
"dot products are not yet supported with Dataset objects." | ||
) | ||
if not isinstance(other, DataArray): | ||
raise TypeError("dot only operates on DataArrays.") | ||
|
Uh oh!
There was an error while loading. Please reload this page.