diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fa63c2d4..3ec2cc4e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,36 +24,52 @@ jobs: steps: - name: Checkout Repo uses: actions/checkout@v4 - - name: Cache conda - uses: actions/cache@v4 - env: - # Increase this value to reset cache if ci/environment.yml has not changed - CACHE_NUMBER: 0 + # - name: Cache conda + # uses: actions/cache@v4 + # env: + # # Increase this value to reset cache if ci/environment.yml has not changed + # CACHE_NUMBER: 0 + # with: + # path: ~/conda_pkgs_dir + # key: + # test-${{ matrix.os }}-conda-py${{ matrix.python }}-${{ env.CACHE_NUMBER }}-${{ + # hashFiles('ci/environment.yml') }} + # - uses: conda-incubator/setup-miniconda@v3 + # with: + # activate-environment: sparse-dev + # allow-softlinks: true + # environment-file: ci/environment.yml + # python-version: ${{ matrix.python }} + # miniforge-version: latest + # - name: Install julia and numba + # run: | + # conda install conda-forge::numba + # conda install conda-forge::julia + # - uses: julia-actions/setup-julia@v1.9 + # with: + # version: '1.10' + - uses: actions/setup-python@v5 with: - path: ~/conda_pkgs_dir - key: - test-${{ matrix.os }}-conda-py${{ matrix.python }}-${{ env.CACHE_NUMBER }}-${{ - hashFiles('ci/environment.yml') }} - - uses: conda-incubator/setup-miniconda@v3 - with: - activate-environment: sparse-dev - allow-softlinks: true - environment-file: ci/environment.yml python-version: ${{ matrix.python }} - miniforge-version: latest + - name: Install Poetry + uses: snok/install-poetry@v1 - name: Install package run: | - pip install -e .[tests] + poetry install --with tests,finch + # - name: Install package + # run: | + # pip install -e .[tests,finch] - name: Run tests run: | - pytest --pyargs sparse + poetry run pytest tests/test_backends.py + # pytest --pyargs sparse/tests/test_backends.py - uses: codecov/codecov-action@v4 if: always() - - name: Publish Test Results - uses: EnricoMi/publish-unit-test-result-action/composite@v2 - if: always() - with: - files: "**/test-*.xml" + # - name: Publish Test Results + # uses: EnricoMi/publish-unit-test-result-action/composite@v2 + # if: always() + # with: + # files: "**/test-*.xml" docs: defaults: run: diff --git a/benchmarks/benchmark_gcxs.py b/benchmarks/benchmark_gcxs.py index 348f1e21..580f8c06 100644 --- a/benchmarks/benchmark_gcxs.py +++ b/benchmarks/benchmark_gcxs.py @@ -6,8 +6,12 @@ class MatrixMultiplySuite: def setup(self): rng = np.random.default_rng(0) - self.x = sparse.random((100, 100), density=0.01, format="gcxs", random_state=rng) - self.y = sparse.random((100, 100), density=0.01, format="gcxs", random_state=rng) + self.x = sparse.random( + (100, 100), density=0.01, format="gcxs", random_state=rng + ) + self.y = sparse.random( + (100, 100), density=0.01, format="gcxs", random_state=rng + ) self.x @ self.y # Numba compilation @@ -18,8 +22,12 @@ def time_matmul(self): class ElemwiseSuite: def setup(self): rng = np.random.default_rng(0) - self.x = sparse.random((100, 100, 100), density=0.01, format="gcxs", random_state=rng) - self.y = sparse.random((100, 100, 100), density=0.01, format="gcxs", random_state=rng) + self.x = sparse.random( + (100, 100, 100), density=0.01, format="gcxs", random_state=rng + ) + self.y = sparse.random( + (100, 100, 100), density=0.01, format="gcxs", random_state=rng + ) self.x + self.y # Numba compilation @@ -33,8 +41,12 @@ def time_mul(self): class ElemwiseBroadcastingSuite: def setup(self): rng = np.random.default_rng(0) - self.x = sparse.random((100, 1, 100), density=0.01, format="gcxs", random_state=rng) - self.y = sparse.random((100, 100), density=0.01, format="gcxs", random_state=rng) + self.x = sparse.random( + (100, 1, 100), density=0.01, format="gcxs", random_state=rng + ) + self.y = sparse.random( + (100, 100), density=0.01, format="gcxs", random_state=rng + ) def time_add(self): self.x + self.y @@ -47,7 +59,9 @@ class IndexingSuite: def setup(self): rng = np.random.default_rng(0) self.index = rng.integers(0, 100, 50) - self.x = sparse.random((100, 100, 100), density=0.01, format="gcxs", random_state=rng) + self.x = sparse.random( + (100, 100, 100), density=0.01, format="gcxs", random_state=rng + ) # Numba compilation self.x[5] @@ -76,9 +90,9 @@ class DenseMultiplySuite: def setup(self, compressed_axis, n_vecs): rng = np.random.default_rng(1337) n = 10000 - x = sparse.random((n, n), density=0.001, format="gcxs", random_state=rng).change_compressed_axes( - (compressed_axis,) - ) + x = sparse.random( + (n, n), density=0.001, format="gcxs", random_state=rng + ).change_compressed_axes((compressed_axis,)) self.x = x self.t = rng.random((n, n_vecs)) self.u = rng.random((n_vecs, n)) diff --git a/benchmarks/benchmark_tensordot.py b/benchmarks/benchmark_tensordot.py index f406118e..2f8db379 100644 --- a/benchmarks/benchmark_tensordot.py +++ b/benchmarks/benchmark_tensordot.py @@ -33,7 +33,9 @@ def setup(self): self.s2 = sparse.random((100, 100, 100, 100), density=0.01, random_state=rng) def time_dense(self): - sparse.tensordot(self.s1, self.s2, axes=([0, 1], [0, 2]), return_type=np.ndarray) + sparse.tensordot( + self.s1, self.s2, axes=([0, 1], [0, 2]), return_type=np.ndarray + ) def time_sparse(self): sparse.tensordot(self.s1, self.s2, axes=([0, 1], [0, 2])) diff --git a/docs/conf.py b/docs/conf.py index 7725d6b0..1e50f8a2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -48,7 +48,9 @@ # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] -mathjax_path = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" +mathjax_path = ( + "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" +) # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: @@ -151,7 +153,9 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). -latex_documents = [(root_doc, "sparse.tex", "sparse Documentation", "Sparse Developers", "manual")] +latex_documents = [ + (root_doc, "sparse.tex", "sparse Documentation", "Sparse Developers", "manual") +] # -- Options for manual page output --------------------------------------- diff --git a/docs/gen_logo.py b/docs/gen_logo.py index e216921a..d1c2bfdd 100644 --- a/docs/gen_logo.py +++ b/docs/gen_logo.py @@ -63,7 +63,9 @@ def fill(rs): root, "rect", style=f"{colors['orange']};{fill(rs)};", - transform=transform(1, b, 0, 1, (i + 5) * s + offset_x, (i * b + j) * s + offset_y), + transform=transform( + 1, b, 0, 1, (i + 5) * s + offset_x, (i * b + j) * s + offset_y + ), **kwargs, ) @@ -103,7 +105,9 @@ def fill(rs): root, "rect", style=f"{colors['blue']};{fill(rs)};", - transform=transform(1, b, 0, 1, i * s + offset_x, (i * b + j + y2) * s + offset_y), + transform=transform( + 1, b, 0, 1, i * s + offset_x, (i * b + j + y2) * s + offset_y + ), **kwargs, ) @@ -119,7 +123,9 @@ def fill(rs): root, "rect", style=f"{colors['grey']};{fill(rs)};", - transform=transform(1, -b, 0, 1, (i + 5) * s + offset_x, ((10 - i) * b + j + 5) * s + offset_y), + transform=transform( + 1, -b, 0, 1, (i + 5) * s + offset_x, ((10 - i) * b + j + 5) * s + offset_y + ), **kwargs, ) diff --git a/pyproject.toml b/pyproject.toml index a57849a8..6cd47eae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,81 +1,110 @@ -[build-system] -requires = ["setuptools>=64", "setuptools_scm>=8"] -build-backend = "setuptools.build_meta" +# [build-system] +# requires = ["setuptools>=64", "setuptools_scm>=8"] +# build-backend = "setuptools.build_meta" + +# [project] +# name = "sparse" +# dynamic = ["version"] +# description = "Sparse n-dimensional arrays for the PyData ecosystem" +# readme = "README.rst" +# dependencies = ["numpy>=1.17"] +# maintainers = [{ name = "Hameer Abbasi", email = "hameerabbasi@yahoo.com" }] +# requires-python = ">=3.9" +# license = { file = "LICENSE" } +# keywords = ["sparse", "numpy", "scipy", "dask"] +# classifiers = [ +# "Development Status :: 2 - Pre-Alpha", +# "Operating System :: OS Independent", +# "License :: OSI Approved :: BSD License", +# "Programming Language :: Python", +# "Programming Language :: Python :: 3", +# "Programming Language :: Python :: 3.9", +# "Programming Language :: Python :: 3.10", +# "Programming Language :: Python :: 3.11", +# "Programming Language :: Python :: 3 :: Only", +# "Intended Audience :: Developers", +# "Intended Audience :: Science/Research", +# ] + +# [project.optional-dependencies] +# docs = ["sphinx", "sphinx_rtd_theme", "scipy"] +# tests = [ +# "dask[array]", +# "pytest>=3.5", +# "pytest-cov", +# "pre-commit", +# "scipy", +# ] +# tox = ["sparse[tests]", "tox"] +# all = ["sparse[docs,tox]", "matrepr"] +# finch = ["finch-tensor==0.1.6"] + +# [project.urls] +# Documentation = "https://sparse.pydata.org/" +# Source = "https://github.com/pydata/sparse/" +# Repository = "https://github.com/pydata/sparse.git" +# "Issue Tracker" = "https://github.com/pydata/sparse/issues" +# Discussions = "https://github.com/pydata/sparse/discussions" + +# # [project.entry-points.numba_extensions] +# # init = "sparse.pydata_backend._numba_extension:_init_extension" + +# [tool.setuptools.packages.find] +# where = ["."] +# include = ["sparse", "sparse.*"] + +# [tool.setuptools_scm] +# version_file = "sparse/_version.py" + +# [tool.ruff] +# exclude = ["sparse/_version.py"] +# line-length = 120 + +# [tool.ruff.lint] +# select = ["F", "E", "W", "I", "B", "UP", "YTT", "BLE", "C4", "T10", "ISC", "ICN", "PIE", "PYI", "RSE", "RET", "SIM", "PGH", "FLY", "NPY", "PERF"] -[project] -name = "sparse" -dynamic = ["version"] -description = "Sparse n-dimensional arrays for the PyData ecosystem" +# [tool.ruff.lint.isort.sections] +# numpy = ["numpy", "numpy.*", "scipy", "scipy.*"] + +# [tool.ruff.format] +# quote-style = "double" +# docstring-code-format = true + +# [tool.ruff.lint.isort] +# section-order = [ +# "future", +# "standard-library", +# "first-party", +# "third-party", +# "numpy", +# "local-folder", +# ] + +[tool.poetry] +name = "sparse-copy" +version = "1.1.1" +description = "" +authors = ["Hameer Abbasi"] readme = "README.rst" -dependencies = ["numpy>=1.17", "numba>=0.49"] -maintainers = [{ name = "Hameer Abbasi", email = "hameerabbasi@yahoo.com" }] -requires-python = ">=3.9" -license = { file = "LICENSE" } -keywords = ["sparse", "numpy", "scipy", "dask"] -classifiers = [ - "Development Status :: 2 - Pre-Alpha", - "Operating System :: OS Independent", - "License :: OSI Approved :: BSD License", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3 :: Only", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", -] - -[project.optional-dependencies] -docs = ["sphinx", "sphinx_rtd_theme", "scipy"] -tests = [ - "dask[array]", - "pytest>=3.5", - "pytest-cov", - "pre-commit", - "scipy", -] -tox = ["sparse[tests]", "tox"] -all = ["sparse[docs,tox]", "matrepr"] -finch = ["finch-tensor"] - -[project.urls] -Documentation = "https://sparse.pydata.org/" -Source = "https://github.com/pydata/sparse/" -Repository = "https://github.com/pydata/sparse.git" -"Issue Tracker" = "https://github.com/pydata/sparse/issues" -Discussions = "https://github.com/pydata/sparse/discussions" - -[project.entry-points.numba_extensions] -init = "sparse.pydata_backend._numba_extension:_init_extension" - -[tool.setuptools.packages.find] -where = ["."] -include = ["sparse", "sparse.*"] - -[tool.setuptools_scm] -version_file = "sparse/_version.py" - -[tool.ruff] -exclude = ["sparse/_version.py"] -line-length = 120 - -[tool.ruff.lint] -select = ["F", "E", "W", "I", "B", "UP", "YTT", "BLE", "C4", "T10", "ISC", "ICN", "PIE", "PYI", "RSE", "RET", "SIM", "PGH", "FLY", "NPY", "PERF"] - -[tool.ruff.lint.isort.sections] -numpy = ["numpy", "numpy.*", "scipy", "scipy.*"] - -[tool.ruff.format] -quote-style = "double" -docstring-code-format = true - -[tool.ruff.lint.isort] -section-order = [ - "future", - "standard-library", - "first-party", - "third-party", - "numpy", - "local-folder", -] +packages = [{include = "sparse"}] + +[tool.poetry.dependencies] +python = "^3.9" +juliapkg = "^0.1.10" +juliacall = "^0.9.15" +numpy = "^1.19" +llvmlite = "0.42.0" +numba = "^0.59" + +[tool.poetry.group.tests.dependencies] +pytest = "^7.4.4" +pre-commit = "^3.6.0" +pytest-cov = "^4.1.0" +scipy = "^1.7" + +[tool.poetry.group.finch.dependencies] +finch-tensor = "0.1.6" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/sparse/__init__.py b/sparse/__init__.py index 06400ca0..807fe52d 100644 --- a/sparse/__init__.py +++ b/sparse/__init__.py @@ -2,8 +2,6 @@ from contextvars import ContextVar from enum import Enum -from ._version import __version__, __version_tuple__ # noqa: F401 - __array_api_version__ = "2022.12" @@ -36,9 +34,7 @@ def __exit__(self, exc_type, exc_value, traceback): @staticmethod def get_backend_module(): backend = backend_var.get() - if backend == BackendType.PyData: - import sparse.pydata_backend as backend_module - elif backend == BackendType.Finch: + if backend == BackendType.PyData or backend == BackendType.Finch: import sparse.finch_backend as backend_module else: raise ValueError(f"Invalid backend identifier: {backend}") @@ -47,7 +43,7 @@ def get_backend_module(): def __getattr__(attr): if attr == "pydata_backend": - import sparse.pydata_backend as backend_module + import sparse.finch_backend as backend_module return backend_module if attr == "finch_backend": diff --git a/sparse/finch_backend/__init__.py b/sparse/finch_backend/__init__.py index f4599979..9328a2bf 100644 --- a/sparse/finch_backend/__init__.py +++ b/sparse/finch_backend/__init__.py @@ -1,4 +1,22 @@ -Tensor = () +import juliapkg + +juliapkg.add("Finch", "9177782c-1635-4eb9-9bfb-d9dfa25e6bce", version="0.6.16") +juliapkg.resolve() + +from juliacall import Main as jl # noqa: E402 + +jl.seval("using Finch") + +try: + import finch # noqa: F401 +except ModuleNotFoundError: + raise ImportError( + "Finch not installed. Run `pip install sparse[finch]` to enable Finch backend" + ) from None + +from finch import Tensor, astype, permute_dims # noqa: E402 + +__all__ = ["Tensor", "permute_dims", "astype"] class COO: diff --git a/sparse/pydata_backend/_common.py b/sparse/pydata_backend/_common.py index cb2288d4..0ca3bb14 100644 --- a/sparse/pydata_backend/_common.py +++ b/sparse/pydata_backend/_common.py @@ -220,7 +220,11 @@ def matmul(a, b): raise TypeError(f"Cannot perform dot product on types {type(a)}, {type(b)}") if check_class_nan(a) or check_class_nan(b): - warnings.warn("Nan will not be propagated in matrix multiplication", RuntimeWarning, stacklevel=1) + warnings.warn( + "Nan will not be propagated in matrix multiplication", + RuntimeWarning, + stacklevel=1, + ) # When b is 2-d, it is equivalent to dot if b.ndim <= 2: @@ -363,7 +367,9 @@ def _dot(a, b, return_type=None): if isinstance(a, GCXS) and isinstance(b, np.ndarray): if a.compressed_axes == (0,): # csr @ ndarray if return_type is None or return_type == np.ndarray: - return _dot_csr_ndarray_type(a.dtype, b.dtype)(out_shape, a.data, a.indices, a.indptr, b) + return _dot_csr_ndarray_type(a.dtype, b.dtype)( + out_shape, a.data, a.indices, a.indptr, b + ) data, indices, indptr = _dot_csr_ndarray_type_sparse(a.dtype, b.dtype)( out_shape, a.data, a.indices, a.indptr, b ) @@ -377,7 +383,9 @@ def _dot(a, b, return_type=None): return out.tocoo() return out if return_type is None or return_type == np.ndarray: # csc @ ndarray - return _dot_csc_ndarray_type(a.dtype, b.dtype)(a.shape, b.shape, a.data, a.indices, a.indptr, b) + return _dot_csc_ndarray_type(a.dtype, b.dtype)( + a.shape, b.shape, a.data, a.indices, a.indptr, b + ) data, indices, indptr = _dot_csc_ndarray_type_sparse(a.dtype, b.dtype)( a.shape, b.shape, a.data, a.indices, a.indptr, b ) @@ -397,7 +405,9 @@ def _dot(a, b, return_type=None): bt = b.T # constant-time transpose if b.compressed_axes == (0,): if return_type is None or return_type == np.ndarray: - out = _dot_csc_ndarray_type(bt.dtype, at.dtype)(bt.shape, at.shape, bt.data, bt.indices, bt.indptr, at) + out = _dot_csc_ndarray_type(bt.dtype, at.dtype)( + bt.shape, at.shape, bt.data, bt.indices, bt.indptr, at + ) return out.T data, indices, indptr = _dot_csc_ndarray_type_sparse(bt.dtype, at.dtype)( bt.shape, at.shape, bt.data, b.indices, b.indptr, at @@ -414,12 +424,16 @@ def _dot(a, b, return_type=None): # compressed_axes == (1,) if return_type is None or return_type == np.ndarray: - out = _dot_csr_ndarray_type(bt.dtype, at.dtype)(out_shape[::-1], bt.data, bt.indices, bt.indptr, at) + out = _dot_csr_ndarray_type(bt.dtype, at.dtype)( + out_shape[::-1], bt.data, bt.indices, bt.indptr, at + ) return out.T data, indices, indptr = _dot_csr_ndarray_type_sparse(bt.dtype, at.dtype)( out_shape[::-1], bt.data, bt.indices, bt.indptr, at ) - out = GCXS((data, indices, indptr), shape=out_shape, compressed_axes=(1,), prune=True) + out = GCXS( + (data, indices, indptr), shape=out_shape, compressed_axes=(1,), prune=True + ) if return_type == COO: return out.tocoo() return out @@ -455,8 +469,12 @@ def _dot(a, b, return_type=None): b = b.view(type=np.ndarray).T if return_type is None or return_type == np.ndarray: - return _dot_coo_ndarray_type(a.dtype, b.dtype)(a.coords, a.data, b, out_shape) - coords, data = _dot_coo_ndarray_type_sparse(a.dtype, b.dtype)(a.coords, a.data, b, out_shape) + return _dot_coo_ndarray_type(a.dtype, b.dtype)( + a.coords, a.data, b, out_shape + ) + coords, data = _dot_coo_ndarray_type_sparse(a.dtype, b.dtype)( + a.coords, a.data, b, out_shape + ) out = COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True) if return_type == GCXS: return out.asformat("gcxs") @@ -466,10 +484,16 @@ def _dot(a, b, return_type=None): a = a.view(type=np.ndarray) if return_type is None or return_type == np.ndarray: - return _dot_ndarray_coo_type(a.dtype, b.dtype)(a, b.coords, b.data, out_shape) + return _dot_ndarray_coo_type(a.dtype, b.dtype)( + a, b.coords, b.data, out_shape + ) b = b.T - coords, data = _dot_ndarray_coo_type_sparse(a.dtype, b.dtype)(a, b.coords, b.data, out_shape) - out = COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True, prune=True) + coords, data = _dot_ndarray_coo_type_sparse(a.dtype, b.dtype)( + a, b.coords, b.data, out_shape + ) + out = COO( + coords, data, shape=out_shape, has_duplicates=False, sorted=True, prune=True + ) if return_type == GCXS: return out.asformat("gcxs") return out @@ -518,7 +542,9 @@ def wrapped(*args): @numba.jit(nopython=True, nogil=True) -def _csr_csr_count_nnz(out_shape, a_indices, b_indices, a_indptr, b_indptr): # pragma: no cover +def _csr_csr_count_nnz( + out_shape, a_indices, b_indices, a_indptr, b_indptr +): # pragma: no cover """ A function for computing the number of nonzero values in the resulting array from multiplying an array with compressed rows with an array @@ -548,7 +574,9 @@ def _csr_csr_count_nnz(out_shape, a_indices, b_indices, a_indptr, b_indptr): # @numba.jit(nopython=True, nogil=True) -def _csr_ndarray_count_nnz(out_shape, indptr, a_indices, a_indptr, b): # pragma: no cover +def _csr_ndarray_count_nnz( + out_shape, indptr, a_indices, a_indptr, b +): # pragma: no cover """ A function for computing the number of nonzero values in the resulting array from multiplying an array with compressed rows with a dense @@ -578,7 +606,9 @@ def _csr_ndarray_count_nnz(out_shape, indptr, a_indices, a_indptr, b): # pragma @numba.jit(nopython=True, nogil=True) -def _csc_ndarray_count_nnz(a_shape, b_shape, indptr, a_indices, a_indptr, b): # pragma: no cover +def _csc_ndarray_count_nnz( + a_shape, b_shape, indptr, a_indices, a_indptr, b +): # pragma: no cover """ A function for computing the number of nonzero values in the resulting array from multiplying an array with compressed columns with a dense @@ -622,7 +652,9 @@ def _dot_csr_csr_type(dt1, dt2): nogil=True, locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, ) - def _dot_csr_csr(out_shape, a_data, b_data, a_indices, b_indices, a_indptr, b_indptr): # pragma: no cover + def _dot_csr_csr( + out_shape, a_data, b_data, a_indices, b_indices, a_indptr, b_indptr + ): # pragma: no cover """ Utility function taking in two ``GCXS`` objects and calculating their dot product: a @ b for a and b with compressed rows. @@ -741,7 +773,9 @@ def _dot_csr_ndarray_type_sparse(dt1, dt2): nogil=True, locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, ) - def _dot_csr_ndarray_sparse(out_shape, a_data, a_indices, a_indptr, b): # pragma: no cover + def _dot_csr_ndarray_sparse( + out_shape, a_data, a_indices, a_indptr, b + ): # pragma: no cover """ Utility function taking in one `GCXS` and one ``ndarray`` and calculating their dot product: a @ b for a with compressed rows. @@ -790,7 +824,9 @@ def _dot_csc_ndarray_type_sparse(dt1, dt2): nogil=True, locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, ) - def _dot_csc_ndarray_sparse(a_shape, b_shape, a_data, a_indices, a_indptr, b): # pragma: no cover + def _dot_csc_ndarray_sparse( + a_shape, b_shape, a_data, a_indices, a_indptr, b + ): # pragma: no cover """ Utility function taking in one `GCXS` and one ``ndarray`` and calculating their dot product: a @ b for a with compressed columns. @@ -852,7 +888,9 @@ def _dot_csc_ndarray_type(dt1, dt2): nogil=True, locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, ) - def _dot_csc_ndarray(a_shape, b_shape, a_data, a_indices, a_indptr, b): # pragma: no cover + def _dot_csc_ndarray( + a_shape, b_shape, a_data, a_indices, a_indptr, b + ): # pragma: no cover """ Utility function taking in one `GCXS` and one ``ndarray`` and calculating their dot product: a @ b for a with compressed columns. @@ -890,7 +928,9 @@ def _dot_coo_coo_type(dt1, dt2): nogil=True, locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, ) - def _dot_coo_coo(out_shape, a_coords, b_coords, a_data, b_data, a_indptr, b_indptr): # pragma: no cover + def _dot_coo_coo( + out_shape, a_coords, b_coords, a_data, b_data, a_indptr, b_indptr + ): # pragma: no cover """ Utility function taking in two ``COO`` objects and calculating their dot product: a @ b. @@ -910,7 +950,9 @@ def _dot_coo_coo(out_shape, a_coords, b_coords, a_data, b_data, a_indptr, b_indp n_row, n_col = out_shape # calculate nnz before multiplying so we can use static arrays - nnz = _csr_csr_count_nnz(out_shape, a_coords[1], b_coords[1], a_indptr, b_indptr) + nnz = _csr_csr_count_nnz( + out_shape, a_coords[1], b_coords[1], a_indptr, b_indptr + ) coords = np.empty((2, nnz), dtype=np.intp) data = np.empty(nnz, dtype=dtr) next_ = np.full(n_col, -1) @@ -1196,7 +1238,9 @@ def _parse_einsum_input(operands): try: s = index(s) except TypeError as e: - raise TypeError("For this input type lists must contain either int or Ellipsis") from e + raise TypeError( + "For this input type lists must contain either int or Ellipsis" + ) from e subscripts += np.core.einsumfunc.einsum_symbols[s] if num != last: subscripts += "," @@ -1210,7 +1254,9 @@ def _parse_einsum_input(operands): try: s = index(s) except TypeError as e: - raise TypeError("For this input type lists must contain either int or Ellipsis") from e + raise TypeError( + "For this input type lists must contain either int or Ellipsis" + ) from e subscripts += np.core.einsumfunc.einsum_symbols[s] # Check for proper "->" if ("-" in subscripts) or (">" in subscripts): @@ -1295,7 +1341,9 @@ def _parse_einsum_input(operands): # Make sure number operands is equivalent to the number of terms if len(input_subscripts.split(",")) != len(operands): - raise ValueError("Number of einsum subscripts must be equal to the number of operands.") + raise ValueError( + "Number of einsum subscripts must be equal to the number of operands." + ) return (input_subscripts, output_subscript, operands) @@ -1370,7 +1418,9 @@ def _einsum_single(lhs, rhs, operand): # scalar output - match numpy behaviour by not wrapping as array return new_data.sum() - return to_output_format(COO(new_coords, new_data, shape=new_shape, has_duplicates=True)) + return to_output_format( + COO(new_coords, new_data, shape=new_shape, has_duplicates=True) + ) def einsum(*operands, **kwargs): @@ -1444,7 +1494,9 @@ def einsum(*operands, **kwargs): # perform necessary transpose and reductions array = _einsum_single(term, pterm, array) # calc broadcastable shape - shape = tuple(array.shape[pterm.index(ix)] if ix in pterm else 1 for ix in aligned_term) + shape = tuple( + array.shape[pterm.index(ix)] if ix in pterm else 1 for ix in aligned_term + ) parrays.append(array.reshape(shape) if array.shape != shape else array) aligned_array = reduce(mul, parrays) @@ -1592,7 +1644,9 @@ def eye(N, M=None, k=0, dtype=float, format="coo", **kwargs): coords = np.stack([n_coords, m_coords]) data = np.array(1, dtype=dtype) - return COO(coords, data=data, shape=(N, M), has_duplicates=False, sorted=True).asformat(format, **kwargs) + return COO( + coords, data=data, shape=(N, M), has_duplicates=False, sorted=True + ).asformat(format, **kwargs) def full(shape, fill_value, dtype=None, format="coo", order="C", **kwargs): @@ -1920,7 +1974,9 @@ def moveaxis(a, source, destination): destination = normalize_axis(destination, a.ndim) if len(source) != len(destination): - raise ValueError("`source` and `destination` arguments must have the same number of elements") + raise ValueError( + "`source` and `destination` arguments must have the same number of elements" + ) order = [n for n in range(a.ndim) if n not in source] @@ -1975,7 +2031,9 @@ def pad(array, pad_width, mode="constant", **kwargs): if mode.lower() != "constant": raise NotImplementedError(f"Mode '{mode}' is not yet supported.") - if not equivalent(kwargs.pop("constant_values", _zero_of_dtype(array.dtype)), array.fill_value): + if not equivalent( + kwargs.pop("constant_values", _zero_of_dtype(array.dtype)), array.fill_value + ): raise ValueError("constant_values can only be equal to fill value.") if kwargs: @@ -1987,7 +2045,12 @@ def pad(array, pad_width, mode="constant", **kwargs): pad_width = np.broadcast_to(pad_width, (len(array.shape), 2)) new_coords = array.coords + pad_width[:, 0:1] - new_shape = tuple([array.shape[i] + pad_width[i, 0] + pad_width[i, 1] for i in range(len(array.shape))]) + new_shape = tuple( + [ + array.shape[i] + pad_width[i, 0] + pad_width[i, 1] + for i in range(len(array.shape)) + ] + ) new_data = array.data return COO(new_coords, new_data, new_shape, fill_value=array.fill_value) @@ -2004,7 +2067,9 @@ def format_to_string(format): raise ValueError(f"invalid format: {format}") -def asarray(obj, /, *, dtype=None, format="coo", backend="pydata", device=None, copy=False): +def asarray( + obj, /, *, dtype=None, format="coo", backend="pydata", device=None, copy=False +): """ Convert the input to a sparse array. diff --git a/sparse/pydata_backend/_compressed/common.py b/sparse/pydata_backend/_compressed/common.py index bd87d256..bafe3f79 100644 --- a/sparse/pydata_backend/_compressed/common.py +++ b/sparse/pydata_backend/_compressed/common.py @@ -7,12 +7,19 @@ def concatenate(arrays, axis=0, compressed_axes=None): from .compressed import GCXS check_consistent_fill_value(arrays) - arrays = [arr if isinstance(arr, GCXS) else GCXS(arr, compressed_axes=(axis,)) for arr in arrays] + arrays = [ + arr if isinstance(arr, GCXS) else GCXS(arr, compressed_axes=(axis,)) + for arr in arrays + ] axis = normalize_axis(axis, arrays[0].ndim) dim = sum(x.shape[axis] for x in arrays) shape = list(arrays[0].shape) shape[axis] = dim - assert all(x.shape[ax] == arrays[0].shape[ax] for x in arrays for ax in set(range(arrays[0].ndim)) - {axis}) + assert all( + x.shape[ax] == arrays[0].shape[ax] + for x in arrays + for ax in set(range(arrays[0].ndim)) - {axis} + ) if compressed_axes is None: compressed_axes = (axis,) if arrays[0].ndim == 1: @@ -53,9 +60,16 @@ def stack(arrays, axis=0, compressed_axes=None): from .compressed import GCXS check_consistent_fill_value(arrays) - arrays = [arr if isinstance(arr, GCXS) else GCXS(arr, compressed_axes=(axis,)) for arr in arrays] + arrays = [ + arr if isinstance(arr, GCXS) else GCXS(arr, compressed_axes=(axis,)) + for arr in arrays + ] axis = normalize_axis(axis, arrays[0].ndim + 1) - assert all(x.shape[ax] == arrays[0].shape[ax] for x in arrays for ax in set(range(arrays[0].ndim)) - {axis}) + assert all( + x.shape[ax] == arrays[0].shape[ax] + for x in arrays + for ax in set(range(arrays[0].ndim)) - {axis} + ) if compressed_axes is None: compressed_axes = (axis,) if arrays[0].ndim == 1: diff --git a/sparse/pydata_backend/_compressed/compressed.py b/sparse/pydata_backend/_compressed/compressed.py index bbba0997..c4a2280f 100644 --- a/sparse/pydata_backend/_compressed/compressed.py +++ b/sparse/pydata_backend/_compressed/compressed.py @@ -145,10 +145,14 @@ def __init__( arg = self.from_scipy_sparse(arg) if isinstance(arg, np.ndarray): - (arg, shape, compressed_axes, fill_value) = _from_coo(COO(arg), compressed_axes) + (arg, shape, compressed_axes, fill_value) = _from_coo( + COO(arg), compressed_axes + ) elif isinstance(arg, COO): - (arg, shape, compressed_axes, fill_value) = _from_coo(arg, compressed_axes, idx_dtype) + (arg, shape, compressed_axes, fill_value) = _from_coo( + arg, compressed_axes, idx_dtype + ) elif isinstance(arg, GCXS): if compressed_axes is not None and arg.compressed_axes != compressed_axes: @@ -175,7 +179,9 @@ def __init__( self.shape = shape - self._compressed_axes = tuple(compressed_axes) if isinstance(compressed_axes, Iterable) else None + self._compressed_axes = ( + tuple(compressed_axes) if isinstance(compressed_axes, Iterable) else None + ) self.fill_value = fill_value if prune: @@ -199,19 +205,27 @@ def from_numpy(cls, x, compressed_axes=None, fill_value=0, idx_dtype=None): @classmethod def from_coo(cls, x, compressed_axes=None, idx_dtype=None): - (arg, shape, compressed_axes, fill_value) = _from_coo(x, compressed_axes, idx_dtype) - return cls(arg, shape=shape, compressed_axes=compressed_axes, fill_value=fill_value) + (arg, shape, compressed_axes, fill_value) = _from_coo( + x, compressed_axes, idx_dtype + ) + return cls( + arg, shape=shape, compressed_axes=compressed_axes, fill_value=fill_value + ) @classmethod def from_scipy_sparse(cls, x): if x.format == "csc": - return cls((x.data, x.indices, x.indptr), shape=x.shape, compressed_axes=(1,)) + return cls( + (x.data, x.indices, x.indptr), shape=x.shape, compressed_axes=(1,) + ) x = x.asformat("csr") return cls((x.data, x.indices, x.indptr), shape=x.shape, compressed_axes=(0,)) @classmethod - def from_iter(cls, x, shape=None, compressed_axes=None, fill_value=None, idx_dtype=None): + def from_iter( + cls, x, shape=None, compressed_axes=None, fill_value=None, idx_dtype=None + ): return cls.from_coo( COO.from_iter(x, shape, fill_value), compressed_axes, @@ -297,7 +311,9 @@ def nbytes(self): @property def _axis_order(self): axis_order = list(self.compressed_axes) - axis_order.extend(np.setdiff1d(np.arange(len(self.shape)), self.compressed_axes)) + axis_order.extend( + np.setdiff1d(np.arange(len(self.shape)), self.compressed_axes) + ) return axis_order @property @@ -381,7 +397,8 @@ def change_compressed_axes(self, new_compressed_axes): raise NotImplementedError("no axes to compress for 1d array") new_compressed_axes = tuple( - normalize_axis(new_compressed_axes[i], self.ndim) for i in range(len(new_compressed_axes)) + normalize_axis(new_compressed_axes[i], self.ndim) + for i in range(len(new_compressed_axes)) ) if new_compressed_axes == self.compressed_axes: @@ -489,12 +506,18 @@ def to_scipy_sparse(self): check_zero_fill_value(self) if self.ndim != 2: - raise ValueError("Can only convert a 2-dimensional array to a Scipy sparse matrix.") + raise ValueError( + "Can only convert a 2-dimensional array to a Scipy sparse matrix." + ) if 0 in self.compressed_axes: - return scipy.sparse.csr_matrix((self.data, self.indices, self.indptr), shape=self.shape) + return scipy.sparse.csr_matrix( + (self.data, self.indices, self.indptr), shape=self.shape + ) - return scipy.sparse.csc_matrix((self.data, self.indices, self.indptr), shape=self.shape) + return scipy.sparse.csc_matrix( + (self.data, self.indices, self.indptr), shape=self.shape + ) def asformat(self, format, **kwargs): """ @@ -564,7 +587,9 @@ def maybe_densify(self, max_size=1000, min_density=0.25): """ if self.size > max_size and self.density < min_density: - raise ValueError("Operation would require converting large sparse array to dense") + raise ValueError( + "Operation would require converting large sparse array to dense" + ) return self.todense() @@ -626,7 +651,9 @@ def reshape(self, shape, order="C", compressed_axes=None): return self if self.size != reduce(operator.mul, shape, 1): - raise ValueError(f"cannot reshape array of size {self.size} into shape {shape}") + raise ValueError( + f"cannot reshape array of size {self.size} into shape {shape}" + ) if len(shape) == 0: return self.tocoo().reshape(shape).asformat("gcxs") @@ -724,7 +751,9 @@ def _2d_transpose(self): numpy.ndarray.transpose : Numpy equivalent function. """ if self.ndim != 2: - raise ValueError(f"cannot perform 2d transpose on array with dimension {self.ndim}") + raise ValueError( + f"cannot perform 2d transpose on array with dimension {self.ndim}" + ) compressed_axes = [(self.compressed_axes[0] + 1) % 2] shape = self.shape[::-1] @@ -810,14 +839,20 @@ def _prune(self): self.indices = self.indices[mask] def isinf(self): - return self.tocoo().isinf().asformat("gcxs", compressed_axes=self.compressed_axes) + return ( + self.tocoo().isinf().asformat("gcxs", compressed_axes=self.compressed_axes) + ) def isnan(self): - return self.tocoo().isnan().asformat("gcxs", compressed_axes=self.compressed_axes) + return ( + self.tocoo().isnan().asformat("gcxs", compressed_axes=self.compressed_axes) + ) class _Compressed2d(GCXS): - def __init__(self, arg, shape=None, compressed_axes=None, prune=False, fill_value=0): + def __init__( + self, arg, shape=None, compressed_axes=None, prune=False, fill_value=0 + ): if not hasattr(arg, "shape") and shape is None: raise ValueError("missing `shape` argument") if shape is not None and hasattr(arg, "shape"): diff --git a/sparse/pydata_backend/_compressed/convert.py b/sparse/pydata_backend/_compressed/convert.py index c15155cd..3303d62f 100644 --- a/sparse/pydata_backend/_compressed/convert.py +++ b/sparse/pydata_backend/_compressed/convert.py @@ -200,7 +200,9 @@ def _resize(x, shape, compressed_axes): @numba.jit(nopython=True, nogil=True) -def _c_ordering(linear, c_linear, reordered_shape, sorted_axis_order, shape): # pragma: no cover +def _c_ordering( + linear, c_linear, reordered_shape, sorted_axis_order, shape +): # pragma: no cover for i, n in enumerate(linear): # c ordering current_coords = unravel_index(n, reordered_shape)[sorted_axis_order] diff --git a/sparse/pydata_backend/_compressed/indexing.py b/sparse/pydata_backend/_compressed/indexing.py index c78a03b0..e6b99784 100644 --- a/sparse/pydata_backend/_compressed/indexing.py +++ b/sparse/pydata_backend/_compressed/indexing.py @@ -29,7 +29,10 @@ def getitem(x, key): key = list(normalize_index(key, x.shape)) # zip_longest so things like x[..., None] are picked up. - if len(key) != 0 and all(isinstance(k, slice) and k == slice(0, dim, 1) for k, dim in zip_longest(key, x.shape)): + if len(key) != 0 and all( + isinstance(k, slice) and k == slice(0, dim, 1) + for k, dim in zip_longest(key, x.shape) + ): return x # return a single element @@ -114,7 +117,11 @@ def getitem(x, key): if np.any(compressed_inds): compressed_axes = shape_key[compressed_inds] - row_size = shape[compressed_axes] if len(compressed_axes) == 1 else np.prod(shape[compressed_axes]) + row_size = ( + shape[compressed_axes] + if len(compressed_axes) == 1 + else np.prod(shape[compressed_axes]) + ) # if only indexing through uncompressed axes else: @@ -144,7 +151,9 @@ def getitem(x, key): indices = uncompressed % size indptr = np.empty(shape[0] + 1, dtype=x.indptr.dtype) indptr[0] = 0 - np.cumsum(np.bincount(uncompressed // size, minlength=shape[0]), out=indptr[1:]) + np.cumsum( + np.bincount(uncompressed // size, minlength=shape[0]), out=indptr[1:] + ) if not np.any(compressed_inds): if len(shape) == 1: indptr = None @@ -171,11 +180,15 @@ def getitem(x, key): if len(shape) == 1: compressed_axes = None - return GCXS(arg, shape=shape, compressed_axes=compressed_axes, fill_value=x.fill_value) + return GCXS( + arg, shape=shape, compressed_axes=compressed_axes, fill_value=x.fill_value + ) @numba.jit(nopython=True, nogil=True) -def get_slicing_selection(arr_data, arr_indices, indptr, starts, ends, col): # pragma: no cover +def get_slicing_selection( + arr_data, arr_indices, indptr, starts, ends, col +): # pragma: no cover """ When the requested elements come in a strictly ascending order, as is the case with acsending slices, we can iteratively reduce the search space, @@ -212,7 +225,9 @@ def get_slicing_selection(arr_data, arr_indices, indptr, starts, ends, col): # col_count = 0 while col_count < len(col): while ( - col_count < len(col) and size < len(current_row) and col[col_count] < current_row[size] + col_count < len(col) + and size < len(current_row) + and col[col_count] < current_row[size] ): # skip needless searches col_count += 1 if col_count >= len(col): # check again because of previous loop @@ -238,7 +253,9 @@ def get_slicing_selection(arr_data, arr_indices, indptr, starts, ends, col): # @numba.jit(nopython=True, nogil=True) -def get_array_selection(arr_data, arr_indices, indptr, starts, ends, col): # pragma: no cover +def get_array_selection( + arr_data, arr_indices, indptr, starts, ends, col +): # pragma: no cover """ This is a very general algorithm to be used when more optimized methods don't apply. It performs a binary search for each of the requested elements. diff --git a/sparse/pydata_backend/_coo/common.py b/sparse/pydata_backend/_coo/common.py index 286eaf9b..eb446ed6 100644 --- a/sparse/pydata_backend/_coo/common.py +++ b/sparse/pydata_backend/_coo/common.py @@ -46,7 +46,9 @@ def asCOO(x, name="asCOO", check=True): from .core import COO if check and not _is_sparse(x): - raise ValueError(f"Performing this operation would produce a dense result: {name}") + raise ValueError( + f"Performing this operation would produce a dense result: {name}" + ) if not isinstance(x, COO): x = COO(x) @@ -164,7 +166,11 @@ def concatenate(arrays, axis=0): arrays = [x if isinstance(x, COO) else COO(x) for x in arrays] axis = normalize_axis(axis, arrays[0].ndim) - assert all(x.shape[ax] == arrays[0].shape[ax] for x in arrays for ax in set(range(arrays[0].ndim)) - {axis}) + assert all( + x.shape[ax] == arrays[0].shape[ax] + for x in arrays + for ax in set(range(arrays[0].ndim)) - {axis} + ) nnz = 0 dim = sum(x.shape[axis] for x in arrays) shape = list(arrays[0].shape) @@ -280,7 +286,9 @@ def triu(x, k=0): check_zero_fill_value(x) if not x.ndim >= 2: - raise NotImplementedError("sparse.triu is not implemented for scalars or 1-D arrays.") + raise NotImplementedError( + "sparse.triu is not implemented for scalars or 1-D arrays." + ) mask = x.coords[-2] + k <= x.coords[-1] @@ -321,7 +329,9 @@ def tril(x, k=0): check_zero_fill_value(x) if not x.ndim >= 2: - raise NotImplementedError("sparse.tril is not implemented for scalars or 1-D arrays.") + raise NotImplementedError( + "sparse.tril is not implemented for scalars or 1-D arrays." + ) mask = x.coords[-2] + k >= x.coords[-1] @@ -389,7 +399,10 @@ def nanmean(x, axis=None, keepdims=False, dtype=None, out=None): assert out is None x = asCOO(x, name="nanmean") - if not (np.issubdtype(x.dtype, np.floating) or np.issubdtype(x.dtype, np.complexfloating)): + if not ( + np.issubdtype(x.dtype, np.floating) + or np.issubdtype(x.dtype, np.complexfloating) + ): return x.mean(axis=axis, keepdims=keepdims, dtype=dtype) mask = np.isnan(x) @@ -855,7 +868,9 @@ def diagonal(a, offset=0, axis1=0, axis2=1): if a.shape[axis1] != a.shape[axis2]: raise ValueError("a.shape[axis1] != a.shape[axis2]") - diag_axes = [axis for axis in range(len(a.shape)) if axis != axis1 and axis != axis2] + [axis1] + diag_axes = [ + axis for axis in range(len(a.shape)) if axis != axis1 and axis != axis2 + ] + [axis1] diag_shape = [a.shape[axis] for axis in diag_axes] diag_shape[-1] -= abs(offset) @@ -1009,7 +1024,13 @@ def _diagonal_idx(coordlist, axis1, axis2, offset): offset : int Offset of the diagonal from the main diagonal. Defaults to main diagonal (0). """ - return np.array([i for i in range(len(coordlist[axis1])) if coordlist[axis1][i] + offset == coordlist[axis2][i]]) + return np.array( + [ + i + for i in range(len(coordlist[axis1])) + if coordlist[axis1][i] + offset == coordlist[axis2][i] + ] + ) def clip(a, a_min=None, a_max=None, out=None): @@ -1097,7 +1118,9 @@ def expand_dims(x, /, *, axis=0): axis = normalize_axis(axis, x.ndim + 1) - new_coords = np.insert(x.coords, obj=axis, values=np.zeros(x.nnz, dtype=np.intp), axis=0) + new_coords = np.insert( + x.coords, obj=axis, values=np.zeros(x.nnz, dtype=np.intp), axis=0 + ) new_shape = list(x.shape) new_shape.insert(axis, 1) new_shape = tuple(new_shape) @@ -1296,9 +1319,18 @@ def sort(x, /, *, axis=-1, descending=False): x_shape = x.shape x = x.reshape((-1, x_shape[-1])) - new_coords, new_data = _sort_coo(x.coords, x.data, x.fill_value, sort_axis_len=x_shape[-1], descending=descending) + new_coords, new_data = _sort_coo( + x.coords, x.data, x.fill_value, sort_axis_len=x_shape[-1], descending=descending + ) - x = COO(new_coords, new_data, x.shape, has_duplicates=False, sorted=True, fill_value=x.fill_value) + x = COO( + new_coords, + new_data, + x.shape, + has_duplicates=False, + sorted=True, + fill_value=x.fill_value, + ) x = x.reshape(x_shape[:-1] + (x_shape[-1],)) x = moveaxis(x, source=-1, destination=axis) @@ -1350,7 +1382,9 @@ def _validate_coo_input(x: Any): if _is_scipy_sparse_obj(x): x = COO.from_scipy_sparse(x) elif not isinstance(x, SparseArray): - raise ValueError(f"Input must be an instance of SparseArray, but it's {type(x)}.") + raise ValueError( + f"Input must be an instance of SparseArray, but it's {type(x)}." + ) elif not isinstance(x, COO): x = x.asformat(COO) @@ -1436,11 +1470,17 @@ def _compute_minmax_args( masked_reduce_coords = reduce_coords[mask] masked_data = data[mask] - compared_data = operator.gt(masked_data, fill_value) if max_mode_flag else operator.lt(masked_data, fill_value) + compared_data = ( + operator.gt(masked_data, fill_value) + if max_mode_flag + else operator.lt(masked_data, fill_value) + ) if np.any(compared_data) or len(masked_data) == reduce_size: # best value is a non-fill value - best_arg = np.argmax(masked_data) if max_mode_flag else np.argmin(masked_data) + best_arg = ( + np.argmax(masked_data) if max_mode_flag else np.argmin(masked_data) + ) result_data.append(masked_reduce_coords[best_arg]) else: # best value is a fill value, find the first occurrence of it @@ -1477,7 +1517,9 @@ def _arg_minmax_common( if not isinstance(axis, (int, type(None))): raise ValueError(f"`axis` must be `int` or `None`, but it's: {type(axis)}.") if isinstance(axis, int) and axis >= x.ndim: - raise ValueError(f"`axis={axis}` is out of bounds for array of dimension {x.ndim}.") + raise ValueError( + f"`axis={axis}` is out of bounds for array of dimension {x.ndim}." + ) if x.ndim == 0: raise ValueError("Input array must be at least 1-D, but it's 0-D.") @@ -1517,7 +1559,9 @@ def _arg_minmax_common( from .core import COO - result = COO(result_indices, result_data, shape=(x.shape[1],), fill_value=0, prune=True) + result = COO( + result_indices, result_data, shape=(x.shape[1],), fill_value=0, prune=True + ) # Let's reshape the result to the original shape. result = result.reshape((1, *new_shape[1:])) diff --git a/sparse/pydata_backend/_coo/core.py b/sparse/pydata_backend/_coo/core.py index 340e64d8..ea150447 100644 --- a/sparse/pydata_backend/_coo/core.py +++ b/sparse/pydata_backend/_coo/core.py @@ -218,7 +218,9 @@ def __init__( ) if data is None: - arr = as_coo(coords, shape=shape, fill_value=fill_value, idx_dtype=idx_dtype) + arr = as_coo( + coords, shape=shape, fill_value=fill_value, idx_dtype=idx_dtype + ) self._make_shallow_copy_of(arr) if cache: self.enable_caching() @@ -254,12 +256,16 @@ def __init__( shape = tuple(shape) if shape and not self.coords.size: - self.coords = np.zeros((len(shape) if isinstance(shape, Iterable) else 1, 0), dtype=np.intp) + self.coords = np.zeros( + (len(shape) if isinstance(shape, Iterable) else 1, 0), dtype=np.intp + ) super().__init__(shape, fill_value=fill_value) if idx_dtype: if not can_store(idx_dtype, max(shape)): - raise ValueError(f"cannot cast array with shape {shape} to dtype {idx_dtype}.") + raise ValueError( + f"cannot cast array with shape {shape} to dtype {idx_dtype}." + ) self.coords = self.coords.astype(idx_dtype) if self.shape: @@ -272,7 +278,9 @@ def __init__( "shape of `coords`; len(shape)={} != coords.shape[0]={}" "(and coords.shape={})" ) - raise ValueError(msg.format(len(shape), self.coords.shape[0], self.coords.shape)) + raise ValueError( + msg.format(len(shape), self.coords.shape[0], self.coords.shape) + ) from .._settings import WARN_ON_TOO_DENSE @@ -305,7 +313,9 @@ def __dask_tokenize__(self): "Produce a deterministic, content-based hash for dask." from dask.base import normalize_token - return normalize_token((type(self), self.coords, self.data, self.shape, self.fill_value)) + return normalize_token( + (type(self), self.coords, self.data, self.shape, self.fill_value) + ) def copy(self, deep=True): """Return a copy of the array. @@ -538,7 +548,10 @@ def from_iter(cls, x, shape=None, fill_value=None, dtype=None): data = np.array([item[1] for item in x], dtype=dtype) if not ( - coords.ndim == 2 and data.ndim == 1 and np.issubdtype(coords.dtype, np.integer) and np.all(coords >= 0) + coords.ndim == 2 + and data.ndim == 1 + and np.issubdtype(coords.dtype, np.integer) + and np.all(coords >= 0) ): raise ValueError("Invalid iterable to convert to COO.") @@ -867,7 +880,9 @@ def swapaxes(self, axis1, axis2): """ # Normalize all axis1, axis2 to positive values - axis1, axis2 = normalize_axis((axis1, axis2), self.ndim) # checks if axis1,2 are in range + raises ValueError + axis1, axis2 = normalize_axis( + (axis1, axis2), self.ndim + ) # checks if axis1,2 are in range + raises ValueError axes = list(range(self.ndim)) axes[axis1], axes[axis2] = axes[axis2], axes[axis1] return self.transpose(axes) @@ -1025,7 +1040,9 @@ def reshape(self, shape, order="C"): shape = tuple([d if d != -1 else extra for d in shape]) if self.size != reduce(operator.mul, shape, 1): - raise ValueError(f"cannot reshape array of size {self.size} into shape {shape}") + raise ValueError( + f"cannot reshape array of size {self.size} into shape {shape}" + ) if self._cache is not None: for sh, value in self._cache["reshape"]: @@ -1092,7 +1109,9 @@ def squeeze(self, axis=None): for d in axis: if d not in squeezable_dims: - raise ValueError(f"Specified axis `{d}` has a size greater than one: {self.shape[d]}") + raise ValueError( + f"Specified axis `{d}` has a size greater than one: {self.shape[d]}" + ) retained_dims = [d for d in range(self.ndim) if d not in axis] @@ -1122,7 +1141,11 @@ def resize(self, *args, refcheck=True, coords_dtype=np.intp): numpy.ndarray.resize : The equivalent Numpy function. """ - warnings.warn("resize is deprecated on all SpraseArray objects.", DeprecationWarning, stacklevel=1) + warnings.warn( + "resize is deprecated on all SpraseArray objects.", + DeprecationWarning, + stacklevel=1, + ) if len(args) == 1 and isinstance(args[0], tuple): shape = args[0] elif all(isinstance(arg, int) for arg in args): @@ -1181,9 +1204,13 @@ def to_scipy_sparse(self): check_zero_fill_value(self) if self.ndim != 2: - raise ValueError("Can only convert a 2-dimensional array to a Scipy sparse matrix.") + raise ValueError( + "Can only convert a 2-dimensional array to a Scipy sparse matrix." + ) - result = scipy.sparse.coo_matrix((self.data, (self.coords[0], self.coords[1])), shape=self.shape) + result = scipy.sparse.coo_matrix( + (self.data, (self.coords[0], self.coords[1])), shape=self.shape + ) result.has_canonical_format = True return result @@ -1191,7 +1218,9 @@ def _tocsr(self): import scipy.sparse if self.ndim != 2: - raise ValueError("This array must be two-dimensional for this conversion to work.") + raise ValueError( + "This array must be two-dimensional for this conversion to work." + ) row, col = self.coords # Pass 3: count nonzeros in each row @@ -1430,7 +1459,9 @@ def maybe_densify(self, max_size=1000, min_density=0.25): ValueError: Operation would require converting large sparse array to dense """ if self.size > max_size and self.density < min_density: - raise ValueError("Operation would require converting large sparse array to dense") + raise ValueError( + "Operation would require converting large sparse array to dense" + ) return self.todense() @@ -1563,10 +1594,14 @@ def as_coo(x, shape=None, fill_value=None, idx_dtype=None): from .._common import _is_scipy_sparse_obj if hasattr(x, "shape") and shape is not None: - raise ValueError("Cannot provide a shape in combination with something that already has a shape.") + raise ValueError( + "Cannot provide a shape in combination with something that already has a shape." + ) if hasattr(x, "fill_value") and fill_value is not None: - raise ValueError("Cannot provide a fill-value in combination with something that already has a fill-value.") + raise ValueError( + "Cannot provide a fill-value in combination with something that already has a fill-value." + ) if isinstance(x, SparseArray): return x.asformat("coo") diff --git a/sparse/pydata_backend/_coo/indexing.py b/sparse/pydata_backend/_coo/indexing.py index be525c74..0759b5a7 100644 --- a/sparse/pydata_backend/_coo/indexing.py +++ b/sparse/pydata_backend/_coo/indexing.py @@ -40,7 +40,9 @@ def getitem(x, index): coords.extend(idx[1:]) fill_value_idx = np.asarray(x.fill_value[index]).flatten() - fill_value = fill_value_idx[0] if fill_value_idx.size else _zero_of_dtype(data.dtype)[()] + fill_value = ( + fill_value_idx[0] if fill_value_idx.size else _zero_of_dtype(data.dtype)[()] + ) if not equivalent(fill_value, fill_value_idx).all(): raise ValueError("Fill-values in the array are inconsistent.") @@ -66,7 +68,8 @@ def getitem(x, index): # zip_longest so things like x[..., None] are picked up. if len(index) != 0 and all( - isinstance(ind, slice) and ind == slice(0, dim, 1) for ind, dim in zip_longest(index, x.shape) + isinstance(ind, slice) and ind == slice(0, dim, 1) + for ind, dim in zip_longest(index, x.shape) ): return x @@ -74,7 +77,11 @@ def getitem(x, index): mask, adv_idx = _mask(x.coords, index, x.shape) # Get the length of the mask - n = len(range(mask.start, mask.stop, mask.step)) if isinstance(mask, slice) else len(mask) + n = ( + len(range(mask.start, mask.stop, mask.step)) + if isinstance(mask, slice) + else len(mask) + ) coords = [] shape = [] @@ -165,7 +172,9 @@ def _mask(coords, indices, shape): if adv_idx.ndim != 1: raise IndexError("Only one-dimensional iterable indices supported.") - mask, aidxs = _compute_multi_mask(coords, _ind_ar_from_indices(indices), adv_idx, adv_idx_pos) + mask, aidxs = _compute_multi_mask( + coords, _ind_ar_from_indices(indices), adv_idx, adv_idx_pos + ) return mask, _AdvIdxInfo(aidxs, adv_idx_pos, len(adv_idx)) mask, is_slice = _compute_mask(coords, _ind_ar_from_indices(indices)) @@ -289,7 +298,9 @@ def _separate_adv_indices(indices): @numba.jit(nopython=True, nogil=True) -def _compute_multi_axis_multi_mask(coords, indices, adv_idx, adv_idx_pos): # pragma: no cover +def _compute_multi_axis_multi_mask( + coords, indices, adv_idx, adv_idx_pos +): # pragma: no cover """ Computes a mask with the advanced index, and also returns the advanced index dimension. @@ -466,8 +477,13 @@ def _compute_mask(coords, indices): # pragma: no cover # (n_searches * log(avg_length)) # The other is an estimated time of a linear filter for the mask. n_pairs = len(starts) - n_current_slices = len(range(indices[i, 0], indices[i, 1], indices[i, 2])) * n_pairs + 2 - if n_current_slices * np.log(n_current_slices / max(n_pairs, 1)) > n_matches + n_pairs: + n_current_slices = ( + len(range(indices[i, 0], indices[i, 1], indices[i, 2])) * n_pairs + 2 + ) + if ( + n_current_slices * np.log(n_current_slices / max(n_pairs, 1)) + > n_matches + n_pairs + ): break # For each of the pairs, search inside the coordinates for other @@ -539,8 +555,14 @@ def _get_mask_pairs(starts_old, stops_old, c, idx): # pragma: no cover # For each matching "integer" in the slice, search within the "sub-coords" # Using binary search. for p_match in range(idx[0], idx[1], idx[2]): - start = np.searchsorted(c[starts_old[j] : stops_old[j]], p_match, side="left") + starts_old[j] - stop = np.searchsorted(c[starts_old[j] : stops_old[j]], p_match, side="right") + starts_old[j] + start = ( + np.searchsorted(c[starts_old[j] : stops_old[j]], p_match, side="left") + + starts_old[j] + ) + stop = ( + np.searchsorted(c[starts_old[j] : stops_old[j]], p_match, side="right") + + starts_old[j] + ) if start != stop: starts.append(start) @@ -597,7 +619,8 @@ def _filter_pairs(starts, stops, coords, indices): # pragma: no cover elem = coords[k, j] match &= (elem - idx[0]) % idx[2] == 0 and ( - (idx[2] > 0 and idx[0] <= elem < idx[1]) or (idx[2] < 0 and idx[0] >= elem > idx[1]) + (idx[2] > 0 and idx[0] <= elem < idx[1]) + or (idx[2] < 0 and idx[0] >= elem > idx[1]) ) # and append to the mask if so. diff --git a/sparse/pydata_backend/_coo/numba_extension.py b/sparse/pydata_backend/_coo/numba_extension.py index 6e1ec835..e5f726a1 100644 --- a/sparse/pydata_backend/_coo/numba_extension.py +++ b/sparse/pydata_backend/_coo/numba_extension.py @@ -63,7 +63,9 @@ def fill_value_type(self): @typeof_impl.register(COO) def _typeof_COO(val: COO, c) -> COOType: - return COOType(data_dtype=val.data.dtype, coords_dtype=val.coords.dtype, ndim=val.ndim) + return COOType( + data_dtype=val.data.dtype, coords_dtype=val.coords.dtype, ndim=val.ndim + ) @register_model(COOType) @@ -99,7 +101,9 @@ def impl_COO(context, builder, sig, args): coo.coords = coords coo.data = data coo.shape = shape - coo.fill_value = context.get_constant_generic(builder, typ.fill_value_type, _zero_of_dtype(typ.data_dtype)) + coo.fill_value = context.get_constant_generic( + builder, typ.fill_value_type, _zero_of_dtype(typ.data_dtype) + ) return impl_ret_borrowed(context, builder, sig.return_type, coo._getvalue()) @@ -108,7 +112,9 @@ def lower_constant_COO(context, builder, typ, pyval): coords = context.get_constant_generic(builder, typ.coords_type, pyval.coords) data = context.get_constant_generic(builder, typ.data_type, pyval.data) shape = context.get_constant_generic(builder, typ.shape_type, pyval.shape) - fill_value = context.get_constant_generic(builder, typ.fill_value_type, pyval.fill_value) + fill_value = context.get_constant_generic( + builder, typ.fill_value_type, pyval.fill_value + ) return impl_ret_borrowed( context, builder, diff --git a/sparse/pydata_backend/_dok.py b/sparse/pydata_backend/_dok.py index 95f9b29d..e2b1a94b 100644 --- a/sparse/pydata_backend/_dok.py +++ b/sparse/pydata_backend/_dok.py @@ -123,7 +123,9 @@ def __init__(self, shape, data=None, dtype=None, fill_value=None): if not len(data): self.dtype = np.dtype("float64") else: - self.dtype = np.result_type(*(np.asarray(x).dtype for x in data.values())) + self.dtype = np.result_type( + *(np.asarray(x).dtype for x in data.values()) + ) for c, d in data.items(): self[c] = d @@ -322,7 +324,9 @@ def __getitem__(self, key): if all(isinstance(k, Iterable) for k in key): if len(key) != self.ndim: - raise NotImplementedError(f"Index sequences for all {self.ndim} array dimensions needed!") + raise NotImplementedError( + f"Index sequences for all {self.ndim} array dimensions needed!" + ) if not all(len(key[0]) == len(k) for k in key): raise IndexError("Unequal length of index sequences!") return self._fancy_getitem(key) @@ -352,12 +356,18 @@ def __setitem__(self, key, value): value = np.asarray(value, dtype=self.dtype) # 1D fancy indexing - if self.ndim == 1 and isinstance(key, Iterable) and all(isinstance(i, (int, np.integer)) for i in key): + if ( + self.ndim == 1 + and isinstance(key, Iterable) + and all(isinstance(i, (int, np.integer)) for i in key) + ): key = (key,) if isinstance(key, tuple) and all(isinstance(k, Iterable) for k in key): if len(key) != self.ndim: - raise NotImplementedError(f"Index sequences for all {self.ndim} array dimensions needed!") + raise NotImplementedError( + f"Index sequences for all {self.ndim} array dimensions needed!" + ) if not all(len(key[0]) == len(k) for k in key): raise IndexError("Unequal length of index sequences!") self._fancy_setitem(key, value) @@ -380,7 +390,9 @@ def _fancy_setitem(self, idxs, values): elif values.ndim > 1: raise ValueError(f"Dimension of values ({values.ndim}) must be 0 or 1!") if not idxs[0].shape == values.shape: - raise ValueError(f"Shape mismatch of indices ({idxs[0].shape}) and values ({values.shape})!") + raise ValueError( + f"Shape mismatch of indices ({idxs[0].shape}) and values ({values.shape})!" + ) fill_value = self.fill_value data = self.data for idx, value in zip(zip(*idxs), values): @@ -390,7 +402,9 @@ def _fancy_setitem(self, idxs, values): del data[idx] def _setitem(self, key_list, value): - value_missing_dims = len([ind for ind in key_list if isinstance(ind, slice)]) - value.ndim + value_missing_dims = ( + len([ind for ind in key_list if isinstance(ind, slice)]) - value.ndim + ) if value_missing_dims < 0: raise ValueError("setting an array element with a sequence.") @@ -416,12 +430,18 @@ def _setitem(self, key_list, value): key_list_temp = key_list[:] for v_idx, ki in enumerate(range(start, stop, step)): key_list_temp[i] = ki - vi = value if value_missing_dims > 0 else (value[0] if value.shape[0] == 1 else value[v_idx]) + vi = ( + value + if value_missing_dims > 0 + else (value[0] if value.shape[0] == 1 else value[v_idx]) + ) self._setitem(key_list_temp, vi) return if not isinstance(ind, Integral): - raise IndexError("All indices must be slices or integers when setting an item.") + raise IndexError( + "All indices must be slices or integers when setting an item." + ) key = tuple(key_list) if not equivalent(value, self.fill_value): diff --git a/sparse/pydata_backend/_io.py b/sparse/pydata_backend/_io.py index dcabdc3a..d16f5692 100644 --- a/sparse/pydata_backend/_io.py +++ b/sparse/pydata_backend/_io.py @@ -129,4 +129,6 @@ def load_npz(filename): compressed_axes=comp_axes, ) except KeyError as e: - raise RuntimeError(f"The file {filename!s} does not contain a valid sparse matrix") from e + raise RuntimeError( + f"The file {filename!s} does not contain a valid sparse matrix" + ) from e diff --git a/sparse/pydata_backend/_slicing.py b/sparse/pydata_backend/_slicing.py index 622000f7..bc3928c5 100644 --- a/sparse/pydata_backend/_slicing.py +++ b/sparse/pydata_backend/_slicing.py @@ -78,7 +78,9 @@ def replace_ellipsis(n, index): loc = isellipsis[0] extra_dimensions = n - (len(index) - sum(i is None for i in index) - 1) - return index[:loc] + (slice(None, None, None),) * extra_dimensions + index[loc + 1 :] + return ( + index[:loc] + (slice(None, None, None),) * extra_dimensions + index[loc + 1 :] + ) def check_index(ind, dimension): @@ -109,7 +111,10 @@ def check_index(ind, dimension): # unknown dimension, assumed to be in bounds if isinstance(ind, Iterable): x = np.asanyarray(ind) - if np.issubdtype(x.dtype, np.integer) and ((x >= dimension) | (x < -dimension)).any(): + if ( + np.issubdtype(x.dtype, np.integer) + and ((x >= dimension) | (x < -dimension)).any() + ): raise IndexError(f"Index out of bounds for dimension {dimension:d}") if x.dtype == np.bool_ and len(x) != dimension: raise IndexError( @@ -125,7 +130,9 @@ def check_index(ind, dimension): ) elif ind >= dimension: - raise IndexError(f"Index is not smaller than dimension {ind:d} >= {dimension:d}") + raise IndexError( + f"Index is not smaller than dimension {ind:d} >= {dimension:d}" + ) elif ind < -dimension: msg = "Negative index is not greater than negative dimension {:d} <= -{:d}" diff --git a/sparse/pydata_backend/_sparse_array.py b/sparse/pydata_backend/_sparse_array.py index 3a954abf..b46637ee 100644 --- a/sparse/pydata_backend/_sparse_array.py +++ b/sparse/pydata_backend/_sparse_array.py @@ -34,7 +34,9 @@ def __init__(self, shape, fill_value=None): shape = (shape,) if not all(isinstance(sh, Integral) and int(sh) >= 0 for sh in shape): - raise ValueError("shape must be an non-negative integer or a tuple of non-negative integers.") + raise ValueError( + "shape must be an non-negative integer or a tuple of non-negative integers." + ) self.shape = tuple(int(sh) for sh in shape) @@ -284,7 +286,11 @@ def __array_function__(self, func, types, args, kwargs): with contextlib.suppress(AttributeError): sparse_func = getattr(type(self), func.__name__) - if not isinstance(sparse_func, Callable) and len(args) == 1 and len(kwargs) == 0: + if ( + not isinstance(sparse_func, Callable) + and len(args) == 1 + and len(kwargs) == 0 + ): try: return getattr(self, func.__name__) except AttributeError: @@ -313,10 +319,15 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return NotImplemented if getattr(ufunc, "signature", None) is not None: - return self.__array_function__(ufunc, (np.ndarray, type(self)), inputs, kwargs) + return self.__array_function__( + ufunc, (np.ndarray, type(self)), inputs, kwargs + ) if out is not None: - test_args = [np.empty(1, dtype=a.dtype) if hasattr(a, "dtype") else [a] for a in inputs] + test_args = [ + np.empty(1, dtype=a.dtype) if hasattr(a, "dtype") else [a] + for a in inputs + ] test_kwargs = kwargs.copy() if method == "reduce": test_kwargs["axis"] = None @@ -386,7 +397,9 @@ def reduce(self, method, axis=(0,), keepdims=False, **kwargs): reduce_super_ufunc = _reduce_super_ufunc.get(method) if reduce_super_ufunc is None: - raise ValueError(f"Performing this reduction operation would produce a dense result: {method!s}") + raise ValueError( + f"Performing this reduction operation would produce a dense result: {method!s}" + ) if not isinstance(axis, tuple): axis = (axis,) @@ -397,7 +410,9 @@ def reduce(self, method, axis=(0,), keepdims=False, **kwargs): result_fill_value = self.fill_value if reduce_super_ufunc is None: missing_counts = counts != n_cols - data[missing_counts] = method(data[missing_counts], self.fill_value, **kwargs) + data[missing_counts] = method( + data[missing_counts], self.fill_value, **kwargs + ) else: data = method( data, @@ -569,7 +584,9 @@ def prod(self, axis=None, keepdims=False, dtype=None, out=None): -------- :obj:`numpy.prod` : Equivalent numpy function. """ - return np.multiply.reduce(self, out=out, axis=axis, keepdims=keepdims, dtype=dtype) + return np.multiply.reduce( + self, out=out, axis=axis, keepdims=keepdims, dtype=dtype + ) def round(self, decimals=0, out=None): """ @@ -585,7 +602,9 @@ def round(self, decimals=0, out=None): """ if out is not None and not isinstance(out, tuple): out = (out,) - return self.__array_ufunc__(np.round, "__call__", self, decimals=decimals, out=out) + return self.__array_ufunc__( + np.round, "__call__", self, decimals=decimals, out=out + ) round_ = round @@ -605,7 +624,9 @@ def clip(self, min=None, max=None, out=None): raise ValueError("One of max or min must be given.") if out is not None and not isinstance(out, tuple): out = (out,) - return self.__array_ufunc__(np.clip, "__call__", self, a_min=min, a_max=max, out=out) + return self.__array_ufunc__( + np.clip, "__call__", self, a_min=min, a_max=max, out=out + ) def astype(self, dtype, casting="unsafe", copy=True): """ @@ -624,7 +645,9 @@ def astype(self, dtype, casting="unsafe", copy=True): # this matches numpy's behavior if self.dtype == dtype and not copy: return self - return self.__array_ufunc__(np.ndarray.astype, "__call__", self, dtype=dtype, copy=copy, casting=casting) + return self.__array_ufunc__( + np.ndarray.astype, "__call__", self, dtype=dtype, copy=copy, casting=casting + ) def mean(self, axis=None, keepdims=False, dtype=None, out=None): """ @@ -699,7 +722,9 @@ def mean(self, axis=None, keepdims=False, dtype=None, out=None): dtype = inter_dtype = np.dtype("f8") else: dtype = self.dtype - inter_dtype = np.dtype("f4") if issubclass(dtype.type, np.float16) else dtype + inter_dtype = ( + np.dtype("f4") if issubclass(dtype.type, np.float16) else dtype + ) else: inter_dtype = dtype @@ -783,7 +808,9 @@ def var(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): rcount = reduce(operator.mul, (self.shape[a] for a in axis), 1) # Make this warning show up on top. if ddof >= rcount: - warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning, stacklevel=1) + warnings.warn( + "Degrees of freedom <= 0 for slice", RuntimeWarning, stacklevel=1 + ) # Cast bool, unsigned int, and int to float64 by default if dtype is None and issubclass(self.dtype.type, (np.integer, np.bool_)): diff --git a/sparse/pydata_backend/_umath.py b/sparse/pydata_backend/_umath.py index 1fbf191b..b575c13d 100644 --- a/sparse/pydata_backend/_umath.py +++ b/sparse/pydata_backend/_umath.py @@ -118,7 +118,9 @@ def _get_nary_broadcast_shape(*shapes): result_shape = _get_broadcast_shape(shape, result_shape) except ValueError as e: # noqa: PERF203 shapes_str = ", ".join(str(shape) for shape in shapes) - raise ValueError(f"operands could not be broadcast together with shapes {shapes_str}") from e + raise ValueError( + f"operands could not be broadcast together with shapes {shapes_str}" + ) from e return result_shape @@ -145,10 +147,18 @@ def _get_broadcast_shape(shape1, shape2, is_result=False): If the two shapes cannot be broadcast together. """ # https://stackoverflow.com/a/47244284/774273 - if not all((l1 == l2) or (l1 == 1) or ((l2 == 1) and not is_result) for l1, l2 in zip(shape1[::-1], shape2[::-1])): - raise ValueError(f"operands could not be broadcast together with shapes {shape1}, {shape2}") + if not all( + (l1 == l2) or (l1 == 1) or ((l2 == 1) and not is_result) + for l1, l2 in zip(shape1[::-1], shape2[::-1]) + ): + raise ValueError( + f"operands could not be broadcast together with shapes {shape1}, {shape2}" + ) - return tuple(l1 if l1 != 1 else l2 for l1, l2 in zip_longest(shape1[::-1], shape2[::-1], fillvalue=1))[::-1] + return tuple( + l1 if l1 != 1 else l2 + for l1, l2 in zip_longest(shape1[::-1], shape2[::-1], fillvalue=1) + )[::-1] def _get_broadcast_parameters(shape, broadcast_shape): @@ -169,7 +179,8 @@ def _get_broadcast_parameters(shape, broadcast_shape): it needs to be broadcast, and True if it doesn't. """ return [ - None if l1 is None else l1 == l2 for l1, l2 in zip_longest(shape[::-1], broadcast_shape[::-1], fillvalue=None) + None if l1 is None else l1 == l2 + for l1, l2 in zip_longest(shape[::-1], broadcast_shape[::-1], fillvalue=None) ][::-1] @@ -257,7 +268,9 @@ def _get_expanded_coords_data(coords, data, params, broadcast_shape): if first_dim != -1: expanded_data = data[all_idx[first_dim]] else: - expanded_coords = all_idx if len(data) else np.empty((0, all_idx.shape[1]), dtype=np.intp) + expanded_coords = ( + all_idx if len(data) else np.empty((0, all_idx.shape[1]), dtype=np.intp) + ) expanded_data = np.repeat(data, reduce(operator.mul, broadcast_shape, 1)) return np.asarray(expanded_coords), np.asarray(expanded_data) @@ -373,7 +386,9 @@ def broadcast_to(x, shape): # Check if all the non-broadcast axes are next to each other nonbroadcast_idx = [idx for idx, p in enumerate(params) if p] - diff_nonbroadcast_idx = [a - b for a, b in zip(nonbroadcast_idx[1:], nonbroadcast_idx[:-1])] + diff_nonbroadcast_idx = [ + a - b for a, b in zip(nonbroadcast_idx[1:], nonbroadcast_idx[:-1]) + ] sorted = all(d == 1 for d in diff_nonbroadcast_idx) return COO( @@ -475,7 +490,9 @@ def get_result(self): data_list = [] coords_list = [] - for mask in itertools.product(*[[True, False] if isinstance(arg, COO) else [None] for arg in self.args]): + for mask in itertools.product( + *[[True, False] if isinstance(arg, COO) else [None] for arg in self.args] + ): if not any(mask): continue @@ -486,9 +503,15 @@ def get_result(self): data_list.append(r[1]) # Concatenate matches and mismatches - data = np.concatenate(data_list) if len(data_list) else np.empty((0,), dtype=self.fill_value.dtype) + data = ( + np.concatenate(data_list) + if len(data_list) + else np.empty((0,), dtype=self.fill_value.dtype) + ) coords = ( - np.concatenate(coords_list, axis=1) if len(coords_list) else np.empty((0, len(self.shape)), dtype=np.intp) + np.concatenate(coords_list, axis=1) + if len(coords_list) + else np.empty((0, len(self.shape)), dtype=np.intp) ) return COO( @@ -511,20 +534,26 @@ def _get_fill_value(self): from ._coo import COO zero_args = tuple( - np.asarray(arg.fill_value, like=arg.data) if isinstance(arg, COO) else arg for arg in self.args + np.asarray(arg.fill_value, like=arg.data) if isinstance(arg, COO) else arg + for arg in self.args ) # Some elemwise functions require a dtype argument, some abhorr it. try: - fill_value_array = self.func(*np.broadcast_arrays(*zero_args), dtype=self.dtype, **self.kwargs) + fill_value_array = self.func( + *np.broadcast_arrays(*zero_args), dtype=self.dtype, **self.kwargs + ) except TypeError: - fill_value_array = self.func(*np.broadcast_arrays(*zero_args), **self.kwargs) + fill_value_array = self.func( + *np.broadcast_arrays(*zero_args), **self.kwargs + ) try: fill_value = fill_value_array[(0,) * fill_value_array.ndim] except IndexError: zero_args = tuple( - arg.fill_value if isinstance(arg, COO) else _zero_of_dtype(arg.dtype) for arg in self.args + arg.fill_value if isinstance(arg, COO) else _zero_of_dtype(arg.dtype) + for arg in self.args ) fill_value = self.func(*zero_args, **self.kwargs)[()] @@ -556,8 +585,12 @@ def _check_broadcast(self): from ._coo import COO full_shape = _get_nary_broadcast_shape(*tuple(arg.shape for arg in self.args)) - non_ndarray_shape = _get_nary_broadcast_shape(*tuple(arg.shape for arg in self.args if isinstance(arg, COO))) - ndarray_shape = _get_nary_broadcast_shape(*tuple(arg.shape for arg in self.args if isinstance(arg, np.ndarray))) + non_ndarray_shape = _get_nary_broadcast_shape( + *tuple(arg.shape for arg in self.args if isinstance(arg, COO)) + ) + ndarray_shape = _get_nary_broadcast_shape( + *tuple(arg.shape for arg in self.args if isinstance(arg, np.ndarray)) + ) self.shape = full_shape self.ndarray_shape = ndarray_shape @@ -580,21 +613,29 @@ def _get_func_coords_data(self, mask): from ._coo import COO matched_args = [arg for arg, m in zip(self.args, mask) if m is not None and m] - unmatched_args = [arg for arg, m in zip(self.args, mask) if m is not None and not m] + unmatched_args = [ + arg for arg, m in zip(self.args, mask) if m is not None and not m + ] ndarray_args = [arg for arg, m in zip(self.args, mask) if m is None] matched_broadcast_shape = _get_nary_broadcast_shape( *tuple(arg.shape for arg in itertools.chain(matched_args, ndarray_args)) ) - matched_arrays = self._match_coo(*matched_args, cache=self.cache, broadcast_shape=matched_broadcast_shape) + matched_arrays = self._match_coo( + *matched_args, cache=self.cache, broadcast_shape=matched_broadcast_shape + ) func_args = [] m_arg = 0 for arg, m in zip(self.args, mask): if m is None: - func_args.append(np.broadcast_to(arg, matched_broadcast_shape)[tuple(matched_arrays[0].coords)]) + func_args.append( + np.broadcast_to(arg, matched_broadcast_shape)[ + tuple(matched_arrays[0].coords) + ] + ) continue if m: @@ -624,13 +665,17 @@ def _get_func_coords_data(self, mask): if matched_arrays[0].shape != self.shape: params = _get_broadcast_parameters(matched_arrays[0].shape, self.shape) - func_coords, func_data = _get_expanded_coords_data(func_coords, func_data, params, self.shape) + func_coords, func_data = _get_expanded_coords_data( + func_coords, func_data, params, self.shape + ) if all(m is None or m for m in mask): return func_coords, func_data # Not really sorted but we need the sortedness. - func_array = COO(func_coords, func_data, self.shape, has_duplicates=False, sorted=True) + func_array = COO( + func_coords, func_data, self.shape, has_duplicates=False, sorted=True + ) unmatched_mask = np.ones(func_array.nnz, dtype=np.bool_) @@ -679,7 +724,9 @@ def _match_coo(*args, **kwargs): raise ValueError(f"Unknown kwargs: {kwargs.keys()}") if return_midx and (len(args) != 2 or cache is not None): - raise NotImplementedError("Matching indices only supported for two args, and no cache.") + raise NotImplementedError( + "Matching indices only supported for two args, and no cache." + ) matched_arrays = [args[0]] cache_key = [id(args[0])] @@ -692,11 +739,18 @@ def _match_coo(*args, **kwargs): cargs = [matched_arrays[0], arg2] current_shape = _get_broadcast_shape(matched_arrays[0].shape, arg2.shape) - params = [_get_broadcast_parameters(arg.shape, current_shape) for arg in cargs] + params = [ + _get_broadcast_parameters(arg.shape, current_shape) for arg in cargs + ] reduced_params = [all(p) for p in zip(*params)] - reduced_shape = _get_reduced_shape(arg2.shape, _rev_idx(reduced_params, arg2.ndim)) + reduced_shape = _get_reduced_shape( + arg2.shape, _rev_idx(reduced_params, arg2.ndim) + ) - reduced_coords = [_get_reduced_coords(arg.coords, _rev_idx(reduced_params, arg.ndim)) for arg in cargs] + reduced_coords = [ + _get_reduced_coords(arg.coords, _rev_idx(reduced_params, arg.ndim)) + for arg in cargs + ] linear = [linear_loc(rc, reduced_shape) for rc in reduced_coords] sorted_idx = [np.argsort(idx) for idx in linear] @@ -713,7 +767,10 @@ def _match_coo(*args, **kwargs): mdata.append(arg2.data[sorted_idx[1]][matched_idx[1]]) # The coords aren't truly sorted, but we don't need them, so it's # best to avoid the extra cost. - matched_arrays = [COO(mcoords, md, shape=current_shape, sorted=True, has_duplicates=False) for md in mdata] + matched_arrays = [ + COO(mcoords, md, shape=current_shape, sorted=True, has_duplicates=False) + for md in mdata + ] if cache is not None: cache[key] = matched_arrays diff --git a/sparse/pydata_backend/_utils.py b/sparse/pydata_backend/_utils.py index 50089aa0..80b1324e 100644 --- a/sparse/pydata_backend/_utils.py +++ b/sparse/pydata_backend/_utils.py @@ -81,7 +81,10 @@ def assert_nnz(s, x): def is_canonical(x): - return not x.shape or ((np.diff(x.linear_loc()) > 0).all() and not equivalent(x.data, x.fill_value).any()) + return not x.shape or ( + (np.diff(x.linear_loc()) > 0).all() + and not equivalent(x.data, x.fill_value).any() + ) def _zero_of_dtype(dtype): @@ -300,7 +303,9 @@ def random( if nnz is None: nnz = int(elements * density) if not (0 <= nnz <= elements): - raise ValueError(f"cannot generate {nnz} nonzero elements for an array with {elements} total elements") + raise ValueError( + f"cannot generate {nnz} nonzero elements for an array with {elements} total elements" + ) if random_state is None: random_state = default_rng @@ -330,7 +335,11 @@ def random( elements, ) else: - ind = algD(nnz, elements, random_state) if elements > 10 * nnz else algA(nnz, elements, random_state) + ind = ( + algD(nnz, elements, random_state) + if elements > 10 * nnz + else algA(nnz, elements, random_state) + ) data = data_rvs(nnz) ar = COO( @@ -344,7 +353,9 @@ def random( if can_store(idx_dtype, max(shape)): ar.coords = ar.coords.astype(idx_dtype) else: - raise ValueError(f"cannot cast array with shape {shape} to dtype {idx_dtype}.") + raise ValueError( + f"cannot cast array with shape {shape} to dtype {idx_dtype}." + ) return ar.asformat(format, **kwargs) @@ -493,7 +504,9 @@ def html_table(arr): info.append(str(arr.compressed_axes)) for h, i in zip(headings, info): - table.append(f'{h}{i}') + table.append( + f'{h}{i}' + ) table.append("") return "".join(table) @@ -557,7 +570,9 @@ def check_zero_fill_value(*args): ValueError: This operation requires zero fill values, but argument 1 had a fill value of 0.5. """ for i, arg in enumerate(args): - if hasattr(arg, "fill_value") and not equivalent(arg.fill_value, _zero_of_dtype(arg.dtype)): + if hasattr(arg, "fill_value") and not equivalent( + arg.fill_value, _zero_of_dtype(arg.dtype) + ): raise ValueError( "This operation requires zero fill values, " f"but argument {i:d} had a fill value of {arg.fill_value!s}." diff --git a/sparse/pydata_backend/tests/test_compressed.py b/sparse/pydata_backend/tests/test_compressed.py index 1ddc9912..174d572c 100644 --- a/sparse/pydata_backend/tests/test_compressed.py +++ b/sparse/pydata_backend/tests/test_compressed.py @@ -18,7 +18,9 @@ def data_rvs(n): else: data_rvs = None - return sparse.random((20, 30, 40), density=0.25, format="gcxs", data_rvs=data_rvs, random_state=rng).astype(dtype) + return sparse.random( + (20, 30, 40), density=0.25, format="gcxs", data_rvs=data_rvs, random_state=rng + ).astype(dtype) @pytest.fixture(scope="module", params=["f8", "f4", "i8", "i4"]) @@ -31,7 +33,9 @@ def data_rvs(n): else: data_rvs = None - return sparse.random((20, 30, 40), density=0.25, format="gcxs", data_rvs=data_rvs, random_state=rng).astype(dtype) + return sparse.random( + (20, 30, 40), density=0.25, format="gcxs", data_rvs=data_rvs, random_state=rng + ).astype(dtype) @pytest.mark.parametrize( @@ -58,7 +62,9 @@ def test_reductions(reduction, random_sparse, axis, keepdims, kwargs): assert_eq(xx, yy) -@pytest.mark.xfail(reason=("Setting output dtype=float16 produces results inconsistent with numpy")) +@pytest.mark.xfail( + reason=("Setting output dtype=float16 produces results inconsistent with numpy") +) @pytest.mark.filterwarnings("ignore:overflow") @pytest.mark.parametrize( "reduction, kwargs", @@ -265,7 +271,9 @@ def test_complex_methods(complex): ) @pytest.mark.parametrize("compressed_axes", [(0,), (1,), (2,), (0, 1), (0, 2), (1, 2)]) def test_slicing(index, compressed_axes): - s = sparse.random((2, 3, 4), density=0.5, format="gcxs", compressed_axes=compressed_axes) + s = sparse.random( + (2, 3, 4), density=0.5, format="gcxs", compressed_axes=compressed_axes + ) x = s.todense() assert_eq(x[index], s[index]) @@ -287,7 +295,9 @@ def test_slicing(index, compressed_axes): ) @pytest.mark.parametrize("compressed_axes", [(0,), (1,), (2,), (0, 1), (0, 2), (1, 2)]) def test_advanced_indexing(index, compressed_axes): - s = sparse.random((2, 3, 4), density=0.5, format="gcxs", compressed_axes=compressed_axes) + s = sparse.random( + (2, 3, 4), density=0.5, format="gcxs", compressed_axes=compressed_axes + ) x = s.todense() assert_eq(x[index], s[index]) @@ -333,7 +343,9 @@ def test_concatenate(): zz = sparse.random((4, 3, 4), density=0.5, format="gcxs") z = zz.todense() - assert_eq(np.concatenate([x, y, z], axis=0), sparse.concatenate([xx, yy, zz], axis=0)) + assert_eq( + np.concatenate([x, y, z], axis=0), sparse.concatenate([xx, yy, zz], axis=0) + ) xx = sparse.random((5, 3, 1), density=0.5, format="gcxs") x = xx.todense() @@ -342,9 +354,13 @@ def test_concatenate(): zz = sparse.random((5, 3, 2), density=0.5, format="gcxs") z = zz.todense() - assert_eq(np.concatenate([x, y, z], axis=2), sparse.concatenate([xx, yy, zz], axis=2)) + assert_eq( + np.concatenate([x, y, z], axis=2), sparse.concatenate([xx, yy, zz], axis=2) + ) - assert_eq(np.concatenate([x, y, z], axis=-1), sparse.concatenate([xx, yy, zz], axis=-1)) + assert_eq( + np.concatenate([x, y, z], axis=-1), sparse.concatenate([xx, yy, zz], axis=-1) + ) @pytest.mark.parametrize("axis", [0, 1]) @@ -426,7 +442,9 @@ def test_from_coo_valerr(): ) @pytest.mark.parametrize("constant_values", [0, 1, 150, np.nan]) def test_pad_valid(pad_width, constant_values): - y = sparse.random((50, 50, 3), density=0.15, fill_value=constant_values, format="gcxs") + y = sparse.random( + (50, 50, 3), density=0.15, fill_value=constant_values, format="gcxs" + ) x = y.todense() xx = np.pad(x, pad_width=pad_width, constant_values=constant_values) yy = np.pad(y, pad_width=pad_width, constant_values=constant_values) diff --git a/sparse/pydata_backend/tests/test_compressed_2d.py b/sparse/pydata_backend/tests/test_compressed_2d.py index 176dafe1..43d0f5d8 100644 --- a/sparse/pydata_backend/tests/test_compressed_2d.py +++ b/sparse/pydata_backend/tests/test_compressed_2d.py @@ -41,7 +41,9 @@ def data_rvs(n): else: data_rvs = None - return cls(sparse.random((20, 30, 40), density=0.25, data_rvs=data_rvs).astype(dtype)) + return cls( + sparse.random((20, 30, 40), density=0.25, data_rvs=data_rvs).astype(dtype) + ) def test_repr(random_sparse): diff --git a/sparse/pydata_backend/tests/test_compressed_convert.py b/sparse/pydata_backend/tests/test_compressed_convert.py index e50b1d63..32cd4d23 100644 --- a/sparse/pydata_backend/tests/test_compressed_convert.py +++ b/sparse/pydata_backend/tests/test_compressed_convert.py @@ -83,4 +83,6 @@ def test_compute_flat(shape, expected_subsample, subsample): ], ) def test_transform_shape(shape, expected_shape): - assert_eq(convert.transform_shape(np.asarray(shape)), expected_shape, compare_dtype=False) + assert_eq( + convert.transform_shape(np.asarray(shape)), expected_shape, compare_dtype=False + ) diff --git a/sparse/pydata_backend/tests/test_coo.py b/sparse/pydata_backend/tests/test_coo.py index a43761bc..c3ee2c90 100644 --- a/sparse/pydata_backend/tests/test_coo.py +++ b/sparse/pydata_backend/tests/test_coo.py @@ -41,7 +41,9 @@ def data_rvs(n): return sparse.random((20, 30, 40), density=0.25, data_rvs=data_rvs).astype(dtype) -@pytest.mark.parametrize("reduction, kwargs", [("sum", {}), ("sum", {"dtype": np.float32}), ("prod", {})]) +@pytest.mark.parametrize( + "reduction, kwargs", [("sum", {}), ("sum", {"dtype": np.float32}), ("prod", {})] +) @pytest.mark.parametrize("axis", [None, 0, 1, 2, (0, 2), -3, (1, -1)]) @pytest.mark.parametrize("keepdims", [True, False]) def test_reductions_fv(reduction, random_sparse_small, axis, keepdims, kwargs, rng): @@ -76,7 +78,9 @@ def test_reductions(reduction, random_sparse, axis, keepdims, kwargs): assert_eq(xx, yy) -@pytest.mark.xfail(reason=("Setting output dtype=float16 produces results inconsistent with numpy")) +@pytest.mark.xfail( + reason=("Setting output dtype=float16 produces results inconsistent with numpy") +) @pytest.mark.filterwarnings("ignore:overflow") @pytest.mark.parametrize( "reduction, kwargs", @@ -151,14 +155,18 @@ def test_ufunc_reductions_kwargs(reduction, kwargs): assert isinstance(xx, COO) -@pytest.mark.parametrize("reduction", ["nansum", "nanmean", "nanprod", "nanmax", "nanmin"]) +@pytest.mark.parametrize( + "reduction", ["nansum", "nanmean", "nanprod", "nanmax", "nanmin"] +) @pytest.mark.parametrize("axis", [None, 0, 1]) @pytest.mark.parametrize("keepdims", [False]) @pytest.mark.parametrize("fraction", [0.25, 0.5, 0.75, 1.0]) @pytest.mark.filterwarnings("ignore:All-NaN") @pytest.mark.filterwarnings("ignore:Mean of empty slice") def test_nan_reductions(reduction, axis, keepdims, fraction): - s = sparse.random((2, 3, 4), data_rvs=random_value_array(np.nan, fraction), density=0.25) + s = sparse.random( + (2, 3, 4), data_rvs=random_value_array(np.nan, fraction), density=0.25 + ) x = s.todense() expected = getattr(np, reduction)(x, axis=axis, keepdims=keepdims) actual = getattr(sparse, reduction)(s, axis=axis, keepdims=keepdims) @@ -281,7 +289,9 @@ def test_moveaxis(source, destination): assert_eq(xx, yy) -@pytest.mark.parametrize("source, destination", [[0, -4], [(0, 5), (1, 2)], [(0, 1, 2), (2, 1)]]) +@pytest.mark.parametrize( + "source, destination", [[0, -4], [(0, 5), (1, 2)], [(0, 1, 2), (2, 1)]] +) def test_moveaxis_error(source, destination): x = sparse.random((2, 3, 4), density=0.25) @@ -342,7 +352,9 @@ def test_reshape(a, b, format): def test_large_reshape(): n = 100 m = 10 - row = np.arange(n, dtype=np.uint16) # np.random.randint(0, n, size=n, dtype=np.uint16) + row = np.arange( + n, dtype=np.uint16 + ) # np.random.randint(0, n, size=n, dtype=np.uint16) col = row % m # np.random.randint(0, m, size=n, dtype=np.uint16) data = np.ones(n, dtype=np.uint8) @@ -408,7 +420,9 @@ def test_kron(a_ndim, b_ndim): assert_eq(sparse.kron(a, b), sol) -@pytest.mark.parametrize("a_spmatrix, b_spmatrix", [(True, True), (True, False), (False, True)]) +@pytest.mark.parametrize( + "a_spmatrix, b_spmatrix", [(True, True), (True, False), (False, True)] +) def test_kron_spmatrix(a_spmatrix, b_spmatrix): sa = sparse.random((3, 4), density=0.5) a = sa.todense() @@ -550,7 +564,9 @@ def test_advanced_indexing(index): def test_custom_dtype_slicing(): - dt = np.dtype([("part1", np.float64), ("part2", np.int64, (2,)), ("part3", np.int64, (2, 2))]) + dt = np.dtype( + [("part1", np.float64), ("part2", np.int64, (2,)), ("part3", np.int64, (2, 2))] + ) x = np.zeros((2, 3, 4), dtype=dt) x[1, 1, 1] = (0.64, [4, 2], [[1, 2], [3, 0]]) @@ -596,7 +612,9 @@ def test_concatenate(): zz = sparse.random((4, 3, 4), density=0.5) z = zz.todense() - assert_eq(np.concatenate([x, y, z], axis=0), sparse.concatenate([xx, yy, zz], axis=0)) + assert_eq( + np.concatenate([x, y, z], axis=0), sparse.concatenate([xx, yy, zz], axis=0) + ) xx = sparse.random((5, 3, 1), density=0.5) x = xx.todense() @@ -605,9 +623,13 @@ def test_concatenate(): zz = sparse.random((5, 3, 2), density=0.5) z = zz.todense() - assert_eq(np.concatenate([x, y, z], axis=2), sparse.concatenate([xx, yy, zz], axis=2)) + assert_eq( + np.concatenate([x, y, z], axis=2), sparse.concatenate([xx, yy, zz], axis=2) + ) - assert_eq(np.concatenate([x, y, z], axis=-1), sparse.concatenate([xx, yy, zz], axis=-1)) + assert_eq( + np.concatenate([x, y, z], axis=-1), sparse.concatenate([xx, yy, zz], axis=-1) + ) @pytest.mark.parametrize("axis", [0, 1]) @@ -832,10 +854,16 @@ def test_add_many_sparse_arrays(): def test_caching(): x = COO({(9, 9, 9): 1}) - assert x[:].reshape((100, 10)).transpose().tocsr() is not x[:].reshape((100, 10)).transpose().tocsr() + assert ( + x[:].reshape((100, 10)).transpose().tocsr() + is not x[:].reshape((100, 10)).transpose().tocsr() + ) x = COO({(9, 9, 9): 1}, cache=True) - assert x[:].reshape((100, 10)).transpose().tocsr() is x[:].reshape((100, 10)).transpose().tocsr() + assert ( + x[:].reshape((100, 10)).transpose().tocsr() + is x[:].reshape((100, 10)).transpose().tocsr() + ) x = COO({(1, 1, 1, 1, 1, 1, 1, 2): 1}, cache=True) @@ -903,7 +931,9 @@ def test_random_nnz(shape, nnz): assert s.nnz == nnz -@pytest.mark.parametrize("density, nnz", [(1, 1), (1.01, None), (-0.01, None), (None, 2)]) +@pytest.mark.parametrize( + "density, nnz", [(1, 1), (1.01, None), (-0.01, None), (None, 2)] +) def test_random_invalid_density_and_nnz(density, nnz): with pytest.raises(ValueError): sparse.random((1,), density, nnz=nnz) @@ -1076,7 +1106,9 @@ def test_asformat(format): assert_eq(s, s2) -@pytest.mark.parametrize("format", [sparse.COO, sparse.DOK, scipy.sparse.csr_matrix, np.asarray]) +@pytest.mark.parametrize( + "format", [sparse.COO, sparse.DOK, scipy.sparse.csr_matrix, np.asarray] +) def test_as_coo(format): x = format(sparse.random((3, 4), density=0.5, format="coo").todense()) @@ -1321,7 +1353,9 @@ def test_full_like(): x = np.zeros((5, 5), dtype="i8") assert_eq(sparse.full_like(x, 9.5), np.full_like(x, 9.5)) assert_eq(sparse.full_like(x, 9.5, dtype="f8"), np.full_like(x, 9.5, dtype="f8")) - assert_eq(sparse.full_like(x, 9.5, shape=(2, 2)), np.full_like(x, 9.5, shape=(2, 2))) + assert_eq( + sparse.full_like(x, 9.5, shape=(2, 2)), np.full_like(x, 9.5, shape=(2, 2)) + ) @pytest.mark.parametrize( @@ -1352,9 +1386,12 @@ def test_out_dtype(): a = sparse.eye(5, dtype="float32") b = sparse.eye(5, dtype="float64") - assert np.positive(a, out=b).dtype == np.positive(a.todense(), out=b.todense()).dtype assert ( - np.positive(a, out=b, dtype="float64").dtype == np.positive(a.todense(), out=b.todense(), dtype="float64").dtype + np.positive(a, out=b).dtype == np.positive(a.todense(), out=b.todense()).dtype + ) + assert ( + np.positive(a, out=b, dtype="float64").dtype + == np.positive(a.todense(), out=b.todense(), dtype="float64").dtype ) @@ -1525,7 +1562,9 @@ def test_flatten(in_shape): def test_asnumpy(): s = sparse.COO(data=[1], coords=[2], shape=(5,)) assert_eq(sparse.asnumpy(s), s.todense()) - assert_eq(sparse.asnumpy(s, dtype=np.float64), np.asarray(s.todense(), dtype=np.float64)) + assert_eq( + sparse.asnumpy(s, dtype=np.float64), np.asarray(s.todense(), dtype=np.float64) + ) a = np.array([1, 2, 3]) # Array passes through with no copying. assert sparse.asnumpy(a) is a @@ -1659,7 +1698,9 @@ def test_array_as_shape(): ) @pytest.mark.parametrize("axis", [None, 0, 1]) @pytest.mark.parametrize("keepdims", [True, False]) -@pytest.mark.parametrize("mode", [(sparse.argmax, np.argmax), (sparse.argmin, np.argmin)]) +@pytest.mark.parametrize( + "mode", [(sparse.argmax, np.argmax), (sparse.argmin, np.argmin)] +) def test_argmax_argmin(arr, axis, keepdims, mode): sparse_func, np_func = mode @@ -1672,7 +1713,9 @@ def test_argmax_argmin(arr, axis, keepdims, mode): @pytest.mark.parametrize("axis", [None, 0, 1, 2]) -@pytest.mark.parametrize("mode", [(sparse.argmax, np.argmax), (sparse.argmin, np.argmin)]) +@pytest.mark.parametrize( + "mode", [(sparse.argmax, np.argmax), (sparse.argmin, np.argmin)] +) def test_argmax_argmin_3D(axis, mode): sparse_func, np_func = mode @@ -1692,7 +1735,9 @@ def test_argmax_argmin_3D(axis, mode): def test_argmax_argmin_constraint(func): s = sparse.COO.from_numpy(np.full((2, 2), 2), fill_value=2) - with pytest.raises(ValueError, match="`axis=2` is out of bounds for array of dimension 2."): + with pytest.raises( + ValueError, match="`axis=2` is out of bounds for array of dimension 2." + ): func(s, axis=2) @@ -1743,12 +1788,16 @@ def test_squeeze_validation(self): with pytest.raises(ValueError, match="Invalid axis parameter: `1.1`."): s_arr.squeeze(1.1) - with pytest.raises(ValueError, match="Specified axis `0` has a size greater than one: 3"): + with pytest.raises( + ValueError, match="Specified axis `0` has a size greater than one: 3" + ): s_arr.squeeze(0) class TestUnique: - arr = np.array([[0, 0, 1, 5, 3, 0], [1, 0, 4, 0, 3, 0], [0, 1, 0, 1, 1, 0]], dtype=np.int64) + arr = np.array( + [[0, 0, 1, 5, 3, 0], [1, 0, 4, 0, 3, 0], [0, 1, 0, 1, 1, 0]], dtype=np.int64 + ) arr_empty = np.zeros((5, 5)) arr_full = np.arange(1, 10) @@ -1775,7 +1824,9 @@ def test_unique_values(self, arr, fill_value): @pytest.mark.parametrize("func", [sparse.unique_counts, sparse.unique_values]) def test_input_validation(self, func): - with pytest.raises(ValueError, match="Input must be an instance of SparseArray"): + with pytest.raises( + ValueError, match="Input must be an instance of SparseArray" + ): func(self.arr) @@ -1793,8 +1844,12 @@ def test_expand_dims(axis): @pytest.mark.parametrize( "arr", [ - np.array([[0, 0, 1, 5, 3, 0], [1, 0, 4, 0, 3, 0], [0, 1, 0, 1, 1, 0]], dtype=np.int64), - np.array([[[2, 0], [0, 5]], [[1, 0], [4, 0]], [[0, 1], [0, -1]]], dtype=np.float64), + np.array( + [[0, 0, 1, 5, 3, 0], [1, 0, 4, 0, 3, 0], [0, 1, 0, 1, 1, 0]], dtype=np.int64 + ), + np.array( + [[[2, 0], [0, 5]], [[1, 0], [4, 0]], [[0, 1], [0, -1]]], dtype=np.float64 + ), np.arange(3, 10), ], ) diff --git a/sparse/pydata_backend/tests/test_dok.py b/sparse/pydata_backend/tests/test_dok.py index 8731f0c0..c132f2d9 100644 --- a/sparse/pydata_backend/tests/test_dok.py +++ b/sparse/pydata_backend/tests/test_dok.py @@ -302,7 +302,9 @@ def test_zeros_like(): ) @pytest.mark.parametrize("constant_values", [0, 1, 150, np.nan]) def test_pad_valid(pad_width, constant_values): - y = sparse.random((50, 50, 3), density=0.15, fill_value=constant_values, format="dok") + y = sparse.random( + (50, 50, 3), density=0.15, fill_value=constant_values, format="dok" + ) x = y.todense() xx = np.pad(x, pad_width=pad_width, constant_values=constant_values) yy = np.pad(y, pad_width=pad_width, constant_values=constant_values) diff --git a/sparse/pydata_backend/tests/test_dot.py b/sparse/pydata_backend/tests/test_dot.py index 4cf0b6b6..4da502b2 100644 --- a/sparse/pydata_backend/tests/test_dot.py +++ b/sparse/pydata_backend/tests/test_dot.py @@ -174,15 +174,27 @@ def test_matmul_errors(): "a, b", [ ( - sparse.GCXS.from_numpy(default_rng.choice([0, np.nan, 2], size=[100, 100], p=[0.99, 0.001, 0.009])), + sparse.GCXS.from_numpy( + default_rng.choice( + [0, np.nan, 2], size=[100, 100], p=[0.99, 0.001, 0.009] + ) + ), sparse.random((100, 100), density=0.01), ), ( - sparse.COO.from_numpy(default_rng.choice([0, np.nan, 2], size=[100, 100], p=[0.99, 0.001, 0.009])), + sparse.COO.from_numpy( + default_rng.choice( + [0, np.nan, 2], size=[100, 100], p=[0.99, 0.001, 0.009] + ) + ), sparse.random((100, 100), density=0.01), ), ( - sparse.GCXS.from_numpy(default_rng.choice([0, np.nan, 2], size=[100, 100], p=[0.99, 0.001, 0.009])), + sparse.GCXS.from_numpy( + default_rng.choice( + [0, np.nan, 2], size=[100, 100], p=[0.99, 0.001, 0.009] + ) + ), scipy.sparse.random(100, 100), ), ( @@ -290,7 +302,9 @@ def test_dot_nocoercion(): @pytest.mark.parametrize("format2", dot_formats) def test_small_values(format1, format2): s1 = format1(sparse.COO(coords=[[0, 10]], data=[3.6e-100, 7.2e-009], shape=(20,))) - s2 = format2(sparse.COO(coords=[[0, 0], [4, 28]], data=[3.8e-25, 4.5e-225], shape=(20, 50))) + s2 = format2( + sparse.COO(coords=[[0, 0], [4, 28]], data=[3.8e-25, 4.5e-225], shape=(20, 50)) + ) def dense_convertor(x): return x.todense() if isinstance(x, sparse.SparseArray) else x diff --git a/sparse/pydata_backend/tests/test_einsum.py b/sparse/pydata_backend/tests/test_einsum.py index 6df039c4..f366ca7b 100644 --- a/sparse/pydata_backend/tests/test_einsum.py +++ b/sparse/pydata_backend/tests/test_einsum.py @@ -99,7 +99,9 @@ def test_einsum(subscripts, density): assert np.allclose(numpy_out, sparse_out.todense()) -@pytest.mark.parametrize("input", [[[0, 0]], [[0, Ellipsis]], [[Ellipsis, 1], [Ellipsis]], [[0, 1], [0]]]) +@pytest.mark.parametrize( + "input", [[[0, 0]], [[0, Ellipsis]], [[Ellipsis, 1], [Ellipsis]], [[0, 1], [0]]] +) @pytest.mark.parametrize("density", [0.1, 1.0]) def test_einsum_nosubscript(input, density): d = 4 @@ -126,7 +128,9 @@ def test_einsum_no_input(): sparse.einsum() -@pytest.mark.parametrize("subscript", ["a+b->c", "i->&", "i->ij", "ij->jij", "a..,a...", ".i...", "a,a->->"]) +@pytest.mark.parametrize( + "subscript", ["a+b->c", "i->&", "i->ij", "ij->jij", "a..,a...", ".i...", "a,a->->"] +) def test_einsum_invalid_input(subscript): x = sparse.random(shape=(2,), density=0.5, format="coo") y = sparse.random(shape=(2,), density=0.5, format="coo") @@ -164,7 +168,9 @@ def test_einsum_type_error(subscript): @pytest.mark.parametrize("formats,expected", format_test_cases) def test_einsum_format(formats, expected, rng): inputs = [ - rng.standard_normal((2, 2, 2)) if format == "dense" else sparse.random((2, 2, 2), density=0.5, format=format) + rng.standard_normal((2, 2, 2)) + if format == "dense" + else sparse.random((2, 2, 2), density=0.5, format=format) for format in formats ] if len(inputs) == 1: diff --git a/sparse/pydata_backend/tests/test_elemwise.py b/sparse/pydata_backend/tests/test_elemwise.py index 2184f0c2..ccf9ea09 100644 --- a/sparse/pydata_backend/tests/test_elemwise.py +++ b/sparse/pydata_backend/tests/test_elemwise.py @@ -296,7 +296,10 @@ def test_trinary_broadcasting(shapes, func): @pytest.mark.parametrize("fraction", [0.25, 0.5, 0.75, 1.0]) @pytest.mark.filterwarnings("ignore:invalid value") def test_trinary_broadcasting_pathological(shapes, func, value, fraction): - args = [sparse.random(s, density=0.5, data_rvs=random_value_array(value, fraction)) for s in shapes] + args = [ + sparse.random(s, density=0.5, data_rvs=random_value_array(value, fraction)) + for s in shapes + ] dense_args = [arg.todense() for arg in args] fs = sparse.elemwise(func, *args) @@ -319,7 +322,11 @@ def mock_unmatch_coo(*args, **kwargs): state["num_matches"] += 1 return result - monkeypatch.setattr(sparse.pydata_backend._umath._Elemwise, "_get_func_coords_data", mock_unmatch_coo) + monkeypatch.setattr( + sparse.pydata_backend._umath._Elemwise, + "_get_func_coords_data", + mock_unmatch_coo, + ) xs * ys @@ -341,7 +348,11 @@ def mock_unmatch_coo(*args, **kwargs): state["num_matches"] += 1 return result - monkeypatch.setattr(sparse.pydata_backend._umath._Elemwise, "_get_func_coords_data", mock_unmatch_coo) + monkeypatch.setattr( + sparse.pydata_backend._umath._Elemwise, + "_get_func_coords_data", + mock_unmatch_coo, + ) xs + ys diff --git a/sparse/tests/test_backends.py b/sparse/tests/test_backends.py deleted file mode 100644 index 9cc8b5df..00000000 --- a/sparse/tests/test_backends.py +++ /dev/null @@ -1,13 +0,0 @@ -import sparse - -import pytest - -import numpy as np - - -def test_backend_contex_manager(backend): - if backend == sparse.BackendType.Finch: - with pytest.raises(NotImplementedError): - sparse.COO.from_numpy(np.eye(5)) - else: - sparse.COO.from_numpy(np.eye(5)) diff --git a/sparse/tests/conftest.py b/tests/conftest.py similarity index 56% rename from sparse/tests/conftest.py rename to tests/conftest.py index 8dd080ce..72139a6e 100644 --- a/sparse/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,9 @@ import pytest -@pytest.fixture(scope="session", params=[sparse.BackendType.PyData, sparse.BackendType.Finch]) +@pytest.fixture( + scope="session", params=[sparse.BackendType.PyData, sparse.BackendType.Finch] +) def backend(request): with sparse.Backend(backend=request.param): yield request.param diff --git a/tests/test_backends.py b/tests/test_backends.py new file mode 100644 index 00000000..c32f3ca5 --- /dev/null +++ b/tests/test_backends.py @@ -0,0 +1,47 @@ +import sparse + +import numpy as np +import scipy.sparse as sp + +# def test_backend_contex_manager(backend): +# if backend == sparse.BackendType.Finch: +# with pytest.raises(NotImplementedError): +# sparse.COO.from_numpy(np.eye(5)) +# else: +# sparse.COO.from_numpy(np.eye(5)) + + +def test_finch_backend(): + np_eye = np.eye(5) + sp_arr = sp.csr_matrix(np_eye) + + with sparse.Backend(backend=sparse.BackendType.Finch): + finch_dense = sparse.Tensor(np_eye) + + assert np.shares_memory(finch_dense.todense(), np_eye) + + finch_arr = sparse.Tensor(sp_arr) + + np.testing.assert_equal(finch_arr.todense(), np_eye) + + transposed = sparse.permute_dims(finch_arr, (1, 0)) + + np.testing.assert_equal(transposed.todense(), np_eye.T) + + +def test_finch_backend_2(): + np_eye = np.eye(5) + sp_arr = sp.csr_matrix(np_eye) + + with sparse.Backend(backend=sparse.BackendType.Finch): + finch_dense = sparse.Tensor(np_eye) + + assert np.shares_memory(finch_dense.todense(), np_eye) + + finch_arr = sparse.Tensor(sp_arr) + + np.testing.assert_equal(finch_arr.todense(), np_eye) + + transposed = sparse.permute_dims(finch_arr, (1, 0)) + + np.testing.assert_equal(transposed.todense(), np_eye.T)