diff --git a/dace/codegen/compiled_sdfg.py b/dace/codegen/compiled_sdfg.py index 733f0ba53c..0e66d7d95c 100644 --- a/dace/codegen/compiled_sdfg.py +++ b/dace/codegen/compiled_sdfg.py @@ -5,7 +5,7 @@ import re import shutil import subprocess -from typing import Any, Callable, Dict, List, Tuple, Optional, Type, Union +from typing import Any, Callable, Dict, List, Tuple, Optional, Type, Union, Sequence import warnings import tempfile import pickle @@ -151,8 +151,28 @@ def __exit__(self, *args, **kwargs): class CompiledSDFG(object): """ A compiled SDFG object that can be called through Python. - Todo: - Scalar return values are not handled properly, this is a code gen issue. + Essentially this class makes an SDFG callable. Normally a user will not create it + directly but instead it is generated by some utilities such as `SDFG.compile()`. + + The class performs the following tasks: + - It ensures that the SDFG object is properly initialized, either by a direct + call to `initialize()` or the first time it is called. Furthermore, it will + also take care of the finalization if it does out of scope. + - It transforms Python arguments into C arguments. + + Technically there are two ways how the SDFG can be called, the first is using + `__call__()`, i.e. as a normal function. However, this will always processes + the arguments and does some error checking and is thus slow. The second way + is the advanced interface, which allows to decompose the calling into different + subset. For more information see `construct_arguments()`, `fast_call()` and + `convert_return_values()`. + + :note: In previous version the arrays used as return values were sometimes reused. + However, this was changed and every time `construct_arguments()` is called + new arrays are allocated. + :note: It is not possible to return scalars. Note that currently using scalars + as return values is a validation error. The only exception are (probably) + Python objects. """ def __init__(self, sdfg, lib: ReloadableDLL, argnames: List[str] = None): @@ -161,9 +181,14 @@ def __init__(self, sdfg, lib: ReloadableDLL, argnames: List[str] = None): self._lib = lib self._initialized = False self._libhandle = ctypes.c_void_p(0) - self._lastargs = () self.do_not_execute = False + # Contains the pointer arguments that where used to call the SDFG, `__call__()` + # was used. It is also used by `get_workspace_size()`. + # NOTE: Using its content might be dangerous as only the pointers to arrays are + # stored. It is the users responsibility to ensure that they are valid. + self._lastargs = None + lib.load() # Explicitly load the library self._init = lib.get_symbol('__dace_init_{}'.format(sdfg.name)) self._init.restype = ctypes.c_void_p @@ -172,17 +197,27 @@ def __init__(self, sdfg, lib: ReloadableDLL, argnames: List[str] = None): self._cfunc = lib.get_symbol('__program_{}'.format(sdfg.name)) # Cache SDFG return values - self._create_new_arrays: bool = True self._return_syms: Dict[str, Any] = None + # It will contain the shape of the array or the name if the return array is passed as argument. self._retarray_shapes: List[Tuple[str, np.dtype, dtypes.StorageType, Tuple[int], Tuple[int], int]] = [] - self._retarray_is_scalar: List[bool] = [] + # Is only `True` if teh return value is a scalar _and_ a `pyobject`. + self._retarray_is_pyobject: List[bool] = [] self._return_arrays: List[np.ndarray] = [] self._callback_retval_references: List[Any] = [] # Avoids garbage-collecting callback return values + # If there are return values then this is `True` it is is a single value. Note that + # `False` either means that a tuple is returned or there are no return values. + # NOTE: Needed to handle the case of a tuple with one element. + self._is_single_value_ret: bool = False + if '__return' in self._sdfg.arrays: + assert not any(aname.startswith('__return_') for aname in self._sdfg.arrays.keys()) + self._is_single_value_ret = True + # Cache SDFG argument properties self._typedict = self._sdfg.arglist() self._sig = self._sdfg.signature_arglist(with_types=False, arglist=self._typedict) self._free_symbols = self._sdfg.free_symbols + self._constants = self._sdfg.constants self.argnames = argnames if self.argnames is None and len(sdfg.arg_names) != 0: @@ -269,12 +304,21 @@ def get_workspace_sizes(self) -> Dict[dtypes.StorageType, int]: """ Returns the total external memory size to be allocated for this SDFG. + Note that the function queries the sizes of the last call that was made by + `__call__()` or `initialize()`. Calls made by `fast_call()` or `safe_call()` + will not be considered. + :return: A dictionary mapping storage types to the number of bytes necessary to allocate for the SDFG to work properly. + :note: It is the users responsibility that all arguments, especially the array + arguments, remain valid between the call to `__call__()` or `initialize()` + and the call to this function. """ if not self._initialized: raise ValueError('Compiled SDFG is uninitialized, please call ``initialize`` prior to ' 'querying external memory size.') + if self._lastargs is None: + raise ValueError('To use `get_workspace_sizes()` `__call__()` or `initialize()` must be called before.') result: Dict[dtypes.StorageType, int] = {} for storage in self.external_memory_types: @@ -288,15 +332,24 @@ def set_workspace(self, storage: dtypes.StorageType, workspace: Any): """ Sets the workspace for the given storage type to the given buffer. + Note that the function queries the sizes of the last call that was made by + `__call__()` or `initialize()`. Calls made by `fast_call()` or `safe_call()` + will not be considered. + :param storage: The storage type to fill. :param workspace: An array-convertible object (through ``__[cuda_]array_interface__``, see ``array_interface_ptr``) to use for the workspace. + :note: It is the users responsibility that all arguments, especially the array + arguments, remain valid between the call to `__call__()` or `initialize()` + and the call to this function. """ if not self._initialized: raise ValueError('Compiled SDFG is uninitialized, please call ``initialize`` prior to ' 'setting external memory.') if storage not in self.external_memory_types: raise ValueError(f'Compiled SDFG does not specify external memory of {storage}') + if self._lastargs is None: + raise ValueError('To use `get_workspace_sizes()` `__call__()` or `initialize()` must be called before.') func = self._lib.get_symbol(f'__dace_set_external_memory_{storage.name}', None) ptr = dtypes.array_interface_ptr(workspace, storage) @@ -331,12 +384,13 @@ def initialize(self, *args, **kwargs): if self._initialized: return - if len(args) > 0 and self.argnames is not None: - kwargs.update({aname: arg for aname, arg in zip(self.argnames, args)}) - # Construct arguments in the exported C function order - _, initargtuple = self._construct_args(kwargs) + callargtuple, initargtuple = self.construct_arguments(*args, **kwargs) self._initialize(initargtuple) + + # The main reason for setting `_lastargs` here is, to allow calls to `get_workspace_size()`. + self._lastargs = (callargtuple, initargtuple) + return self._libhandle def finalize(self): @@ -361,38 +415,34 @@ def __call__(self, *args, **kwargs): """ Forwards the Python call to the compiled ``SDFG``. - The order of the positional arguments is expected to be the same as in - the ``argnames`` member. The function will roughly perform the - following tasks: - - Change the order of the Python arguments into the one required by - the binary. - - Performing some basic sanity checks. - - Transforming the Python arguments into their ``C`` equivalents. - - Allocate the memory for the return values. - - Call the ``C` function. + The order of the positional arguments is expected to be the same as in the + ``argnames`` member. The function will perform the following tasks: + - Calling ``construct_arguments()`` and creating the argument vector and + allocating the memory for the return values. + - Performing the actual call by means of ``fast_call()``, with enabled error + checks. + - Then it will convert the return value into the expected format by means of + ``convert_return_values()`` and return that value. :note: The memory for the return values is only allocated the first time this function is called. Thus, this function will always return the same objects. To force the allocation of new memory you can call ``clear_return_values()`` in advance. """ - if self.argnames is None and len(args) != 0: - raise KeyError(f"Passed positional arguments to an SDFG that does not accept them.") - elif len(args) > 0 and self.argnames is not None: - kwargs.update( - # `_construct_args` will handle all of its arguments as kwargs. - { - aname: arg - for aname, arg in zip(self.argnames, args) - }) - argtuple, initargtuple = self._construct_args(kwargs) # Missing arguments will be detected here. - # Return values are cached in `self._lastargs`. - return self.fast_call(argtuple, initargtuple, do_gpu_check=True) + argtuple, initargtuple = self.construct_arguments(*args, **kwargs) # Missing arguments will be detected here. + self._lastargs = (argtuple, initargtuple) + self.fast_call(argtuple, initargtuple, do_gpu_check=True) + return self.convert_return_values() def safe_call(self, *args, **kwargs): """ Forwards the Python call to the compiled ``SDFG`` in a separate process to avoid crashes in the main process. Raises an exception if the SDFG execution fails. + + Note the current implementation lacks the proper handling of return values. + Thus output can only be transmitted through inout arguments. """ + if any(aname == '__return' or aname.startswith('__return_') for aname in self.sdfg.arrays.keys()): + raise NotImplementedError('`CompiledSDFG.safe_call()` does not support return values.') # Pickle the SDFG and arguments with tempfile.NamedTemporaryFile(mode='wb', delete=False) as f: @@ -444,24 +494,25 @@ def safe_call(self, *args, **kwargs): def fast_call( self, - callargs: Tuple[Any, ...], - initargs: Tuple[Any, ...], + callargs: Sequence[Any], + initargs: Sequence[Any], do_gpu_check: bool = False, - ) -> Union[Tuple[Any, ...], Any]: + ) -> None: """ - Calls the underlying binary functions directly and bypassing - argument sanitation. + Calls the underlying binary functions directly and bypassing argument sanitation. - This is a faster, but less user friendly version of ``__call__()``. - While ``__call__()`` will transforms its Python arguments such that - they can be forwarded, this function assumes that this processing - was already done by the user. + This is a faster, but less user friendly version of ``__call__()``. While + ``__call__()`` will transforms its Python arguments such that they can be + forwarded and allocate memory for the return values, this function assumes + that this processing was already done by the user. + To build the argument vectors you should use `self.construct_arguments()`. :param callargs: Arguments passed to the actual computation. :param initargs: Arguments passed to the initialization function. :param do_gpu_check: Check if errors happened on the GPU. - :note: You may use `_construct_args()` to generate the processed arguments. + :note: This is an advanced interface. + :note: In previous versions this function also called `convert_return_values()`. """ try: # Call initializer function if necessary, then SDFG @@ -485,8 +536,7 @@ def fast_call( if lasterror is not None: raise RuntimeError( f'An error was detected when calling "{self._sdfg.name}": {self._get_error_text(lasterror)}') - - return self._convert_return_values() + return except (RuntimeError, TypeError, UnboundLocalError, KeyError, cgx.DuplicateDLLError, ReferenceError): self._lib.unload() raise @@ -498,18 +548,40 @@ def __del__(self): self._libhandle = ctypes.c_void_p(0) self._lib.unload() - def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: - """ - Main function that controls argument construction for calling - the C prototype of the SDFG. + def construct_arguments(self, *args: Any, **kwargs: Any) -> Tuple[Tuple[Any], Tuple[Any]]: + """Construct the argument vectors suitable for from its argument. - Organizes arguments first by ``sdfg.arglist``, then data descriptors - by alphabetical order, then symbols by alphabetical order. + The function returns a pair of tuple, that are suitable for `fast_call()`. + The first element of is `callargs`, i.e. the full arguments, while the + second element is `initargs`, which is only used/needed the first time + an SDFG is called. - :note: If not initialized this function will initialize the memory for - the return values, however, it might also reallocate said memory. - :note: This function will also update the internal argument cache. + It is important that this function will also allocate new return values. + The array objects are managed by `self` and remain valid until this + function is called again. However, they are also returned by `self.__call__()`. + + It is also possible to pass the array, that should be used to return a value, + directly as argument. In that case the allocation for that return value will + be skipped. + + :note: In case of arrays, the returned argument vectors only contains the + pointers to the underlying memory. Thus it is the user's responsibility + to ensure that the memory remains allocated until the argument vector + is used. + :note: This is an advanced interface. """ + if self.argnames is None and len(args) != 0: + raise KeyError(f"Passed positional arguments to an SDFG that does not accept them.") + elif len(args) > 0 and self.argnames is not None: + positional_arguments = {aname: avalue for aname, avalue in zip(self.argnames, args)} + if not positional_arguments.keys().isdisjoint(kwargs.keys()): + raise ValueError( + f'The arguments where passed once as positional and named arguments: {set(positional_arguments.keys()).intersection(kwargs.keys())}' + ) + kwargs.update(positional_arguments) + + # NOTE: This might invalidate the elements associated to the return values of + # all argument vectors that were created before. self._initialize_return_values(kwargs) # Add the return values to the arguments, since they are part of the C signature. @@ -539,31 +611,51 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: argnames = [] sig = [] - # Type checking - cargs = [] no_view_arguments = not Config.get_bool('compiler', 'allow_view_arguments') - for i, (a, arg, atype) in enumerate(zip(argnames, arglist, argtypes)): - carg = dt.make_ctypes_argument(arg, - atype, - a, - allow_views=not no_view_arguments, - symbols=kwargs, - callback_retval_references=self._callback_retval_references) - cargs.append(carg) - - constants = self.sdfg.constants + cargs = tuple( + dt.make_ctypes_argument(aval, + atype, + aname, + allow_views=not no_view_arguments, + symbols=kwargs, + callback_retval_references=self._callback_retval_references) + for aval, atype, aname in zip(arglist, argtypes, argnames)) + symbols = self._free_symbols callparams = tuple((carg, aname) for arg, carg, aname in zip(arglist, cargs, argnames) - if not (symbolic.issymbolic(arg) and (hasattr(arg, 'name') and arg.name in constants))) - - newargs = tuple(carg for carg, aname in callparams) + if not ((hasattr(arg, 'name') and arg.name in self._constants) and symbolic.issymbolic(arg))) + newargs = tuple(carg for carg, _aname in callparams) initargs = tuple(carg for carg, aname in callparams if aname in symbols) - self._lastargs = newargs, initargs - return self._lastargs + return (newargs, initargs) + + def convert_return_values(self) -> Union[Any, Tuple[Any, ...]]: + """Convert the return arguments. + + Execute the `return` statement and return. This function should only be called + after `fast_call()` has been run. + Keep in mid that it is not possible to return scalars (with the exception of + `pyobject`s), they will be always returned as an array with shape `(1,)`. + + :note: This is an advanced interface. + :note: After `fast_call()` returns it is only allowed to call this function once. + """ + # TODO: Make sure that the function is called only once by checking it. + # NOTE: Currently it is not possible to return a scalar value, see `tests/sdfg/scalar_return.py` + if not self._return_arrays: + return None + elif self._is_single_value_ret: + assert len(self._return_arrays) == 1 + return self._return_arrays[0].item() if self._retarray_is_pyobject[0] else self._return_arrays[0] + else: + return tuple(r.item() if is_pyobj else r + for r, is_pyobj in zip(self._return_arrays, self._retarray_is_pyobject)) def clear_return_values(self): - self._create_new_arrays = True + warnings.warn( + 'The "CompiledSDFG.clear_return_values" API is deprecated, as this behaviour has' + ' become the new default, and is a noops.', DeprecationWarning) + pass def _create_array(self, _: str, dtype: np.dtype, storage: dtypes.StorageType, shape: Tuple[int], strides: Tuple[int], total_size: int): @@ -599,52 +691,76 @@ def _initialize_return_values(self, kwargs): # Clear references from last call (allow garbage collection) self._callback_retval_references.clear() - if self._initialized: - if self._return_syms == syms: - if not self._create_new_arrays: - return - else: - self._create_new_arrays = False - # Use stored sizes to recreate arrays (fast path) - self._return_arrays = tuple(kwargs[desc[0]] if desc[0] in kwargs else self._create_array(*desc) - for desc in self._retarray_shapes) - return + if self._initialized and self._return_syms == syms: + # Use stored sizes to recreate arrays (fast path) + self._return_arrays = tuple(kwargs[desc[0]] if desc[0] in kwargs else self._create_array(*desc) + for desc in self._retarray_shapes) + return self._return_syms = syms - self._create_new_arrays = False - - # Initialize return values with numpy arrays - self._retarray_shapes = [] self._return_arrays = [] + self._retarray_shapes = [] + self._retarray_is_pyobject = [] for arrname, arr in sorted(self.sdfg.arrays.items()): - if arrname.startswith('__return') and not arr.transient: - if arrname in kwargs: + if arrname.startswith('__return'): + if arr.transient: + raise ValueError(f'Used the special array name "{arrname}" as transient.') + + elif arrname in kwargs: + # The return value is passed as an argument, in that case store the name in `self._retarray_shapes`. + warnings.warn(f'Return value "{arrname}" is passed as a regular argument.', stacklevel=2) self._return_arrays.append(kwargs[arrname]) - self._retarray_is_scalar.append(isinstance(arr, dt.Scalar)) self._retarray_shapes.append((arrname, )) - continue - if isinstance(arr, dt.Stream): + elif isinstance(arr, dt.Stream): raise NotImplementedError('Return streams are unsupported') - shape = tuple(symbolic.evaluate(s, syms) for s in arr.shape) - dtype = arr.dtype.as_numpy_dtype() - total_size = int(symbolic.evaluate(arr.total_size, syms)) - strides = tuple(symbolic.evaluate(s, syms) * arr.dtype.bytes for s in arr.strides) - shape_desc = (arrname, dtype, arr.storage, shape, strides, total_size) - self._retarray_is_scalar.append(isinstance(arr, dt.Scalar) or isinstance(arr.dtype, dtypes.pyobject)) - self._retarray_shapes.append(shape_desc) - - # Create an array with the properties of the SDFG array - arr = self._create_array(*shape_desc) - self._return_arrays.append(arr) + else: + shape = tuple(symbolic.evaluate(s, syms) for s in arr.shape) + dtype = arr.dtype.as_numpy_dtype() + total_size = int(symbolic.evaluate(arr.total_size, syms)) + strides = tuple(symbolic.evaluate(s, syms) * arr.dtype.bytes for s in arr.strides) + shape_desc = (arrname, dtype, arr.storage, shape, strides, total_size) + self._retarray_shapes.append(shape_desc) + + # Create an array with the properties of the SDFG array + return_array = self._create_array(*shape_desc) + self._return_arrays.append(return_array) + + # BUG COMPATIBILITY(PR#2206): + # In the original version `_retarray_is_pyobject` was named `_retarray_is_scalar`, however + # since scalars could not be returned on an [implementation level](https://github.com/spcl/dace/pull/1609) + # it was essentially useless. But was used for `pyobject` in _some_ cases. And indeed, + # since `pyobject`s are essentially `void` pointers is was, in principle possible, to return/pass + # them as "scalars", read "not inside an array". + # However, if the return value was passed as argument, i.e. the first `elif`, then it + # was ignored if `arr` was a `pyobject`. Only if the return value was managed by `self`, + # i.e. the `else` case, then it was considered, in a way at least. The problem was, that it was + # done using the following check: + # `isinstance(arr, dt.Scalar) or isinstance(arr.dtype, dtypes.pyobject)` + # Because of the `or` that is used, _everything_ whose `dtype` is `pyobject` was classified + # as a scalar `pyobject`, i.e. one element, even if it was in fact an array of millions of `pyobject`s. + # The correct behaviour would be to change the `or` to an `and` but then several unit + # tests (`test_pyobject_return`, `test_pyobject_return_tuple` and `test_nested_autoparse[False]` + # in `tests/python_frontend/callee_autodetect_test.py`) will fail. + # The following code is bug compatible and also allows to pass a `pyobject` directly, i.e. + # through `kwargs`. + if isinstance(arr.dtype, dtypes.pyobject): + if isinstance(arr, dt.Scalar): + # Proper scalar. + self._retarray_is_pyobject.append(True) + elif isinstance(arr, dt.Array): + # An array, let's check if it is just a wrapper for a single value. + if not (len(arr.shape) == 1 and arr.shape[0] == 1): + warnings.warn(f'Decay an array of `pyobject`s with shape {arr.shape} to a single one.', + stacklevel=2) + self._retarray_is_pyobject.append(True) + else: + raise ValueError( + f'Does not know how to handle "{arrname}", which is a {type(arr).__name__} of `pyobject`.') + else: + self._retarray_is_pyobject.append(False) - def _convert_return_values(self): - # Return the values as they would be from a Python function - # NOTE: Currently it is not possible to return a scalar value, see `tests/sdfg/scalar_return.py` - if not self._return_arrays: - return None - elif len(self._return_arrays) == 1: - return self._return_arrays[0].item() if self._retarray_is_scalar[0] else self._return_arrays[0] - else: - return tuple(r.item() if scalar else r for r, scalar in zip(self._return_arrays, self._retarray_is_scalar)) + assert (not self._is_single_value_ret) or (len(self._return_arrays) == 1) + assert len(self._return_arrays) == len(self._retarray_shapes) == len(self._retarray_is_pyobject) + self._return_arrays = tuple(self._return_arrays) diff --git a/tests/codegen/external_memory_test.py b/tests/codegen/external_memory_test.py index 169e050914..47eac55ff3 100644 --- a/tests/codegen/external_memory_test.py +++ b/tests/codegen/external_memory_test.py @@ -30,7 +30,7 @@ def tester(a: dace.float64[N]): a = np.random.rand(20) if symbolic: - extra_args = dict(a=a, N=20) + extra_args = dict(N=20) else: extra_args = {} diff --git a/tests/python_frontend/return_value_test.py b/tests/python_frontend/return_value_test.py index 4a845bea0b..4e704287bc 100644 --- a/tests/python_frontend/return_value_test.py +++ b/tests/python_frontend/return_value_test.py @@ -9,7 +9,15 @@ def test_return_scalar(): def return_scalar(): return 5 - assert return_scalar() == 5 + res = return_scalar() + assert res == 5 + + # Don't be fooled by the test above the return value is an array. If you would + # add the return value annotation to the program, i.e. `-> dace.int32` you would + # get a validation error. + assert isinstance(res, np.ndarray) + assert res.shape == (1, ) + assert res.dtype == np.int64 def test_return_scalar_in_nested_function(): @@ -22,7 +30,15 @@ def nested_function() -> dace.int32: def return_scalar(): return nested_function() - assert return_scalar() == 5 + res = return_scalar() + assert res == 5 + + # Don't be fooled by the test above the return value is an array. If you would + # add the return value annotation to the program, i.e. `-> dace.int32` you would + # get a validation error. + assert isinstance(res, np.ndarray) + assert res.shape == (1, ) + assert res.dtype == np.int32 def test_return_array(): @@ -42,6 +58,8 @@ def return_tuple(): return 5, 6 res = return_tuple() + assert isinstance(res, tuple) + assert len(res) == 2 assert res == (5, 6) @@ -52,6 +70,8 @@ def return_array_tuple(): return 5 * np.ones(5), 6 * np.ones(6) res = return_array_tuple() + assert isinstance(res, tuple) + assert len(res) == 2 assert np.allclose(res[0], 5 * np.ones(5)) assert np.allclose(res[1], 6 * np.ones(6)) @@ -66,10 +86,25 @@ def return_void(a: dace.float64[20]): a = np.random.rand(20) ref = a + 1 - return_void(a) + res = return_void(a) + assert res is None assert np.allclose(a, ref) +def test_return_tuple_1_element(): + + @dace.program + def return_one_element_tuple(a: dace.float64[20]): + return (a + 3.5, ) + + a = np.random.rand(20) + ref = a + 3.5 + res = return_one_element_tuple(a) + assert isinstance(res, tuple) + assert len(res) == 1 + assert np.allclose(res[0], ref) + + def test_return_void_in_if(): @dace.program