diff --git a/doc/dpctl.rst b/doc/dpctl.rst index 2df63e0ce499..3916d3c8b01a 100644 --- a/doc/dpctl.rst +++ b/doc/dpctl.rst @@ -10,20 +10,23 @@ Interplay with the Data Parallel Control Library An example below demonstrates how the Data Parallel Extension for NumPy* can be easily combined with the device management interface provided by dpctl package. -Literally, the SYCL* queue manager interface from the dpctl package allows -to set an input queue as the currently usable queue inside the context -manager's scope. This way an array creation function from the dpnp package -which is defined inside the context will allocate the data using that queue. - .. code-block:: python :linenos: - import dpctl - import dpnp as np + import dpctl + import dpnp + + d = dpctl.select_cpu_device() + x = dpnp.array([1, 2, 3], device=d) + s = dpnp.sum(x) + + y = dpnp.linspace(0, dpnp.pi, num=10**6, device="gpu") + f = 1 + y * dpnp.sin(y) + + # locate argument where function attains global maximum + max_arg = x[dpnp.argmax(f)] + max_val = dpnp.max(f) - with dpctl.device_context("opencl:gpu"): - x = np.array([1, 2, 3]) - s = np.sum(x) For more information please refer to `Data Parallel Control Library`_ documentation. diff --git a/dpnp/backend/examples/example10.cpp b/dpnp/backend/examples/example10.cpp index 279854a57cef..b09ea9b335dc 100644 --- a/dpnp/backend/examples/example10.cpp +++ b/dpnp/backend/examples/example10.cpp @@ -50,8 +50,6 @@ void test_dpnp_random_normal(const size_t size, double dev_time_used = 0.0; double sum_dev_time_used = 0.0; - dpnp_queue_initialize_c(QueueOptions::GPU_SELECTOR); - double *result = (double *)dpnp_memory_alloc_c(size * sizeof(double)); dpnp_rng_srand_c(seed); // TODO: will move diff --git a/dpnp/backend/examples/example3.cpp b/dpnp/backend/examples/example3.cpp index 48d8acb514cc..2d516dc0b8de 100644 --- a/dpnp/backend/examples/example3.cpp +++ b/dpnp/backend/examples/example3.cpp @@ -44,7 +44,6 @@ int main(int, char **) { const size_t size = 256; - dpnp_queue_initialize_c(); std::cout << "SYCL queue is CPU: " << dpnp_queue_is_cpu_c() << std::endl; int *array1 = (int *)dpnp_memory_alloc_c(size * sizeof(int)); diff --git a/dpnp/backend/examples/example5.cpp b/dpnp/backend/examples/example5.cpp index 368f5fe441bb..1bb09e445a63 100644 --- a/dpnp/backend/examples/example5.cpp +++ b/dpnp/backend/examples/example5.cpp @@ -52,8 +52,6 @@ int main(int, char **) { const size_t size = 256; - dpnp_queue_initialize_c(QueueOptions::CPU_SELECTOR); - double *result = (double *)dpnp_memory_alloc_c(size * sizeof(double)); size_t seed = 10; diff --git a/dpnp/backend/examples/example7.cpp b/dpnp/backend/examples/example7.cpp index df4c7eacf85d..49c12c5dd51c 100644 --- a/dpnp/backend/examples/example7.cpp +++ b/dpnp/backend/examples/example7.cpp @@ -45,8 +45,6 @@ int main(int, char **) const size_t size = 2; size_t len = size * size; - dpnp_queue_initialize_c(QueueOptions::CPU_SELECTOR); - float *array = (float *)dpnp_memory_alloc_c(len * sizeof(float)); float *result1 = (float *)dpnp_memory_alloc_c(size * sizeof(float)); float *result2 = (float *)dpnp_memory_alloc_c(len * sizeof(float)); diff --git a/dpnp/backend/examples/example8.cpp b/dpnp/backend/examples/example8.cpp index 9970e9803b72..19074c5c8fc3 100644 --- a/dpnp/backend/examples/example8.cpp +++ b/dpnp/backend/examples/example8.cpp @@ -42,8 +42,6 @@ int main(int, char **) { const size_t size = 16; - dpnp_queue_initialize_c(QueueOptions::GPU_SELECTOR); - double *array = (double *)dpnp_memory_alloc_c(size * sizeof(double)); long *result = (long *)dpnp_memory_alloc_c(size * sizeof(long)); diff --git a/dpnp/backend/examples/example9.cpp b/dpnp/backend/examples/example9.cpp index 7aa7bf30737e..fedcb909a7af 100644 --- a/dpnp/backend/examples/example9.cpp +++ b/dpnp/backend/examples/example9.cpp @@ -46,8 +46,6 @@ int main(int, char **) long result = 0; long result_verification = 0; - dpnp_queue_initialize_c(QueueOptions::CPU_SELECTOR); - long *array = reinterpret_cast(dpnp_memory_alloc_c(size * sizeof(long))); diff --git a/dpnp/backend/examples/example_bs.cpp b/dpnp/backend/examples/example_bs.cpp index 847877c98228..6fcbd9b142f8 100644 --- a/dpnp/backend/examples/example_bs.cpp +++ b/dpnp/backend/examples/example_bs.cpp @@ -229,7 +229,6 @@ int main(int, char **) const double RISK_FREE = 0.1; const double VOLATILITY = 0.2; - dpnp_queue_initialize_c(QueueOptions::GPU_SELECTOR); std::cout << "SYCL queue is CPU: " << dpnp_queue_is_cpu_c() << std::endl; double *price = (double *)dpnp_memory_alloc_c(SIZE * sizeof(double)); diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp index c3dfceef21a2..ccbf6fa85361 100644 --- a/dpnp/backend/include/dpnp_iface.hpp +++ b/dpnp/backend/include/dpnp_iface.hpp @@ -68,33 +68,6 @@ typedef ssize_t shape_elem_type; * @} */ -/** - * @ingroup BACKEND_API - * @brief SYCL queue initialization selector. - * - * The structure defines the parameters that are used for the library - * initialization by @ref dpnp_queue_initialize_c "dpnp_queue_initialize". - */ -enum class QueueOptions : uint32_t -{ - CPU_SELECTOR, /**< CPU side execution mode */ - GPU_SELECTOR, /**< Intel GPU side execution mode */ - AUTO_SELECTOR /**< Automatic selection based on environment variable with - @ref CPU_SELECTOR default */ -}; - -/** - * @ingroup BACKEND_API - * @brief SYCL queue initialization. - * - * Global SYCL queue initialization. - * - * @param [in] selector Select type @ref QueueOptions of the SYCL queue. - * Default @ref AUTO_SELECTOR - */ -INP_DLLEXPORT void dpnp_queue_initialize_c( - QueueOptions selector = QueueOptions::AUTO_SELECTOR); - /** * @ingroup BACKEND_API * @brief SYCL queue device status. @@ -112,8 +85,7 @@ INP_DLLEXPORT size_t dpnp_queue_is_cpu_c(); * @param [in] size_in_bytes Number of bytes for requested memory allocation. * @param [in] q_ref Reference to SYCL queue. * - * @return A pointer to newly created memory on @ref dpnp_queue_initialize_c - * "initialized SYCL device". + * @return A pointer to newly created memory on SYCL device. */ INP_DLLEXPORT char *dpnp_memory_alloc_c(DPCTLSyclQueueRef q_ref, size_t size_in_bytes); diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 53f37ee5c3c4..e7e05323ac8d 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -70,7 +70,8 @@ static VSLStreamStatePtr get_rng_stream() void dpnp_rng_srand_c(size_t seed) { - backend_sycl::backend_sycl_rng_engine_init(seed); + auto &be = backend_sycl::get(); + be.set_rng_engines_seed(seed); set_rng_stream(seed); } diff --git a/dpnp/backend/src/queue_sycl.cpp b/dpnp/backend/src/queue_sycl.cpp index b24cab1da72e..5e6df29d21d2 100644 --- a/dpnp/backend/src/queue_sycl.cpp +++ b/dpnp/backend/src/queue_sycl.cpp @@ -31,13 +31,7 @@ #include "dpnp_utils.hpp" #include "queue_sycl.hpp" -#if defined(DPNP_LOCAL_QUEUE) -sycl::queue *backend_sycl::queue = nullptr; -#endif -mkl_rng::mt19937 *backend_sycl::rng_engine = nullptr; -mkl_rng::mcg59 *backend_sycl::rng_mcg59_engine = nullptr; - -static void dpnpc_show_mathlib_version() +[[maybe_unused]] static void dpnpc_show_mathlib_version() { #if 1 const int len = 256; @@ -61,8 +55,8 @@ static void dpnpc_show_mathlib_version() #endif } -#if (not defined(NDEBUG)) && defined(DPNP_LOCAL_QUEUE) -static void show_available_sycl_devices() +#if (not defined(NDEBUG)) +[[maybe_unused]] static void show_available_sycl_devices() { const std::vector devices = sycl::device::get_devices(); @@ -86,25 +80,6 @@ static void show_available_sycl_devices() } #endif -#if defined(DPNP_LOCAL_QUEUE) -static sycl::device get_default_sycl_device() -{ - int dpnpc_queue_gpu = 0; - sycl::device dev = sycl::device(sycl::cpu_selector()); - - const char *dpnpc_queue_gpu_var = getenv("DPNPC_QUEUE_GPU"); - if (dpnpc_queue_gpu_var != NULL) { - dpnpc_queue_gpu = atoi(dpnpc_queue_gpu_var); - } - - if (dpnpc_queue_gpu) { - dev = sycl::device(sycl::gpu_selector()); - } - - return dev; -} -#endif - #if defined(DPNPC_TOUCH_KERNEL_TO_LINK) /** * Function push the SYCL kernels to be linked (final stage of the compilation) @@ -135,117 +110,8 @@ static long dpnp_kernels_link() } #endif -#if defined(DPNP_LOCAL_QUEUE) -// Catch asynchronous exceptions -static void exception_handler(sycl::exception_list exceptions) -{ - for (std::exception_ptr const &e : exceptions) { - try { - std::rethrow_exception(e); - } catch (sycl::exception const &e) { - std::cout << "DPNP. Caught asynchronous SYCL exception:\n" - << e.what() << std::endl; - } - } -}; -#endif - -void backend_sycl::backend_sycl_queue_init(QueueOptions selector) -{ -#if defined(DPNP_LOCAL_QUEUE) - std::chrono::high_resolution_clock::time_point t1 = - std::chrono::high_resolution_clock::now(); - - if (queue) { - backend_sycl::destroy(); - } - - sycl::device dev; - -#if not defined(NDEBUG) - show_available_sycl_devices(); -#endif - - if (QueueOptions::CPU_SELECTOR == selector) { - dev = sycl::device(sycl::cpu_selector()); - } - else if (QueueOptions::GPU_SELECTOR == selector) { - dev = sycl::device(sycl::gpu_selector()); - } - else { - dev = get_default_sycl_device(); - } - - if (is_verbose_mode()) { - sycl::property_list properties{ - sycl::property::queue::enable_profiling()}; - queue = new sycl::queue(dev, exception_handler, properties); - } - else { - queue = new sycl::queue(dev, exception_handler); - } - - std::chrono::high_resolution_clock::time_point t2 = - std::chrono::high_resolution_clock::now(); - std::chrono::duration time_queue_init = - std::chrono::duration_cast>(t2 - t1); -#else - (void)selector; -#endif - - std::chrono::high_resolution_clock::time_point t3 = - std::chrono::high_resolution_clock::now(); -#if defined(DPNPC_TOUCH_KERNEL_TO_LINK) - // Remove pre-link kernel library at startup time - dpnp_kernels_link(); -#endif - std::chrono::high_resolution_clock::time_point t4 = - std::chrono::high_resolution_clock::now(); - std::chrono::duration time_kernels_link = - std::chrono::duration_cast>(t4 - t3); - - std::cout << "Running on: " - << DPNP_QUEUE.get_device().get_info() - << "\n"; -#if defined(DPNP_LOCAL_QUEUE) - std::cout << "queue initialization time: " << time_queue_init.count() - << " (sec.)\n"; -#else - std::cout << "DPCtrl SYCL queue used\n"; -#endif - std::cout << "SYCL kernels link time: " << time_kernels_link.count() - << " (sec.)\n"; - dpnpc_show_mathlib_version(); - - std::cout << std::endl; -} - -bool backend_sycl::backend_sycl_is_cpu() -{ - sycl::queue &qptr = get_queue(); - - if (qptr.get_device().is_cpu()) { - return true; - } - - return false; -} - -void backend_sycl::backend_sycl_rng_engine_init(size_t seed) -{ - if (rng_engine) { - backend_sycl::destroy_rng_engine(); - } - rng_engine = new mkl_rng::mt19937(DPNP_QUEUE, seed); - rng_mcg59_engine = new mkl_rng::mcg59(DPNP_QUEUE, seed); -} - -void dpnp_queue_initialize_c(QueueOptions selector) -{ - backend_sycl::backend_sycl_queue_init(selector); -} - size_t dpnp_queue_is_cpu_c() { - return backend_sycl::backend_sycl_is_cpu(); + const auto &be = backend_sycl::get(); + return be.backend_sycl_is_cpu(); } diff --git a/dpnp/backend/src/queue_sycl.hpp b/dpnp/backend/src/queue_sycl.hpp index b59c028e1043..a50da2e539a7 100644 --- a/dpnp/backend/src/queue_sycl.hpp +++ b/dpnp/backend/src/queue_sycl.hpp @@ -29,7 +29,7 @@ //#pragma clang diagnostic push //#pragma clang diagnostic ignored "-Wpass-failed" -#include +#include //#pragma clang diagnostic pop #pragma clang diagnostic push @@ -38,17 +38,7 @@ #include #pragma clang diagnostic pop -#include - -#if !defined(DPNP_LOCAL_QUEUE) -#if defined __has_include -#if __has_include() -#include -#else -#include -#endif -#endif -#endif +#include #include "dpnp_pstl.hpp" // this header must be included after @@ -69,120 +59,71 @@ namespace mkl_rng = oneapi::mkl::rng; */ class backend_sycl { -#if defined(DPNP_LOCAL_QUEUE) - static sycl::queue *queue; /**< contains SYCL queue pointer initialized in - @ref backend_sycl_queue_init */ -#endif - static mkl_rng::mt19937 - *rng_engine; /**< RNG MT19937 engine ptr. initialized in @ref - backend_sycl_rng_engine_init */ - static mkl_rng::mcg59 - *rng_mcg59_engine; /**< RNG MCG59 engine ptr. initialized in @ref - backend_sycl_rng_engine_init */ - - static void destroy() +public: + ~backend_sycl() {} + + static backend_sycl &get() { - backend_sycl::destroy_rng_engine(); -#if defined(DPNP_LOCAL_QUEUE) - delete queue; - queue = nullptr; -#endif + static backend_sycl backend{}; + return backend; } - static void destroy_rng_engine() + static sycl::queue &get_queue() { - delete rng_engine; - delete rng_mcg59_engine; - - rng_engine = nullptr; - rng_mcg59_engine = nullptr; + auto &be = backend_sycl::get(); + return be.queue_; } -public: - backend_sycl() + static mkl_rng::mt19937 &get_rng_engine() { -#if defined(DPNP_LOCAL_QUEUE) - queue = nullptr; - rng_engine = nullptr; -#endif + auto &be = backend_sycl::get(); + return be.rng_mt19937_engine_; } - virtual ~backend_sycl() + static mkl_rng::mcg59 &get_rng_mcg59_engine() { - backend_sycl::destroy(); + auto &be = backend_sycl::get(); + return be.rng_mcg59_engine_; } - /** - * Explicitly disallow copying - */ - backend_sycl(const backend_sycl &) = delete; - backend_sycl &operator=(const backend_sycl &) = delete; - - /** - * Initialize @ref queue - */ - static void backend_sycl_queue_init( - QueueOptions selector = QueueOptions::CPU_SELECTOR); - - /** - * Return True if current @ref queue is related to cpu device - */ - static bool backend_sycl_is_cpu(); - - /** - * Initialize @ref rng_engine and @ref rng_mcg59_engine - */ - static void backend_sycl_rng_engine_init(size_t seed = 1); - - /** - * Return the @ref queue to the user - */ - static sycl::queue &get_queue() + template + void set_rng_engines_seed(const SeedT &seed) { -#if defined(DPNP_LOCAL_QUEUE) - if (!queue) { - backend_sycl_queue_init(); - } - - return *queue; -#else - // temporal solution. Started from Sept-2020 - DPCTLSyclQueueRef DPCtrl_queue = DPCTLQueueMgr_GetCurrentQueue(); - if (DPCtrl_queue == nullptr) { - std::string reason = - (DPCTLQueueMgr_GetQueueStackSize() == static_cast(-1)) - ? ": the queue stack is empty, probably no device is " - "available." - : "."; - throw std::runtime_error( - "Failed to create a copy of SYCL queue with default device" + - reason); - } - return *(reinterpret_cast(DPCtrl_queue)); -#endif + mkl_rng::mt19937 rng_eng_mt19937(queue_, seed); + mkl_rng::mcg59 rng_eng_mcg59(queue_, seed); + + // now that instances are created, let's move them + rng_mt19937_engine_ = std::move(rng_eng_mt19937); + rng_mcg59_engine_ = std::move(rng_eng_mcg59); } - /** - * Return the @ref rng_engine to the user - */ - static mkl_rng::mt19937 &get_rng_engine() + bool backend_sycl_is_cpu() const { - if (!rng_engine) { - backend_sycl_rng_engine_init(); - } - return *rng_engine; + const auto &dev = queue_.get_device(); + return dev.is_cpu(); } - /** - * Return the @ref rng_mcg59_engine to the user - */ - static mkl_rng::mcg59 &get_rng_mcg59_engine() +private: + static constexpr std::size_t default_seed = 1; + + backend_sycl() + : queue_{sycl::default_selector_v, + (is_verbose_mode()) + ? sycl::property_list{sycl::property::queue:: + enable_profiling()} + : sycl::property_list{}}, + rng_mt19937_engine_{queue_, default_seed}, rng_mcg59_engine_{ + queue_, default_seed} { - if (!rng_engine) { - backend_sycl_rng_engine_init(); - } - return *rng_mcg59_engine; } + + backend_sycl(backend_sycl const &) = default; + backend_sycl &operator=(backend_sycl const &) = default; + backend_sycl &operator=(backend_sycl &&) = default; + + sycl::queue queue_; + mkl_rng::mt19937 rng_mt19937_engine_; + mkl_rng::mcg59 rng_mcg59_engine_; }; #endif // QUEUE_SYCL_H diff --git a/dpnp/backend/tests/test_broadcast_iterator.cpp b/dpnp/backend/tests/test_broadcast_iterator.cpp index 58587b1774ff..0fa4a0cf2ab1 100644 --- a/dpnp/backend/tests/test_broadcast_iterator.cpp +++ b/dpnp/backend/tests/test_broadcast_iterator.cpp @@ -30,8 +30,6 @@ #include "dpnp_iterator.hpp" #include "dpnp_test_utils.hpp" -// TODO need to fix build procedure and remove this workaround. Issue #551 -#define DPNP_LOCAL_QUEUE 1 #include "queue_sycl.hpp" struct IteratorParameters diff --git a/dpnp/backend/tests/test_utils_iterator.cpp b/dpnp/backend/tests/test_utils_iterator.cpp index f8f6748f8eef..4b20408b10c2 100644 --- a/dpnp/backend/tests/test_utils_iterator.cpp +++ b/dpnp/backend/tests/test_utils_iterator.cpp @@ -30,8 +30,6 @@ #include "dpnp_iterator.hpp" #include "dpnp_test_utils.hpp" -// TODO need to fix build procedure and remove this workaround. Issue #551 -#define DPNP_LOCAL_QUEUE 1 #include "queue_sycl.hpp" using namespace std; diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 528b03e3b583..d2c454d62ec4 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -194,17 +194,10 @@ cdef extern from "dpnp_iface_fptr.hpp": DPNPFuncData get_dpnp_function_ptr(DPNPFuncName name, DPNPFuncType first_type, DPNPFuncType second_type) except + -cdef extern from "dpnp_iface.hpp" namespace "QueueOptions": # need this namespace for Enum import - cdef enum QueueOptions "QueueOptions": - CPU_SELECTOR - GPU_SELECTOR - AUTO_SELECTOR - cdef extern from "constants.hpp": void dpnp_python_constants_initialize_c(void * py_none, void * py_nan) cdef extern from "dpnp_iface.hpp": - void dpnp_queue_initialize_c(QueueOptions selector) char * dpnp_memory_alloc_c(size_t size_in_bytes) except + void dpnp_memory_free_c(void * ptr) diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index 3013cd76094f..e8012e74bbaf 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -83,12 +83,7 @@ cpdef dpnp_queue_initialize(): It takes visible time and needs to be done in the module loading procedure. """ cdef time_t seed_from_time - cdef QueueOptions queue_type = CPU_SELECTOR - if (config.__DPNP_QUEUE_GPU__): - queue_type = GPU_SELECTOR - - dpnp_queue_initialize_c(queue_type) dpnp_python_constants_initialize_c(< void*> None, < void * > dpnp.nan) diff --git a/examples/example10.py b/examples/example10.py index 655d5d41c00b..19a4dff99cb2 100644 --- a/examples/example10.py +++ b/examples/example10.py @@ -87,12 +87,7 @@ def example(): if __name__ == "__main__": - try: - import dpctl + import dpctl - with dpctl.device_context("opencl:gpu") as gpu_queue: - gpu_queue.get_sycl_device().print_device_info() - example() - - except ImportError: - example() + dpctl.select_default_device().print_device_info() + example() diff --git a/tests/third_party/intel/zero-copy-test1.py b/tests/third_party/intel/zero-copy-test1.py index 44d2d776e9b6..f99565df5e68 100644 --- a/tests/third_party/intel/zero-copy-test1.py +++ b/tests/third_party/intel/zero-copy-test1.py @@ -62,12 +62,7 @@ def dppy_f(array_like_obj): hb = np.arange(0, global_size, dtype="i4") da = DuckUSMArray(hb.shape, dtype=hb.dtype, host_buffer=hb) - if dpctl.has_gpu_queues(dpctl.backend_type.level_zero): - print("\nScheduling on OpenCL GPU\n") - with dpctl.device_context("opencl:gpu") as gpu_queue: - dppy_f[global_size, dppy.DEFAULT_LOCAL_SIZE](da) - else: - print("\nSkip scheduling on OpenCL GPU\n") + dppy_f[global_size, dppy.DEFAULT_LOCAL_SIZE](da) assert da[0] == 10