diff --git a/include/blas_helper.cuh b/include/blas_helper.cuh index 647290ee98..4dad754964 100644 --- a/include/blas_helper.cuh +++ b/include/blas_helper.cuh @@ -79,6 +79,18 @@ namespace quda template <> struct VectorType { using type = array; }; + template <> struct VectorType { + using type = array; + }; + template <> struct VectorType { + using type = array; + }; + template <> struct VectorType { + using type = array; + }; + template <> struct VectorType { + using type = array; + }; template <> struct VectorType { using type = array; }; @@ -343,37 +355,49 @@ namespace quda // native ordering template <> constexpr int n_vector() { return 2; } + template <> constexpr int n_vector() { return 2; } template <> constexpr int n_vector() { return 2; } template <> constexpr int n_vector() { return 2; } + template <> constexpr int n_vector() { return 2; } template <> constexpr int n_vector() { return 2; } template <> constexpr int n_vector() { return 4; } - template <> constexpr int n_vector() { return 4; } + template <> constexpr int n_vector() { return 4; } + template <> constexpr int n_vector() { return 4; } // TODO: correct? template <> constexpr int n_vector() { return 4; } + template <> constexpr int n_vector() { return QUDA_ORDER_SP_MG; } template <> constexpr int n_vector() { return 2; } template <> constexpr int n_vector() { return QUDA_ORDER_FP; } + template <> constexpr int n_vector() { return QUDA_ORDER_FP_MG; } template <> constexpr int n_vector() { return 2; } template <> constexpr int n_vector() { return QUDA_ORDER_FP; } + template <> constexpr int n_vector() { return QUDA_ORDER_FP_MG; } template <> constexpr int n_vector() { return 2; } // Just use float-2/float-4 ordering on CPU when not site unrolling template <> constexpr int n_vector() { return 2; } + template <> constexpr int n_vector() { return 2; } template <> constexpr int n_vector() { return 2; } template <> constexpr int n_vector() { return 4; } + template <> constexpr int n_vector() { return 4; } template <> constexpr int n_vector() { return 4; } // AoS ordering is used on CPU uses when we are site unrolling template <> constexpr int n_vector() { return 24; } + template <> constexpr int n_vector() { return 12; } template <> constexpr int n_vector() { return 6; } template <> constexpr int n_vector() { return 24; } + template <> constexpr int n_vector() { return 12; } template <> constexpr int n_vector() { return 6; } template <> constexpr int n_vector() { return 24; } + template <> constexpr int n_vector() { return 12; } template <> constexpr int n_vector() { return 6; } template <> constexpr int n_vector() { return 24; } + template <> constexpr int n_vector() { return 12; } template <> constexpr int n_vector() { return 6; } template