-
Notifications
You must be signed in to change notification settings - Fork 12
Add more multiplication primitives #107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 14 commits
b4db29e
1261006
f976ae4
f0720bd
f928811
51f2987
35fffa7
b87b408
07be9f9
93e4bac
05468a2
ac45f1b
2302504
2214ac8
61a7506
7b41db0
5cf88df
3a65ed2
2b613ee
fa0667b
dea354a
81237a6
b792e30
5d41262
cfb1072
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,6 +70,8 @@ constexpr __uint128_t lsbIndex(__uint128_t v) noexcept { | |
} | ||
#endif | ||
|
||
|
||
|
||
/// Core abstraction around SIMD Within A Register (SWAR). Specifies 'lanes' | ||
/// of NBits width against a type T, and provides an abstraction for performing | ||
/// SIMD operations against that primitive type T treated as a SIMD register. | ||
|
@@ -108,6 +110,17 @@ struct SWAR { | |
return result; | ||
} | ||
|
||
constexpr static auto evenLaneMask() { | ||
using S = SWAR<NBits, T>; | ||
static_assert(0 == S::Lanes % 2, "Only even number of elements supported"); | ||
using D = SWAR<NBits * 2, T>; | ||
return S{(D::LeastSignificantBit << S::NBits) - D::LeastSignificantBit}; | ||
} | ||
|
||
constexpr static auto oddLaneMask() { | ||
return SWAR<NBits, T>{static_cast<T>(~evenLaneMask().value())}; | ||
} | ||
|
||
template <typename Range> | ||
constexpr static auto from(const Range &values) noexcept { | ||
using std::begin; using std::end; | ||
|
@@ -245,6 +258,12 @@ constexpr auto horizontalEquality(SWAR<NBits, T> left, SWAR<NBits, T> right) { | |
return left.m_v == right.m_v; | ||
} | ||
|
||
template <int NBits, typename T> | ||
constexpr static auto consumeMSB(SWAR<NBits, T> s) noexcept { | ||
using S = SWAR<NBits, T>; | ||
auto msbCleared = s & ~S{S::MostSignificantBit}; | ||
return S{static_cast<T>(msbCleared.value() << 1)}; | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sold on promoting this to the main header of swar. |
||
|
||
#if ZOO_USE_LEASTNBITSMASK | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,10 @@ | ||
#ifndef ZOO_SWAR_ASSOCIATIVE_ITERATION_H | ||
#define ZOO_SWAR_ASSOCIATIVE_ITERATION_H | ||
|
||
#include "SWAR.h" | ||
#include "zoo/swar/SWAR.h" | ||
#include <assert.h> | ||
#include <cstdint> | ||
|
||
//#define ZOO_DEVELOPMENT_DEBUGGING | ||
#ifdef ZOO_DEVELOPMENT_DEBUGGING | ||
|
@@ -393,8 +396,7 @@ constexpr auto multiplication_OverflowUnsafe_SpecificBitCount( | |
}; | ||
|
||
auto halver = [](auto counts) { | ||
auto msbCleared = counts & ~S{S::MostSignificantBit}; | ||
return S{msbCleared.value() << 1}; | ||
return swar::consumeMSB(counts); | ||
}; | ||
|
||
auto shifted = S{multiplier.value() << (NB - ActualBits)}; | ||
|
@@ -426,38 +428,6 @@ constexpr auto multiplication_OverflowUnsafe_SpecificBitCount_deprecated( | |
return product; | ||
} | ||
|
||
// TODO(Jamie): Add tests from other PR. | ||
template<int ActualBits, int NB, typename T> | ||
constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount( | ||
SWAR<NB, T> x, | ||
SWAR<NB, T> exponent | ||
) { | ||
using S = SWAR<NB, T>; | ||
|
||
auto operation = [](auto left, auto right, auto counts) { | ||
const auto mask = makeLaneMaskFromMSB(counts); | ||
const auto product = | ||
multiplication_OverflowUnsafe_SpecificBitCount<ActualBits>(left, right); | ||
return (product & mask) | (left & ~mask); | ||
}; | ||
|
||
// halver should work same as multiplication... i think... | ||
auto halver = [](auto counts) { | ||
auto msbCleared = counts & ~S{S::MostSignificantBit}; | ||
return S{static_cast<T>(msbCleared.value() << 1)}; | ||
}; | ||
|
||
exponent = S{static_cast<T>(exponent.value() << (NB - ActualBits))}; | ||
return associativeOperatorIterated_regressive( | ||
x, | ||
S{meta::BitmaskMaker<T, 1, NB>().value}, // neutral is lane wise.. | ||
exponent, | ||
S{S::MostSignificantBit}, | ||
operation, | ||
ActualBits, | ||
halver | ||
); | ||
} | ||
|
||
template<int NB, typename T> | ||
constexpr auto multiplication_OverflowUnsafe( | ||
|
@@ -475,14 +445,6 @@ struct SWAR_Pair{ | |
SWAR<NB, T> even, odd; | ||
}; | ||
|
||
template<int NB, typename T> | ||
constexpr SWAR<NB, T> doublingMask() { | ||
using S = SWAR<NB, T>; | ||
static_assert(0 == S::Lanes % 2, "Only even number of elements supported"); | ||
using D = SWAR<NB * 2, T>; | ||
return S{(D::LeastSignificantBit << NB) - D::LeastSignificantBit}; | ||
} | ||
|
||
template<int NB, typename T> | ||
constexpr auto doublePrecision(SWAR<NB, T> input) { | ||
using S = SWAR<NB, T>; | ||
|
@@ -491,7 +453,7 @@ constexpr auto doublePrecision(SWAR<NB, T> input) { | |
"Precision can only be doubled for SWARs of even element count" | ||
); | ||
using RV = SWAR<NB * 2, T>; | ||
constexpr auto DM = doublingMask<NB, T>(); | ||
constexpr auto DM = SWAR<NB, T>::evenLaneMask(); | ||
return SWAR_Pair<NB * 2, T>{ | ||
RV{(input & DM).value()}, | ||
RV{(input.value() >> NB) & DM.value()} | ||
|
@@ -503,13 +465,93 @@ constexpr auto halvePrecision(SWAR<NB, T> even, SWAR<NB, T> odd) { | |
using S = SWAR<NB, T>; | ||
static_assert(0 == NB % 2, "Only even lane-bitcounts supported"); | ||
using RV = SWAR<NB/2, T>; | ||
constexpr auto HalvingMask = doublingMask<NB/2, T>(); | ||
constexpr auto HalvingMask = SWAR<NB/2, T>::evenLaneMask(); | ||
auto | ||
evenHalf = RV{even.value()} & HalvingMask, | ||
oddHalf = RV{(RV{odd.value()} & HalvingMask).value() << NB/2}; | ||
return evenHalf | oddHalf; | ||
} | ||
|
||
|
||
template <int NB, typename T> struct MultiplicationResult { | ||
SWAR<NB, T> result; | ||
SWAR<NB, T> overflow; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not overflow. |
||
}; | ||
|
||
template <int NB, typename T> | ||
constexpr auto | ||
doublingMultiplication(SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
using S = SWAR<NB, T>; using D = SWAR<NB * 2, T>; | ||
auto [l_even, l_odd] = doublePrecision(multiplicand); | ||
auto [r_even, r_odd] = doublePrecision(multiplier); | ||
auto | ||
res_even = multiplication_OverflowUnsafe(l_even, r_even), | ||
res_odd = multiplication_OverflowUnsafe(l_odd, r_odd); | ||
return SWAR_Pair<NB * 2, T>{res_even, res_odd}; | ||
} | ||
|
||
template <int NB, typename T> | ||
constexpr MultiplicationResult<NB, T> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why the explicit return type? |
||
wideningMultiplication(SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier) { | ||
using S = SWAR<NB, T>; using D = SWAR<NB * 2, T>; | ||
constexpr auto | ||
HalfLane = S::NBits, | ||
UpperHalfOfLanes = SWAR<S::NBits, T>::oddLaneMask().value(); | ||
auto [res_even, res_odd] = doublingMultiplication(multiplicand, multiplier); | ||
auto result = halvePrecision(res_even, res_odd); | ||
auto | ||
over_even = D{(res_even.value() & UpperHalfOfLanes) >> HalfLane}, | ||
over_odd = D{(res_odd.value() & UpperHalfOfLanes) >> HalfLane}; | ||
auto upper_lanes_overflow = halvePrecision(over_even, over_odd); | ||
return {result, upper_lanes_overflow}; | ||
} | ||
|
||
template <int NB, typename T> | ||
constexpr | ||
auto saturatedMultiplication(SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier) { | ||
using S = SWAR<NB, T>; | ||
constexpr auto One = S{S::LeastSignificantBit}; | ||
auto [result, overflow] = wideningMultiplication(multiplicand, multiplier); | ||
auto did_overflow = zoo::swar::greaterEqual(overflow, One); | ||
auto lane_mask = did_overflow.MSBtoLaneMask(); | ||
auto saturated = result | lane_mask; | ||
return S{saturated}; | ||
} | ||
|
||
|
||
// TODO(Jamie): Add tests from other PR. | ||
template<int NB, typename T> | ||
constexpr auto saturatingExponentiation( | ||
SWAR<NB, T> x, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Absolutely not. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good call |
||
SWAR<NB, T> exponent | ||
) { | ||
using S = SWAR<NB, T>; | ||
constexpr auto NumBitsPerLane = S::NBits; | ||
constexpr auto | ||
MSB = S{S::MostSignificantBit}, | ||
LSB = S{S::LeastSignificantBit}; | ||
|
||
auto operation = [](auto left, auto right, auto counts) { | ||
auto mask = makeLaneMaskFromMSB(counts); | ||
auto product = saturatedMultiplication(left, right); | ||
return (product & mask) | (left & ~mask); | ||
}; | ||
|
||
auto halver = [](auto counts) { | ||
return swar::consumeMSB(counts); | ||
}; | ||
|
||
return associativeOperatorIterated_regressive( | ||
x, | ||
LSB, | ||
exponent, | ||
MSB, | ||
operation, | ||
NumBitsPerLane, | ||
halver | ||
); | ||
} | ||
|
||
} | ||
|
||
#endif |
Uh oh!
There was an error while loading. Please reload this page.