-
Notifications
You must be signed in to change notification settings - Fork 11
Add more multiplication primitives #107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
jamierpond
wants to merge
25
commits into
master
Choose a base branch
from
jp/overflow-multi-safe-clean
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
25 commits
Select commit
Hold shift + click to select a range
b4db29e
add main logic
jamierpond 1261006
add even lane mask
jamierpond f976ae4
even/odd lane mask
jamierpond f0720bd
clean up a little
jamierpond f928811
clean some more
jamierpond 51f2987
just return overflow
jamierpond 35fffa7
wow seems to be working
jamierpond b87b408
tidy a little
jamierpond 07be9f9
rm spurious tests
jamierpond 93e4bac
rename
jamierpond 05468a2
start test refactor
jamierpond ac45f1b
tidy tests
jamierpond 2302504
rm tests
jamierpond 2214ac8
add consume msb
jamierpond 61a7506
rename lower/upper
jamierpond 7b41db0
make doubling multi nicer
jamierpond 5cf88df
consolidate exponentation and make naming consistent
jamierpond 3a65ed2
works
jamierpond 2b613ee
mv tests
jamierpond fa0667b
oops
jamierpond dea354a
tidy
jamierpond 81237a6
tidy
jamierpond b792e30
tidy
jamierpond 5d41262
make pair for generatlity
jamierpond cfb1072
tidy
jamierpond File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,13 @@ std::ostream &operator<<(std::ostream &out, zoo::swar::SWAR<NB, B> s) { | |
|
||
namespace zoo::swar { | ||
|
||
template <int NBits, typename T> | ||
constexpr static auto consumeMSB(SWAR<NBits, T> s) noexcept { | ||
using S = SWAR<NBits, T>; | ||
auto msbCleared = s & ~S{S::MostSignificantBit}; | ||
return S{msbCleared.value() << 1}; | ||
} | ||
|
||
template<typename S> | ||
constexpr auto parallelSuffix(S input) { | ||
auto | ||
|
@@ -393,8 +400,7 @@ constexpr auto multiplication_OverflowUnsafe_SpecificBitCount( | |
}; | ||
|
||
auto halver = [](auto counts) { | ||
auto msbCleared = counts & ~S{S::MostSignificantBit}; | ||
return S{msbCleared.value() << 1}; | ||
return swar::consumeMSB(counts); | ||
}; | ||
|
||
auto shifted = S{multiplier.value() << (NB - ActualBits)}; | ||
|
@@ -426,38 +432,6 @@ constexpr auto multiplication_OverflowUnsafe_SpecificBitCount_deprecated( | |
return product; | ||
} | ||
|
||
// TODO(Jamie): Add tests from other PR. | ||
template<int ActualBits, int NB, typename T> | ||
constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount( | ||
SWAR<NB, T> x, | ||
SWAR<NB, T> exponent | ||
) { | ||
using S = SWAR<NB, T>; | ||
|
||
auto operation = [](auto left, auto right, auto counts) { | ||
const auto mask = makeLaneMaskFromMSB(counts); | ||
const auto product = | ||
multiplication_OverflowUnsafe_SpecificBitCount<ActualBits>(left, right); | ||
return (product & mask) | (left & ~mask); | ||
}; | ||
|
||
// halver should work same as multiplication... i think... | ||
auto halver = [](auto counts) { | ||
auto msbCleared = counts & ~S{S::MostSignificantBit}; | ||
return S{static_cast<T>(msbCleared.value() << 1)}; | ||
}; | ||
|
||
exponent = S{static_cast<T>(exponent.value() << (NB - ActualBits))}; | ||
return associativeOperatorIterated_regressive( | ||
x, | ||
S{meta::BitmaskMaker<T, 1, NB>().value}, // neutral is lane wise.. | ||
exponent, | ||
S{S::MostSignificantBit}, | ||
operation, | ||
ActualBits, | ||
halver | ||
); | ||
} | ||
|
||
template<int NB, typename T> | ||
constexpr auto multiplication_OverflowUnsafe( | ||
|
@@ -475,14 +449,6 @@ struct SWAR_Pair{ | |
SWAR<NB, T> even, odd; | ||
}; | ||
|
||
template<int NB, typename T> | ||
constexpr SWAR<NB, T> doublingMask() { | ||
using S = SWAR<NB, T>; | ||
static_assert(0 == S::Lanes % 2, "Only even number of elements supported"); | ||
using D = SWAR<NB * 2, T>; | ||
return S{(D::LeastSignificantBit << NB) - D::LeastSignificantBit}; | ||
} | ||
|
||
template<int NB, typename T> | ||
constexpr auto doublePrecision(SWAR<NB, T> input) { | ||
using S = SWAR<NB, T>; | ||
|
@@ -491,7 +457,7 @@ constexpr auto doublePrecision(SWAR<NB, T> input) { | |
"Precision can only be doubled for SWARs of even element count" | ||
); | ||
using RV = SWAR<NB * 2, T>; | ||
constexpr auto DM = doublingMask<NB, T>(); | ||
constexpr auto DM = SWAR<NB, T>::evenLaneMask(); | ||
return SWAR_Pair<NB * 2, T>{ | ||
RV{(input & DM).value()}, | ||
RV{(input.value() >> NB) & DM.value()} | ||
|
@@ -503,13 +469,125 @@ constexpr auto halvePrecision(SWAR<NB, T> even, SWAR<NB, T> odd) { | |
using S = SWAR<NB, T>; | ||
static_assert(0 == NB % 2, "Only even lane-bitcounts supported"); | ||
using RV = SWAR<NB/2, T>; | ||
constexpr auto HalvingMask = doublingMask<NB/2, T>(); | ||
constexpr auto HalvingMask = SWAR<NB/2, T>::evenLaneMask(); | ||
auto | ||
evenHalf = RV{even.value()} & HalvingMask, | ||
oddHalf = RV{(RV{odd.value()} & HalvingMask).value() << NB/2}; | ||
|
||
return evenHalf | oddHalf; | ||
} | ||
|
||
|
||
template <int NB, typename T> struct MultiplicationResult { | ||
SWAR<NB, T> lower; | ||
SWAR<NB, T> upper; | ||
Comment on lines
+482
to
+483
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. merge |
||
}; | ||
|
||
template <int NB, typename T> | ||
constexpr | ||
auto | ||
doublePrecisionMultiplication(SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier) { | ||
auto | ||
icand = doublePrecision(multiplicand), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice! never thought about omitting the prefix |
||
plier = doublePrecision(multiplier); | ||
auto | ||
lower = multiplication_OverflowUnsafe(icand.even, plier.even), | ||
upper = multiplication_OverflowUnsafe(icand.odd, plier.odd); | ||
return std::make_pair(lower, upper); | ||
} | ||
|
||
template <int NB, typename T> | ||
constexpr auto deinterleaveLanesOfPair = [](auto even, auto odd) { | ||
using S = SWAR<NB, T>; | ||
using H = SWAR<NB / 2, T>; | ||
constexpr auto | ||
HalfLane = H::NBits, | ||
UpperHalfOfLanes = H::oddLaneMask().value(); | ||
auto | ||
upper_even = even.shiftIntraLaneRight(HalfLane, S{UpperHalfOfLanes}), | ||
upper_odd = odd.shiftIntraLaneRight(HalfLane, S{UpperHalfOfLanes}); | ||
auto | ||
lower = halvePrecision(even, odd), // throws away the upper bits | ||
upper = halvePrecision(upper_even, upper_odd); // preserve the upper bits | ||
return std::make_pair(lower, upper); | ||
}; | ||
|
||
template <int NB, typename T> | ||
constexpr auto | ||
wideningMultiplication(SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier) { | ||
auto [even, odd] = doublePrecisionMultiplication(multiplicand, multiplier); | ||
auto [lower, upper] = deinterleaveLanesOfPair<NB * 2, T>(even, odd); | ||
return std::make_pair(lower, upper); | ||
} | ||
|
||
template <int NB, typename T> | ||
constexpr | ||
auto saturatingMultiplication(SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier) { | ||
using S = SWAR<NB, T>; | ||
constexpr auto One = S{S::LeastSignificantBit}; | ||
auto [result, overflow] = wideningMultiplication(multiplicand, multiplier); | ||
auto did_overflow = zoo::swar::greaterEqual(overflow, One); | ||
auto lane_mask = did_overflow.MSBtoLaneMask(); | ||
return S{result | lane_mask}; | ||
} | ||
|
||
template<int NB, typename T, typename MultiplicationFn> | ||
constexpr auto exponentiation ( | ||
SWAR<NB, T> x, | ||
SWAR<NB, T> exponent, | ||
MultiplicationFn&& multiplicationFn | ||
) { | ||
using S = SWAR<NB, T>; | ||
constexpr auto NumBitsPerLane = S::NBits; | ||
constexpr auto | ||
MSB = S{S::MostSignificantBit}, | ||
LSB = S{S::LeastSignificantBit}; | ||
|
||
auto operation = [&multiplicationFn](auto left, auto right, auto counts) { | ||
auto mask = makeLaneMaskFromMSB(counts); | ||
auto product = multiplicationFn(left, right); | ||
return (product & mask) | (left & ~mask); | ||
}; | ||
|
||
auto halver = [](auto counts) { | ||
return swar::consumeMSB(counts); | ||
}; | ||
|
||
return associativeOperatorIterated_regressive( | ||
x, | ||
LSB, | ||
exponent, | ||
MSB, | ||
operation, | ||
NumBitsPerLane, | ||
halver | ||
); | ||
} | ||
|
||
template<int NB, typename T> | ||
constexpr auto saturatingExponentation( | ||
SWAR<NB, T> x, | ||
SWAR<NB, T> exponent | ||
) { | ||
return exponentiation( | ||
x, | ||
exponent, | ||
saturatingMultiplication<NB, T> | ||
); | ||
} | ||
|
||
template<int NB, typename T> | ||
constexpr auto exponentiation_OverflowUnsafe( | ||
SWAR<NB, T> x, | ||
SWAR<NB, T> exponent | ||
) { | ||
return exponentiation( | ||
x, | ||
exponent, | ||
multiplication_OverflowUnsafe<NB, T> | ||
); | ||
} | ||
|
||
} | ||
|
||
#endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.