Skip to content

Commit 66fa2ae

Browse files
committed
improve speed of the Gamma loss function
1 parent 2b5c09f commit 66fa2ae

File tree

4 files changed

+54
-55
lines changed

4 files changed

+54
-55
lines changed

shared/libebm/compute/avx2_ebm/avx2_32.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,24 @@ namespace DEFINED_ZONE_NAME {
3434
#error DEFINED_ZONE_NAME must be defined
3535
#endif // DEFINED_ZONE_NAME
3636

37-
// this is super-special and included inside the zone namespace
38-
#include "objective_registrations.hpp"
39-
4037
static constexpr size_t k_cAlignment = 32;
41-
4238
struct alignas(k_cAlignment) Avx2_32_Float;
39+
struct alignas(k_cAlignment) Avx2_32_Int;
40+
41+
template<bool bNegateInput = false,
42+
bool bNaNPossible = true,
43+
bool bUnderflowPossible = true,
44+
bool bOverflowPossible = true>
45+
inline Avx2_32_Float Exp(const Avx2_32_Float& val) noexcept;
46+
template<bool bNegateOutput = false,
47+
bool bNaNPossible = true,
48+
bool bNegativePossible = true,
49+
bool bZeroPossible = true,
50+
bool bPositiveInfinityPossible = true>
51+
inline Avx2_32_Float Log(const Avx2_32_Float& val) noexcept;
52+
53+
// this is super-special and included inside the zone namespace
54+
#include "objective_registrations.hpp"
4355

4456
struct alignas(k_cAlignment) Avx2_32_Int final {
4557
friend Avx2_32_Float;
@@ -138,18 +150,6 @@ struct alignas(k_cAlignment) Avx2_32_Int final {
138150
static_assert(std::is_standard_layout<Avx2_32_Int>::value && std::is_trivially_copyable<Avx2_32_Int>::value,
139151
"This allows offsetof, memcpy, memset, inter-language, GPU and cross-machine use where needed");
140152

141-
template<bool bNegateInput = false,
142-
bool bNaNPossible = true,
143-
bool bUnderflowPossible = true,
144-
bool bOverflowPossible = true>
145-
inline Avx2_32_Float Exp(const Avx2_32_Float& val) noexcept;
146-
template<bool bNegateOutput = false,
147-
bool bNaNPossible = true,
148-
bool bNegativePossible = true,
149-
bool bZeroPossible = true,
150-
bool bPositiveInfinityPossible = true>
151-
inline Avx2_32_Float Log(const Avx2_32_Float& val) noexcept;
152-
153153
struct alignas(k_cAlignment) Avx2_32_Float final {
154154
template<bool bNegateInput, bool bNaNPossible, bool bUnderflowPossible, bool bOverflowPossible>
155155
friend Avx2_32_Float Exp(const Avx2_32_Float& val) noexcept;

shared/libebm/compute/avx512f_ebm/avx512f_32.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,24 @@ namespace DEFINED_ZONE_NAME {
3434
#error DEFINED_ZONE_NAME must be defined
3535
#endif // DEFINED_ZONE_NAME
3636

37-
// this is super-special and included inside the zone namespace
38-
#include "objective_registrations.hpp"
39-
4037
static constexpr size_t k_cAlignment = 64;
41-
4238
struct alignas(k_cAlignment) Avx512f_32_Float;
39+
struct alignas(k_cAlignment) Avx512f_32_Int;
40+
41+
template<bool bNegateInput = false,
42+
bool bNaNPossible = true,
43+
bool bUnderflowPossible = true,
44+
bool bOverflowPossible = true>
45+
inline Avx512f_32_Float Exp(const Avx512f_32_Float& val) noexcept;
46+
template<bool bNegateOutput = false,
47+
bool bNaNPossible = true,
48+
bool bNegativePossible = true,
49+
bool bZeroPossible = true,
50+
bool bPositiveInfinityPossible = true>
51+
inline Avx512f_32_Float Log(const Avx512f_32_Float& val) noexcept;
52+
53+
// this is super-special and included inside the zone namespace
54+
#include "objective_registrations.hpp"
4355

4456
struct alignas(k_cAlignment) Avx512f_32_Int final {
4557
friend Avx512f_32_Float;
@@ -152,18 +164,6 @@ struct alignas(k_cAlignment) Avx512f_32_Int final {
152164
static_assert(std::is_standard_layout<Avx512f_32_Int>::value && std::is_trivially_copyable<Avx512f_32_Int>::value,
153165
"This allows offsetof, memcpy, memset, inter-language, GPU and cross-machine use where needed");
154166

155-
template<bool bNegateInput = false,
156-
bool bNaNPossible = true,
157-
bool bUnderflowPossible = true,
158-
bool bOverflowPossible = true>
159-
inline Avx512f_32_Float Exp(const Avx512f_32_Float& val) noexcept;
160-
template<bool bNegateOutput = false,
161-
bool bNaNPossible = true,
162-
bool bNegativePossible = true,
163-
bool bZeroPossible = true,
164-
bool bPositiveInfinityPossible = true>
165-
inline Avx512f_32_Float Log(const Avx512f_32_Float& val) noexcept;
166-
167167
struct alignas(k_cAlignment) Avx512f_32_Float final {
168168
template<bool bNegateInput, bool bNaNPossible, bool bUnderflowPossible, bool bOverflowPossible>
169169
friend Avx512f_32_Float Exp(const Avx512f_32_Float& val) noexcept;

shared/libebm/compute/cpu_ebm/cpu_64.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,24 @@ namespace DEFINED_ZONE_NAME {
3131
#error DEFINED_ZONE_NAME must be defined
3232
#endif // DEFINED_ZONE_NAME
3333

34+
struct Cpu_64_Float;
35+
struct Cpu_64_Int;
36+
37+
template<bool bNegateInput = false,
38+
bool bNaNPossible = true,
39+
bool bUnderflowPossible = true,
40+
bool bOverflowPossible = true>
41+
inline Cpu_64_Float Exp(const Cpu_64_Float& val) noexcept;
42+
template<bool bNegateOutput = false,
43+
bool bNaNPossible = true,
44+
bool bNegativePossible = true,
45+
bool bZeroPossible = true,
46+
bool bPositiveInfinityPossible = true>
47+
inline Cpu_64_Float Log(const Cpu_64_Float& val) noexcept;
48+
3449
// this is super-special and included inside the zone namespace
3550
#include "objective_registrations.hpp"
3651

37-
struct Cpu_64_Float;
38-
3952
struct Cpu_64_Int final {
4053
friend Cpu_64_Float;
4154
friend inline Cpu_64_Float IfEqual(const Cpu_64_Int& cmp1,
@@ -96,18 +109,6 @@ struct Cpu_64_Int final {
96109
static_assert(std::is_standard_layout<Cpu_64_Int>::value && std::is_trivially_copyable<Cpu_64_Int>::value,
97110
"This allows offsetof, memcpy, memset, inter-language, GPU and cross-machine use where needed");
98111

99-
template<bool bNegateInput = false,
100-
bool bNaNPossible = true,
101-
bool bUnderflowPossible = true,
102-
bool bOverflowPossible = true>
103-
inline Cpu_64_Float Exp(const Cpu_64_Float& val) noexcept;
104-
template<bool bNegateOutput = false,
105-
bool bNaNPossible = true,
106-
bool bNegativePossible = true,
107-
bool bZeroPossible = true,
108-
bool bPositiveInfinityPossible = true>
109-
inline Cpu_64_Float Log(const Cpu_64_Float& val) noexcept;
110-
111112
struct Cpu_64_Float final {
112113
template<bool bNegateInput, bool bNaNPossible, bool bUnderflowPossible, bool bOverflowPossible>
113114
friend Cpu_64_Float Exp(const Cpu_64_Float& val) noexcept;

shared/libebm/compute/objectives/GammaDevianceRegressionObjective.hpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,25 +52,23 @@ template<typename TFloat> struct GammaDevianceRegressionObjective : RegressionOb
5252
inline double FinishMetric(const double metricSum) const noexcept { return 2.0 * metricSum; }
5353

5454
GPU_DEVICE inline TFloat CalcMetric(const TFloat& score, const TFloat& target) const noexcept {
55-
const TFloat prediction = Exp(score); // log link function
56-
const TFloat frac = target / prediction;
55+
const TFloat invPrediction = Exp<true>(score); // log link function
56+
const TFloat frac = target * invPrediction;
5757
const TFloat metric = frac - 1.0 - Log(frac);
5858
return metric;
5959
}
6060

6161
GPU_DEVICE inline TFloat CalcGradient(const TFloat& score, const TFloat& target) const noexcept {
62-
const TFloat prediction = Exp(score); // log link function
63-
const TFloat frac = target / prediction;
64-
const TFloat gradient = 1.0 - frac;
62+
const TFloat invPrediction = Exp<true>(score); // log link function
63+
const TFloat gradient = FusedNegateMultiplyAdd(target, invPrediction, 1.0);
6564
return gradient;
6665
}
6766

6867
GPU_DEVICE inline GradientHessian<TFloat> CalcGradientHessian(
6968
const TFloat& score, const TFloat& target) const noexcept {
70-
const TFloat prediction = Exp(score); // log link function
71-
const TFloat frac = target / prediction;
72-
const TFloat gradient = 1.0 - frac;
73-
const TFloat hessian = frac;
69+
const TFloat invPrediction = Exp<true>(score); // log link function
70+
const TFloat gradient = FusedNegateMultiplyAdd(target, invPrediction, 1.0);
71+
const TFloat hessian = target * invPrediction;
7472
return MakeGradientHessian(gradient, hessian);
7573
}
7674
};

0 commit comments

Comments
 (0)