16
16
17
17
// ================================================================================
18
18
// this file has been auto-generated, do not modify its contents!
19
- // date: 2024-11-18 13:40:03.668017
20
- // git hash: ae0e6b16ac2d626e69bb08554044a77671f408ab
19
+ // date: 2024-11-18 13:50:24.614671
20
+ // git hash: f89cf98f79e78ab6013063dea4b4b516ce163855
21
21
// ================================================================================
22
22
23
23
#ifndef KERNEL_FLOAT_MACROS_H
@@ -1950,8 +1950,7 @@ struct multiply<bool> {
1950
1950
namespace detail {
1951
1951
template <typename Policy, typename T, size_t N>
1952
1952
struct apply_impl <Policy, ops::divide<T>, N, T, T, T> {
1953
- KERNEL_FLOAT_INLINE static void
1954
- call (ops::divide<T> fun, T* result, const T* lhs, const T* rhs) {
1953
+ KERNEL_FLOAT_INLINE static void call (ops::divide<T>, T* result, const T* lhs, const T* rhs) {
1955
1954
T rhs_rcp[N];
1956
1955
1957
1956
// Fast way to perform division is to multiply by the reciprocal
@@ -1968,13 +1967,33 @@ struct apply_impl<accurate_policy, ops::divide<T>, N, T, T, T>:
1968
1967
template <>
1969
1968
struct apply_impl <fast_policy, ops::divide<float >, 1 , float , float , float > {
1970
1969
KERNEL_FLOAT_INLINE static void
1971
- call (ops::divide<float > fun , float * result, const float * lhs, const float * rhs) {
1970
+ call (ops::divide<float >, float * result, const float * lhs, const float * rhs) {
1972
1971
*result = __fdividef (*lhs, *rhs);
1973
1972
}
1974
1973
};
1975
1974
#endif
1976
1975
} // namespace detail
1977
1976
1977
+ namespace detail {
1978
+ // Override `pow` using `log2` and `exp2`
1979
+ template <typename Policy, typename T, size_t N>
1980
+ struct apply_impl <Policy, ops::pow<T>, N, T, T, T> {
1981
+ KERNEL_FLOAT_INLINE static void call (ops::divide<T>, T* result, const T* lhs, const T* rhs) {
1982
+ T lhs_log[N];
1983
+ T result_log[N];
1984
+
1985
+ // Fast way to perform power function is using log2 and exp2
1986
+ apply_impl<Policy, ops::log2 <T>, N, T, T>::call ({}, lhs_log, lhs);
1987
+ apply_impl<Policy, ops::multiply<T>, N, T, T, T>::call ({}, result_log, lhs_log, rhs);
1988
+ apply_impl<Policy, ops::exp2 <T>, N, T, T, T>::call ({}, result, result_log);
1989
+ }
1990
+ };
1991
+
1992
+ template <typename T, size_t N>
1993
+ struct apply_impl <accurate_policy, ops::pow<T>, N, T, T, T>:
1994
+ apply_base_impl<accurate_policy, ops::pow<T>, N, T, T, T> {};
1995
+ } // namespace detail
1996
+
1978
1997
template <typename L, typename R, typename T = promoted_vector_value_type<L, R>>
1979
1998
KERNEL_FLOAT_INLINE zip_common_type<ops::divide<T>, T, T>
1980
1999
fast_divide (const L& left, const R& right) {
0 commit comments