HIP: Heterogenous-computing Interface for Portability
Loading...
Searching...
No Matches
amd_hip_bf16.h
Go to the documentation of this file.
1
85#ifndef _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BF16_H_
86#define _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BF16_H_
87
88#if !defined(__HIPCC_RTC__)
89#include <hip/amd_detail/amd_hip_common.h>
90#endif // !defined(__HIPCC_RTC__)
91
92#include "amd_hip_vector_types.h" // float2 etc
93#include "device_library_decls.h" // ocml conversion functions
94#include "math_fwd.h" // ocml device functions
95
96#if defined(__HIPCC_RTC__)
97#define __HOST_DEVICE__ __device__ static
98#else
99#include <algorithm>
100#include <climits>
101#include <cmath>
102#define __HOST_DEVICE__ __host__ __device__ static inline
103#endif
104
105#define HIPRT_ONE_BF16 __float2bfloat16(1.0f)
106#define HIPRT_ZERO_BF16 __float2bfloat16(0.0f)
107#define HIPRT_INF_BF16 __ushort_as_bfloat16((unsigned short)0x7F80U)
108#define HIPRT_MAX_NORMAL_BF16 __ushort_as_bfloat16((unsigned short)0x7F7FU)
109#define HIPRT_MIN_DENORM_BF16 __ushort_as_bfloat16((unsigned short)0x0001U)
110#define HIPRT_NAN_BF16 __ushort_as_bfloat16((unsigned short)0x7FFFU)
111#define HIPRT_NEG_ZERO_BF16 __ushort_as_bfloat16((unsigned short)0x8000U)
112
113// Since we are using unsigned short to represent data in bfloat16, it can be of different sizes on
114// different machines. These naive checks should prevent some undefined behavior on systems which
115// have different sizes for basic types.
116#if !defined(__HIPCC_RTC__)
117static_assert(CHAR_BIT == 8, "byte size should be of 8 bits");
118#endif
119static_assert(sizeof(unsigned short) == 2, "size of unsigned short should be 2 bytes");
120
123 unsigned short data;
124};
125
130};
131
136__HOST_DEVICE__ inline float __bfloat162float(__hip_bfloat16 a) {
137 unsigned int uval = 0;
138 uval = a.data << 16;
139 union {
140 unsigned int u32;
141 float fp32;
142 } u = {uval};
143 return u.fp32;
144}
145
150__HOST_DEVICE__ __hip_bfloat16 __float2bfloat16(float f) {
151 __hip_bfloat16 ret;
152 union {
153 float fp32;
154 unsigned int u32;
155 } u = {f};
156 if (~u.u32 & 0x7f800000) {
157 // When the exponent bits are not all 1s, then the value is zero, normal,
158 // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
159 // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
160 // This causes the bfloat16's mantissa to be incremented by 1 if the 16
161 // least significant bits of the float mantissa are greater than 0x8000,
162 // or if they are equal to 0x8000 and the least significant bit of the
163 // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
164 // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
165 // has the value 0x7f, then incrementing it causes it to become 0x00 and
166 // the exponent is incremented by one, which is the next higher FP value
167 // to the unrounded bfloat16 value. When the bfloat16 value is subnormal
168 // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up
169 // to a normal value with an exponent of 0x01 and a mantissa of 0x00.
170 // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
171 // incrementing it causes it to become an exponent of 0xFF and a mantissa
172 // of 0x00, which is Inf, the next higher value to the unrounded value.
173 u.u32 += 0x7fff + ((u.u32 >> 16) & 1); // Round to nearest, round to even
174 } else if (u.u32 & 0xffff) {
175 // When all of the exponent bits are 1, the value is Inf or NaN.
176 // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
177 // mantissa bit. Quiet NaN is indicated by the most significant mantissa
178 // bit being 1. Signaling NaN is indicated by the most significant
179 // mantissa bit being 0 but some other bit(s) being 1. If any of the
180 // lower 16 bits of the mantissa are 1, we set the least significant bit
181 // of the bfloat16 mantissa, in order to preserve signaling NaN in case
182 // the bloat16's mantissa bits are all 0.
183 u.u32 |= 0x10000; // Preserve signaling NaN
184 }
185
186 ret.data = (u.u32 >> 16);
187 return ret;
188}
189
194__HOST_DEVICE__ float2 __bfloat1622float2(const __hip_bfloat162 a) {
195 return float2{__bfloat162float(a.x), __bfloat162float(a.y)};
196}
197
203 return __hip_bfloat162{a, a};
204}
205
210__HOST_DEVICE__ short int __bfloat16_as_short(const __hip_bfloat16 h) { return (short)h.data; }
211
216__HOST_DEVICE__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h) { return h.data; }
217
222__HOST_DEVICE__ __hip_bfloat16 __double2bfloat16(const double a) {
223 return __float2bfloat16((float)a);
224}
225
232}
233
239 return __hip_bfloat162{a, b};
240}
241
246__HOST_DEVICE__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a) { return a.y; }
247
253 return __hip_bfloat162{a.y, a.y};
254}
255
260__HOST_DEVICE__ float __high2float(const __hip_bfloat162 a) { return __bfloat162float(a.y); }
261
267 const __hip_bfloat162 b) {
268 return __hip_bfloat162{a.y, b.y};
269}
270
275__HOST_DEVICE__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a) { return a.x; }
276
282 return __hip_bfloat162{a.x, a.x};
283}
284
289__HOST_DEVICE__ float __low2float(const __hip_bfloat162 a) { return __bfloat162float(a.x); }
290
296 return __hip_bfloat162{a.y, a.x};
297}
298
304 return __hip_bfloat162{a.x, b.x};
305}
306
311__HOST_DEVICE__ __hip_bfloat16 __short_as_bfloat16(const short int a) {
312 return __hip_bfloat16{(unsigned short)a};
313}
314
319__HOST_DEVICE__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a) {
320 return __hip_bfloat16{a};
321}
322
323
328__HOST_DEVICE__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b) {
330}
331
336__HOST_DEVICE__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b) {
338}
339
344__HOST_DEVICE__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b) {
346}
347
353 const __hip_bfloat16 c) {
354 return __float2bfloat16(
355 __ocml_fma_f32(__bfloat162float(a), __bfloat162float(b), __bfloat162float(c)));
356}
357
362__HOST_DEVICE__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b) {
364}
365
370__HOST_DEVICE__ __hip_bfloat16 __hneg(const __hip_bfloat16 a) {
371 auto ret = a;
372 ret.data ^= 0x8000;
373 return ret;
374}
375
380__HOST_DEVICE__ __hip_bfloat16 __habs(const __hip_bfloat16 a) {
381 auto ret = a;
382 ret.data &= 0x7FFF;
383 return ret;
384}
385
394
399__HOST_DEVICE__ __hip_bfloat162 __habs2(const __hip_bfloat162 a) {
400 return __hip_bfloat162{__habs(a.x), __habs(a.y)};
401}
402
407__HOST_DEVICE__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
408 return __hip_bfloat162{__hadd(a.x, b.x), __hadd(a.y, b.y)};
409}
410
416 const __hip_bfloat162 c) {
417 return __hip_bfloat162{__hfma(a.x, b.x, c.x), __hfma(a.y, b.y, c.y)};
418}
419
424__HOST_DEVICE__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
425 return __hip_bfloat162{__hmul(a.x, b.x), __hmul(a.y, b.y)};
426}
427
432__HOST_DEVICE__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a) {
433 return __hip_bfloat162{__hneg(a.x), __hneg(a.y)};
434}
435
440__HOST_DEVICE__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
441 return __hip_bfloat162{__hsub(a.x, b.x), __hsub(a.y, b.y)};
442}
443
448__HOST_DEVICE__ __hip_bfloat16 operator*(const __hip_bfloat16& l, const __hip_bfloat16& r) {
449 return __hmul(l, r);
450}
451
457 l = __hmul(l, r);
458 return l;
459}
460
465__HOST_DEVICE__ __hip_bfloat16 operator+(const __hip_bfloat16& l) { return l; }
466
471__HOST_DEVICE__ __hip_bfloat16 operator+(const __hip_bfloat16& l, const __hip_bfloat16& r) {
472 return __hadd(l, r);
473}
474
479__HOST_DEVICE__ __hip_bfloat16 operator-(const __hip_bfloat16& l) { return __hneg(l); }
480
485__HOST_DEVICE__ __hip_bfloat16 operator-(const __hip_bfloat16& l, const __hip_bfloat16& r) {
486 return __hsub(l, r);
487}
488
493__HOST_DEVICE__ __hip_bfloat16 operator++(__hip_bfloat16& l, const int) {
494 auto ret = l;
495 l = __hadd(l, HIPRT_ONE_BF16);
496 return ret;
497}
498
504 l = __hadd(l, HIPRT_ONE_BF16);
505 return l;
506}
507
512__HOST_DEVICE__ __hip_bfloat16 operator--(__hip_bfloat16& l, const int) {
513 auto ret = l;
514 l = __hsub(l, HIPRT_ONE_BF16);
515 return ret;
516}
517
523 l = __hsub(l, HIPRT_ONE_BF16);
524 return l;
525}
526
532 l = __hadd(l, r);
533 return l;
534}
535
541 l = __hsub(l, r);
542 return l;
543}
544
549__HOST_DEVICE__ __hip_bfloat16 operator/(const __hip_bfloat16& l, const __hip_bfloat16& r) {
550 return __hdiv(l, r);
551}
552
558 l = __hdiv(l, r);
559 return l;
560}
561
566__HOST_DEVICE__ __hip_bfloat162 operator*(const __hip_bfloat162& l, const __hip_bfloat162& r) {
567 return __hmul2(l, r);
568}
569
575 l = __hmul2(l, r);
576 return l;
577}
578
583__HOST_DEVICE__ __hip_bfloat162 operator+(const __hip_bfloat162& l) { return l; }
584
589__HOST_DEVICE__ __hip_bfloat162 operator+(const __hip_bfloat162& l, const __hip_bfloat162& r) {
590 return __hadd2(l, r);
591}
592
597__HOST_DEVICE__ __hip_bfloat162 operator-(const __hip_bfloat162& l) { return __hneg2(l); }
598
603__HOST_DEVICE__ __hip_bfloat162 operator-(const __hip_bfloat162& l, const __hip_bfloat162& r) {
604 return __hsub2(l, r);
605}
606
611__HOST_DEVICE__ __hip_bfloat162 operator++(__hip_bfloat162& l, const int) {
612 auto ret = l;
613 l = __hadd2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
614 return ret;
615}
616
622 l = __hadd2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
623 return l;
624}
625
630__HOST_DEVICE__ __hip_bfloat162 operator--(__hip_bfloat162& l, const int) {
631 auto ret = l;
632 l = __hsub2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
633 return ret;
634}
635
641 l = __hsub2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
642 return l;
643}
644
650 l = __hadd2(l, r);
651 return l;
652}
653
659 l = __hsub2(l, r);
660 return l;
661}
662
667__HOST_DEVICE__ __hip_bfloat162 operator/(const __hip_bfloat162& l, const __hip_bfloat162& r) {
668 return __h2div(l, r);
669}
670
676 l = __h2div(l, r);
677 return l;
678}
679
684__HOST_DEVICE__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b) {
685 return __bfloat162float(a) == __bfloat162float(b);
686}
687
692__HOST_DEVICE__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b) {
693 return !(__bfloat162float(a) < __bfloat162float(b)) &&
695}
696
701__HOST_DEVICE__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
702 return __bfloat162float(a) > __bfloat162float(b);
703}
704
709__HOST_DEVICE__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
710 return !(__bfloat162float(a) <= __bfloat162float(b));
711}
712
717__HOST_DEVICE__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b) {
718 return __bfloat162float(a) >= __bfloat162float(b);
719}
720
725__HOST_DEVICE__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
726 return !(__bfloat162float(a) < __bfloat162float(b));
727}
728
733__HOST_DEVICE__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b) {
734 return __bfloat162float(a) != __bfloat162float(b);
735}
736
741__HOST_DEVICE__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
742 return !(__bfloat162float(a) == __bfloat162float(b));
743}
744
749__HOST_DEVICE__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b) {
750#if __HIP_DEVICE_COMPILE__
751 return __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a), __bfloat162float(b)));
752#else
753 return __float2bfloat16(std::max(__bfloat162float(a), __bfloat162float(b)));
754#endif
755}
756
761__HOST_DEVICE__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b) {
762#if __HIP_DEVICE_COMPILE__
763 return __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a), __bfloat162float(b)));
764#else
765 return __float2bfloat16(std::min(__bfloat162float(a), __bfloat162float(b)));
766#endif
767}
768
773__HOST_DEVICE__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
774 return __bfloat162float(a) < __bfloat162float(b);
775}
776
781__HOST_DEVICE__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
782 return !(__bfloat162float(a) >= __bfloat162float(b));
783}
784
789__HOST_DEVICE__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b) {
790 return __bfloat162float(a) <= __bfloat162float(b);
791}
792
797__HOST_DEVICE__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
798 return !(__bfloat162float(a) > __bfloat162float(b));
799}
800
805__HOST_DEVICE__ int __hisinf(const __hip_bfloat16 a) {
806 unsigned short sign = a.data & 0x8000U;
807#if __HIP_DEVICE_COMPILE__
808 int res = __ocml_isinf_f32(__bfloat162float(a));
809#else
810 int res = std::isinf(__bfloat162float(a)) ? 1 : 0;
811#endif
812 return (res == 0) ? res : ((sign != 0U) ? -res : res);
813}
814
819__HOST_DEVICE__ bool __hisnan(const __hip_bfloat16 a) {
820#if __HIP_DEVICE_COMPILE__
821 return __ocml_isnan_f32(__bfloat162float(a));
822#else
823 return std::isnan(__bfloat162float(a));
824#endif
825}
826
831__HOST_DEVICE__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
832 return __heq(a.x, b.x) && __heq(a.y, b.y);
833}
834
839__HOST_DEVICE__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
840 return __hequ(a.x, b.x) && __hequ(a.y, b.y);
841}
842
847__HOST_DEVICE__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
848 return __hge(a.x, b.x) && __hge(a.y, b.y);
849}
850
855__HOST_DEVICE__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
856 return __hgeu(a.x, b.x) && __hgeu(a.y, b.y);
857}
858
863__HOST_DEVICE__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
864 return __hgt(a.x, b.x) && __hgt(a.y, b.y);
865}
866
871__HOST_DEVICE__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
872 return __hgtu(a.x, b.x) && __hgtu(a.y, b.y);
873}
874
879__HOST_DEVICE__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
880 return __hle(a.x, b.x) && __hle(a.y, b.y);
881}
882
887__HOST_DEVICE__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
888 return __hleu(a.x, b.x) && __hleu(a.y, b.y);
889}
890
895__HOST_DEVICE__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
896 return __hlt(a.x, b.x) && __hlt(a.y, b.y);
897}
898
903__HOST_DEVICE__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
904 return __hltu(a.x, b.x) && __hltu(a.y, b.y);
905}
906
911__HOST_DEVICE__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
912 return __hne(a.x, b.x) && __hne(a.y, b.y);
913}
914
919__HOST_DEVICE__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
920 return __hneu(a.x, b.x) && __hneu(a.y, b.y);
921}
922
927__HOST_DEVICE__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
928 return __hip_bfloat162{{__heq(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
929 {__heq(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
930}
931
936__HOST_DEVICE__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
937 return __hip_bfloat162{{__hge(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
938 {__hge(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
939}
940
945__HOST_DEVICE__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
946 return __hip_bfloat162{{__hgt(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
947 {__hgt(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ONE_BF16}};
948}
949
954__HOST_DEVICE__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a) {
955 return __hip_bfloat162{{__hisnan(a.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
956 {__hisnan(a.y) ? HIPRT_ONE_BF16 : HIPRT_ONE_BF16}};
957}
958
963__HOST_DEVICE__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
964 return __hip_bfloat162{{__hle(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
965 {__hle(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
966}
967
972__HOST_DEVICE__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
973 return __hip_bfloat162{{__hlt(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
974 {__hlt(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
975}
976
981__HOST_DEVICE__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
982 return __hip_bfloat162{__hmax(a.x, b.x), __hmax(a.y, b.y)};
983}
984
989__HOST_DEVICE__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
990 return __hip_bfloat162{__hmin(a.x, b.x), __hmin(a.y, b.y)};
991}
992
997__HOST_DEVICE__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
998 return __hip_bfloat162{{__hne(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
999 {__hne(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
1000}
1001
1006__HOST_DEVICE__ bool operator==(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1007 return __heq(l, r);
1008}
1009
1014__HOST_DEVICE__ bool operator!=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1015 return __hne(l, r);
1016}
1017
1022__HOST_DEVICE__ bool operator<(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1023 return __hlt(l, r);
1024}
1025
1030__HOST_DEVICE__ bool operator<=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1031 return __hle(l, r);
1032}
1033
1038__HOST_DEVICE__ bool operator>(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1039 return __hgt(l, r);
1040}
1041
1046__HOST_DEVICE__ bool operator>=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1047 return __hge(l, r);
1048}
1049
1054__HOST_DEVICE__ bool operator==(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1055 return __heq(l.x, r.x) && __heq(l.y, r.y);
1056}
1057
1062__HOST_DEVICE__ bool operator!=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1063 return __hne(l.x, r.x) || __hne(l.y, r.y);
1064}
1065
1070__HOST_DEVICE__ bool operator<(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1071 return __hlt(l.x, r.x) && __hlt(l.y, r.y);
1072}
1073
1078__HOST_DEVICE__ bool operator<=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1079 return __hle(l.x, r.x) && __hle(l.y, r.y);
1080}
1081
1086__HOST_DEVICE__ bool operator>(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1087 return __hgt(l.x, r.x) && __hgt(l.y, r.y);
1088}
1089
1094__HOST_DEVICE__ bool operator>=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1095 return __hge(l.x, r.x) && __hge(l.y, r.y);
1096}
1097
1103 return __float2bfloat16(__ocml_ceil_f32(__bfloat162float(h)));
1104}
1105
1111 return __float2bfloat16(__ocml_cos_f32(__bfloat162float(h)));
1112}
1113
1119 return __float2bfloat16(__ocml_exp_f32(__bfloat162float(h)));
1120}
1121
1127 return __float2bfloat16(__ocml_exp10_f32(__bfloat162float(h)));
1128}
1129
1135 return __float2bfloat16(__ocml_exp2_f32(__bfloat162float(h)));
1136}
1137
1143 return __float2bfloat16(__ocml_floor_f32(__bfloat162float(h)));
1144}
1145
1151 return __float2bfloat16(__ocml_log_f32(__bfloat162float(h)));
1152}
1153
1159 return __float2bfloat16(__ocml_log10_f32(__bfloat162float(h)));
1160}
1161
1167 return __float2bfloat16(__ocml_log2_f32(__bfloat162float(h)));
1168}
1169
1175 return __float2bfloat16(1.0f / (__bfloat162float(h)));
1176}
1177
1183 return __float2bfloat16(__ocml_rint_f32(__bfloat162float(h)));
1184}
1185
1191 return __float2bfloat16(__ocml_rsqrt_f32(__bfloat162float(h)));
1192}
1193
1199 return __float2bfloat16(__ocml_sin_f32(__bfloat162float(h)));
1200}
1201
1207 return __float2bfloat16(__ocml_sqrt_f32(__bfloat162float(h)));
1208}
1209
1215 return __float2bfloat16(__ocml_trunc_f32(__bfloat162float(h)));
1216}
1217
1223 return __hip_bfloat162{hceil(h.x), hceil(h.y)};
1224}
1225
1231 return __hip_bfloat162{hcos(h.x), hcos(h.y)};
1232}
1233
1239 return __hip_bfloat162{hexp(h.x), hexp(h.y)};
1240}
1241
1247 return __hip_bfloat162{hexp10(h.x), hexp10(h.y)};
1248}
1249
1255 return __hip_bfloat162{hexp2(h.x), hexp2(h.y)};
1256}
1257
1263 return __hip_bfloat162{hfloor(h.x), hfloor(h.y)};
1264}
1265
1271 return __hip_bfloat162{hlog(h.x), hlog(h.y)};
1272}
1273
1279 return __hip_bfloat162{hlog10(h.x), hlog10(h.y)};
1280}
1281
1287 return __hip_bfloat162{hlog2(h.x), hlog2(h.y)};
1288}
1289
1295 return __hip_bfloat162{hrcp(h.x), hrcp(h.y)};
1296}
1297
1303 return __hip_bfloat162{hrint(h.x), hrint(h.y)};
1304}
1305
1311 return __hip_bfloat162{hrsqrt(h.x), hrsqrt(h.y)};
1312}
1313
1319 return __hip_bfloat162{hsin(h.x), hsin(h.y)};
1320}
1321
1327 return __hip_bfloat162{hsqrt(h.x), hsqrt(h.y)};
1328}
1329
1335 return __hip_bfloat162{htrunc(h.x), htrunc(h.y)};
1336}
1337#endif
Contains declarations for types and functions in device library. Uses int64_t and uint64_t instead of...
__HOST_DEVICE__ __hip_bfloat16 & operator-=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to subtract-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:540
__HOST_DEVICE__ __hip_bfloat16 & operator/=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:557
__HOST_DEVICE__ __hip_bfloat16 operator+(const __hip_bfloat16 &l)
Operator to unary+ on a __hip_bfloat16 number.
Definition amd_hip_bf16.h:465
__HOST_DEVICE__ __hip_bfloat16 __hneg(const __hip_bfloat16 a)
Negate a bfloat16 value.
Definition amd_hip_bf16.h:370
__HOST_DEVICE__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b)
Subtracts two bfloat16 values.
Definition amd_hip_bf16.h:336
__HOST_DEVICE__ __hip_bfloat16 operator*(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:448
__HOST_DEVICE__ __hip_bfloat16 operator++(__hip_bfloat16 &l, const int)
Operator to post increment a __hip_bfloat16 number.
Definition amd_hip_bf16.h:493
__HOST_DEVICE__ __hip_bfloat16 & operator+=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to add-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:531
__HOST_DEVICE__ __hip_bfloat16 & operator*=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:456
__HOST_DEVICE__ __hip_bfloat16 operator-(const __hip_bfloat16 &l)
Operator to negate a __hip_bfloat16 number.
Definition amd_hip_bf16.h:479
__HOST_DEVICE__ __hip_bfloat16 __habs(const __hip_bfloat16 a)
Returns absolute of a bfloat16.
Definition amd_hip_bf16.h:380
__device__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b, const __hip_bfloat16 c)
Performs FMA of given bfloat16 values.
Definition amd_hip_bf16.h:352
__HOST_DEVICE__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b)
Multiplies two bfloat16 values.
Definition amd_hip_bf16.h:362
__HOST_DEVICE__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b)
Divides two bfloat16 values.
Definition amd_hip_bf16.h:344
__HOST_DEVICE__ __hip_bfloat16 operator--(__hip_bfloat16 &l, const int)
Operator to post decrement a __hip_bfloat16 number.
Definition amd_hip_bf16.h:512
__HOST_DEVICE__ __hip_bfloat16 operator/(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:549
__HOST_DEVICE__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b)
Adds two bfloat16 values.
Definition amd_hip_bf16.h:328
__HOST_DEVICE__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than.
Definition amd_hip_bf16.h:701
__HOST_DEVICE__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than equal.
Definition amd_hip_bf16.h:797
__HOST_DEVICE__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than.
Definition amd_hip_bf16.h:781
__HOST_DEVICE__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than equal.
Definition amd_hip_bf16.h:717
__HOST_DEVICE__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - not equal.
Definition amd_hip_bf16.h:733
__HOST_DEVICE__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than operator.
Definition amd_hip_bf16.h:773
__HOST_DEVICE__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered not equal.
Definition amd_hip_bf16.h:741
__HOST_DEVICE__ bool __hisnan(const __hip_bfloat16 a)
Checks if number is nan.
Definition amd_hip_bf16.h:819
__HOST_DEVICE__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than.
Definition amd_hip_bf16.h:709
__HOST_DEVICE__ bool operator<=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1030
__HOST_DEVICE__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return max.
Definition amd_hip_bf16.h:749
__HOST_DEVICE__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than equal.
Definition amd_hip_bf16.h:725
__HOST_DEVICE__ int __hisinf(const __hip_bfloat16 a)
Checks if number is inf.
Definition amd_hip_bf16.h:805
__HOST_DEVICE__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return min.
Definition amd_hip_bf16.h:761
__HOST_DEVICE__ bool operator==(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform an equal compare on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1006
__HOST_DEVICE__ bool operator<(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1022
__HOST_DEVICE__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values.
Definition amd_hip_bf16.h:684
__HOST_DEVICE__ bool operator!=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a not equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1014
__HOST_DEVICE__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered equal.
Definition amd_hip_bf16.h:692
__HOST_DEVICE__ bool operator>=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1046
__HOST_DEVICE__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than equal.
Definition amd_hip_bf16.h:789
__HOST_DEVICE__ bool operator>(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1038
__HOST_DEVICE__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:911
__HOST_DEVICE__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a)
Check for a is NaN, returns 1.0 if NaN, otherwise 0.0.
Definition amd_hip_bf16.h:954
__HOST_DEVICE__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b - unordered.
Definition amd_hip_bf16.h:903
__HOST_DEVICE__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal.
Definition amd_hip_bf16.h:831
__HOST_DEVICE__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal - unordered.
Definition amd_hip_bf16.h:839
__HOST_DEVICE__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:963
__HOST_DEVICE__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b - unordered.
Definition amd_hip_bf16.h:887
__HOST_DEVICE__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b - unordered.
Definition amd_hip_bf16.h:871
__HOST_DEVICE__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:972
__HOST_DEVICE__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b, returns 1.0 if equal, otherwise 0.0.
Definition amd_hip_bf16.h:927
__HOST_DEVICE__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:936
__HOST_DEVICE__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b.
Definition amd_hip_bf16.h:847
__HOST_DEVICE__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Returns min of two elements.
Definition amd_hip_bf16.h:989
__HOST_DEVICE__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks for not equal to.
Definition amd_hip_bf16.h:997
__HOST_DEVICE__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b.
Definition amd_hip_bf16.h:879
__HOST_DEVICE__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:919
__HOST_DEVICE__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Returns max of two elements.
Definition amd_hip_bf16.h:981
__HOST_DEVICE__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b - unordered.
Definition amd_hip_bf16.h:855
__HOST_DEVICE__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b.
Definition amd_hip_bf16.h:863
__HOST_DEVICE__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:945
__HOST_DEVICE__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b.
Definition amd_hip_bf16.h:895
__HOST_DEVICE__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Adds two bfloat162 values.
Definition amd_hip_bf16.h:407
__device__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat162 b, const __hip_bfloat162 c)
Performs FMA of given bfloat162 values.
Definition amd_hip_bf16.h:415
__HOST_DEVICE__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b)
Divides bfloat162 values.
Definition amd_hip_bf16.h:390
__HOST_DEVICE__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a)
Converts a bfloat162 into negative.
Definition amd_hip_bf16.h:432
__HOST_DEVICE__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Subtracts two bfloat162 values.
Definition amd_hip_bf16.h:440
__HOST_DEVICE__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Multiplies two bfloat162 values.
Definition amd_hip_bf16.h:424
__HOST_DEVICE__ __hip_bfloat162 __habs2(const __hip_bfloat162 a)
Returns absolute of a bfloat162.
Definition amd_hip_bf16.h:399
__HOST_DEVICE__ float __bfloat162float(__hip_bfloat16 a)
Converts bfloat16 to float.
Definition amd_hip_bf16.h:136
__HOST_DEVICE__ __hip_bfloat16 __float2bfloat16(float f)
Converts float to bfloat16.
Definition amd_hip_bf16.h:150
__HOST_DEVICE__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a)
Moves bfloat16 value to bfloat162.
Definition amd_hip_bf16.h:202
__HOST_DEVICE__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b)
Extracts high 16 bits from each and combines them.
Definition amd_hip_bf16.h:266
__HOST_DEVICE__ __hip_bfloat16 __double2bfloat16(const double a)
Convert double to __hip_bfloat16.
Definition amd_hip_bf16.h:222
__HOST_DEVICE__ __hip_bfloat16 __short_as_bfloat16(const short int a)
Reinterprets short int into a bfloat16.
Definition amd_hip_bf16.h:311
__HOST_DEVICE__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b)
Combine two __hip_bfloat16 to __hip_bfloat162.
Definition amd_hip_bf16.h:238
__HOST_DEVICE__ float __high2float(const __hip_bfloat162 a)
Converts high 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:260
__HOST_DEVICE__ __hip_bfloat162 __float22bfloat162_rn(const float2 a)
Convert float2 to __hip_bfloat162.
Definition amd_hip_bf16.h:230
__HOST_DEVICE__ float __low2float(const __hip_bfloat162 a)
Converts low 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:289
__HOST_DEVICE__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a)
Returns high 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:246
__HOST_DEVICE__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b)
Extracts low 16 bits from each and combines them.
Definition amd_hip_bf16.h:303
__HOST_DEVICE__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a)
Returns low 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:281
__HOST_DEVICE__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a)
Swaps both halves.
Definition amd_hip_bf16.h:295
__HOST_DEVICE__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a)
Returns low 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:275
__HOST_DEVICE__ short int __bfloat16_as_short(const __hip_bfloat16 h)
Reinterprets bits in a __hip_bfloat16 as a signed short integer.
Definition amd_hip_bf16.h:210
__HOST_DEVICE__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a)
Returns high 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:252
__HOST_DEVICE__ float2 __bfloat1622float2(const __hip_bfloat162 a)
Converts and moves bfloat162 to float2.
Definition amd_hip_bf16.h:194
__HOST_DEVICE__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h)
Reinterprets bits in a __hip_bfloat16 as an unsigned signed short integer.
Definition amd_hip_bf16.h:216
__HOST_DEVICE__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a)
Reinterprets unsigned short int into a bfloat16.
Definition amd_hip_bf16.h:319
__device__ __hip_bfloat16 hexp(const __hip_bfloat16 h)
Calculate exponential of bfloat16.
Definition amd_hip_bf16.h:1118
__device__ __hip_bfloat16 hrint(const __hip_bfloat16 h)
Round to nearest int.
Definition amd_hip_bf16.h:1182
__device__ __hip_bfloat16 hrsqrt(const __hip_bfloat16 h)
Reciprocal square root.
Definition amd_hip_bf16.h:1190
__device__ __hip_bfloat16 hcos(const __hip_bfloat16 h)
Calculate cosine of bfloat16.
Definition amd_hip_bf16.h:1110
__device__ __hip_bfloat16 htrunc(const __hip_bfloat16 h)
Calculate truncate of bfloat16.
Definition amd_hip_bf16.h:1214
__device__ __hip_bfloat16 hlog10(const __hip_bfloat16 h)
Calculate log 10 of bfloat16.
Definition amd_hip_bf16.h:1158
__device__ __hip_bfloat16 hexp10(const __hip_bfloat16 h)
Calculate exponential 10 of bfloat16.
Definition amd_hip_bf16.h:1126
__device__ __hip_bfloat16 hceil(const __hip_bfloat16 h)
Calculate ceil of bfloat16.
Definition amd_hip_bf16.h:1102
__device__ __hip_bfloat16 hrcp(const __hip_bfloat16 h)
Calculate reciprocal.
Definition amd_hip_bf16.h:1174
__device__ __hip_bfloat16 hsqrt(const __hip_bfloat16 h)
Calculate sqrt of bfloat16.
Definition amd_hip_bf16.h:1206
__device__ __hip_bfloat16 hfloor(const __hip_bfloat16 h)
Calculate floor of bfloat16.
Definition amd_hip_bf16.h:1142
__device__ __hip_bfloat16 hsin(const __hip_bfloat16 h)
Calculate sin of bfloat16.
Definition amd_hip_bf16.h:1198
__device__ __hip_bfloat16 hlog(const __hip_bfloat16 h)
Calculate natural log of bfloat16.
Definition amd_hip_bf16.h:1150
__device__ __hip_bfloat16 hlog2(const __hip_bfloat16 h)
Calculate log 2 of bfloat16.
Definition amd_hip_bf16.h:1166
__device__ __hip_bfloat16 hexp2(const __hip_bfloat16 h)
Calculate exponential 2 of bfloat16.
Definition amd_hip_bf16.h:1134
__device__ __hip_bfloat162 h2sin(const __hip_bfloat162 h)
Calculate sin of bfloat162.
Definition amd_hip_bf16.h:1318
__device__ __hip_bfloat162 h2log(const __hip_bfloat162 h)
Calculate natural log of bfloat162.
Definition amd_hip_bf16.h:1270
__device__ __hip_bfloat162 h2log2(const __hip_bfloat162 h)
Calculate log 2 of bfloat162.
Definition amd_hip_bf16.h:1286
__device__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h)
Calculate sqrt of bfloat162.
Definition amd_hip_bf16.h:1326
__device__ __hip_bfloat162 h2log10(const __hip_bfloat162 h)
Calculate log 10 of bfloat162.
Definition amd_hip_bf16.h:1278
__device__ __hip_bfloat162 h2ceil(const __hip_bfloat162 h)
Calculate ceil of bfloat162.
Definition amd_hip_bf16.h:1222
__device__ __hip_bfloat162 h2rint(const __hip_bfloat162 h)
Calculate vector round to nearest int.
Definition amd_hip_bf16.h:1302
__device__ __hip_bfloat162 h2rcp(const __hip_bfloat162 h)
Calculate vector reciprocal.
Definition amd_hip_bf16.h:1294
__device__ __hip_bfloat162 h2rsqrt(const __hip_bfloat162 h)
Calculate vector reciprocal square root.
Definition amd_hip_bf16.h:1310
__device__ __hip_bfloat162 h2cos(const __hip_bfloat162 h)
Calculate cosine of bfloat162.
Definition amd_hip_bf16.h:1230
__device__ __hip_bfloat162 h2floor(const __hip_bfloat162 h)
Calculate floor of bfloat162.
Definition amd_hip_bf16.h:1262
__device__ __hip_bfloat162 h2exp10(const __hip_bfloat162 h)
Calculate exponential 10 of bfloat162.
Definition amd_hip_bf16.h:1246
__device__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h)
Calculate truncate of bfloat162.
Definition amd_hip_bf16.h:1334
__device__ __hip_bfloat162 h2exp2(const __hip_bfloat162 h)
Calculate exponential 2 of bfloat162.
Definition amd_hip_bf16.h:1254
__device__ __hip_bfloat162 h2exp(const __hip_bfloat162 h)
Calculate exponential of bfloat162.
Definition amd_hip_bf16.h:1238
Struct to represent a 16 bit brain floating point number.
Definition amd_hip_bf16.h:122
Struct to represent two 16 bit brain floating point numbers.
Definition amd_hip_bf16.h:127
Definition amd_hip_vector_types.h:2035