Skip to main content

core/stdarch/crates/core_arch/src/x86/
avxneconvert.rs

1use crate::core_arch::x86::*;
2
3#[cfg(test)]
4use stdarch_test::assert_instr;
5
6/// Convert scalar BF16 (16-bit) floating point element stored at memory locations starting at location
7/// a to single precision (32-bit) floating-point, broadcast it to packed single precision (32-bit)
8/// floating-point elements, and store the results in dst.
9///
10/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bcstnebf16_ps)
11#[inline]
12#[target_feature(enable = "avxneconvert")]
13#[cfg_attr(test, assert_instr(vbcstnebf162ps))]
14#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
15pub unsafe fn _mm_bcstnebf16_ps(a: *const bf16) -> __m128 {
16    bcstnebf162ps_128(a)
17}
18
19/// Convert scalar BF16 (16-bit) floating point element stored at memory locations starting at location
20/// a to single precision (32-bit) floating-point, broadcast it to packed single precision (32-bit) floating-point
21/// elements, and store the results in dst.
22///
23/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bcstnebf16_ps)
24#[inline]
25#[target_feature(enable = "avxneconvert")]
26#[cfg_attr(test, assert_instr(vbcstnebf162ps))]
27#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
28pub unsafe fn _mm256_bcstnebf16_ps(a: *const bf16) -> __m256 {
29    bcstnebf162ps_256(a)
30}
31
32/// Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting
33/// at location a to a single-precision (32-bit) floating-point, broadcast it to packed single-precision
34/// (32-bit) floating-point elements, and store the results in dst.
35///
36/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bcstnesh_ps)
37#[inline]
38#[target_feature(enable = "avxneconvert")]
39#[cfg_attr(test, assert_instr(vbcstnesh2ps))]
40#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
41pub unsafe fn _mm_bcstnesh_ps(a: *const f16) -> __m128 {
42    bcstnesh2ps_128(a)
43}
44
45/// Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting
46/// at location a to a single-precision (32-bit) floating-point, broadcast it to packed single-precision
47/// (32-bit) floating-point elements, and store the results in dst.
48///
49/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bcstnesh_ps)
50#[inline]
51#[target_feature(enable = "avxneconvert")]
52#[cfg_attr(test, assert_instr(vbcstnesh2ps))]
53#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
54pub unsafe fn _mm256_bcstnesh_ps(a: *const f16) -> __m256 {
55    bcstnesh2ps_256(a)
56}
57
58/// Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at
59/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
60///
61/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneebf16_ps)
62#[inline]
63#[target_feature(enable = "avxneconvert")]
64#[cfg_attr(test, assert_instr(vcvtneebf162ps))]
65#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
66pub unsafe fn _mm_cvtneebf16_ps(a: *const __m128bh) -> __m128 {
67    transmute(cvtneebf162ps_128(a))
68}
69
70/// Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at
71/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
72///
73/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneebf16_ps)
74#[inline]
75#[target_feature(enable = "avxneconvert")]
76#[cfg_attr(test, assert_instr(vcvtneebf162ps))]
77#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
78pub unsafe fn _mm256_cvtneebf16_ps(a: *const __m256bh) -> __m256 {
79    transmute(cvtneebf162ps_256(a))
80}
81
82/// Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at
83/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
84///
85/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneeph_ps)
86#[inline]
87#[target_feature(enable = "avxneconvert")]
88#[cfg_attr(test, assert_instr(vcvtneeph2ps))]
89#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
90pub unsafe fn _mm_cvtneeph_ps(a: *const __m128h) -> __m128 {
91    transmute(cvtneeph2ps_128(a))
92}
93
94/// Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at
95/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
96///
97/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneeph_ps)
98#[inline]
99#[target_feature(enable = "avxneconvert")]
100#[cfg_attr(test, assert_instr(vcvtneeph2ps))]
101#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
102pub unsafe fn _mm256_cvtneeph_ps(a: *const __m256h) -> __m256 {
103    transmute(cvtneeph2ps_256(a))
104}
105
106/// Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at
107/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
108///
109/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneobf16_ps)
110#[inline]
111#[target_feature(enable = "avxneconvert")]
112#[cfg_attr(test, assert_instr(vcvtneobf162ps))]
113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
114pub unsafe fn _mm_cvtneobf16_ps(a: *const __m128bh) -> __m128 {
115    transmute(cvtneobf162ps_128(a))
116}
117
118/// Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at
119/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
120///
121/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneobf16_ps)
122#[inline]
123#[target_feature(enable = "avxneconvert")]
124#[cfg_attr(test, assert_instr(vcvtneobf162ps))]
125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
126pub unsafe fn _mm256_cvtneobf16_ps(a: *const __m256bh) -> __m256 {
127    transmute(cvtneobf162ps_256(a))
128}
129
130/// Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at
131/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
132///
133/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneoph_ps)
134#[inline]
135#[target_feature(enable = "avxneconvert")]
136#[cfg_attr(test, assert_instr(vcvtneoph2ps))]
137#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
138pub unsafe fn _mm_cvtneoph_ps(a: *const __m128h) -> __m128 {
139    transmute(cvtneoph2ps_128(a))
140}
141
142/// Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at
143/// location a to single precision (32-bit) floating-point elements, and store the results in dst.
144///
145/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneoph_ps)
146#[inline]
147#[target_feature(enable = "avxneconvert")]
148#[cfg_attr(test, assert_instr(vcvtneoph2ps))]
149#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
150pub unsafe fn _mm256_cvtneoph_ps(a: *const __m256h) -> __m256 {
151    transmute(cvtneoph2ps_256(a))
152}
153
154/// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point
155/// elements, and store the results in dst.
156///
157/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneps_avx_pbh)
158#[inline]
159#[target_feature(enable = "avxneconvert")]
160#[cfg_attr(test, assert_instr(vcvtneps2bf16))]
161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
162pub fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh {
163    unsafe { vcvtneps2bf16_128(a) }
164}
165
166/// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point
167/// elements, and store the results in dst.
168///
169/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneps_avx_pbh)
170#[inline]
171#[target_feature(enable = "avxneconvert")]
172#[cfg_attr(test, assert_instr(vcvtneps2bf16))]
173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
174pub fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh {
175    unsafe { vcvtneps2bf16_256(a) }
176}
177
178#[allow(improper_ctypes)]
179unsafe extern "C" {
180    #[link_name = "llvm.x86.vbcstnebf162ps128"]
181    fn bcstnebf162ps_128(a: *const bf16) -> __m128;
182    #[link_name = "llvm.x86.vbcstnebf162ps256"]
183    fn bcstnebf162ps_256(a: *const bf16) -> __m256;
184    #[link_name = "llvm.x86.vbcstnesh2ps128"]
185    fn bcstnesh2ps_128(a: *const f16) -> __m128;
186    #[link_name = "llvm.x86.vbcstnesh2ps256"]
187    fn bcstnesh2ps_256(a: *const f16) -> __m256;
188
189    #[link_name = "llvm.x86.vcvtneebf162ps128"]
190    fn cvtneebf162ps_128(a: *const __m128bh) -> __m128;
191    #[link_name = "llvm.x86.vcvtneebf162ps256"]
192    fn cvtneebf162ps_256(a: *const __m256bh) -> __m256;
193    #[link_name = "llvm.x86.vcvtneeph2ps128"]
194    fn cvtneeph2ps_128(a: *const __m128h) -> __m128;
195    #[link_name = "llvm.x86.vcvtneeph2ps256"]
196    fn cvtneeph2ps_256(a: *const __m256h) -> __m256;
197
198    #[link_name = "llvm.x86.vcvtneobf162ps128"]
199    fn cvtneobf162ps_128(a: *const __m128bh) -> __m128;
200    #[link_name = "llvm.x86.vcvtneobf162ps256"]
201    fn cvtneobf162ps_256(a: *const __m256bh) -> __m256;
202    #[link_name = "llvm.x86.vcvtneoph2ps128"]
203    fn cvtneoph2ps_128(a: *const __m128h) -> __m128;
204    #[link_name = "llvm.x86.vcvtneoph2ps256"]
205    fn cvtneoph2ps_256(a: *const __m256h) -> __m256;
206
207    #[link_name = "llvm.x86.vcvtneps2bf16128"]
208    fn vcvtneps2bf16_128(a: __m128) -> __m128bh;
209    #[link_name = "llvm.x86.vcvtneps2bf16256"]
210    fn vcvtneps2bf16_256(a: __m256) -> __m128bh;
211}
212
213#[cfg(test)]
214mod tests {
215    use crate::core_arch::simd::{u16x4, u16x8};
216    use crate::core_arch::x86::*;
217    use crate::mem::transmute_copy;
218    use std::ptr::addr_of;
219    use stdarch_test::simd_test;
220
221    const BF16_ONE: u16 = 0b0_01111111_0000000;
222    const BF16_TWO: u16 = 0b0_10000000_0000000;
223    const BF16_THREE: u16 = 0b0_10000000_1000000;
224    const BF16_FOUR: u16 = 0b0_10000001_0000000;
225    const BF16_FIVE: u16 = 0b0_10000001_0100000;
226    const BF16_SIX: u16 = 0b0_10000001_1000000;
227    const BF16_SEVEN: u16 = 0b0_10000001_1100000;
228    const BF16_EIGHT: u16 = 0b0_10000010_0000000;
229
230    #[simd_test(enable = "avxneconvert")]
231    fn test_mm_bcstnebf16_ps() {
232        let a = bf16::from_bits(BF16_ONE);
233        let r = unsafe { _mm_bcstnebf16_ps(addr_of!(a)) };
234        let e = _mm_set_ps(1., 1., 1., 1.);
235        assert_eq_m128(r, e);
236    }
237
238    #[simd_test(enable = "avxneconvert")]
239    fn test_mm256_bcstnebf16_ps() {
240        let a = bf16::from_bits(BF16_ONE);
241        let r = unsafe { _mm256_bcstnebf16_ps(addr_of!(a)) };
242        let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.);
243        assert_eq_m256(r, e);
244    }
245
246    #[simd_test(enable = "avxneconvert")]
247    fn test_mm_bcstnesh_ps() {
248        let a = 1.0_f16;
249        let r = unsafe { _mm_bcstnesh_ps(addr_of!(a)) };
250        let e = _mm_set_ps(1., 1., 1., 1.);
251        assert_eq_m128(r, e);
252    }
253
254    #[simd_test(enable = "avxneconvert")]
255    fn test_mm256_bcstnesh_ps() {
256        let a = 1.0_f16;
257        let r = unsafe { _mm256_bcstnesh_ps(addr_of!(a)) };
258        let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.);
259        assert_eq_m256(r, e);
260    }
261
262    #[simd_test(enable = "avxneconvert")]
263    fn test_mm_cvtneebf16_ps() {
264        let a = __m128bh([
265            BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
266        ]);
267        let r = unsafe { _mm_cvtneebf16_ps(addr_of!(a)) };
268        let e = _mm_setr_ps(1., 3., 5., 7.);
269        assert_eq_m128(r, e);
270    }
271
272    #[simd_test(enable = "avxneconvert")]
273    fn test_mm256_cvtneebf16_ps() {
274        let a = __m256bh([
275            BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
276            BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
277        ]);
278        let r = unsafe { _mm256_cvtneebf16_ps(addr_of!(a)) };
279        let e = _mm256_setr_ps(1., 3., 5., 7., 1., 3., 5., 7.);
280        assert_eq_m256(r, e);
281    }
282
283    #[simd_test(enable = "avxneconvert")]
284    fn test_mm_cvtneeph_ps() {
285        let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
286        let r = unsafe { _mm_cvtneeph_ps(addr_of!(a)) };
287        let e = _mm_setr_ps(1., 3., 5., 7.);
288        assert_eq_m128(r, e);
289    }
290
291    #[simd_test(enable = "avxneconvert")]
292    fn test_mm256_cvtneeph_ps() {
293        let a = __m256h([
294            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
295        ]);
296        let r = unsafe { _mm256_cvtneeph_ps(addr_of!(a)) };
297        let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
298        assert_eq_m256(r, e);
299    }
300
301    #[simd_test(enable = "avxneconvert")]
302    fn test_mm_cvtneobf16_ps() {
303        let a = __m128bh([
304            BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
305        ]);
306        let r = unsafe { _mm_cvtneobf16_ps(addr_of!(a)) };
307        let e = _mm_setr_ps(2., 4., 6., 8.);
308        assert_eq_m128(r, e);
309    }
310
311    #[simd_test(enable = "avxneconvert")]
312    fn test_mm256_cvtneobf16_ps() {
313        let a = __m256bh([
314            BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
315            BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
316        ]);
317        let r = unsafe { _mm256_cvtneobf16_ps(addr_of!(a)) };
318        let e = _mm256_setr_ps(2., 4., 6., 8., 2., 4., 6., 8.);
319        assert_eq_m256(r, e);
320    }
321
322    #[simd_test(enable = "avxneconvert")]
323    fn test_mm_cvtneoph_ps() {
324        let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
325        let r = unsafe { _mm_cvtneoph_ps(addr_of!(a)) };
326        let e = _mm_setr_ps(2., 4., 6., 8.);
327        assert_eq_m128(r, e);
328    }
329
330    #[simd_test(enable = "avxneconvert")]
331    fn test_mm256_cvtneoph_ps() {
332        let a = __m256h([
333            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
334        ]);
335        let r = unsafe { _mm256_cvtneoph_ps(addr_of!(a)) };
336        let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
337        assert_eq_m256(r, e);
338    }
339
340    #[simd_test(enable = "avxneconvert")]
341    fn test_mm_cvtneps_avx_pbh() {
342        let a = _mm_setr_ps(1., 2., 3., 4.);
343        let r: u16x4 = unsafe { transmute_copy(&_mm_cvtneps_avx_pbh(a)) };
344        let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR);
345        assert_eq!(r, e);
346    }
347
348    #[simd_test(enable = "avxneconvert")]
349    fn test_mm256_cvtneps_avx_pbh() {
350        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
351        let r: u16x8 = _mm256_cvtneps_avx_pbh(a).as_u16x8();
352        let e = u16x8::new(
353            BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
354        );
355        assert_eq!(r, e);
356    }
357}