1use crate::core_arch::x86::*;
2
3#[cfg(test)]
4use stdarch_test::assert_instr;
5
6#[inline]
12#[target_feature(enable = "avxneconvert")]
13#[cfg_attr(test, assert_instr(vbcstnebf162ps))]
14#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
15pub unsafe fn _mm_bcstnebf16_ps(a: *const bf16) -> __m128 {
16 bcstnebf162ps_128(a)
17}
18
19#[inline]
25#[target_feature(enable = "avxneconvert")]
26#[cfg_attr(test, assert_instr(vbcstnebf162ps))]
27#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
28pub unsafe fn _mm256_bcstnebf16_ps(a: *const bf16) -> __m256 {
29 bcstnebf162ps_256(a)
30}
31
32#[inline]
38#[target_feature(enable = "avxneconvert")]
39#[cfg_attr(test, assert_instr(vbcstnesh2ps))]
40#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
41pub unsafe fn _mm_bcstnesh_ps(a: *const f16) -> __m128 {
42 bcstnesh2ps_128(a)
43}
44
45#[inline]
51#[target_feature(enable = "avxneconvert")]
52#[cfg_attr(test, assert_instr(vbcstnesh2ps))]
53#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
54pub unsafe fn _mm256_bcstnesh_ps(a: *const f16) -> __m256 {
55 bcstnesh2ps_256(a)
56}
57
58#[inline]
63#[target_feature(enable = "avxneconvert")]
64#[cfg_attr(test, assert_instr(vcvtneebf162ps))]
65#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
66pub unsafe fn _mm_cvtneebf16_ps(a: *const __m128bh) -> __m128 {
67 transmute(cvtneebf162ps_128(a))
68}
69
70#[inline]
75#[target_feature(enable = "avxneconvert")]
76#[cfg_attr(test, assert_instr(vcvtneebf162ps))]
77#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
78pub unsafe fn _mm256_cvtneebf16_ps(a: *const __m256bh) -> __m256 {
79 transmute(cvtneebf162ps_256(a))
80}
81
82#[inline]
87#[target_feature(enable = "avxneconvert")]
88#[cfg_attr(test, assert_instr(vcvtneeph2ps))]
89#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
90pub unsafe fn _mm_cvtneeph_ps(a: *const __m128h) -> __m128 {
91 transmute(cvtneeph2ps_128(a))
92}
93
94#[inline]
99#[target_feature(enable = "avxneconvert")]
100#[cfg_attr(test, assert_instr(vcvtneeph2ps))]
101#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
102pub unsafe fn _mm256_cvtneeph_ps(a: *const __m256h) -> __m256 {
103 transmute(cvtneeph2ps_256(a))
104}
105
106#[inline]
111#[target_feature(enable = "avxneconvert")]
112#[cfg_attr(test, assert_instr(vcvtneobf162ps))]
113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
114pub unsafe fn _mm_cvtneobf16_ps(a: *const __m128bh) -> __m128 {
115 transmute(cvtneobf162ps_128(a))
116}
117
118#[inline]
123#[target_feature(enable = "avxneconvert")]
124#[cfg_attr(test, assert_instr(vcvtneobf162ps))]
125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
126pub unsafe fn _mm256_cvtneobf16_ps(a: *const __m256bh) -> __m256 {
127 transmute(cvtneobf162ps_256(a))
128}
129
130#[inline]
135#[target_feature(enable = "avxneconvert")]
136#[cfg_attr(test, assert_instr(vcvtneoph2ps))]
137#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
138pub unsafe fn _mm_cvtneoph_ps(a: *const __m128h) -> __m128 {
139 transmute(cvtneoph2ps_128(a))
140}
141
142#[inline]
147#[target_feature(enable = "avxneconvert")]
148#[cfg_attr(test, assert_instr(vcvtneoph2ps))]
149#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
150pub unsafe fn _mm256_cvtneoph_ps(a: *const __m256h) -> __m256 {
151 transmute(cvtneoph2ps_256(a))
152}
153
154#[inline]
159#[target_feature(enable = "avxneconvert")]
160#[cfg_attr(test, assert_instr(vcvtneps2bf16))]
161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
162pub fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh {
163 unsafe { vcvtneps2bf16_128(a) }
164}
165
166#[inline]
171#[target_feature(enable = "avxneconvert")]
172#[cfg_attr(test, assert_instr(vcvtneps2bf16))]
173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
174pub fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh {
175 unsafe { vcvtneps2bf16_256(a) }
176}
177
178#[allow(improper_ctypes)]
179unsafe extern "C" {
180 #[link_name = "llvm.x86.vbcstnebf162ps128"]
181 fn bcstnebf162ps_128(a: *const bf16) -> __m128;
182 #[link_name = "llvm.x86.vbcstnebf162ps256"]
183 fn bcstnebf162ps_256(a: *const bf16) -> __m256;
184 #[link_name = "llvm.x86.vbcstnesh2ps128"]
185 fn bcstnesh2ps_128(a: *const f16) -> __m128;
186 #[link_name = "llvm.x86.vbcstnesh2ps256"]
187 fn bcstnesh2ps_256(a: *const f16) -> __m256;
188
189 #[link_name = "llvm.x86.vcvtneebf162ps128"]
190 fn cvtneebf162ps_128(a: *const __m128bh) -> __m128;
191 #[link_name = "llvm.x86.vcvtneebf162ps256"]
192 fn cvtneebf162ps_256(a: *const __m256bh) -> __m256;
193 #[link_name = "llvm.x86.vcvtneeph2ps128"]
194 fn cvtneeph2ps_128(a: *const __m128h) -> __m128;
195 #[link_name = "llvm.x86.vcvtneeph2ps256"]
196 fn cvtneeph2ps_256(a: *const __m256h) -> __m256;
197
198 #[link_name = "llvm.x86.vcvtneobf162ps128"]
199 fn cvtneobf162ps_128(a: *const __m128bh) -> __m128;
200 #[link_name = "llvm.x86.vcvtneobf162ps256"]
201 fn cvtneobf162ps_256(a: *const __m256bh) -> __m256;
202 #[link_name = "llvm.x86.vcvtneoph2ps128"]
203 fn cvtneoph2ps_128(a: *const __m128h) -> __m128;
204 #[link_name = "llvm.x86.vcvtneoph2ps256"]
205 fn cvtneoph2ps_256(a: *const __m256h) -> __m256;
206
207 #[link_name = "llvm.x86.vcvtneps2bf16128"]
208 fn vcvtneps2bf16_128(a: __m128) -> __m128bh;
209 #[link_name = "llvm.x86.vcvtneps2bf16256"]
210 fn vcvtneps2bf16_256(a: __m256) -> __m128bh;
211}
212
213#[cfg(test)]
214mod tests {
215 use crate::core_arch::simd::{u16x4, u16x8};
216 use crate::core_arch::x86::*;
217 use crate::mem::transmute_copy;
218 use std::ptr::addr_of;
219 use stdarch_test::simd_test;
220
221 const BF16_ONE: u16 = 0b0_01111111_0000000;
222 const BF16_TWO: u16 = 0b0_10000000_0000000;
223 const BF16_THREE: u16 = 0b0_10000000_1000000;
224 const BF16_FOUR: u16 = 0b0_10000001_0000000;
225 const BF16_FIVE: u16 = 0b0_10000001_0100000;
226 const BF16_SIX: u16 = 0b0_10000001_1000000;
227 const BF16_SEVEN: u16 = 0b0_10000001_1100000;
228 const BF16_EIGHT: u16 = 0b0_10000010_0000000;
229
230 #[simd_test(enable = "avxneconvert")]
231 fn test_mm_bcstnebf16_ps() {
232 let a = bf16::from_bits(BF16_ONE);
233 let r = unsafe { _mm_bcstnebf16_ps(addr_of!(a)) };
234 let e = _mm_set_ps(1., 1., 1., 1.);
235 assert_eq_m128(r, e);
236 }
237
238 #[simd_test(enable = "avxneconvert")]
239 fn test_mm256_bcstnebf16_ps() {
240 let a = bf16::from_bits(BF16_ONE);
241 let r = unsafe { _mm256_bcstnebf16_ps(addr_of!(a)) };
242 let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.);
243 assert_eq_m256(r, e);
244 }
245
246 #[simd_test(enable = "avxneconvert")]
247 fn test_mm_bcstnesh_ps() {
248 let a = 1.0_f16;
249 let r = unsafe { _mm_bcstnesh_ps(addr_of!(a)) };
250 let e = _mm_set_ps(1., 1., 1., 1.);
251 assert_eq_m128(r, e);
252 }
253
254 #[simd_test(enable = "avxneconvert")]
255 fn test_mm256_bcstnesh_ps() {
256 let a = 1.0_f16;
257 let r = unsafe { _mm256_bcstnesh_ps(addr_of!(a)) };
258 let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.);
259 assert_eq_m256(r, e);
260 }
261
262 #[simd_test(enable = "avxneconvert")]
263 fn test_mm_cvtneebf16_ps() {
264 let a = __m128bh([
265 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
266 ]);
267 let r = unsafe { _mm_cvtneebf16_ps(addr_of!(a)) };
268 let e = _mm_setr_ps(1., 3., 5., 7.);
269 assert_eq_m128(r, e);
270 }
271
272 #[simd_test(enable = "avxneconvert")]
273 fn test_mm256_cvtneebf16_ps() {
274 let a = __m256bh([
275 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
276 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
277 ]);
278 let r = unsafe { _mm256_cvtneebf16_ps(addr_of!(a)) };
279 let e = _mm256_setr_ps(1., 3., 5., 7., 1., 3., 5., 7.);
280 assert_eq_m256(r, e);
281 }
282
283 #[simd_test(enable = "avxneconvert")]
284 fn test_mm_cvtneeph_ps() {
285 let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
286 let r = unsafe { _mm_cvtneeph_ps(addr_of!(a)) };
287 let e = _mm_setr_ps(1., 3., 5., 7.);
288 assert_eq_m128(r, e);
289 }
290
291 #[simd_test(enable = "avxneconvert")]
292 fn test_mm256_cvtneeph_ps() {
293 let a = __m256h([
294 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
295 ]);
296 let r = unsafe { _mm256_cvtneeph_ps(addr_of!(a)) };
297 let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
298 assert_eq_m256(r, e);
299 }
300
301 #[simd_test(enable = "avxneconvert")]
302 fn test_mm_cvtneobf16_ps() {
303 let a = __m128bh([
304 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
305 ]);
306 let r = unsafe { _mm_cvtneobf16_ps(addr_of!(a)) };
307 let e = _mm_setr_ps(2., 4., 6., 8.);
308 assert_eq_m128(r, e);
309 }
310
311 #[simd_test(enable = "avxneconvert")]
312 fn test_mm256_cvtneobf16_ps() {
313 let a = __m256bh([
314 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
315 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
316 ]);
317 let r = unsafe { _mm256_cvtneobf16_ps(addr_of!(a)) };
318 let e = _mm256_setr_ps(2., 4., 6., 8., 2., 4., 6., 8.);
319 assert_eq_m256(r, e);
320 }
321
322 #[simd_test(enable = "avxneconvert")]
323 fn test_mm_cvtneoph_ps() {
324 let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
325 let r = unsafe { _mm_cvtneoph_ps(addr_of!(a)) };
326 let e = _mm_setr_ps(2., 4., 6., 8.);
327 assert_eq_m128(r, e);
328 }
329
330 #[simd_test(enable = "avxneconvert")]
331 fn test_mm256_cvtneoph_ps() {
332 let a = __m256h([
333 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
334 ]);
335 let r = unsafe { _mm256_cvtneoph_ps(addr_of!(a)) };
336 let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
337 assert_eq_m256(r, e);
338 }
339
340 #[simd_test(enable = "avxneconvert")]
341 fn test_mm_cvtneps_avx_pbh() {
342 let a = _mm_setr_ps(1., 2., 3., 4.);
343 let r: u16x4 = unsafe { transmute_copy(&_mm_cvtneps_avx_pbh(a)) };
344 let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR);
345 assert_eq!(r, e);
346 }
347
348 #[simd_test(enable = "avxneconvert")]
349 fn test_mm256_cvtneps_avx_pbh() {
350 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
351 let r: u16x8 = _mm256_cvtneps_avx_pbh(a).as_u16x8();
352 let e = u16x8::new(
353 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
354 );
355 assert_eq!(r, e);
356 }
357}