/* { dg-do compile } */ /* { dg-options "-Ofast -mavx512fp16 -mavx512vl" } */ typedef _Float16 v8hf __attribute__ ((__vector_size__ (16))); typedef _Float16 v16hf __attribute__ ((__vector_size__ (32))); void foo1 (_Float16* __restrict pa, _Float16* __restrict pb, _Float16* __restrict pc, _Float16* __restrict pd) { for (int i = 0; i != 8; i++) pd[i] = pa[i] * pb[i] + pc[i]; } /* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ void foo2 (_Float16* __restrict pa, _Float16* __restrict pb, _Float16* __restrict pc, _Float16* __restrict pd) { for (int i = 0; i != 8; i++) pd[i] = -pa[i] * pb[i] + pc[i]; } /* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ void foo3 (_Float16* __restrict pa, _Float16* __restrict pb, _Float16* __restrict pc, _Float16* __restrict pd) { for (int i = 0; i != 8; i++) pd[i] = pa[i] * pb[i] - pc[i]; } /* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ void foo4 (_Float16* __restrict pa, _Float16* __restrict pb, _Float16* __restrict pc, _Float16* __restrict pd) { for (int i = 0; i != 8; i++) pd[i] = -pa[i] * pb[i] - pc[i]; } /* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */