/* i?86 does not have V2SF, x32 does though. */ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O3 -mavx -mfma" } */ struct Matrix { float m11; float m12; float m21; float m22; float dx; float dy; }; struct Matrix multiply(const struct Matrix *a, const struct Matrix *b) { struct Matrix out; out.m11 = a->m11*b->m11 + a->m12*b->m21; out.m12 = a->m11*b->m12 + a->m12*b->m22; out.m21 = a->m21*b->m11 + a->m22*b->m21; out.m22 = a->m21*b->m12 + a->m22*b->m22; out.dx = a->dx*b->m11 + a->dy*b->m21 + b->dx; out.dy = a->dx*b->m12 + a->dy*b->m22 + b->dy; return out; } /* The whole kernel should be vectorized with V4SF and V2SF operations. */ /* { dg-final { scan-assembler-times "vadd" 1 } } */ /* { dg-final { scan-assembler-times "vmul" 2 } } */ /* { dg-final { scan-assembler-times "vfma" 2 } } */