/* { dg-do run { target lp64 } } */ /* This is a test exercising peeling for alignment for a positive step vector loop. We're forcing atom tuning here because that has a higher unaligned vs aligned cost unlike most other archs. */ /* { dg-options "-O3 -march=x86-64 -mtune=atom -fdump-tree-vect-details -save-temps" } */ float a[1024], b[1024]; void __attribute__((noipa)) foo1 () { for (int i = 2; i < 508; ++i) a[i] = b[i] * 2.; } void __attribute__((noipa)) foo2 () { for (int i = 3; i < 508; ++i) a[i] = b[i] * 2.; } void __attribute__((noipa)) foo3 () { for (int i = 4; i < 508; ++i) a[i] = b[i] * 2.; } void __attribute__((noipa)) foo4 () { for (int i = 5; i < 508; ++i) a[i] = b[i] * 2.; } void __attribute__((noipa)) foo5 (int start) { for (int i = start; i < 508; ++i) a[i] = b[i] * 2.; } int main() { for (int i = 2; i < 508; ++i) { __asm__ volatile ("" : : : "memory"); b[i] = i; } foo1 (); for (int i = 2; i < 508; ++i) if (a[i] != 2*i) __builtin_abort (); for (int i = 3; i < 508; ++i) { __asm__ volatile ("" : : : "memory"); b[i] = i; } foo2 (); for (int i = 3; i < 508; ++i) if (a[i] != 2*i) __builtin_abort (); for (int i = 4; i < 508; ++i) { __asm__ volatile ("" : : : "memory"); b[i] = i; } foo3 (); for (int i = 4; i < 508; ++i) if (a[i] != 2*i) __builtin_abort (); for (int i = 5; i < 508; ++i) { __asm__ volatile ("" : : : "memory"); b[i] = i; } foo4 (); for (int i = 5; i < 508; ++i) if (a[i] != 2*i) __builtin_abort (); for (int i = 3; i < 508; ++i) { __asm__ volatile ("" : : : "memory"); b[i] = i; } foo5 (3); for (int i = 3; i < 508; ++i) if (a[i] != 2*i) __builtin_abort (); } /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 4 "vect" } } */ /* Verify all vector accesses are emitted as aligned. */ /* { dg-final { scan-assembler-not "movup" } } */