diff options
-rw-r--r-- | gcc/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 103 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/vect-fcm.x | 56 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 3 |
11 files changed, 249 insertions, 19 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 478b4ed..d9bf3c5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,15 @@ 2013-01-08 James Greenhalgh <james.greenhalgh@arm.com> + * config/aarch64/aarch64-simd.md + (aarch64_simd_bsl<mode>_internal): Add floating-point modes. + (aarch64_simd_bsl): Likewise. + (aarch64_vcond_internal<mode>): Likewise. + (vcond<mode><mode>): Likewise. + (aarch64_cm<cmp><mode>): Fix constraints, add new modes. + * config/aarch64/iterators.md (V_cmp_result): Add V2DF. + +2013-01-08 James Greenhalgh <james.greenhalgh@arm.com> + * config/aarch64/aarch64-builtins.c (aarch64_builtin_vectorized_function): Handle sqrt, sqrtf. diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index d4b52c3..e6655e8 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1463,7 +1463,7 @@ (set_attr "simd_mode" "V2SI")] ) -;; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register +;; vbsl_* intrinsics may compile to any of bsl/bif/bit depending on register ;; allocation. For an intrinsic of form: ;; vD = bsl_* (vS, vN, vM) ;; We can use any of: @@ -1472,11 +1472,12 @@ ;; bif vD, vM, vS (if D = N, so 0-bits in vS choose bits from vM, else vN) (define_insn "aarch64_simd_bsl<mode>_internal" - [(set (match_operand:VDQ 0 "register_operand" "=w,w,w") - (unspec:VDQ [(match_operand:VDQ 1 "register_operand" " 0,w,w") - (match_operand:VDQ 2 "register_operand" " w,w,0") - (match_operand:VDQ 3 "register_operand" " w,0,w")] - UNSPEC_BSL))] + [(set (match_operand:VALL 0 "register_operand" "=w,w,w") + (unspec:VALL + [(match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w") + (match_operand:VALL 2 "register_operand" " w,w,0") + (match_operand:VALL 3 "register_operand" " w,0,w")] + UNSPEC_BSL))] "TARGET_SIMD" "@ bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> @@ -1485,15 +1486,15 @@ ) (define_expand "aarch64_simd_bsl<mode>" - [(set (match_operand:VDQ 0 "register_operand") - (unspec:VDQ [(match_operand:<V_cmp_result> 1 "register_operand") - (match_operand:VDQ 2 "register_operand") - (match_operand:VDQ 3 "register_operand")] - UNSPEC_BSL))] + [(set (match_operand:VALL 0 "register_operand") + (unspec:VALL [(match_operand:<V_cmp_result> 1 "register_operand") + (match_operand:VALL 2 "register_operand") + (match_operand:VALL 3 "register_operand")] + UNSPEC_BSL))] "TARGET_SIMD" { /* We can't alias operands together if they have different modes. */ - operands[1] = gen_lowpart (<MODE>mode, operands[1]); + operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]); }) (define_expand "aarch64_vcond_internal<mode>" @@ -1574,14 +1575,64 @@ DONE; }) +(define_expand "aarch64_vcond_internal<mode>" + [(set (match_operand:VDQF 0 "register_operand") + (if_then_else:VDQF + (match_operator 3 "comparison_operator" + [(match_operand:VDQF 4 "register_operand") + (match_operand:VDQF 5 "nonmemory_operand")]) + (match_operand:VDQF 1 "register_operand") + (match_operand:VDQF 2 "register_operand")))] + "TARGET_SIMD" +{ + int inverse = 0; + rtx mask = gen_reg_rtx (<V_cmp_result>mode); + + if (!REG_P (operands[5]) + && (operands[5] != CONST0_RTX (<MODE>mode))) + operands[5] = force_reg (<MODE>mode, operands[5]); + + switch (GET_CODE (operands[3])) + { + case LT: + inverse = 1; + /* Fall through. */ + case GE: + emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5])); + break; + case LE: + inverse = 1; + /* Fall through. */ + case GT: + emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5])); + break; + case NE: + inverse = 1; + /* Fall through. */ + case EQ: + emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5])); + break; + default: + gcc_unreachable (); + } + + if (inverse) + emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[2], + operands[1])); + else + emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[1], + operands[2])); + DONE; +}) + (define_expand "vcond<mode><mode>" - [(set (match_operand:VDQ 0 "register_operand") - (if_then_else:VDQ + [(set (match_operand:VALL 0 "register_operand") + (if_then_else:VALL (match_operator 3 "comparison_operator" - [(match_operand:VDQ 4 "register_operand") - (match_operand:VDQ 5 "nonmemory_operand")]) - (match_operand:VDQ 1 "register_operand") - (match_operand:VDQ 2 "register_operand")))] + [(match_operand:VALL 4 "register_operand") + (match_operand:VALL 5 "nonmemory_operand")]) + (match_operand:VALL 1 "register_operand") + (match_operand:VALL 2 "register_operand")))] "TARGET_SIMD" { emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1], @@ -2866,6 +2917,22 @@ (set_attr "simd_mode" "<MODE>")] ) +;; fcm(eq|ge|le|lt|gt) + +(define_insn "aarch64_cm<cmp><mode>" + [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w") + (unspec:<V_cmp_result> + [(match_operand:VDQF 1 "register_operand" "w,w") + (match_operand:VDQF 2 "aarch64_simd_reg_or_zero" "w,Dz")] + VCMP_S))] + "TARGET_SIMD" + "@ + fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype> + fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0" + [(set_attr "simd_type" "simd_fcmp") + (set_attr "simd_mode" "<MODE>")] +) + ;; addp (define_insn "aarch64_addp<mode>" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a2ef1e8..303a4bc 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,17 @@ 2013-01-08 James Greenhalgh <james.greenhalgh@arm.com> + * gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c: New. + * gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c: Likewise. + * gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c: Likewise. + * gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c: Likewise. + * gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c: Likewise. + * gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c: Likewise. + * gcc/testsuite/gcc.target/aarch64/vect-fcm.x: Likewise. + * gcc/testsuite/lib/target-supports.exp + (check_effective_target_vect_cond): Enable for AArch64. + +2013-01-08 James Greenhalgh <james.greenhalgh@arm.com> + * gcc.target/aarch64/vsqrt.c (test_square_root_v2sf): Use endian-safe float pool loading. (test_square_root_v4sf): Likewise. diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c new file mode 100644 index 0000000..a177d28 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c @@ -0,0 +1,14 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ + +#define FTYPE double +#define OP == +#define INV_OP != + +#include "vect-fcm.x" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ +/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */ +/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c new file mode 100644 index 0000000..01f3880 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c @@ -0,0 +1,14 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ + +#define FTYPE float +#define OP == +#define INV_OP != + +#include "vect-fcm.x" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ +/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */ +/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c new file mode 100644 index 0000000..6027593 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c @@ -0,0 +1,14 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ + +#define FTYPE double +#define OP >= +#define INV_OP < + +#include "vect-fcm.x" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ +/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */ +/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c new file mode 100644 index 0000000..0337d70 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c @@ -0,0 +1,14 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ + +#define FTYPE float +#define OP >= +#define INV_OP < + +#include "vect-fcm.x" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ +/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */ +/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c new file mode 100644 index 0000000..b812a39 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c @@ -0,0 +1,14 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ + +#define FTYPE double +#define OP > +#define INV_OP <= + +#include "vect-fcm.x" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ +/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */ +/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c new file mode 100644 index 0000000..5e012a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c @@ -0,0 +1,14 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ + +#define FTYPE float +#define OP > +#define INV_OP <= + +#include "vect-fcm.x" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ +/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */ +/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm.x b/gcc/testsuite/gcc.target/aarch64/vect-fcm.x new file mode 100644 index 0000000..7e51bef --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm.x @@ -0,0 +1,56 @@ +#include <stdlib.h> +#define N 16 + +FTYPE input1[N] = +{2.0, 4.0, 8.0, 16.0, + 2.125, 4.25, 8.5, 17.0, + -2.0, -4.0, -8.0, -16.0, + -2.125, -4.25, -8.5, -17.0}; + +FTYPE input2[N] = +{-2.0, 4.0, -8.0, 16.0, + 2.125, -4.25, 8.5, -17.0, + 2.0, -4.0, 8.0, -16.0, + -2.125, 4.25, -8.5, 17.0}; + +void +foo (FTYPE *in1, FTYPE *in2, FTYPE *output) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = (in1[i] OP in2[i]) ? 2.0 : 4.0; +} + +void +bar (FTYPE *in1, FTYPE *in2, FTYPE *output) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = (in1[i] INV_OP in2[i]) ? 4.0 : 2.0; +} + +void +foobar (FTYPE *in1, FTYPE *in2, FTYPE *output) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = (in1[i] OP 0.0) ? 4.0 : 2.0; +} + +int +main (int argc, char **argv) +{ + FTYPE out1[N]; + FTYPE out2[N]; + int i = 0; + foo (input1, input2, out1); + bar (input1, input2, out2); + for (i = 0; i < N; i++) + if (out1[i] != out2[i]) + abort (); + return 0; +} + diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index a6d766e..3ee5608 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -3699,7 +3699,8 @@ proc check_effective_target_vect_condition { } { verbose "check_effective_target_vect_cond: using cached result" 2 } else { set et_vect_cond_saved 0 - if { [istarget powerpc*-*-*] + if { [istarget aarch64*-*-*] + || [istarget powerpc*-*-*] || [istarget ia64-*-*] || [istarget i?86-*-*] || [istarget spu-*-*] |