diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2014-04-23 15:26:28 +0000 |
---|---|---|
committer | Kyrylo Tkachov <ktkachov@gcc.gnu.org> | 2014-04-23 15:26:28 +0000 |
commit | f7d5cf8df3193f8f6e62501def08e4b0b1baadbc (patch) | |
tree | 2510b3a45cf0d83f6649c31498711958ba864a74 /gcc | |
parent | 9ac05ae59008ccdd479eca70e6c0c6742fad5b0e (diff) | |
download | gcc-f7d5cf8df3193f8f6e62501def08e4b0b1baadbc.zip gcc-f7d5cf8df3193f8f6e62501def08e4b0b1baadbc.tar.gz gcc-f7d5cf8df3193f8f6e62501def08e4b0b1baadbc.tar.bz2 |
[AArch64][2/3] Recognise rev16 operations on SImode and DImode data
* config/aarch64/aarch64.md (rev16<mode>2): New pattern.
(rev16<mode>2_alt): Likewise.
* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case.
* config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New.
(aarch_rev16_shleft_mask_imm_p): Likewise.
(aarch_rev16_p_1): Likewise.
(aarch_rev16_p): Likewise.
* config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern.
(aarch_rev16_shright_mask_imm_p): Likewise.
(aarch_rev16_shleft_mask_imm_p): Likewise.
* gcc.target/aarch64/rev16_1.c: New test.
From-SVN: r209704
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 13 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 10 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 32 | ||||
-rw-r--r-- | gcc/config/arm/aarch-common-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/arm/aarch-common.c | 73 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/rev16_1.c | 59 |
7 files changed, 194 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6d3bab8..1b8dd62 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,18 @@ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + * config/aarch64/aarch64.md (rev16<mode>2): New pattern. + (rev16<mode>2_alt): Likewise. + * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case. + * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New. + (aarch_rev16_shleft_mask_imm_p): Likewise. + (aarch_rev16_p_1): Likewise. + (aarch_rev16_p): Likewise. + * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern. + (aarch_rev16_shright_mask_imm_p): Likewise. + (aarch_rev16_shleft_mask_imm_p): Likewise. + +2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + * config/arm/aarch-common-protos.h (alu_cost_table): Add rev field. * config/arm/aarch-cost-tables.h (generic_extra_costs): Specify rev cost. diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index dacd7ee..68c29aa 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4695,6 +4695,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, return false; case IOR: + if (aarch_rev16_p (x)) + { + *cost = COSTS_N_INSNS (1); + + if (speed) + *cost += extra_cost->alu.rev; + + return true; + } + /* Fall through. */ case XOR: case AND: cost_logic: diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index ee32b6c..98c46d1 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -3253,6 +3253,38 @@ [(set_attr "type" "rev")] ) +;; There are no canonicalisation rules for the position of the lshiftrt, ashift +;; operations within an IOR/AND RTX, therefore we have two patterns matching +;; each valid permutation. + +(define_insn "rev16<mode>2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r") + (const_int 8)) + (match_operand:GPI 3 "const_int_operand" "n")) + (and:GPI (lshiftrt:GPI (match_dup 1) + (const_int 8)) + (match_operand:GPI 2 "const_int_operand" "n"))))] + "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode) + && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)" + "rev16\\t%<w>0, %<w>1" + [(set_attr "type" "rev")] +) + +(define_insn "rev16<mode>2_alt" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r") + (const_int 8)) + (match_operand:GPI 2 "const_int_operand" "n")) + (and:GPI (ashift:GPI (match_dup 1) + (const_int 8)) + (match_operand:GPI 3 "const_int_operand" "n"))))] + "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode) + && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)" + "rev16\\t%<w>0, %<w>1" + [(set_attr "type" "rev")] +) + ;; zero_extend version of above (define_insn "*bswapsi2_uxtw" [(set (match_operand:DI 0 "register_operand" "=r") diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 5693c31..1b60d78 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -24,6 +24,9 @@ #define GCC_AARCH_COMMON_PROTOS_H extern int aarch_crypto_can_dual_issue (rtx, rtx); +extern bool aarch_rev16_p (rtx); +extern bool aarch_rev16_shleft_mask_imm_p (rtx, enum machine_mode); +extern bool aarch_rev16_shright_mask_imm_p (rtx, enum machine_mode); extern int arm_early_load_addr_dep (rtx, rtx); extern int arm_early_store_addr_dep (rtx, rtx); extern int arm_mac_accumulator_is_mul_result (rtx, rtx); diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c index af8fc99..884d4b3 100644 --- a/gcc/config/arm/aarch-common.c +++ b/gcc/config/arm/aarch-common.c @@ -191,6 +191,79 @@ arm_get_set_operands (rtx producer, rtx consumer, return 0; } +bool +aarch_rev16_shright_mask_imm_p (rtx val, enum machine_mode mode) +{ + return CONST_INT_P (val) + && INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff, mode); +} + +bool +aarch_rev16_shleft_mask_imm_p (rtx val, enum machine_mode mode) +{ + return CONST_INT_P (val) + && INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff00, mode); +} + + +static bool +aarch_rev16_p_1 (rtx lhs, rtx rhs, enum machine_mode mode) +{ + if (GET_CODE (lhs) == AND + && GET_CODE (XEXP (lhs, 0)) == ASHIFT + && CONST_INT_P (XEXP (XEXP (lhs, 0), 1)) + && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8 + && REG_P (XEXP (XEXP (lhs, 0), 0)) + && CONST_INT_P (XEXP (lhs, 1)) + && GET_CODE (rhs) == AND + && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT + && REG_P (XEXP (XEXP (rhs, 0), 0)) + && CONST_INT_P (XEXP (XEXP (rhs, 0), 1)) + && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8 + && CONST_INT_P (XEXP (rhs, 1)) + && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0))) + + { + rtx lhs_mask = XEXP (lhs, 1); + rtx rhs_mask = XEXP (rhs, 1); + + return aarch_rev16_shright_mask_imm_p (rhs_mask, mode) + && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode); + } + + return false; +} + +/* Recognise a sequence of bitwise operations corresponding to a rev16 operation. + These will be of the form: + ((x >> 8) & 0x00ff00ff) + | ((x << 8) & 0xff00ff00) + for SImode and with similar but wider bitmasks for DImode. + The two sub-expressions of the IOR can appear on either side so check both + permutations with the help of aarch_rev16_p_1 above. */ + +bool +aarch_rev16_p (rtx x) +{ + rtx left_sub_rtx, right_sub_rtx; + bool is_rev = false; + + if (GET_CODE (x) != IOR) + return false; + + left_sub_rtx = XEXP (x, 0); + right_sub_rtx = XEXP (x, 1); + + /* There are no canonicalisation rules for the position of the two shifts + involved in a rev, so try both permutations. */ + is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x)); + + if (!is_rev) + is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x)); + + return is_rev; +} + /* Return nonzero if the CONSUMER instruction (a load) does need PRODUCER's value to calculate the address. */ int diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e74096c..9f1e7ce 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + * gcc.target/aarch64/rev16_1.c: New test. + 2014-04-23 Richard Biener <rguenther@suse.de> PR tree-optimization/60903 diff --git a/gcc/testsuite/gcc.target/aarch64/rev16_1.c b/gcc/testsuite/gcc.target/aarch64/rev16_1.c new file mode 100644 index 0000000..126d3c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/rev16_1.c @@ -0,0 +1,59 @@ +/* { dg-options "-O2" } */ +/* { dg-do run } */ + +extern void abort (void); + +typedef unsigned int __u32; + +__u32 +__rev16_32_alt (__u32 x) +{ + return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) + | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8); +} + +__u32 +__rev16_32 (__u32 x) +{ + return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) + | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8); +} + +typedef unsigned long long __u64; + +__u64 +__rev16_64_alt (__u64 x) +{ + return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8) + | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8); +} + +__u64 +__rev16_64 (__u64 x) +{ + return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8) + | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8); +} + +int +main (void) +{ + volatile __u32 in32 = 0x12345678; + volatile __u32 expected32 = 0x34127856; + volatile __u64 in64 = 0x1234567890abcdefUL; + volatile __u64 expected64 = 0x34127856ab90efcdUL; + + if (__rev16_32 (in32) != expected32) + abort (); + + if (__rev16_32_alt (in32) != expected32) + abort (); + + if (__rev16_64 (in64) != expected64) + abort (); + + if (__rev16_64_alt (in64) != expected64) + abort (); + + return 0; +} |