aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2014-04-23 15:26:28 +0000
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>2014-04-23 15:26:28 +0000
commitf7d5cf8df3193f8f6e62501def08e4b0b1baadbc (patch)
tree2510b3a45cf0d83f6649c31498711958ba864a74 /gcc
parent9ac05ae59008ccdd479eca70e6c0c6742fad5b0e (diff)
downloadgcc-f7d5cf8df3193f8f6e62501def08e4b0b1baadbc.zip
gcc-f7d5cf8df3193f8f6e62501def08e4b0b1baadbc.tar.gz
gcc-f7d5cf8df3193f8f6e62501def08e4b0b1baadbc.tar.bz2
[AArch64][2/3] Recognise rev16 operations on SImode and DImode data
* config/aarch64/aarch64.md (rev16<mode>2): New pattern. (rev16<mode>2_alt): Likewise. * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case. * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New. (aarch_rev16_shleft_mask_imm_p): Likewise. (aarch_rev16_p_1): Likewise. (aarch_rev16_p): Likewise. * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern. (aarch_rev16_shright_mask_imm_p): Likewise. (aarch_rev16_shleft_mask_imm_p): Likewise. * gcc.target/aarch64/rev16_1.c: New test. From-SVN: r209704
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog13
-rw-r--r--gcc/config/aarch64/aarch64.c10
-rw-r--r--gcc/config/aarch64/aarch64.md32
-rw-r--r--gcc/config/arm/aarch-common-protos.h3
-rw-r--r--gcc/config/arm/aarch-common.c73
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/rev16_1.c59
7 files changed, 194 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6d3bab8..1b8dd62 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,18 @@
2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+ * config/aarch64/aarch64.md (rev16<mode>2): New pattern.
+ (rev16<mode>2_alt): Likewise.
+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case.
+ * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New.
+ (aarch_rev16_shleft_mask_imm_p): Likewise.
+ (aarch_rev16_p_1): Likewise.
+ (aarch_rev16_p): Likewise.
+ * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern.
+ (aarch_rev16_shright_mask_imm_p): Likewise.
+ (aarch_rev16_shleft_mask_imm_p): Likewise.
+
+2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
* config/arm/aarch-common-protos.h (alu_cost_table): Add rev field.
* config/arm/aarch-cost-tables.h (generic_extra_costs): Specify
rev cost.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index dacd7ee..68c29aa 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4695,6 +4695,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
return false;
case IOR:
+ if (aarch_rev16_p (x))
+ {
+ *cost = COSTS_N_INSNS (1);
+
+ if (speed)
+ *cost += extra_cost->alu.rev;
+
+ return true;
+ }
+ /* Fall through. */
case XOR:
case AND:
cost_logic:
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index ee32b6c..98c46d1 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3253,6 +3253,38 @@
[(set_attr "type" "rev")]
)
+;; There are no canonicalisation rules for the position of the lshiftrt, ashift
+;; operations within an IOR/AND RTX, therefore we have two patterns matching
+;; each valid permutation.
+
+(define_insn "rev16<mode>2"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
+ (const_int 8))
+ (match_operand:GPI 3 "const_int_operand" "n"))
+ (and:GPI (lshiftrt:GPI (match_dup 1)
+ (const_int 8))
+ (match_operand:GPI 2 "const_int_operand" "n"))))]
+ "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode)
+ && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)"
+ "rev16\\t%<w>0, %<w>1"
+ [(set_attr "type" "rev")]
+)
+
+(define_insn "rev16<mode>2_alt"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r")
+ (const_int 8))
+ (match_operand:GPI 2 "const_int_operand" "n"))
+ (and:GPI (ashift:GPI (match_dup 1)
+ (const_int 8))
+ (match_operand:GPI 3 "const_int_operand" "n"))))]
+ "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode)
+ && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)"
+ "rev16\\t%<w>0, %<w>1"
+ [(set_attr "type" "rev")]
+)
+
;; zero_extend version of above
(define_insn "*bswapsi2_uxtw"
[(set (match_operand:DI 0 "register_operand" "=r")
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index 5693c31..1b60d78 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -24,6 +24,9 @@
#define GCC_AARCH_COMMON_PROTOS_H
extern int aarch_crypto_can_dual_issue (rtx, rtx);
+extern bool aarch_rev16_p (rtx);
+extern bool aarch_rev16_shleft_mask_imm_p (rtx, enum machine_mode);
+extern bool aarch_rev16_shright_mask_imm_p (rtx, enum machine_mode);
extern int arm_early_load_addr_dep (rtx, rtx);
extern int arm_early_store_addr_dep (rtx, rtx);
extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
index af8fc99..884d4b3 100644
--- a/gcc/config/arm/aarch-common.c
+++ b/gcc/config/arm/aarch-common.c
@@ -191,6 +191,79 @@ arm_get_set_operands (rtx producer, rtx consumer,
return 0;
}
+bool
+aarch_rev16_shright_mask_imm_p (rtx val, enum machine_mode mode)
+{
+ return CONST_INT_P (val)
+ && INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff, mode);
+}
+
+bool
+aarch_rev16_shleft_mask_imm_p (rtx val, enum machine_mode mode)
+{
+ return CONST_INT_P (val)
+ && INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff00, mode);
+}
+
+
+static bool
+aarch_rev16_p_1 (rtx lhs, rtx rhs, enum machine_mode mode)
+{
+ if (GET_CODE (lhs) == AND
+ && GET_CODE (XEXP (lhs, 0)) == ASHIFT
+ && CONST_INT_P (XEXP (XEXP (lhs, 0), 1))
+ && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8
+ && REG_P (XEXP (XEXP (lhs, 0), 0))
+ && CONST_INT_P (XEXP (lhs, 1))
+ && GET_CODE (rhs) == AND
+ && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT
+ && REG_P (XEXP (XEXP (rhs, 0), 0))
+ && CONST_INT_P (XEXP (XEXP (rhs, 0), 1))
+ && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8
+ && CONST_INT_P (XEXP (rhs, 1))
+ && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0)))
+
+ {
+ rtx lhs_mask = XEXP (lhs, 1);
+ rtx rhs_mask = XEXP (rhs, 1);
+
+ return aarch_rev16_shright_mask_imm_p (rhs_mask, mode)
+ && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode);
+ }
+
+ return false;
+}
+
+/* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
+ These will be of the form:
+ ((x >> 8) & 0x00ff00ff)
+ | ((x << 8) & 0xff00ff00)
+ for SImode and with similar but wider bitmasks for DImode.
+ The two sub-expressions of the IOR can appear on either side so check both
+ permutations with the help of aarch_rev16_p_1 above. */
+
+bool
+aarch_rev16_p (rtx x)
+{
+ rtx left_sub_rtx, right_sub_rtx;
+ bool is_rev = false;
+
+ if (GET_CODE (x) != IOR)
+ return false;
+
+ left_sub_rtx = XEXP (x, 0);
+ right_sub_rtx = XEXP (x, 1);
+
+ /* There are no canonicalisation rules for the position of the two shifts
+ involved in a rev, so try both permutations. */
+ is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x));
+
+ if (!is_rev)
+ is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x));
+
+ return is_rev;
+}
+
/* Return nonzero if the CONSUMER instruction (a load) does need
PRODUCER's value to calculate the address. */
int
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e74096c..9f1e7ce 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * gcc.target/aarch64/rev16_1.c: New test.
+
2014-04-23 Richard Biener <rguenther@suse.de>
PR tree-optimization/60903
diff --git a/gcc/testsuite/gcc.target/aarch64/rev16_1.c b/gcc/testsuite/gcc.target/aarch64/rev16_1.c
new file mode 100644
index 0000000..126d3c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/rev16_1.c
@@ -0,0 +1,59 @@
+/* { dg-options "-O2" } */
+/* { dg-do run } */
+
+extern void abort (void);
+
+typedef unsigned int __u32;
+
+__u32
+__rev16_32_alt (__u32 x)
+{
+ return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8)
+ | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8);
+}
+
+__u32
+__rev16_32 (__u32 x)
+{
+ return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8)
+ | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8);
+}
+
+typedef unsigned long long __u64;
+
+__u64
+__rev16_64_alt (__u64 x)
+{
+ return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8)
+ | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8);
+}
+
+__u64
+__rev16_64 (__u64 x)
+{
+ return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8)
+ | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8);
+}
+
+int
+main (void)
+{
+ volatile __u32 in32 = 0x12345678;
+ volatile __u32 expected32 = 0x34127856;
+ volatile __u64 in64 = 0x1234567890abcdefUL;
+ volatile __u64 expected64 = 0x34127856ab90efcdUL;
+
+ if (__rev16_32 (in32) != expected32)
+ abort ();
+
+ if (__rev16_32_alt (in32) != expected32)
+ abort ();
+
+ if (__rev16_64 (in64) != expected64)
+ abort ();
+
+ if (__rev16_64_alt (in64) != expected64)
+ abort ();
+
+ return 0;
+}