aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2021-12-15 15:09:48 +0100
committerTom de Vries <tdevries@suse.de>2022-01-04 12:28:02 +0100
commitbeed3f8f60492289ca6211d86c54a2254a642035 (patch)
treedecc94b8e351664b22f98f7ccbc14ce8bf694851
parenta54d11749f0ce98192cfe28e5ccc0633d4db3982 (diff)
downloadgcc-beed3f8f60492289ca6211d86c54a2254a642035.zip
gcc-beed3f8f60492289ca6211d86c54a2254a642035.tar.gz
gcc-beed3f8f60492289ca6211d86c54a2254a642035.tar.bz2
nvptx: Transition nvptx backend to STORE_FLAG_VALUE = 1
This patch to the nvptx backend changes the backend's STORE_FLAG_VALUE from -1 to 1, by using BImode predicates and selp instructions, instead of set instructions (almost always followed by integer negation). Historically, it was reasonable (through rare) for backends to use -1 for representing true during the RTL passes. However with tree-ssa, GCC now emits lots of code that reads and writes _Bool values, requiring STORE_FLAG_VALUE=-1 targets to frequently convert 0/-1 pseudos to 0/1 pseudos using integer negation. Unfortunately, this process prevents or complicates many optimizations (negate isn't associative with logical AND, OR and XOR, and interferes with range/vrp/nonzerobits bounds etc.). The impact of this is that for a relatively simple logical expression like "return (x==21) && (y==69);", the nvptx backend currently generates: mov.u32 %r26, %ar0; mov.u32 %r27, %ar1; set.u32.eq.u32 %r30, %r26, 21; neg.s32 %r31, %r30; mov.u32 %r29, %r31; set.u32.eq.u32 %r33, %r27, 69; neg.s32 %r34, %r33; mov.u32 %r32, %r34; cvt.u16.u8 %r39, %r29; mov.u16 %r36, %r39; cvt.u16.u8 %r39, %r32; mov.u16 %r37, %r39; and.b16 %r35, %r36, %r37; cvt.u32.u16 %r38, %r35; cvt.u32.u8 %value, %r38; This patch tweaks nvptx to generate 0/1 values instead, requiring the same number of instructions, using (BImode) predicate registers and selp instructions so as to now generate the almost identical: mov.u32 %r26, %ar0; mov.u32 %r27, %ar1; setp.eq.u32 %r31, %r26, 21; selp.u32 %r30, 1, 0, %r31; mov.u32 %r29, %r30; setp.eq.u32 %r34, %r27, 69; selp.u32 %r33, 1, 0, %r34; mov.u32 %r32, %r33; cvt.u16.u8 %r39, %r29; mov.u16 %r36, %r39; cvt.u16.u8 %r39, %r32; mov.u16 %r37, %r39; and.b16 %r35, %r36, %r37; cvt.u32.u16 %r38, %r35; cvt.u32.u8 %value, %r38; The hidden benefit is that this sequence can (in theory) be optimized by the RTL passes to eventually generate a much shorter sequence using an and.pred instruction (just like Nvidia's nvcc compiler). This patch has been tested nvptx-none with a "make" and "make -k check" (including newlib) hosted on x86_64-pc-linux-gnu with no new failures. gcc/ChangeLog: * config/nvptx/nvptx.h (STORE_FLAG_VALUE): Change to 1. * config/nvptx/nvptx.md (movbi): Use P1 constraint for true. (setcc_from_bi): Remove SImode specific pattern. (setcc<mode>_from_bi): Provide more general HSDIM pattern. (extendbi<mode>2, zeroextendbi<mode>2): Provide instructions for sign- and zero-extending BImode predicates to integers. (setcc_int<mode>): Remove previous (-1-based) instructions. (cstorebi4): Remove BImode to SImode specific expander. (cstore<mode>4): Fix indentation. Expand using setccsi_from_bi. (cstore<mode>4): For both integer and floating point modes.
-rw-r--r--gcc/config/nvptx/nvptx.h2
-rw-r--r--gcc/config/nvptx/nvptx.md76
2 files changed, 41 insertions, 37 deletions
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index 728dff0..d61bd70 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -319,7 +319,7 @@ struct GTY(()) machine_function
#define NO_DOT_IN_LABEL
#define ASM_COMMENT_START "//"
-#define STORE_FLAG_VALUE -1
+#define STORE_FLAG_VALUE 1
#define FLOAT_STORE_FLAG_VALUE(MODE) REAL_VALUE_ATOF("1.0", (MODE))
#define CASE_VECTOR_MODE SImode
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 77ac36d..a4c14a3 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -215,7 +215,7 @@
;; get variables in this mode and pseudos are never spilled.
(define_insn "movbi"
[(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
- (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))]
+ (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,P1"))]
""
"@
%.\\tmov%t0\\t%0, %1;
@@ -859,12 +859,26 @@
;; Conditional stores
-(define_insn "setcc_from_bi"
- [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
- (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R")
- (const_int 0)))]
+(define_insn "setcc<mode>_from_bi"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (ne:HSDIM (match_operand:BI 1 "nvptx_register_operand" "R")
+ (const_int 0)))]
+ ""
+ "%.\\tselp%t0\\t%0, 1, 0, %1;")
+
+(define_insn "extendbi<mode>2"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (sign_extend:HSDIM
+ (match_operand:BI 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tselp%t0\\t%0, -1, 0, %1;")
+
+(define_insn "zero_extendbi<mode>2"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (zero_extend:HSDIM
+ (match_operand:BI 1 "nvptx_register_operand" "R")))]
""
- "%.\\tselp%t0 %0,-1,0,%1;")
+ "%.\\tselp%t0\\t%0, 1, 0, %1;")
(define_insn "sel_true<mode>"
[(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
@@ -902,22 +916,6 @@
""
"%.\\tselp%t0\\t%0, %3, %2, %1;")
-(define_insn "setcc_int<mode>"
- [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
- (match_operator:SI 1 "nvptx_comparison_operator"
- [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
- (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
- ""
- "%.\\tset%t0%c1\\t%0, %2, %3;")
-
-(define_insn "setcc_int<mode>"
- [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
- (match_operator:SI 1 "nvptx_float_comparison_operator"
- [(match_operand:SDFM 2 "nvptx_register_operand" "R")
- (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
- ""
- "%.\\tset%t0%c1\\t%0, %2, %3;")
-
(define_insn "setcc_float<mode>"
[(set (match_operand:SF 0 "nvptx_register_operand" "=R")
(match_operator:SF 1 "nvptx_comparison_operator"
@@ -934,29 +932,35 @@
""
"%.\\tset%t0%c1\\t%0, %2, %3;")
-(define_expand "cstorebi4"
- [(set (match_operand:SI 0 "nvptx_register_operand")
- (match_operator:SI 1 "ne_operator"
- [(match_operand:BI 2 "nvptx_register_operand")
- (match_operand:BI 3 "const0_operand")]))]
- ""
- "")
-
(define_expand "cstore<mode>4"
[(set (match_operand:SI 0 "nvptx_register_operand")
(match_operator:SI 1 "nvptx_comparison_operator"
- [(match_operand:HSDIM 2 "nvptx_register_operand")
- (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
+ [(match_operand:HSDIM 2 "nvptx_register_operand")
+ (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
""
- "")
+{
+ rtx reg = gen_reg_rtx (BImode);
+ rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
+ operands[2], operands[3]);
+ emit_move_insn (reg, cmp);
+ emit_insn (gen_setccsi_from_bi (operands[0], reg));
+ DONE;
+})
(define_expand "cstore<mode>4"
[(set (match_operand:SI 0 "nvptx_register_operand")
(match_operator:SI 1 "nvptx_float_comparison_operator"
- [(match_operand:SDFM 2 "nvptx_register_operand")
- (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
+ [(match_operand:SDFM 2 "nvptx_register_operand")
+ (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
""
- "")
+{
+ rtx reg = gen_reg_rtx (BImode);
+ rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
+ operands[2], operands[3]);
+ emit_move_insn (reg, cmp);
+ emit_insn (gen_setccsi_from_bi (operands[0], reg));
+ DONE;
+})
;; Calls