diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2021-12-15 15:09:48 +0100 |
---|---|---|
committer | Tom de Vries <tdevries@suse.de> | 2022-01-04 12:28:02 +0100 |
commit | beed3f8f60492289ca6211d86c54a2254a642035 (patch) | |
tree | decc94b8e351664b22f98f7ccbc14ce8bf694851 | |
parent | a54d11749f0ce98192cfe28e5ccc0633d4db3982 (diff) | |
download | gcc-beed3f8f60492289ca6211d86c54a2254a642035.zip gcc-beed3f8f60492289ca6211d86c54a2254a642035.tar.gz gcc-beed3f8f60492289ca6211d86c54a2254a642035.tar.bz2 |
nvptx: Transition nvptx backend to STORE_FLAG_VALUE = 1
This patch to the nvptx backend changes the backend's STORE_FLAG_VALUE
from -1 to 1, by using BImode predicates and selp instructions, instead
of set instructions (almost always followed by integer negation).
Historically, it was reasonable (through rare) for backends to use -1
for representing true during the RTL passes. However with tree-ssa,
GCC now emits lots of code that reads and writes _Bool values, requiring
STORE_FLAG_VALUE=-1 targets to frequently convert 0/-1 pseudos to 0/1
pseudos using integer negation. Unfortunately, this process prevents
or complicates many optimizations (negate isn't associative with logical
AND, OR and XOR, and interferes with range/vrp/nonzerobits bounds etc.).
The impact of this is that for a relatively simple logical expression
like "return (x==21) && (y==69);", the nvptx backend currently generates:
mov.u32 %r26, %ar0;
mov.u32 %r27, %ar1;
set.u32.eq.u32 %r30, %r26, 21;
neg.s32 %r31, %r30;
mov.u32 %r29, %r31;
set.u32.eq.u32 %r33, %r27, 69;
neg.s32 %r34, %r33;
mov.u32 %r32, %r34;
cvt.u16.u8 %r39, %r29;
mov.u16 %r36, %r39;
cvt.u16.u8 %r39, %r32;
mov.u16 %r37, %r39;
and.b16 %r35, %r36, %r37;
cvt.u32.u16 %r38, %r35;
cvt.u32.u8 %value, %r38;
This patch tweaks nvptx to generate 0/1 values instead, requiring the
same number of instructions, using (BImode) predicate registers and selp
instructions so as to now generate the almost identical:
mov.u32 %r26, %ar0;
mov.u32 %r27, %ar1;
setp.eq.u32 %r31, %r26, 21;
selp.u32 %r30, 1, 0, %r31;
mov.u32 %r29, %r30;
setp.eq.u32 %r34, %r27, 69;
selp.u32 %r33, 1, 0, %r34;
mov.u32 %r32, %r33;
cvt.u16.u8 %r39, %r29;
mov.u16 %r36, %r39;
cvt.u16.u8 %r39, %r32;
mov.u16 %r37, %r39;
and.b16 %r35, %r36, %r37;
cvt.u32.u16 %r38, %r35;
cvt.u32.u8 %value, %r38;
The hidden benefit is that this sequence can (in theory) be optimized
by the RTL passes to eventually generate a much shorter sequence using
an and.pred instruction (just like Nvidia's nvcc compiler).
This patch has been tested nvptx-none with a "make" and "make -k check"
(including newlib) hosted on x86_64-pc-linux-gnu with no new failures.
gcc/ChangeLog:
* config/nvptx/nvptx.h (STORE_FLAG_VALUE): Change to 1.
* config/nvptx/nvptx.md (movbi): Use P1 constraint for true.
(setcc_from_bi): Remove SImode specific pattern.
(setcc<mode>_from_bi): Provide more general HSDIM pattern.
(extendbi<mode>2, zeroextendbi<mode>2): Provide instructions
for sign- and zero-extending BImode predicates to integers.
(setcc_int<mode>): Remove previous (-1-based) instructions.
(cstorebi4): Remove BImode to SImode specific expander.
(cstore<mode>4): Fix indentation. Expand using setccsi_from_bi.
(cstore<mode>4): For both integer and floating point modes.
-rw-r--r-- | gcc/config/nvptx/nvptx.h | 2 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.md | 76 |
2 files changed, 41 insertions, 37 deletions
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h index 728dff0..d61bd70 100644 --- a/gcc/config/nvptx/nvptx.h +++ b/gcc/config/nvptx/nvptx.h @@ -319,7 +319,7 @@ struct GTY(()) machine_function #define NO_DOT_IN_LABEL #define ASM_COMMENT_START "//" -#define STORE_FLAG_VALUE -1 +#define STORE_FLAG_VALUE 1 #define FLOAT_STORE_FLAG_VALUE(MODE) REAL_VALUE_ATOF("1.0", (MODE)) #define CASE_VECTOR_MODE SImode diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 77ac36d..a4c14a3 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -215,7 +215,7 @@ ;; get variables in this mode and pseudos are never spilled. (define_insn "movbi" [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R") - (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))] + (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,P1"))] "" "@ %.\\tmov%t0\\t%0, %1; @@ -859,12 +859,26 @@ ;; Conditional stores -(define_insn "setcc_from_bi" - [(set (match_operand:SI 0 "nvptx_register_operand" "=R") - (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R") - (const_int 0)))] +(define_insn "setcc<mode>_from_bi" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (ne:HSDIM (match_operand:BI 1 "nvptx_register_operand" "R") + (const_int 0)))] + "" + "%.\\tselp%t0\\t%0, 1, 0, %1;") + +(define_insn "extendbi<mode>2" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (sign_extend:HSDIM + (match_operand:BI 1 "nvptx_register_operand" "R")))] + "" + "%.\\tselp%t0\\t%0, -1, 0, %1;") + +(define_insn "zero_extendbi<mode>2" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (zero_extend:HSDIM + (match_operand:BI 1 "nvptx_register_operand" "R")))] "" - "%.\\tselp%t0 %0,-1,0,%1;") + "%.\\tselp%t0\\t%0, 1, 0, %1;") (define_insn "sel_true<mode>" [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") @@ -902,22 +916,6 @@ "" "%.\\tselp%t0\\t%0, %3, %2, %1;") -(define_insn "setcc_int<mode>" - [(set (match_operand:SI 0 "nvptx_register_operand" "=R") - (match_operator:SI 1 "nvptx_comparison_operator" - [(match_operand:HSDIM 2 "nvptx_register_operand" "R") - (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] - "" - "%.\\tset%t0%c1\\t%0, %2, %3;") - -(define_insn "setcc_int<mode>" - [(set (match_operand:SI 0 "nvptx_register_operand" "=R") - (match_operator:SI 1 "nvptx_float_comparison_operator" - [(match_operand:SDFM 2 "nvptx_register_operand" "R") - (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] - "" - "%.\\tset%t0%c1\\t%0, %2, %3;") - (define_insn "setcc_float<mode>" [(set (match_operand:SF 0 "nvptx_register_operand" "=R") (match_operator:SF 1 "nvptx_comparison_operator" @@ -934,29 +932,35 @@ "" "%.\\tset%t0%c1\\t%0, %2, %3;") -(define_expand "cstorebi4" - [(set (match_operand:SI 0 "nvptx_register_operand") - (match_operator:SI 1 "ne_operator" - [(match_operand:BI 2 "nvptx_register_operand") - (match_operand:BI 3 "const0_operand")]))] - "" - "") - (define_expand "cstore<mode>4" [(set (match_operand:SI 0 "nvptx_register_operand") (match_operator:SI 1 "nvptx_comparison_operator" - [(match_operand:HSDIM 2 "nvptx_register_operand") - (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))] + [(match_operand:HSDIM 2 "nvptx_register_operand") + (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))] "" - "") +{ + rtx reg = gen_reg_rtx (BImode); + rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode, + operands[2], operands[3]); + emit_move_insn (reg, cmp); + emit_insn (gen_setccsi_from_bi (operands[0], reg)); + DONE; +}) (define_expand "cstore<mode>4" [(set (match_operand:SI 0 "nvptx_register_operand") (match_operator:SI 1 "nvptx_float_comparison_operator" - [(match_operand:SDFM 2 "nvptx_register_operand") - (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))] + [(match_operand:SDFM 2 "nvptx_register_operand") + (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))] "" - "") +{ + rtx reg = gen_reg_rtx (BImode); + rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode, + operands[2], operands[3]); + emit_move_insn (reg, cmp); + emit_insn (gen_setccsi_from_bi (operands[0], reg)); + DONE; +}) ;; Calls |