aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog.omp29
-rw-r--r--gcc/DATESTAMP.omp2
-rw-r--r--gcc/config/gcn/gcn-valu.md313
-rw-r--r--gcc/config/gcn/gcn.cc124
-rw-r--r--gcc/config/gcn/gcn.md28
-rw-r--r--gcc/doc/tm.texi9
-rw-r--r--gcc/doc/tm.texi.in2
-rw-r--r--gcc/hooks.cc7
-rw-r--r--gcc/hooks.h1
-rw-r--r--gcc/optc-save-gen.awk19
-rw-r--r--gcc/params.opt6
-rw-r--r--gcc/target.def14
-rw-r--r--gcc/testsuite/ChangeLog.omp16
-rw-r--r--gcc/tree-vect-stmts.cc41
-rw-r--r--libgomp/ChangeLog.omp32
15 files changed, 535 insertions, 108 deletions
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 69162ef..4b23eb1 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,32 @@
+2025-07-21 Thomas Schwinge <tschwinge@baylibre.com>
+
+ Backported from master:
+ 2025-05-12 Thomas Schwinge <tschwinge@baylibre.com>
+
+ * config/nvptx/nvptx-sm.def: Add '61'.
+ * config/nvptx/nvptx-gen.h: Regenerate.
+ * config/nvptx/nvptx-gen.opt: Likewise.
+ * config/nvptx/nvptx.cc (first_ptx_version_supporting_sm): Adjust.
+ * config/nvptx/nvptx.opt (-march-map=sm_61, -march-map=sm_62):
+ Likewise.
+ * config.gcc: Likewise.
+ * doc/invoke.texi (Nvidia PTX Options): Document '-march=sm_61'.
+ * config/nvptx/gen-multilib-matches-tests: Extend.
+
+2025-07-21 Thomas Schwinge <tschwinge@baylibre.com>
+
+ Backported from master:
+ 2025-05-12 Thomas Schwinge <tschwinge@baylibre.com>
+
+ * config/nvptx/nvptx-opts.h (enum ptx_version): Add
+ 'PTX_VERSION_5_0'.
+ * config/nvptx/nvptx.cc (ptx_version_to_string)
+ (ptx_version_to_number): Adjust.
+ * config/nvptx/nvptx.h (TARGET_PTX_5_0): New.
+ * config/nvptx/nvptx.opt (Enum(ptx_version)): Add 'EnumValue'
+ '5.0' for 'PTX_VERSION_5_0'.
+ * doc/invoke.texi (Nvidia PTX Options): Document '-mptx=5.0'.
+
2025-06-17 Tobias Burnus <tburnus@baylibre.com>
Backported from master:
diff --git a/gcc/DATESTAMP.omp b/gcc/DATESTAMP.omp
index 6952979..7578d89 100644
--- a/gcc/DATESTAMP.omp
+++ b/gcc/DATESTAMP.omp
@@ -1 +1 @@
-20250703
+20250722
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 4b21302..3899117 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -1133,6 +1133,23 @@
DONE;
})
+(define_expand "gather_load<mode><vndi>"
+ [(match_operand:V_MOV 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (match_operand:<VnDI> 2 "register_operand")
+ (match_operand 3 "immediate_operand")
+ (match_operand:SI 4 "gcn_alu_operand")]
+ ""
+ {
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+ operands[2], operands[4],
+ INTVAL (operands[3]), NULL);
+
+ emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
+ const0_rtx, const0_rtx));
+ DONE;
+ })
+
; Allow any address expression
(define_expand "gather<mode>_expr<exec>"
[(set (match_operand:V_MOV 0 "register_operand")
@@ -1259,6 +1276,23 @@
DONE;
})
+(define_expand "scatter_store<mode><vndi>"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:<VnDI> 1 "register_operand")
+ (match_operand 2 "immediate_operand")
+ (match_operand:SI 3 "gcn_alu_operand")
+ (match_operand:V_MOV 4 "register_operand")]
+ ""
+ {
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+ operands[1], operands[3],
+ INTVAL (operands[2]), NULL);
+
+ emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
+ const0_rtx, const0_rtx));
+ DONE;
+ })
+
; Allow any address expression
(define_expand "scatter<mode>_expr<exec_scatter>"
[(set (mem:BLK (scratch))
@@ -1455,28 +1489,26 @@
;; }}}
;; {{{ ALU special case: add/sub
-(define_insn "add<mode>3<exec_clobber>"
+(define_insn "add<mode>3<exec>"
[(set (match_operand:V_INT_1REG 0 "register_operand")
(plus:V_INT_1REG
(match_operand:V_INT_1REG 1 "register_operand")
- (match_operand:V_INT_1REG 2 "gcn_alu_operand")))
- (clobber (reg:DI VCC_REG))]
+ (match_operand:V_INT_1REG 2 "gcn_alu_operand")))]
""
{@ [cons: =0, %1, 2; attrs: type, length]
- [v,v,vSvA;vop2,4] v_add_co_u32\t%0, vcc, %2, %1
+ [v,v,vSvA;vop2,4] {v_add_u32|v_add_nc_u32}\t%0, %2, %1
[v,v,vSvB;vop2,8] ^
})
-(define_insn "add<mode>3_dup<exec_clobber>"
+(define_insn "add<mode>3_dup<exec>"
[(set (match_operand:V_INT_1REG 0 "register_operand")
(plus:V_INT_1REG
(vec_duplicate:V_INT_1REG
(match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"))
- (match_operand:V_INT_1REG 1 "register_operand")))
- (clobber (reg:DI VCC_REG))]
+ (match_operand:V_INT_1REG 1 "register_operand")))]
""
{@ [cons: =0, 1, 2; attrs: type, length]
- [v,v,SvA;vop2,4] v_add_co_u32\t%0, vcc, %2, %1
+ [v,v,SvA;vop2,4] {v_add_u32|v_add_nc_u32}\t%0, %2, %1
[v,v,SvB;vop2,8] ^
})
@@ -1503,16 +1535,16 @@
(plus:V_SI
(vec_duplicate:V_SI
(match_operand:SI 1 "gcn_alu_operand"))
- (match_operand:V_SI 2 "register_operand")))
+ (match_operand:V_SI 2 "gcn_alu_operand")))
(set (match_operand:DI 3 "register_operand")
- (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2))
- (match_dup 1))
- (vec_duplicate:V_SI (match_dup 2))))]
+ (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 1))
+ (match_dup 2))
+ (match_dup 2)))]
""
{@ [cons: =0, 1, 2, =3; attrs: type, length]
- [v,SvA,v,cV;vop2 ,4] v_add_co_u32\t%0, %3, %1, %2
- [v,SvB,v,cV;vop2 ,8] ^
- [v,SvA,v,Sg;vop3b,8] ^
+ [v,SvA,vA,cV;vop2 ,4] v_add_co_u32\t%0, %3, %1, %2
+ [v,SvB,vA,cV;vop2 ,8] ^
+ [v,SvA,vA,Sg;vop3b,8] ^
})
; v_addc does not accept an SGPR because the VCC read already counts as an
@@ -1551,16 +1583,15 @@
[(set_attr "type" "vop2,vop3b")
(set_attr "length" "4,8")])
-(define_insn "sub<mode>3<exec_clobber>"
+(define_insn "sub<mode>3<exec>"
[(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v")
(minus:V_INT_1REG
(match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v")
- (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB")))
- (clobber (reg:DI VCC_REG))]
+ (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB")))]
""
"@
- v_sub_co_u32\t%0, vcc, %1, %2
- v_subrev_co_u32\t%0, vcc, %2, %1"
+ {v_sub_u32|v_sub_nc_u32}\t%0, %1, %2
+ {v_subrev_u32|v_subrev_nc_u32}\t%0, %2, %1"
[(set_attr "type" "vop2")
(set_attr "length" "8,8")])
@@ -1648,6 +1679,39 @@
[(set_attr "type" "vmult")
(set_attr "length" "8")])
+(define_insn_and_split "add<mode>3_dup"
+ [(set (match_operand:V_DI 0 "register_operand" "= v")
+ (plus:V_DI
+ (vec_duplicate:V_DI
+ (match_operand:DI 1 "register_operand" "SvB"))
+ (match_operand:V_DI 2 "gcn_alu_operand" "vDb")))
+ (clobber (reg:DI VCC_REG))
+ (clobber (match_scratch:<VnSI> 3 "=&v"))]
+ ""
+ "#"
+ "gcn_can_split_p (<MODE>mode, operands[0])
+ && gcn_can_split_p (<MODE>mode, operands[1])
+ && gcn_can_split_p (<MODE>mode, operands[2])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_add<vnsi>3_vcc_dup
+ (gcn_operand_part (<MODE>mode, operands[0], 0),
+ gcn_operand_part (DImode, operands[1], 0),
+ gcn_operand_part (<MODE>mode, operands[2], 0),
+ vcc));
+ emit_insn (gen_vec_duplicate<vnsi> (operands[3],
+ gcn_operand_part (DImode, operands[1], 1)));
+ emit_insn (gen_addc<vnsi>3
+ (gcn_operand_part (<MODE>mode, operands[0], 1),
+ operands[3],
+ gcn_operand_part (<MODE>mode, operands[2], 1),
+ vcc, vcc));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
(define_insn_and_split "add<mode>3_exec"
[(set (match_operand:V_DI 0 "register_operand" "= v")
(vec_merge:V_DI
@@ -1685,6 +1749,49 @@
[(set_attr "type" "vmult")
(set_attr "length" "8")])
+(define_insn_and_split "add<mode>3_dup_exec"
+ [(set (match_operand:V_DI 0 "register_operand" "= v")
+ (vec_merge:V_DI
+ (plus:V_DI
+ (vec_duplicate:V_DI
+ (match_operand:DI 1 "register_operand" "SvB"))
+ (match_operand:V_DI 2 "gcn_alu_operand" "vDb"))
+ (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
+ (clobber (reg:DI VCC_REG))
+ (clobber (match_scratch:<VnSI> 5 "=&v"))]
+ ""
+ "#"
+ "gcn_can_split_p (<MODE>mode, operands[0])
+ && gcn_can_split_p (<MODE>mode, operands[1])
+ && gcn_can_split_p (<MODE>mode, operands[2])
+ && gcn_can_split_p (<MODE>mode, operands[4])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_add<vnsi>3_vcc_dup_exec
+ (gcn_operand_part (<MODE>mode, operands[0], 0),
+ gcn_operand_part (DImode, operands[1], 0),
+ gcn_operand_part (<MODE>mode, operands[2], 0),
+ vcc,
+ gcn_operand_part (<MODE>mode, operands[3], 0),
+ operands[4]));
+ emit_insn (gen_vec_duplicate<vnsi>_exec (operands[5],
+ gcn_operand_part (DImode, operands[1], 1),
+ gcn_gen_undef (<VnSI>mode),
+ operands[4]));
+ emit_insn (gen_addc<vnsi>3_exec
+ (gcn_operand_part (<MODE>mode, operands[0], 1),
+ operands[5],
+ gcn_operand_part (<MODE>mode, operands[2], 1),
+ vcc, vcc,
+ gcn_operand_part (<MODE>mode, operands[3], 1),
+ operands[4]));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
(define_insn_and_split "sub<mode>3"
[(set (match_operand:V_DI 0 "register_operand" "= v, v")
(minus:V_DI
@@ -1827,7 +1934,7 @@
(ltu:DI (plus:V_DI
(zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
(match_dup 2))
- (match_dup 1)))]
+ (match_dup 2)))]
""
{@ [cons: =0, 1, 2, =3]
[v,ASv,v,&Sg] #
@@ -1878,7 +1985,7 @@
(ltu:DI (plus:V_DI
(zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
(match_dup 2))
- (match_dup 1))
+ (match_dup 2))
(match_dup 5)))]
""
{@ [cons: =0, 1, 2, =3, 4, 5]
@@ -1932,7 +2039,7 @@
(ltu:DI (plus:V_DI
(zero_extend:V_DI (match_dup 1))
(vec_duplicate:V_DI (match_dup 2)))
- (match_dup 1)))]
+ (vec_duplicate:V_DI (match_dup 2))))]
""
{@ [cons: =0, 1, 2, =3]
[v,v,DbSv,&cV] #
@@ -1981,7 +2088,7 @@
(ltu:DI (plus:V_DI
(zero_extend:V_DI (match_dup 1))
(vec_duplicate:V_DI (match_dup 2)))
- (match_dup 1))
+ (vec_duplicate:V_DI (match_dup 2)))
(match_dup 5)))]
""
{@ [cons: =0, 1, 2, =3, 4, 5]
@@ -2190,6 +2297,22 @@
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
+(define_insn "<su>mul<mode>3_highpart_dup<exec>"
+ [(set (match_operand:V_SI 0 "register_operand" "= v")
+ (truncate:V_SI
+ (lshiftrt:<VnDI>
+ (mult:<VnDI>
+ (any_extend:<VnDI>
+ (vec_duplicate:V_SI
+ (match_operand:SI 1 "gcn_alu_operand" "SvA")))
+ (any_extend:<VnDI>
+ (match_operand:V_SI 2 "gcn_alu_operand" " vA")))
+ (const_int 32))))]
+ ""
+ "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
(define_insn "mul<mode>3<exec>"
[(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
(mult:V_INT_1REG
@@ -2201,11 +2324,11 @@
(set_attr "length" "8")])
(define_insn "mul<mode>3_dup<exec>"
- [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
+ [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
(mult:V_INT_1REG
- (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
(vec_duplicate:V_INT_1REG
- (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
+ (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvA"))
+ (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vA")))]
""
"v_mul_lo_u32\t%0, %1, %2"
[(set_attr "type" "vop3a")
@@ -2241,6 +2364,37 @@
DONE;
})
+(define_insn_and_split "mul<mode>3_dup"
+ [(set (match_operand:V_DI 0 "register_operand" "=&v")
+ (mult:V_DI
+ (vec_duplicate:V_DI
+ (match_operand:DI 1 "gcn_alu_operand" " Sv"))
+ (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
+ (clobber (match_scratch:<VnSI> 3 "=&v"))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+ {
+ rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
+ rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
+ rtx left_lo = gcn_operand_part (DImode, operands[1], 0);
+ rtx left_hi = gcn_operand_part (DImode, operands[1], 1);
+ rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
+ rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
+ rtx tmp = operands[3];
+
+ emit_insn (gen_mul<vnsi>3_dup (out_lo, left_lo, right_lo));
+ emit_insn (gen_umul<vnsi>3_highpart_dup (out_hi, left_lo, right_lo));
+ emit_insn (gen_mul<vnsi>3_dup (tmp, left_hi, right_lo));
+ emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
+ emit_insn (gen_mul<vnsi>3_dup (tmp, left_lo, right_hi));
+ emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
+ emit_insn (gen_mul<vnsi>3_dup (tmp, left_hi, right_hi));
+ emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
+ DONE;
+ })
+
(define_insn_and_split "mul<mode>3_exec"
[(set (match_operand:V_DI 0 "register_operand" "=&v")
(vec_merge:V_DI
@@ -2289,6 +2443,56 @@
DONE;
})
+(define_insn_and_split "mul<mode>3_dup_exec"
+ [(set (match_operand:V_DI 0 "register_operand" "=&v")
+ (vec_merge:V_DI
+ (mult:V_DI
+ (vec_duplicate:V_DI
+ (match_operand:DI 1 "gcn_alu_operand" " Sv"))
+ (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
+ (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
+ (clobber (match_scratch:<VnSI> 5 "=&v"))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+ {
+ rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
+ rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
+ rtx left_lo = gcn_operand_part (DImode, operands[1], 0);
+ rtx left_hi = gcn_operand_part (DImode, operands[1], 1);
+ rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
+ rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
+ rtx exec = operands[4];
+ rtx tmp = operands[5];
+
+ rtx old_lo, old_hi;
+ if (GET_CODE (operands[3]) == UNSPEC)
+ {
+ old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
+ }
+ else
+ {
+ old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
+ old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
+ }
+
+ rtx undef = gcn_gen_undef (<VnSI>mode);
+
+ emit_insn (gen_mul<vnsi>3_dup_exec (out_lo, left_lo, right_lo, old_lo,
+ exec));
+ emit_insn (gen_umul<vnsi>3_highpart_dup_exec (out_hi, left_lo, right_lo,
+ old_hi, exec));
+ emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_hi, right_lo, undef, exec));
+ emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
+ emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_lo, right_hi, undef, exec));
+ emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
+ emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_hi, right_hi, undef, exec));
+ emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
+ DONE;
+ })
+
(define_insn_and_split "mul<mode>3_zext"
[(set (match_operand:V_DI 0 "register_operand" "=&v")
(mult:V_DI
@@ -3795,9 +3999,9 @@
/* Unsigned comparisons use the same patterns as signed comparisons,
except that they use unsigned operators (e.g. LTU vs LT).
The '%E1' directive then does the Right Thing. */
- emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
- operands[2], operands[3],
- operands[4]));
+ emit_insn (gen_vec_cmp<mode>di_exec (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[4]));
DONE;
})
@@ -4052,6 +4256,32 @@
DONE;
})
+(define_expand "mask_gather_load<mode><vndi>"
+ [(set:V_MOV (match_operand:V_MOV 0 "register_operand")
+ (unspec:V_MOV
+ [(match_operand:DI 1 "register_operand")
+ (match_operand:<VnDI> 2 "register_operand")
+ (match_operand 3 "immediate_operand")
+ (match_operand:SI 4 "gcn_alu_operand")
+ (match_operand:DI 5 "")
+ (match_operand:V_MOV 6 "maskload_else_operand")]
+ UNSPEC_GATHER))]
+ ""
+ {
+ rtx exec = force_reg (DImode, operands[5]);
+
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+ operands[2], operands[4],
+ INTVAL (operands[3]), exec);
+
+ emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
+ const0_rtx, const0_rtx,
+ const0_rtx,
+ gcn_gen_undef (<MODE>mode),
+ exec));
+ DONE;
+ })
+
(define_expand "mask_scatter_store<mode><vnsi>"
[(match_operand:DI 0 "register_operand")
(match_operand:<VnSI> 1 "register_operand")
@@ -4080,6 +4310,27 @@
DONE;
})
+(define_expand "mask_scatter_store<mode><vndi>"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:<VnDI> 1 "register_operand")
+ (match_operand 2 "immediate_operand")
+ (match_operand:SI 3 "gcn_alu_operand")
+ (match_operand:V_MOV 4 "register_operand")
+ (match_operand:DI 5 "")]
+ ""
+ {
+ rtx exec = force_reg (DImode, operands[5]);
+
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+ operands[1], operands[3],
+ INTVAL (operands[2]), exec);
+
+ emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
+ operands[4], const0_rtx,
+ const0_rtx, exec));
+ DONE;
+ })
+
(define_code_iterator cond_op [plus minus mult])
(define_expand "cond_<expander><mode>"
@@ -4400,7 +4651,7 @@
rtx tmp = gen_reg_rtx (<MODE>mode);
rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1));
- emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2]));
+ emit_insn (gen_mul<mode>3_dup (tmp, operands[2], v1));
emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1]));
DONE;
})
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 9b882d9..a6f3731 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -54,6 +54,7 @@
#include "gimple.h"
#include "cgraph.h"
#include "case-cfn-macros.h"
+#include "opts.h"
/* This file should be included last. */
#include "target-def.h"
@@ -183,6 +184,11 @@ gcn_option_override (void)
if (flag_sram_ecc == HSACO_ATTR_DEFAULT)
flag_sram_ecc = gcn_devices[gcn_arch].sramecc_default;
+
+ /* TODO: This seems to produce tighter loops, but the testsuites expects it
+ to be set to '2', so I'll leave it default for now.
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_vect_partial_vector_usage, 1); */
}
/* }}} */
@@ -1276,13 +1282,13 @@ gen_##PREFIX##vN##SUFFIX (PARAMS) \
}
#define GEN_VNM_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \
-GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS))) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS))) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS))) \
GEN_VN_NOEXEC (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS))) \
GEN_VN_NOEXEC (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS))) \
static rtx \
gen_##PREFIX##vNm##SUFFIX (PARAMS) \
{ \
@@ -1290,13 +1296,13 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS) \
\
switch (mode) \
{ \
- case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS); \
- case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS); \
- case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS); \
+ USE_QHF (case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS);) \
+ USE_QHF (case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS);) \
+ USE_QHF (case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS);) \
case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS); \
- case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS); \
+ USE_QHF (case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS);) \
case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS); \
- case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS); \
+ USE_QHF (case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS);) \
default: \
break; \
} \
@@ -1341,13 +1347,13 @@ gen_##PREFIX##vN##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
}
#define GEN_VNM(PREFIX, SUFFIX, PARAMS, ARGS) \
-GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS))) \
+USE_QHF (GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS))) \
+USE_QHF (GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS))) \
GEN_VN (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS))) \
GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS))) \
USE_TI (GEN_VN (PREFIX, ti##SUFFIX, A(PARAMS), A(ARGS))) \
static rtx \
gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
@@ -1356,15 +1362,22 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
\
switch (mode) \
{ \
- case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec); \
- case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec); \
- case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec); \
- case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \
- case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec); \
- case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \
- case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec); \
- case E_TImode: \
- USE_TI (return gen_##PREFIX##vNti##SUFFIX (ARGS, merge_src, exec);) \
+ USE_QHF (case E_QImode: \
+ return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec);) \
+ USE_QHF (case E_HImode: \
+ return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec);) \
+ USE_QHF (case E_HFmode: \
+ return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec);) \
+ case E_SImode: \
+ return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \
+ USE_QHF (case E_SFmode: \
+ return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec);) \
+ case E_DImode: \
+ return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \
+ USE_QHF (case E_DFmode: \
+ return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec);) \
+ USE_TI (case E_TImode: \
+ return gen_##PREFIX##vNti##SUFFIX (ARGS, merge_src, exec);) \
default: \
break; \
} \
@@ -1373,7 +1386,8 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
return NULL_RTX; \
}
-/* These have TImode support. */
+/* These support everything. */
+#define USE_QHF(ARGS) ARGS
#define USE_TI(ARGS) ARGS
GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src))
GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
@@ -1383,6 +1397,7 @@ GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
#define USE_TI(ARGS)
GEN_VNM (add,3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
GEN_VN (add,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VN (add,di3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
GEN_VN (add,si3_vcc_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc),
A(dest, src1, src2, vcc))
GEN_VN (add,di3_sext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
@@ -1394,15 +1409,20 @@ GEN_VN (add,di3_vcc_zext_dup2, A(rtx dest, rtx src1, rtx src2, rtx vcc),
GEN_VN (addc,si3, A(rtx dest, rtx src1, rtx src2, rtx vccout, rtx vccin),
A(dest, src1, src2, vccout, vccin))
GEN_VN (and,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
-GEN_VN (ashl,si3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift))
GEN_VNM_NOEXEC (ds_bpermute,, A(rtx dest, rtx addr, rtx src, rtx exec),
A(dest, addr, src, exec))
GEN_VNM (gather,_expr, A(rtx dest, rtx addr, rtx as, rtx vol),
A(dest, addr, as, vol))
-GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
GEN_VN (sub,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
GEN_VN_NOEXEC (vec_series,si, A(rtx dest, rtx x, rtx c), A(dest, x, c))
+/* These do not have QI, HI, or any FP support. */
+#undef USE_QHF
+#define USE_QHF(ARGS)
+GEN_VNM (ashl,3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift))
+GEN_VNM (mul,3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+
+#undef USE_QHF
#undef USE_TI
#undef GEN_VNM
#undef GEN_VN
@@ -1996,8 +2016,8 @@ gcn_expand_vector_init (rtx op0, rtx vec)
rtx addr = gen_reg_rtx (addrmode);
int unit_size = GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op0)));
- emit_insn (gen_mulvNsi3_dup (ramp, gen_rtx_REG (offsetmode, VGPR_REGNO (1)),
- GEN_INT (unit_size)));
+ emit_insn (gen_mulvNsi3_dup (ramp, GEN_INT (unit_size),
+ gen_rtx_REG (offsetmode, VGPR_REGNO (1))));
bool simple_repeat = true;
@@ -2294,36 +2314,46 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
Return values.
ADDR_SPACE_FLAT - return VnDImode vector of absolute addresses.
- ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. */
+ ADDR_SPACE_GLOBAL - return VnSImode vector of offsets.
+ 64-bit offsets - return VnDImode vector of absolute addresses. */
rtx
gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
bool unsigned_p, rtx exec)
{
int vf = GET_MODE_NUNITS (GET_MODE (offsets));
- rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode));
- rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode));
+ rtx scaled_offsets = gen_reg_rtx (GET_MODE (offsets));
+ rtx abs_addr = gen_reg_rtx (VnMODE (vf, DImode));
+ bool use_di = GET_MODE_INNER (GET_MODE (scaled_offsets)) == DImode;
if (CONST_INT_P (scale)
&& INTVAL (scale) > 0
&& exact_log2 (INTVAL (scale)) >= 0)
- emit_insn (gen_ashlvNsi3 (tmpsi, offsets,
- GEN_INT (exact_log2 (INTVAL (scale))),
- NULL, exec));
+ emit_insn (gen_ashlvNm3 (scaled_offsets, offsets,
+ GEN_INT (exact_log2 (INTVAL (scale))),
+ NULL, exec));
else
- emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec));
+ emit_insn (gen_mulvNm3_dup (scaled_offsets, scale, offsets, NULL, exec));
+ /* No instructions support DImode offsets. */
+ if (use_di)
+ {
+ emit_insn (gen_addvNdi3_dup (abs_addr, base, scaled_offsets, NULL, exec));
+ return abs_addr;
+ }
/* "Global" instructions do not support negative register offsets. */
- if (as == ADDR_SPACE_FLAT || !unsigned_p)
+ else if (as == ADDR_SPACE_FLAT || !unsigned_p)
{
if (unsigned_p)
- emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec));
+ emit_insn (gen_addvNdi3_zext_dup2 (abs_addr, scaled_offsets, base,
+ NULL, exec));
else
- emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec));
- return tmpdi;
+ emit_insn (gen_addvNdi3_sext_dup2 (abs_addr, scaled_offsets, base,
+ NULL, exec));
+ return abs_addr;
}
else if (as == ADDR_SPACE_GLOBAL)
- return tmpsi;
+ return scaled_offsets;
gcc_unreachable ();
}
@@ -5765,6 +5795,16 @@ gcn_libc_has_function (enum function_class fn_class,
return bsd_libc_has_function (fn_class, type);
}
+/* Implement TARGET_VECTORIZE_PREFER_GATHER_SCATTER. */
+
+static bool
+gcn_prefer_gather_scatter (machine_mode ARG_UNUSED (mode),
+ int ARG_UNUSED (scale),
+ unsigned int ARG_UNUSED (group_size))
+{
+ return true;
+}
+
/* }}} */
/* {{{ md_reorg pass. */
@@ -7964,6 +8004,8 @@ gcn_dwarf_register_span (rtx rtl)
gcn_vectorize_builtin_vectorized_function
#undef TARGET_VECTORIZE_GET_MASK_MODE
#define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
+#undef TARGET_VECTORIZE_PREFER_GATHER_SCATTER
+#define TARGET_VECTORIZE_PREFER_GATHER_SCATTER gcn_prefer_gather_scatter
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 1998931..abb1850 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -1136,14 +1136,13 @@
[(set (match_operand:SI 0 "register_operand" "= Sg, Sg, Sg, v")
(plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v")
(match_operand:SI 2 "gcn_alu_operand" " SgA,SgJ, B,vBSv")))
- (clobber (match_scratch:BI 3 "= cs, cs, cs, X"))
- (clobber (match_scratch:DI 4 "= X, X, X, cV"))]
+ (clobber (match_scratch:BI 3 "= cs, cs, cs, X"))]
""
"@
s_add_i32\t%0, %1, %2
s_addk_i32\t%0, %2
s_add_i32\t%0, %1, %2
- v_add_co_u32\t%0, vcc, %2, %1"
+ {v_add_u32|v_add_nc_u32}\t%0, %2, %1"
[(set_attr "type" "sop2,sopk,sop2,vop2")
(set_attr "length" "4,4,8,8")])
@@ -1151,8 +1150,7 @@
[(parallel [(set (match_operand:SI 0 "register_operand")
(plus:SI (match_operand:SI 1 "gcn_alu_operand")
(match_operand:SI 2 "gcn_alu_operand")))
- (clobber (reg:BI SCC_REG))
- (clobber (scratch:DI))])]
+ (clobber (reg:BI SCC_REG))])]
""
{})
@@ -1332,14 +1330,13 @@
[(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v, v")
(minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgA, v,vBSv")
(match_operand:SI 2 "gcn_alu_operand" "SgA, B, vBSv, v")))
- (clobber (match_scratch:BI 3 "=cs, cs, X, X"))
- (clobber (match_scratch:DI 4 "= X, X, cV, cV"))]
+ (clobber (match_scratch:BI 3 "=cs, cs, X, X"))]
""
"@
s_sub_i32\t%0, %1, %2
s_sub_i32\t%0, %1, %2
- v_subrev_co_u32\t%0, vcc, %2, %1
- v_sub_co_u32\t%0, vcc, %1, %2"
+ {v_subrev_u32|v_subrev_nc_u32}\t%0, %2, %1
+ {v_sub_u32|v_sub_nc_u32}\t%0, %1, %2"
[(set_attr "type" "sop2,sop2,vop2,vop2")
(set_attr "length" "4,8,8,8")])
@@ -1569,8 +1566,7 @@
(mult:DI (match_operand:DI 1 "register_operand" "%Sg, Sg, v, v")
(match_operand:DI 2 "nonmemory_operand" "Sg, i,vSv, A")))
(clobber (match_scratch:SI 3 "=&Sg,&Sg,&v,&v"))
- (clobber (match_scratch:BI 4 "=cs, cs, X, X"))
- (clobber (match_scratch:DI 5 "=X, X,cV,cV"))]
+ (clobber (match_scratch:BI 4 "=cs, cs, X, X"))]
""
"#"
"reload_completed"
@@ -1585,15 +1581,13 @@
emit_insn (gen_umulsidi3 (operands[0], op1lo, op2lo));
emit_insn (gen_mulsi3 (tmp, op1lo, op2hi));
rtx add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp));
- rtx clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]);
- rtx clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]);
- add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2));
+ rtx clob = gen_rtx_CLOBBER (VOIDmode, operands[4]);
+ add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, add, clob));
emit_insn (add);
emit_insn (gen_mulsi3 (tmp, op1hi, op2lo));
add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp));
- clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]);
- clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]);
- add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2));
+ clob = gen_rtx_CLOBBER (VOIDmode, operands[4]);
+ add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, add, clob));
emit_insn (add);
DONE;
})
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index a96700c..ff52c4f 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6515,6 +6515,15 @@ The default is @code{NULL_TREE} which means to not vectorize scatter
stores.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_VECTORIZE_PREFER_GATHER_SCATTER (machine_mode @var{mode}, int @var{scale}, unsigned int @var{group_size})
+This hook returns TRUE if gather loads or scatter stores are cheaper on
+this target than a sequence of elementwise loads or stores. The @var{mode}
+and @var{scale} correspond to the @code{gather_load} and
+@code{scatter_store} instruction patterns. The @var{group_size} is the
+number of scalar elements in each scalar loop iteration that are to be
+combined into the vector.
+@end deftypefn
+
@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int}, @var{bool})
This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}
fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index eccc4d8..b03ad4c 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4311,6 +4311,8 @@ address; but often a machine-dependent strategy can generate better code.
@hook TARGET_VECTORIZE_BUILTIN_SCATTER
+@hook TARGET_VECTORIZE_PREFER_GATHER_SCATTER
+
@hook TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
@hook TARGET_SIMD_CLONE_ADJUST
diff --git a/gcc/hooks.cc b/gcc/hooks.cc
index 951825d..76cb5931 100644
--- a/gcc/hooks.cc
+++ b/gcc/hooks.cc
@@ -117,6 +117,13 @@ hook_bool_mode_const_rtx_true (machine_mode, const_rtx)
return true;
}
+/* Generic hook that takes (machine_mode, int, unsigned) and returns false. */
+bool
+hook_bool_mode_int_unsigned_false (machine_mode, int, unsigned)
+{
+ return false;
+}
+
/* Generic hook that takes (machine_mode, rtx) and returns false. */
bool
hook_bool_mode_rtx_false (machine_mode, rtx)
diff --git a/gcc/hooks.h b/gcc/hooks.h
index c0663bf..e95bd11 100644
--- a/gcc/hooks.h
+++ b/gcc/hooks.h
@@ -36,6 +36,7 @@ extern bool hook_bool_mode_true (machine_mode);
extern bool hook_bool_mode_mode_true (machine_mode, machine_mode);
extern bool hook_bool_mode_const_rtx_false (machine_mode, const_rtx);
extern bool hook_bool_mode_const_rtx_true (machine_mode, const_rtx);
+extern bool hook_bool_mode_int_unsigned_false (machine_mode, int, unsigned);
extern bool hook_bool_mode_rtx_false (machine_mode, rtx);
extern bool hook_bool_mode_rtx_true (machine_mode, rtx);
extern bool hook_bool_const_rtx_insn_const_rtx_insn_true (const rtx_insn *,
diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk
index a3d7e5a..31756ec 100644
--- a/gcc/optc-save-gen.awk
+++ b/gcc/optc-save-gen.awk
@@ -1313,6 +1313,12 @@ for (i = 0; i < n_opts; i++) {
# offloading is enabled.
if (flag_set_p("Target", flags[i]))
var_target_opt[n_opt_val] = 1;
+
+ # These options should not be passed from host to target, but
+ # are not actually target specific.
+ if (flag_set_p("NoOffload", flags[i]))
+ var_target_opt[n_opt_val] = 2;
+
n_opt_val++;
}
}
@@ -1393,7 +1399,7 @@ for (i = 0; i < n_opt_val; i++) {
# Do not stream out target-specific opts if offloading is
# enabled.
if (var_target_opt[i])
- print " if (!lto_stream_offload_p)"
+ print " if (!lto_stream_offload_p) {"
# If applicable, encode the streamed value.
if (var_opt_optimize_init[i]) {
print " if (" var_opt_optimize_init[i] " > (" var_opt_val_type[i] ") 10)";
@@ -1403,6 +1409,8 @@ for (i = 0; i < n_opt_val; i++) {
} else {
print " bp_pack_var_len_" sgn " (bp, ptr->" name");";
}
+ if (var_target_opt[i])
+ print "}"
}
}
print " for (size_t i = 0; i < ARRAY_SIZE (ptr->explicit_mask); i++)";
@@ -1418,10 +1426,14 @@ print " struct cl_optimization *ptr ATTRIBUTE_UNUSED)"
print "{";
for (i = 0; i < n_opt_val; i++) {
name = var_opt_val[i]
- if (var_target_opt[i]) {
+ if (var_target_opt[i] == 1) {
print "#ifdef ACCEL_COMPILER"
print "#error accel compiler cannot define Optimization attribute for target-specific option " name;
print "#else"
+ } else if (var_target_opt[i] == 2) {
+ print "#ifdef ACCEL_COMPILER"
+ print " ptr->" name " = global_options." name ";"
+ print "#else"
}
otype = var_opt_val_type[i];
if (otype ~ "^const char \\**$") {
@@ -1489,6 +1501,9 @@ for (i = 0; i < n_opts; i++) {
if (flag_set_p("Warning", flags[i]))
continue;
+ if (flag_set_p("NoOffload", flags[i]))
+ continue;
+
if (name in checked_options)
continue;
checked_options[name]++
diff --git a/gcc/params.opt b/gcc/params.opt
index a2b606f..28b47ff 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -1226,7 +1226,7 @@ Common Joined UInteger Var(param_use_canonical_types) Init(1) IntegerRange(0, 1)
Whether to use canonical types.
-param=vect-epilogues-nomask=
-Common Joined UInteger Var(param_vect_epilogues_nomask) Init(1) IntegerRange(0, 1) Param Optimization
+Common Joined UInteger Var(param_vect_epilogues_nomask) Init(1) IntegerRange(0, 1) Param Optimization NoOffload
Enable loop epilogue vectorization using smaller vector size.
-param=vect-max-layout-candidates=
@@ -1246,11 +1246,11 @@ Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) Init(6)
Bound on number of runtime checks inserted by the vectorizer's loop versioning for alignment check.
-param=vect-partial-vector-usage=
-Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) IntegerRange(0, 2) Param Optimization
+Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) IntegerRange(0, 2) Param Optimization NoOffload
Controls how loop vectorizer uses partial vectors. 0 means never, 1 means only for loops whose need to iterate can be removed, 2 means for all loops. The default value is 2.
-param=vect-inner-loop-cost-factor=
-Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 10000) Param Optimization
+Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 10000) Param Optimization NoOffload
The maximum factor which the loop vectorizer applies to the cost of statements in an inner loop relative to the loop being vectorized.
-param=vect-induction-float=
diff --git a/gcc/target.def b/gcc/target.def
index 6c7cdc8..c631131 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2056,6 +2056,20 @@ all zeros. GCC can then try to branch around the instruction instead.",
(unsigned ifn),
default_empty_mask_is_expensive)
+/* Prefer gather/scatter loads/stores to e.g. elementwise accesses if\n\
+we cannot use a contiguous access. */
+DEFHOOK
+(prefer_gather_scatter,
+ "This hook returns TRUE if gather loads or scatter stores are cheaper on\n\
+this target than a sequence of elementwise loads or stores. The @var{mode}\n\
+and @var{scale} correspond to the @code{gather_load} and\n\
+@code{scatter_store} instruction patterns. The @var{group_size} is the\n\
+number of scalar elements in each scalar loop iteration that are to be\n\
+combined into the vector.",
+ bool,
+ (machine_mode mode, int scale, unsigned int group_size),
+ hook_bool_mode_int_unsigned_false)
+
/* Target builtin that implements vector gather operation. */
DEFHOOK
(builtin_gather,
diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index a7ea0db..a67b8b7 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,19 @@
+2025-07-21 Thomas Schwinge <tschwinge@baylibre.com>
+
+ Backported from master:
+ 2025-05-12 Thomas Schwinge <tschwinge@baylibre.com>
+
+ * gcc.target/nvptx/march-map=sm_61.c: Adjust.
+ * gcc.target/nvptx/march-map=sm_62.c: Likewise.
+ * gcc.target/nvptx/march=sm_61.c: New.
+
+2025-07-21 Thomas Schwinge <tschwinge@baylibre.com>
+
+ Backported from master:
+ 2025-05-12 Thomas Schwinge <tschwinge@baylibre.com>
+
+ * gcc.target/nvptx/mptx=5.0.c: New.
+
2025-07-03 Thomas Schwinge <tschwinge@baylibre.com>
Backported from master:
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 978a462..f09e06b 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1822,21 +1822,35 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
static bool
vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
+ tree vectype,
loop_vec_info loop_vinfo, bool masked_p,
gather_scatter_info *gs_info,
- vec<int> *elsvals)
+ vec<int> *elsvals,
+ unsigned int group_size,
+ bool single_element_p)
{
if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals)
|| gs_info->ifn == IFN_LAST)
- return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
- masked_p, gs_info, elsvals);
+ {
+ if (!vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
+ masked_p, gs_info, elsvals))
+ return false;
+ }
+ else
+ {
+ tree old_offset_type = TREE_TYPE (gs_info->offset);
+ tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
- tree old_offset_type = TREE_TYPE (gs_info->offset);
- tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
+ gcc_assert (TYPE_PRECISION (new_offset_type)
+ >= TYPE_PRECISION (old_offset_type));
+ gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
+ }
- gcc_assert (TYPE_PRECISION (new_offset_type)
- >= TYPE_PRECISION (old_offset_type));
- gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
+ if (!single_element_p
+ && !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype),
+ gs_info->scale,
+ group_size))
+ return false;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -2397,11 +2411,11 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
allows us to use contiguous accesses. */
if ((*memory_access_type == VMAT_ELEMENTWISE
|| *memory_access_type == VMAT_STRIDED_SLP)
- && single_element_p
&& (!slp_node || SLP_TREE_LANES (slp_node) == 1)
&& loop_vinfo
- && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
- masked_p, gs_info, elsvals))
+ && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
+ masked_p, gs_info, elsvals,
+ group_size, single_element_p))
*memory_access_type = VMAT_GATHER_SCATTER;
if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
@@ -2558,8 +2572,9 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
{
gcc_assert (!slp_node);
if (loop_vinfo
- && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
- masked_p, gs_info, elsvals))
+ && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
+ masked_p, gs_info, elsvals,
+ 1, false))
*memory_access_type = VMAT_GATHER_SCATTER;
else
*memory_access_type = VMAT_ELEMENTWISE;
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index 689cfbf..73baf37 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,35 @@
+2025-07-21 Thomas Schwinge <tschwinge@baylibre.com>
+
+ Backported from master:
+ 2025-07-17 Thomas Schwinge <tschwinge@baylibre.com>
+
+ PR target/119692
+ * testsuite/libgomp.c++/pr119692-1-4.C: '{ dg-timeout 10 { target offload_device } }'.
+ * testsuite/libgomp.c++/pr119692-1-5.C: Likewise.
+ * testsuite/libgomp.c++/target-exceptions-bad_cast-1.C: Likewise.
+ * testsuite/libgomp.c++/target-exceptions-bad_cast-2.C: Likewise.
+ * testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C: Likewise.
+ * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C: Likewise.
+
+2025-07-21 Thomas Schwinge <tschwinge@baylibre.com>
+
+ Backported from master:
+ 2025-05-12 Thomas Schwinge <tschwinge@baylibre.com>
+
+ * testsuite/libgomp.c/declare-variant-3-sm61.c: New.
+ * testsuite/libgomp.c/declare-variant-3.h: Adjust.
+
+2025-07-21 Thomas Schwinge <tschwinge@baylibre.com>
+
+ Backported from master:
+ 2025-07-21 Thomas Schwinge <tschwinge@baylibre.com>
+
+ PR target/119853
+ PR target/119854
+ * testsuite/libgomp.c++/target-cdtor-1.C: Adjust for
+ 'targetm.cxx.use_aeabi_atexit'.
+ * testsuite/libgomp.c++/target-cdtor-2.C: Likewise.
+
2025-07-03 Jakub Jelinek <jakub@redhat.com>
Backported from master: