diff options
Diffstat (limited to 'gcc/config/gcn/gcn-valu.md')
-rw-r--r-- | gcc/config/gcn/gcn-valu.md | 321 |
1 files changed, 293 insertions, 28 deletions
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index f49c1ed..a34d2e3 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -811,7 +811,7 @@ [(set_attr "type" "vop3a") (set_attr "length" "8") (set_attr "exec" "none") - (set_attr "laneselect" "yes")]) + (set_attr "laneselect" "write")]) ; FIXME: 64bit operations really should be splitters, but I am not sure how ; to represent vertical subregs. @@ -828,7 +828,7 @@ [(set_attr "type" "vmult") (set_attr "length" "16") (set_attr "exec" "none") - (set_attr "laneselect" "yes")]) + (set_attr "laneselect" "write")]) (define_expand "vec_set<mode>" [(set (match_operand:V_MOV 0 "register_operand") @@ -854,7 +854,7 @@ [(set_attr "type" "vop3a") (set_attr "length" "8") (set_attr "exec" "none") - (set_attr "laneselect" "yes")]) + (set_attr "laneselect" "write")]) (define_insn "*vec_set<mode>_1" [(set (match_operand:V_2REG 0 "register_operand" "=v") @@ -871,7 +871,7 @@ [(set_attr "type" "vmult") (set_attr "length" "16") (set_attr "exec" "none") - (set_attr "laneselect" "yes")]) + (set_attr "laneselect" "write")]) (define_insn "vec_duplicate<mode><exec>" [(set (match_operand:V_1REG 0 "register_operand" "=v") @@ -910,7 +910,7 @@ [(set_attr "type" "vop3a") (set_attr "length" "8") (set_attr "exec" "none") - (set_attr "laneselect" "yes")]) + (set_attr "laneselect" "read")]) (define_insn "vec_extract<mode><scalar_mode>" [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg") @@ -922,7 +922,7 @@ [(set_attr "type" "vmult") (set_attr "length" "16") (set_attr "exec" "none") - (set_attr "laneselect" "yes")]) + (set_attr "laneselect" "read")]) (define_insn "vec_extract<mode><scalar_mode>" [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg") @@ -934,7 +934,7 @@ [(set_attr "type" "vmult") (set_attr "length" "32") (set_attr "exec" "none") - (set_attr "laneselect" "yes")]) + (set_attr "laneselect" "read")]) (define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop" [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v") @@ -1133,6 +1133,23 @@ DONE; }) +(define_expand "gather_load<mode><vndi>" + [(match_operand:V_MOV 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:<VnDI> 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand")] + "" + { + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], + operands[2], operands[4], + INTVAL (operands[3]), NULL); + + emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx, + const0_rtx, const0_rtx)); + DONE; + }) + ; Allow any address expression (define_expand "gather<mode>_expr<exec>" [(set (match_operand:V_MOV 0 "register_operand") @@ -1175,6 +1192,7 @@ return buf; } [(set_attr "type" "flat") + (set_attr "flatmemaccess" "load") (set_attr "length" "12") (set_attr "cdna" "*,cdna2,*,cdna2") (set_attr "xnack" "off,off,on,on")]) @@ -1233,6 +1251,7 @@ return buf; } [(set_attr "type" "flat") + (set_attr "flatmemaccess" "load") (set_attr "length" "12") (set_attr "cdna" "*,cdna2,*,cdna2") (set_attr "xnack" "off,off,on,on")]) @@ -1259,6 +1278,23 @@ DONE; }) +(define_expand "scatter_store<mode><vndi>" + [(match_operand:DI 0 "register_operand") + (match_operand:<VnDI> 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:SI 3 "gcn_alu_operand") + (match_operand:V_MOV 4 "register_operand")] + "" + { + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], + operands[1], operands[3], + INTVAL (operands[2]), NULL); + + emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4], + const0_rtx, const0_rtx)); + DONE; + }) + ; Allow any address expression (define_expand "scatter<mode>_expr<exec_scatter>" [(set (mem:BLK (scratch)) @@ -1301,6 +1337,7 @@ return buf; } [(set_attr "type" "flat") + (set_attr "flatmemaccess" "store") (set_attr "length" "12") (set_attr "cdna" "*,cdna2")]) @@ -1356,6 +1393,7 @@ return buf; } [(set_attr "type" "flat") + (set_attr "flatmemaccess" "store") (set_attr "length" "12") (set_attr "cdna" "*,cdna2")]) @@ -1501,16 +1539,16 @@ (plus:V_SI (vec_duplicate:V_SI (match_operand:SI 1 "gcn_alu_operand")) - (match_operand:V_SI 2 "register_operand"))) + (match_operand:V_SI 2 "gcn_alu_operand"))) (set (match_operand:DI 3 "register_operand") - (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2)) - (match_dup 1)) - (vec_duplicate:V_SI (match_dup 2))))] + (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 1)) + (match_dup 2)) + (match_dup 2)))] "" {@ [cons: =0, 1, 2, =3; attrs: type, length] - [v,SvA,v,cV;vop2 ,4] v_add_co_u32\t%0, %3, %1, %2 - [v,SvB,v,cV;vop2 ,8] ^ - [v,SvA,v,Sg;vop3b,8] ^ + [v,SvA,vA,cV;vop2 ,4] v_add_co_u32\t%0, %3, %1, %2 + [v,SvB,vA,cV;vop2 ,8] ^ + [v,SvA,vA,Sg;vop3b,8] ^ }) ; v_addc does not accept an SGPR because the VCC read already counts as an @@ -1645,6 +1683,39 @@ [(set_attr "type" "vmult") (set_attr "length" "8")]) +(define_insn_and_split "add<mode>3_dup" + [(set (match_operand:V_DI 0 "register_operand" "= v") + (plus:V_DI + (vec_duplicate:V_DI + (match_operand:DI 1 "register_operand" "SvB")) + (match_operand:V_DI 2 "gcn_alu_operand" "vDb"))) + (clobber (reg:DI VCC_REG)) + (clobber (match_scratch:<VnSI> 3 "=&v"))] + "" + "#" + "gcn_can_split_p (<MODE>mode, operands[0]) + && gcn_can_split_p (<MODE>mode, operands[1]) + && gcn_can_split_p (<MODE>mode, operands[2])" + [(const_int 0)] + { + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_add<vnsi>3_vcc_dup + (gcn_operand_part (<MODE>mode, operands[0], 0), + gcn_operand_part (DImode, operands[1], 0), + gcn_operand_part (<MODE>mode, operands[2], 0), + vcc)); + emit_insn (gen_vec_duplicate<vnsi> (operands[3], + gcn_operand_part (DImode, operands[1], 1))); + emit_insn (gen_addc<vnsi>3 + (gcn_operand_part (<MODE>mode, operands[0], 1), + operands[3], + gcn_operand_part (<MODE>mode, operands[2], 1), + vcc, vcc)); + DONE; + } + [(set_attr "type" "vmult") + (set_attr "length" "8")]) + (define_insn_and_split "add<mode>3_exec" [(set (match_operand:V_DI 0 "register_operand" "= v") (vec_merge:V_DI @@ -1682,6 +1753,49 @@ [(set_attr "type" "vmult") (set_attr "length" "8")]) +(define_insn_and_split "add<mode>3_dup_exec" + [(set (match_operand:V_DI 0 "register_operand" "= v") + (vec_merge:V_DI + (plus:V_DI + (vec_duplicate:V_DI + (match_operand:DI 1 "register_operand" "SvB")) + (match_operand:V_DI 2 "gcn_alu_operand" "vDb")) + (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) + (clobber (reg:DI VCC_REG)) + (clobber (match_scratch:<VnSI> 5 "=&v"))] + "" + "#" + "gcn_can_split_p (<MODE>mode, operands[0]) + && gcn_can_split_p (<MODE>mode, operands[1]) + && gcn_can_split_p (<MODE>mode, operands[2]) + && gcn_can_split_p (<MODE>mode, operands[4])" + [(const_int 0)] + { + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_add<vnsi>3_vcc_dup_exec + (gcn_operand_part (<MODE>mode, operands[0], 0), + gcn_operand_part (DImode, operands[1], 0), + gcn_operand_part (<MODE>mode, operands[2], 0), + vcc, + gcn_operand_part (<MODE>mode, operands[3], 0), + operands[4])); + emit_insn (gen_vec_duplicate<vnsi>_exec (operands[5], + gcn_operand_part (DImode, operands[1], 1), + gcn_gen_undef (<VnSI>mode), + operands[4])); + emit_insn (gen_addc<vnsi>3_exec + (gcn_operand_part (<MODE>mode, operands[0], 1), + operands[5], + gcn_operand_part (<MODE>mode, operands[2], 1), + vcc, vcc, + gcn_operand_part (<MODE>mode, operands[3], 1), + operands[4])); + DONE; + } + [(set_attr "type" "vmult") + (set_attr "length" "8")]) + (define_insn_and_split "sub<mode>3" [(set (match_operand:V_DI 0 "register_operand" "= v, v") (minus:V_DI @@ -1824,7 +1938,7 @@ (ltu:DI (plus:V_DI (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1))) (match_dup 2)) - (match_dup 1)))] + (match_dup 2)))] "" {@ [cons: =0, 1, 2, =3] [v,ASv,v,&Sg] # @@ -1875,7 +1989,7 @@ (ltu:DI (plus:V_DI (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1))) (match_dup 2)) - (match_dup 1)) + (match_dup 2)) (match_dup 5)))] "" {@ [cons: =0, 1, 2, =3, 4, 5] @@ -1929,7 +2043,7 @@ (ltu:DI (plus:V_DI (zero_extend:V_DI (match_dup 1)) (vec_duplicate:V_DI (match_dup 2))) - (match_dup 1)))] + (vec_duplicate:V_DI (match_dup 2))))] "" {@ [cons: =0, 1, 2, =3] [v,v,DbSv,&cV] # @@ -1978,7 +2092,7 @@ (ltu:DI (plus:V_DI (zero_extend:V_DI (match_dup 1)) (vec_duplicate:V_DI (match_dup 2))) - (match_dup 1)) + (vec_duplicate:V_DI (match_dup 2))) (match_dup 5)))] "" {@ [cons: =0, 1, 2, =3, 4, 5] @@ -2187,6 +2301,22 @@ [(set_attr "type" "vop3a") (set_attr "length" "8")]) +(define_insn "<su>mul<mode>3_highpart_dup<exec>" + [(set (match_operand:V_SI 0 "register_operand" "= v") + (truncate:V_SI + (lshiftrt:<VnDI> + (mult:<VnDI> + (any_extend:<VnDI> + (vec_duplicate:V_SI + (match_operand:SI 1 "gcn_alu_operand" "SvA"))) + (any_extend:<VnDI> + (match_operand:V_SI 2 "gcn_alu_operand" " vA"))) + (const_int 32))))] + "" + "v_mul_hi<sgnsuffix>0\t%0, %2, %1" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + (define_insn "mul<mode>3<exec>" [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") (mult:V_INT_1REG @@ -2198,11 +2328,11 @@ (set_attr "length" "8")]) (define_insn "mul<mode>3_dup<exec>" - [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") + [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") (mult:V_INT_1REG - (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA") (vec_duplicate:V_INT_1REG - (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))] + (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvA")) + (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vA")))] "" "v_mul_lo_u32\t%0, %1, %2" [(set_attr "type" "vop3a") @@ -2238,6 +2368,37 @@ DONE; }) +(define_insn_and_split "mul<mode>3_dup" + [(set (match_operand:V_DI 0 "register_operand" "=&v") + (mult:V_DI + (vec_duplicate:V_DI + (match_operand:DI 1 "gcn_alu_operand" " Sv")) + (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))) + (clobber (match_scratch:<VnSI> 3 "=&v"))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); + rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); + rtx left_lo = gcn_operand_part (DImode, operands[1], 0); + rtx left_hi = gcn_operand_part (DImode, operands[1], 1); + rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); + rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); + rtx tmp = operands[3]; + + emit_insn (gen_mul<vnsi>3_dup (out_lo, left_lo, right_lo)); + emit_insn (gen_umul<vnsi>3_highpart_dup (out_hi, left_lo, right_lo)); + emit_insn (gen_mul<vnsi>3_dup (tmp, left_hi, right_lo)); + emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); + emit_insn (gen_mul<vnsi>3_dup (tmp, left_lo, right_hi)); + emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); + emit_insn (gen_mul<vnsi>3_dup (tmp, left_hi, right_hi)); + emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); + DONE; + }) + (define_insn_and_split "mul<mode>3_exec" [(set (match_operand:V_DI 0 "register_operand" "=&v") (vec_merge:V_DI @@ -2286,6 +2447,56 @@ DONE; }) +(define_insn_and_split "mul<mode>3_dup_exec" + [(set (match_operand:V_DI 0 "register_operand" "=&v") + (vec_merge:V_DI + (mult:V_DI + (vec_duplicate:V_DI + (match_operand:DI 1 "gcn_alu_operand" " Sv")) + (match_operand:V_DI 2 "gcn_alu_operand" "vDA")) + (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) + (clobber (match_scratch:<VnSI> 5 "=&v"))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); + rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); + rtx left_lo = gcn_operand_part (DImode, operands[1], 0); + rtx left_hi = gcn_operand_part (DImode, operands[1], 1); + rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); + rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); + rtx exec = operands[4]; + rtx tmp = operands[5]; + + rtx old_lo, old_hi; + if (GET_CODE (operands[3]) == UNSPEC) + { + old_lo = old_hi = gcn_gen_undef (<VnSI>mode); + } + else + { + old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); + old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); + } + + rtx undef = gcn_gen_undef (<VnSI>mode); + + emit_insn (gen_mul<vnsi>3_dup_exec (out_lo, left_lo, right_lo, old_lo, + exec)); + emit_insn (gen_umul<vnsi>3_highpart_dup_exec (out_hi, left_lo, right_lo, + old_hi, exec)); + emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_hi, right_lo, undef, exec)); + emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); + emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_lo, right_hi, undef, exec)); + emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); + emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_hi, right_hi, undef, exec)); + emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); + DONE; + }) + (define_insn_and_split "mul<mode>3_zext" [(set (match_operand:V_DI 0 "register_operand" "=&v") (mult:V_DI @@ -3053,7 +3264,8 @@ "flag_unsafe_math_optimizations" "v_sqrt%i0\t%0, %1" [(set_attr "type" "vop1") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "transop" "yes")]) (define_insn "sqrt<mode>2" [(set (match_operand:FP 0 "register_operand" "= v") @@ -3062,7 +3274,8 @@ "flag_unsafe_math_optimizations" "v_sqrt%i0\t%0, %1" [(set_attr "type" "vop1") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "transop" "yes")]) ; These FP unops have f64, f32 and f16 versions. (define_int_iterator MATH_UNOP_1OR2REG @@ -3352,7 +3565,8 @@ "" "v_rcp%i0\t%0, %1" [(set_attr "type" "vop1") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "transop" "yes")]) ;; v_div_scale takes a numerator (op2) and denominator (op1) and returns the ;; one that matches op3 adjusted for best results in reciprocal division. @@ -3724,6 +3938,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,*,yes,yes")]) @@ -3778,6 +3993,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,*,yes,yes")]) @@ -3792,9 +4008,9 @@ /* Unsigned comparisons use the same patterns as signed comparisons, except that they use unsigned operators (e.g. LTU vs LT). The '%E1' directive then does the Right Thing. */ - emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1], - operands[2], operands[3], - operands[4])); + emit_insn (gen_vec_cmp<mode>di_exec (operands[0], operands[1], + operands[2], operands[3], + operands[4])); DONE; }) @@ -3836,6 +4052,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,yes,yes")]) @@ -3859,6 +4076,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,yes,yes")]) @@ -4049,6 +4267,32 @@ DONE; }) +(define_expand "mask_gather_load<mode><vndi>" + [(set:V_MOV (match_operand:V_MOV 0 "register_operand") + (unspec:V_MOV + [(match_operand:DI 1 "register_operand") + (match_operand:<VnDI> 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand") + (match_operand:DI 5 "") + (match_operand:V_MOV 6 "maskload_else_operand")] + UNSPEC_GATHER))] + "" + { + rtx exec = force_reg (DImode, operands[5]); + + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], + operands[2], operands[4], + INTVAL (operands[3]), exec); + + emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr, + const0_rtx, const0_rtx, + const0_rtx, + gcn_gen_undef (<MODE>mode), + exec)); + DONE; + }) + (define_expand "mask_scatter_store<mode><vnsi>" [(match_operand:DI 0 "register_operand") (match_operand:<VnSI> 1 "register_operand") @@ -4077,6 +4321,27 @@ DONE; }) +(define_expand "mask_scatter_store<mode><vndi>" + [(match_operand:DI 0 "register_operand") + (match_operand:<VnDI> 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:SI 3 "gcn_alu_operand") + (match_operand:V_MOV 4 "register_operand") + (match_operand:DI 5 "")] + "" + { + rtx exec = force_reg (DImode, operands[5]); + + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], + operands[1], operands[3], + INTVAL (operands[2]), exec); + + emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx, + operands[4], const0_rtx, + const0_rtx, exec)); + DONE; + }) + (define_code_iterator cond_op [plus minus mult]) (define_expand "cond_<expander><mode>" @@ -4397,7 +4662,7 @@ rtx tmp = gen_reg_rtx (<MODE>mode); rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1)); - emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2])); + emit_insn (gen_mul<mode>3_dup (tmp, operands[2], v1)); emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1])); DONE; }) |