aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/gcn/gcn.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/gcn/gcn.md')
-rw-r--r--gcc/config/gcn/gcn.md289
1 files changed, 163 insertions, 126 deletions
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 9193461..4130cf6 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -312,18 +312,33 @@
; We need to be able to identify v_readlane and v_writelane with
; SGPR lane selection in order to handle "Manually Inserted Wait States".
-(define_attr "laneselect" "yes,no" (const_string "no"))
+(define_attr "laneselect" "write,read,no" (const_string "no"))
-; Identify instructions that require a "Manually Inserted Wait State" if
-; their inputs are overwritten by subsequent instructions.
+; Global or flat memory access using store or load followed by waitcnt
+; and using flat/global atomic access, possibly followed by a waitcnt.
+; 'storex34' denotes FLAT_STORE_X{3,4}.
+; 'cmpswapx2' denotes FLAT_ATOMIC_{F}CMPSWAP_X2
+; Used to handle "Manually Inserted Wait State".
-(define_attr "delayeduse" "yes,no" (const_string "no"))
+(define_attr "flatmemaccess"
+ "store,storex34,load,atomic,atomicwait,cmpswapx2,no"
+ (const_string "no"))
+
+; Identify v_cmp and v_cmpx instructions for "Manually Inserted Wait State"
+; handling.
+
+(define_attr "vcmp" "vcmp,vcmpx,no" (const_string "no"))
; Identify instructions that require "Manually Inserted Wait State" if
; a previous instruction writes to VCC. The number gives the number of NOPs.
(define_attr "vccwait" "" (const_int 0))
+; Mark trans ops such as v_{exp,rsq,sqrt,sin,cos,log,...}_F{16,32,64}
+; for later conditional s_nop insertion.
+
+(define_attr "transop" "yes,no" (const_string "no"))
+
;; }}}
;; {{{ Iterators useful across the wole machine description
@@ -414,6 +429,15 @@
"s_nop\t0x0"
[(set_attr "type" "sopp")])
+; Variant of 'nop' that accepts a count argument.
+; s_nop accepts 0x0 to 0xf for 1 to 16 nops; however,
+; as %0 prints decimals, only 0 to 9 (= 1 to 10 nops) can be used.
+(define_insn "nops"
+ [(match_operand 0 "const_int_operand")]
+ ""
+ "s_nop\t0x%0"
+ [(set_attr "type" "sopp")])
+
; FIXME: What should the value of the immediate be? Zero is disallowed, so
; pick 1 for now.
(define_insn "trap"
@@ -555,9 +579,12 @@
}
[(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat,
flat,flat,flat,flat")
+ (set_attr "flatmemaccess" "*,*,*,*,*,*,*,*,*,load,load,store,load,load,store")
+ (set_attr "vcmp" "*,*,*,*,vcmp,*,*,*,*,*,*,*,*,*,*")
(set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*")
(set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12")
- (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")])
+ (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")
+ (set_attr "laneselect" "*,*,read,*,*,*,*,*,*,*,*,*,*,*,*")])
; 32bit move pattern
@@ -565,38 +592,38 @@
[(set (match_operand:SISF 0 "nonimmediate_operand")
(match_operand:SISF 1 "gcn_load_operand"))]
""
- {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack]
- [SD ,SSA ;sop1 ,* ,4 ,* ,* ] s_mov_b32\t%0, %1
- [SD ,J ;sopk ,* ,4 ,* ,* ] s_movk_i32\t%0, %1
- [SD ,B ;sop1 ,* ,8 ,* ,* ] s_mov_b32\t%0, %1
- [SD ,RB ;smem ,* ,12,* ,off] s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
- [&SD ,RB ;smem ,* ,12,* ,on ] ^
- [RB ,Sm ;smem ,* ,12,* ,* ] s_buffer_store%s1\t%1, s[0:3], %0
- [Sm ,RS ;smem ,* ,12,* ,off] s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
- [&Sm ,RS ;smem ,* ,12,* ,on ] ^
- [RS ,Sm ;smem ,* ,12,* ,* ] s_store_dword\t%1, %A0
- [v ,v ;vop1 ,* ,4 ,* ,* ] v_mov_b32\t%0, %1
- [Sg ,v ;vop3a,none,8 ,* ,* ] v_readlane_b32\t%0, %1, 0
- [v ,Sv ;vop3a,none,8 ,* ,* ] v_writelane_b32\t%0, %1, 0
- [v ,^a ;vop3p_mai,*,8,* ,* ] v_accvgpr_read_b32\t%0, %1
- [a ,v ;vop3p_mai,*,8,* ,* ] v_accvgpr_write_b32\t%0, %1
- [a ,a ;vop1 ,* ,4,cdna2,* ] v_accvgpr_mov_b32\t%0, %1
- [v ,RF ;flat ,* ,12,* ,off] flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
- [&v ,RF ;flat ,* ,12,* ,on ] ^
- [^a ,RF ;flat ,* ,12,cdna2,off] ^
- [&^a ,RF ;flat ,* ,12,cdna2,on ] ^
- [RF ,v ;flat ,* ,12,* ,* ] flat_store_dword\t%A0, %1%O0%g0
- [RF ,a ;flat ,* ,12,cdna2,* ] ^
- [v ,B ;vop1 ,* ,8 ,* ,* ] v_mov_b32\t%0, %1
- [RLRG,v ;ds ,* ,12,* ,* ] ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
- [v ,RLRG;ds ,* ,12,* ,* ] ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
- [SD ,Y ;sop1 ,* ,8 ,* ,* ] s_mov_b32\t%0, %1
- [v ,RM ;flat ,* ,12,* ,off] global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
- [&v ,RM ;flat ,* ,12,* ,on ] ^
- [^a ,RM ;flat ,* ,12,cdna2,off] ^
- [&^a ,RM ;flat ,* ,12,cdna2,on ] ^
- [RM ,v ;flat ,* ,12,* ,* ] global_store_dword\t%A0, %1%O0%g0
- [RM ,a ;flat ,* ,12,cdna2,* ] ^
+ {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack, laneselect, flatmemaccess]
+ [SD ,SSA ;sop1 ,* ,4 ,* ,* ,* ,* ] s_mov_b32\t%0, %1
+ [SD ,J ;sopk ,* ,4 ,* ,* ,* ,* ] s_movk_i32\t%0, %1
+ [SD ,B ;sop1 ,* ,8 ,* ,* ,* ,* ] s_mov_b32\t%0, %1
+ [SD ,RB ;smem ,* ,12,* ,off,* ,* ] s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
+ [&SD ,RB ;smem ,* ,12,* ,on ,* ,* ] ^
+ [RB ,Sm ;smem ,* ,12,* ,* ,* ,* ] s_buffer_store%s1\t%1, s[0:3], %0
+ [Sm ,RS ;smem ,* ,12,* ,off,* ,* ] s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+ [&Sm ,RS ;smem ,* ,12,* ,on ,* ,* ] ^
+ [RS ,Sm ;smem ,* ,12,* ,* ,* ,* ] s_store_dword\t%1, %A0
+ [v ,v ;vop1 ,* ,4 ,* ,* ,* ,* ] v_mov_b32\t%0, %1
+ [Sg ,v ;vop3a,none,8 ,* ,* ,read ,* ] v_readlane_b32\t%0, %1, 0
+ [v ,Sv ;vop3a,none,8 ,* ,* ,write,* ] v_writelane_b32\t%0, %1, 0
+ [v ,^a ;vop3p_mai,*,8,* ,* ,* ,* ] v_accvgpr_read_b32\t%0, %1
+ [a ,v ;vop3p_mai,*,8,* ,* ,* ,* ] v_accvgpr_write_b32\t%0, %1
+ [a ,a ;vop1 ,* ,4,cdna2,* ,* ,* ] v_accvgpr_mov_b32\t%0, %1
+ [v ,RF ;flat ,* ,12,* ,off,* ,load ] flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
+ [&v ,RF ;flat ,* ,12,* ,on ,* ,load ] ^
+ [^a ,RF ;flat ,* ,12,cdna2,off,* ,load ] ^
+ [&^a ,RF ;flat ,* ,12,cdna2,on ,* ,load ] ^
+ [RF ,v ;flat ,* ,12,* ,* ,* ,store] flat_store_dword\t%A0, %1%O0%g0
+ [RF ,a ;flat ,* ,12,cdna2,* ,* ,store] ^
+ [v ,B ;vop1 ,* ,8 ,* ,* ,* ,* ] v_mov_b32\t%0, %1
+ [RLRG,v ;ds ,* ,12,* ,* ,* ,* ] ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
+ [v ,RLRG;ds ,* ,12,* ,* ,* ,* ] ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+ [SD ,Y ;sop1 ,* ,8 ,* ,* ,* ,* ] s_mov_b32\t%0, %1
+ [v ,RM ;flat ,* ,12,* ,off,* ,load ] global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+ [&v ,RM ;flat ,* ,12,* ,on ,* ,load ] ^
+ [^a ,RM ;flat ,* ,12,cdna2,off,* ,load ] ^
+ [&^a ,RM ;flat ,* ,12,cdna2,on ,* ,load ] ^
+ [RM ,v ;flat ,* ,12,* ,* ,* ,store] global_store_dword\t%A0, %1%O0%g0
+ [RM ,a ;flat ,* ,12,cdna2,* ,* ,store] ^
})
; 8/16bit move pattern
@@ -606,31 +633,31 @@
[(set (match_operand:QIHI 0 "nonimmediate_operand")
(match_operand:QIHI 1 "gcn_load_operand"))]
"gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
- {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack]
- [SD ,SSA ;sop1 ,* ,4 ,* ,* ] s_mov_b32\t%0, %1
- [SD ,J ;sopk ,* ,4 ,* ,* ] s_movk_i32\t%0, %1
- [SD ,B ;sop1 ,* ,8 ,* ,* ] s_mov_b32\t%0, %1
- [v ,v ;vop1 ,* ,4 ,* ,* ] v_mov_b32\t%0, %1
- [Sg ,v ;vop3a,none,4 ,* ,* ] v_readlane_b32\t%0, %1, 0
- [v ,Sv ;vop3a,none,4 ,* ,* ] v_writelane_b32\t%0, %1, 0
- [v ,^a ;vop3p_mai,*,8,* ,* ] v_accvgpr_read_b32\t%0, %1
- [a ,v ;vop3p_mai,*,8,* ,* ] v_accvgpr_write_b32\t%0, %1
- [a ,a ;vop1 ,* ,8,cdna2,* ] v_accvgpr_mov_b32\t%0, %1
- [v ,RF ;flat ,* ,12,* ,off] flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
- [&v ,RF ;flat ,* ,12,* ,on ] ^
- [^a ,RF ;flat ,* ,12,cdna2,off] ^
- [&^a ,RF ;flat ,* ,12,cdna2,on ] ^
- [RF ,v ;flat ,* ,12,* ,* ] flat_store%s0\t%A0, %1%O0%g0
- [RF ,a ;flat ,* ,12,cdna2,* ] ^
- [v ,B ;vop1 ,* ,8 ,* ,* ] v_mov_b32\t%0, %1
- [RLRG,v ;ds ,* ,12,* ,* ] ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
- [v ,RLRG;ds ,* ,12,* ,* ] ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
- [v ,RM ;flat ,* ,12,* ,off] global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
- [&v ,RM ;flat ,* ,12,* ,on ] ^
- [^a ,RM ;flat ,* ,12,cdna2,off] ^
- [&^a ,RM ;flat ,* ,12,cdna2,on ] ^
- [RM ,v ;flat ,* ,12,* ,* ] global_store%s0\t%A0, %1%O0%g0
- [RM ,a ;flat ,* ,12,cdna2,* ] ^
+ {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack, laneselect, flatmemaccess]
+ [SD ,SSA ;sop1 ,* ,4 ,* ,* ,* ,* ] s_mov_b32\t%0, %1
+ [SD ,J ;sopk ,* ,4 ,* ,* ,* ,* ] s_movk_i32\t%0, %1
+ [SD ,B ;sop1 ,* ,8 ,* ,* ,* ,* ] s_mov_b32\t%0, %1
+ [v ,v ;vop1 ,* ,4 ,* ,* ,* ,* ] v_mov_b32\t%0, %1
+ [Sg ,v ;vop3a,none,4 ,* ,* ,read ,* ] v_readlane_b32\t%0, %1, 0
+ [v ,Sv ;vop3a,none,4 ,* ,* ,write,* ] v_writelane_b32\t%0, %1, 0
+ [v ,^a ;vop3p_mai,*,8,* ,* ,* ,* ] v_accvgpr_read_b32\t%0, %1
+ [a ,v ;vop3p_mai,*,8,* ,* ,* ,* ] v_accvgpr_write_b32\t%0, %1
+ [a ,a ;vop1 ,* ,8,cdna2,* ,* ,* ] v_accvgpr_mov_b32\t%0, %1
+ [v ,RF ;flat ,* ,12,* ,off,* ,load ] flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
+ [&v ,RF ;flat ,* ,12,* ,on ,* ,load ] ^
+ [^a ,RF ;flat ,* ,12,cdna2,off,* ,load ] ^
+ [&^a ,RF ;flat ,* ,12,cdna2,on ,* ,load ] ^
+ [RF ,v ;flat ,* ,12,* ,* ,* ,store] flat_store%s0\t%A0, %1%O0%g0
+ [RF ,a ;flat ,* ,12,cdna2,* ,* ,store] ^
+ [v ,B ;vop1 ,* ,8 ,* ,* ,* ,* ] v_mov_b32\t%0, %1
+ [RLRG,v ;ds ,* ,12,* ,* ,* ,* ] ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
+ [v ,RLRG;ds ,* ,12,* ,* ,* ,* ] ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+ [v ,RM ;flat ,* ,12,* ,off,* ,load ] global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+ [&v ,RM ;flat ,* ,12,* ,on ,* ,load ] ^
+ [^a ,RM ;flat ,* ,12,cdna2,off,* ,load ] ^
+ [&^a ,RM ;flat ,* ,12,cdna2,on ,* ,load ] ^
+ [RM ,v ;flat ,* ,12,* ,* ,* ,store] global_store%s0\t%A0, %1%O0%g0
+ [RM ,a ;flat ,* ,12,cdna2,* ,* ,store] ^
})
; 64bit move pattern
@@ -639,34 +666,34 @@
[(set (match_operand:DIDF 0 "nonimmediate_operand")
(match_operand:DIDF 1 "general_operand"))]
"GET_CODE(operands[1]) != SYMBOL_REF"
- {@ [cons: =0, 1; attrs: type, length, cdna, xnack]
- [SD ,SSA ;sop1 ,4 ,* ,* ] s_mov_b64\t%0, %1
- [SD ,C ;sop1 ,8 ,* ,* ] ^
- [SD ,DB ;mult ,* ,* ,* ] #
- [RS ,Sm ;smem ,12,* ,* ] s_store_dwordx2\t%1, %A0
- [Sm ,RS ;smem ,12,* ,off] s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
- [&Sm ,RS ;smem ,12,* ,on ] ^
- [v ,v ;vmult,* ,* ,* ] #
- [v ,DB ;vmult,* ,* ,* ] #
- [Sg ,v ;vmult,* ,* ,* ] #
- [v ,Sv ;vmult,* ,* ,* ] #
- [v ,^a ;vmult,* ,* ,* ] #
- [a ,v ;vmult,* ,* ,* ] #
- [a ,a ;vmult,* ,cdna2,* ] #
- [v ,RF ;flat ,12,* ,off] flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
- [&v ,RF ;flat ,12,* ,on ] ^
- [^a ,RF ;flat ,12,cdna2,off] ^
- [&^a ,RF ;flat ,12,cdna2,on ] ^
- [RF ,v ;flat ,12,* ,* ] flat_store_dwordx2\t%A0, %1%O0%g0
- [RF ,a ;flat ,12,cdna2,* ] ^
- [RLRG,v ;ds ,12,* ,* ] ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
- [v ,RLRG;ds ,12,* ,* ] ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
- [v ,RM ;flat ,12,* ,off] global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
- [&v ,RM ;flat ,12,* ,on ] ^
- [^a ,RM ;flat ,12,cdna2,off] ^
- [&^a ,RM ;flat ,12,cdna2,on ] ^
- [RM ,v ;flat ,12,* ,* ] global_store_dwordx2\t%A0, %1%O0%g0
- [RM ,a ;flat ,12,cdna2,* ] ^
+ {@ [cons: =0, 1; attrs: type, length, cdna, xnack, flatmemaccess]
+ [SD ,SSA ;sop1 ,4 ,* ,* ,* ] s_mov_b64\t%0, %1
+ [SD ,C ;sop1 ,8 ,* ,* ,* ] ^
+ [SD ,DB ;mult ,* ,* ,* ,* ] #
+ [RS ,Sm ;smem ,12,* ,* ,* ] s_store_dwordx2\t%1, %A0
+ [Sm ,RS ;smem ,12,* ,off,* ] s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+ [&Sm ,RS ;smem ,12,* ,on ,* ] ^
+ [v ,v ;vmult,* ,* ,* ,* ] #
+ [v ,DB ;vmult,* ,* ,* ,* ] #
+ [Sg ,v ;vmult,* ,* ,* ,* ] #
+ [v ,Sv ;vmult,* ,* ,* ,* ] #
+ [v ,^a ;vmult,* ,* ,* ,* ] #
+ [a ,v ;vmult,* ,* ,* ,* ] #
+ [a ,a ;vmult,* ,cdna2,* ,* ] #
+ [v ,RF ;flat ,12,* ,off,load ] flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
+ [&v ,RF ;flat ,12,* ,on ,load ] ^
+ [^a ,RF ;flat ,12,cdna2,off,load ] ^
+ [&^a ,RF ;flat ,12,cdna2,on ,load ] ^
+ [RF ,v ;flat ,12,* ,* ,store] flat_store_dwordx2\t%A0, %1%O0%g0
+ [RF ,a ;flat ,12,cdna2,* ,store] ^
+ [RLRG,v ;ds ,12,* ,* ,* ] ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
+ [v ,RLRG;ds ,12,* ,* ,* ] ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+ [v ,RM ;flat ,12,* ,off,load ] global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+ [&v ,RM ;flat ,12,* ,on ,load ] ^
+ [^a ,RM ;flat ,12,cdna2,off,load ] ^
+ [&^a ,RM ;flat ,12,cdna2,on ,load ] ^
+ [RM ,v ;flat ,12,* ,* ,store] global_store_dwordx2\t%A0, %1%O0%g0
+ [RM ,a ;flat ,12,cdna2,* ,store] ^
}
"reload_completed
&& ((!MEM_P (operands[0]) && !MEM_P (operands[1])
@@ -704,31 +731,31 @@
[(set (match_operand:TI 0 "nonimmediate_operand")
(match_operand:TI 1 "general_operand" ))]
""
- {@ [cons: =0, 1; attrs: type, delayeduse, length, cdna, xnack]
- [SD ,SSB;mult ,* ,* ,* ,* ] #
- [RS ,Sm ;smem ,* ,12,* ,* ] s_store_dwordx4\t%1, %A0
- [Sm ,RS ;smem ,yes,12,* ,off] s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
- [&Sm,RS ;smem ,yes,12,* ,on ] ^
- [RF ,v ;flat ,* ,12,* ,* ] flat_store_dwordx4\t%A0, %1%O0%g0
- [RF ,a ;flat ,* ,12,cdna2,* ] ^
- [v ,RF ;flat ,* ,12,* ,off] flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
- [&v ,RF ;flat ,* ,12,* ,on ] ^
- [^a ,RF ;flat ,* ,12,cdna2,off] ^
- [&^a,RF ;flat ,* ,12,cdna2,on ] ^
- [v ,v ;vmult,* ,* ,* ,* ] #
- [v ,Sv ;vmult,* ,* ,* ,* ] #
- [SD ,v ;vmult,* ,* ,* ,* ] #
- [RM ,v ;flat ,yes,12,* ,* ] global_store_dwordx4\t%A0, %1%O0%g0
- [RM ,a ;flat ,yes,12,cdna2,* ] ^
- [v ,RM ;flat ,* ,12,* ,off] global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
- [&v ,RM ;flat ,* ,12,* ,on ] ^
- [^a ,RM ;flat ,* ,12,cdna2,off] ^
- [&^a,RM ;flat ,* ,12,cdna2,on ] ^
- [RL ,v ;ds ,* ,12,* ,* ] ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
- [v ,RL ;ds ,* ,12,* ,* ] ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
- [v ,^a ;vmult,* ,* ,* ,* ] #
- [a ,v ;vmult,* ,* ,* ,* ] #
- [a ,a ;vmult,* ,* ,cdna2,* ] #
+ {@ [cons: =0, 1; attrs: type, length, cdna, xnack, flatmemaccess]
+ [SD ,SSB;mult ,* ,* ,* ,* ] #
+ [RS ,Sm ;smem ,12,* ,* ,* ] s_store_dwordx4\t%1, %A0
+ [Sm ,RS ;smem ,12,* ,off,* ] s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+ [&Sm,RS ;smem ,12,* ,on ,* ] ^
+ [RF ,v ;flat ,12,* ,* ,storex34] flat_store_dwordx4\t%A0, %1%O0%g0
+ [RF ,a ;flat ,12,cdna2,* ,storex34] ^
+ [v ,RF ;flat ,12,* ,off,load ] flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
+ [&v ,RF ;flat ,12,* ,on ,load ] ^
+ [^a ,RF ;flat ,12,cdna2,off,load ] ^
+ [&^a,RF ;flat ,12,cdna2,on ,load ] ^
+ [v ,v ;vmult,* ,* ,* ,* ] #
+ [v ,Sv ;vmult,* ,* ,* ,* ] #
+ [SD ,v ;vmult,* ,* ,* ,* ] #
+ [RM ,v ;flat ,12,* ,* ,storex34] global_store_dwordx4\t%A0, %1%O0%g0
+ [RM ,a ;flat ,12,cdna2,* ,storex34] ^
+ [v ,RM ;flat ,12,* ,off,load ] global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+ [&v ,RM ;flat ,12,* ,on ,load ] ^
+ [^a ,RM ;flat ,12,cdna2,off,load ] ^
+ [&^a,RM ;flat ,12,cdna2,on ,load ] ^
+ [RL ,v ;ds ,12,* ,* ,* ] ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
+ [v ,RL ;ds ,12,* ,* ,* ] ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+ [v ,^a ;vmult,* ,* ,* ,* ] #
+ [a ,v ;vmult,* ,* ,* ,* ] #
+ [a ,a ;vmult,* ,cdna2,* ,* ] #
}
"reload_completed
&& REG_P (operands[0])
@@ -1077,6 +1104,7 @@
s_cmp%D1\t%2, %3
v_cmp%E1\tvcc, %2, %3"
[(set_attr "type" "sopc,vopc")
+ (set_attr "vcmp" "vcmp")
(set_attr "length" "8")])
(define_insn "cstoredi4_vector"
@@ -1087,6 +1115,7 @@
""
"v_cmp%E1\tvcc, %2, %3"
[(set_attr "type" "vopc")
+ (set_attr "vcmp" "vcmp")
(set_attr "length" "8")])
(define_expand "cbranchdi4"
@@ -1113,6 +1142,7 @@
""
"v_cmp%E1\tvcc, %2, %3"
[(set_attr "type" "vopc")
+ (set_attr "vcmp" "vcmp")
(set_attr "length" "8")])
(define_expand "cbranch<mode>4"
@@ -1985,6 +2015,7 @@
flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 %G2\;s_waitcnt\t0
global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
[(set_attr "type" "smem,flat,flat")
+ (set_attr "flatmemaccess" "*,atomicwait,atomicwait")
(set_attr "length" "12")])
; FIXME: These patterns are disabled because the instructions don't
@@ -2006,6 +2037,7 @@
flat_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\t0
global_atomic_<bare_mnemonic><X>\t%A0, %1%O0\;s_waitcnt\tvmcnt(0)"
[(set_attr "type" "smem,flat,flat")
+ (set_attr "flatmemaccess" "*,atomicwait,atomicwait")
(set_attr "length" "12")])
(define_mode_attr x2 [(SI "DI") (DI "TI")])
@@ -2053,7 +2085,7 @@
global_atomic_cmpswap<X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
[(set_attr "type" "smem,flat,flat")
(set_attr "length" "12")
- (set_attr "delayeduse" "*,yes,yes")])
+ (set_attr "flatmemaccess" "*,cmpswapx2,cmpswapx2")])
(define_insn "sync_compare_and_swap<mode>_lds_insn"
[(set (match_operand:SIDI 0 "register_operand" "= v")
@@ -2151,7 +2183,7 @@
? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;flat_load%o0\t%0, %A1%O1 %G1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\t0\;buffer_inv sc1"
: "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\t0\;buffer_wbinvl1_vol");
@@ -2163,7 +2195,7 @@
? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;global_load%o0\t%0, %A1%O1 %G1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;global_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
: "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
@@ -2173,6 +2205,7 @@
gcc_unreachable ();
}
[(set_attr "type" "smem,flat,flat")
+ (set_attr "flatmemaccess" "*,load,load")
(set_attr "length" "28")
(set_attr "rdna" "no,*,*")])
@@ -2209,7 +2242,7 @@
: TARGET_WBINVL1_CACHE
? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;flat_store%o1\t%A0, %1%O0 %G1"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_store%o1\t%A0, %1%O0 %G1"
: "error: cache architectire unspecified");
case 2:
return (TARGET_GLn_CACHE
@@ -2217,7 +2250,7 @@
: TARGET_WBINVL1_CACHE
? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;global_store%o1\t%A0, %1%O0 %G1"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;global_store%o1\t%A0, %1%O0 %G1"
: "error: cache architecture unspecified");
}
break;
@@ -2237,7 +2270,8 @@
? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1\;"
"s_waitcnt\t0\;buffer_wbinvl1_vol"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;flat_store%o1\t%A0, %1%O0 %G1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;"
+ "flat_store%o1\t%A0, %1%O0 %G1\;"
"s_waitcnt\t0\;buffer_inv sc1"
: "error: cache architecture unspecified");
case 2:
@@ -2248,7 +2282,8 @@
? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;global_store%o1\t%A0, %1%O0 %G1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;"
+ "global_store%o1\t%A0, %1%O0 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
: "error: cache architecture unspecified");
}
@@ -2257,6 +2292,7 @@
gcc_unreachable ();
}
[(set_attr "type" "smem,flat,flat")
+ (set_attr "flatmemaccess" "*,store,store")
(set_attr "length" "28")
(set_attr "rdna" "no,*,*")])
@@ -2331,7 +2367,7 @@
? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\t0"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\t0"
: "error: cache architecture unspecified");
case 2:
@@ -2344,7 +2380,7 @@
"global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;"
"global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)"
: "error: cache architecture unspecified");
@@ -2366,7 +2402,7 @@
? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\t0\;buffer_wbinvl1_vol"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\t0\;buffer_inv sc1"
: "error: cache architecture unspecified");
case 2:
@@ -2379,7 +2415,7 @@
"global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
: TARGET_TARGET_SC_CACHE
- ? "buffer_inv sc1\;"
+ ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;"
"global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
: "error: cache architecture unspecified");
@@ -2389,6 +2425,7 @@
gcc_unreachable ();
}
[(set_attr "type" "smem,flat,flat")
+ (set_attr "flatmemaccess" "*,atomicwait,atomicwait")
(set_attr "length" "28")
(set_attr "rdna" "no,*,*")])