aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/loongarch/loongarch.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/loongarch/loongarch.md')
-rw-r--r--gcc/config/loongarch/loongarch.md132
1 files changed, 118 insertions, 14 deletions
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 625f30c..763d514 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -518,6 +518,7 @@
;; These code iterators allow the signed and unsigned scc operations to use
;; the same template.
+(define_code_iterator any_ge [ge geu])
(define_code_iterator any_gt [gt gtu])
(define_code_iterator any_lt [lt ltu])
(define_code_iterator any_le [le leu])
@@ -1636,6 +1637,80 @@
operands[3] = tmp;
})
+;; Optimize (a << imm1) | (b & imm2) to use bstrins.w instruction, both a and b
+;; should be 32bits, imm2 value should be equal to (1LL << imm1) - 1.
+;; For example: (a << 1) | (b & 1)
+;; slli.w $r12,$r12,1
+;; andi $r13,$r13,1
+;; or $r12,$r12,$r13
+;; Optimized to use bstrins.w instruction as below:
+;; bstrins.w $r13,$r12,31,1
+(define_insn_and_split "*bstrins_w_for_ior_ashift_and_extend"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (any_or_plus:DI
+ (and:DI (match_operand:DI 1 "register_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "i"))
+ (ashift:DI
+ (sign_extract:DI
+ (match_operand:DI 3 "register_operand" "r")
+ (match_operand:SI 4 "const_uimm5_operand")
+ (const_int 0))
+ (match_operand:SI 5 "const_uimm5_operand"))))]
+ "TARGET_64BIT && loongarch_pre_reload_split ()
+ && !reg_overlap_mentioned_p (operands[0], operands[3])
+ && INTVAL (operands[2]) != 0 && INTVAL (operands[5]) != 0
+ && INTVAL (operands[2]) == (1LL << INTVAL (operands[5])) - 1
+ && INTVAL (operands[4]) + INTVAL (operands[5]) == 0x20"
+ "#"
+ "&& true"
+ [(const_int 0)]
+ {
+ emit_move_insn (operands[0], operands[1]);
+ rtx len = GEN_INT (32 - INTVAL (operands[5]));
+ rtx dest = gen_lowpart (SImode, operands[0]);
+ rtx op = gen_lowpart (SImode, operands[3]);
+ emit_insn (gen_insvsi (dest, len, operands[5], op));
+ })
+
+;; Optimize (a << imm1) | (b & imm2) to use bstrins.d instruction, the size of
+;; a and b are 8 bits, 16 bits or 64bits, imm2 value should be equal to
+;; (1LL << imm1) - 1.
+(define_insn_and_split "*bstrins_d_for_ior_ashift_and"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (any_or_plus:DI
+ (and:DI (match_operand:DI 1 "register_operand" "r")
+ (match_operand:DI 2 "const_int_operand" "i"))
+ (ashift:DI
+ (match_operand:DI 3 "register_operand" "r")
+ (match_operand:DI 4 "const_uimm63_operand"))))]
+ "TARGET_64BIT && loongarch_pre_reload_split ()
+ && !reg_overlap_mentioned_p (operands[0], operands[3])
+ && INTVAL (operands[2]) != 0 && INTVAL (operands[4]) != 0
+ && INTVAL (operands[2]) == (1LL << INTVAL (operands[4])) - 1"
+ "#"
+ "&& true"
+ [(set (match_dup 0) (match_dup 1))
+ (set (zero_extract:DI (match_dup 0) (match_dup 2) (match_dup 4))
+ (match_dup 3))]
+ {
+ operands[2] = GEN_INT (64 - INTVAL (operands[4]));
+ })
+
+(define_insn "and_load_zero_extend<mode>"
+ [(set (match_operand:X 0 "register_operand" "=r,r,r,r,r,r")
+ (and:X (match_operand:X 1 "memory_operand" "%m,m,m,k,k,k")
+ (match_operand:X 2 "mask_operand" "Yb,Yh,Yw,Yb,Yh,Yw")))]
+ ""
+ "@
+ ld.bu\t%0,%1
+ ld.hu\t%0,%1
+ ld.wu\t%0,%1
+ ldx.bu\t%0,%1
+ ldx.hu\t%0,%1
+ ldx.wu\t%0,%1"
+ [(set_attr "move_type" "load,load,load,load,load,load")
+ (set_attr "mode" "<MODE>")])
+
;; We always avoid the shift operation in bstrins_<mode>_for_ior_mask
;; if possible, but the result may be sub-optimal when one of the masks
;; is (1 << N) - 1 and one of the src register is the dest register.
@@ -1670,6 +1745,24 @@
DONE;
})
+(define_insn_and_split "bstrins_bstrpick_for_and_imm<mode>"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (and:X (match_operand:X 1 "register_operand" "r")
+ (match_operand:X 2 "const_int_operand" "i")))]
+ "loongarch_use_bstrins_bstrpick_for_and (operands[2], <MODE>mode)"
+ "#"
+ "&& true"
+ [(const_int 0)]
+{
+ unsigned HOST_WIDE_INT op2 = INTVAL (operands[2]);
+ int leading_zero_bit = __builtin_clzll (op2);
+ unsigned HOST_WIDE_INT mask = (~0ULL) << (64 - leading_zero_bit);
+ emit_insn (gen_extzv<mode> (operands[0], operands[1],
+ GEN_INT (64 - leading_zero_bit), const0_rtx));
+ emit_insn (gen_and<mode>3 (operands[0], operands[0], GEN_INT (op2 | mask)));
+}
+ [(set_attr "length" "8")])
+
(define_insn "*iorhi3"
[(set (match_operand:HI 0 "register_operand" "=r,r")
(ior:HI (match_operand:HI 1 "register_operand" "%r,r")
@@ -1740,21 +1833,23 @@
;; This attribute used for get connection of scalar mode and corresponding
;; vector mode.
-(define_mode_attr cntmap [(SI "v4si") (DI "v2di")])
+(define_mode_attr cntmap [(SI "V4SI") (DI "V2DI")])
-(define_expand "popcount<mode>2"
- [(set (match_operand:GPR 0 "register_operand")
- (popcount:GPR (match_operand:GPR 1 "register_operand")))]
+(define_insn_and_split "popcount<mode>2"
+ [(set (match_operand:GPR 0 "register_operand" "=f")
+ (popcount:GPR (match_operand:GPR 1 "register_operand" "f")))]
"ISA_HAS_LSX"
+ "#"
+ ;; Do the split very lately to work around init-regs unneeded zero-
+ ;; initialization from init-regs. See PR61810 and all the referenced
+ ;; issues.
+ "&& reload_completed"
+ [(set (match_operand:<cntmap> 0 "register_operand" "=f")
+ (popcount:<cntmap>
+ (match_operand:<cntmap> 1 "register_operand" "f")))]
{
- rtx in = operands[1];
- rtx out = operands[0];
- rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) :
- gen_reg_rtx (V2DImode);
- emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1)));
- emit_insn (gen_popcount<cntmap>2 (vreg, vreg));
- emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0)));
- DONE;
+ operands[0] = gen_rtx_REG (<cntmap>mode, REGNO (operands[0]));
+ operands[1] = gen_rtx_REG (<cntmap>mode, REGNO (operands[1]));
})
;;
@@ -2306,8 +2401,8 @@
})
(define_insn_and_split "*movsi_internal"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,f,*r,*m")
- (match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,m,*f,*f"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,f,f,r,*m")
+ (match_operand:SI 1 "move_operand" "r,Yd,w,rJ,rJ,m,f,*f"))]
"(register_operand (operands[0], SImode)
|| reg_or_0_operand (operands[1], SImode))"
{ return loongarch_output_move (operands); }
@@ -3495,6 +3590,15 @@
[(set_attr "type" "slt")
(set_attr "mode" "<X:MODE>")])
+(define_insn "*sge<u>_<X:mode><GPR:mode>"
+ [(set (match_operand:GPR 0 "register_operand" "=r")
+ (any_ge:GPR (match_operand:X 1 "register_operand" " r")
+ (const_int 1)))]
+ ""
+ "slti<u>\t%0,zero,%1"
+ [(set_attr "type" "slt")
+ (set_attr "mode" "<X:MODE>")])
+
(define_insn "*sgt<u>_<X:mode><GPR:mode>"
[(set (match_operand:GPR 0 "register_operand" "=r")
(any_gt:GPR (match_operand:X 1 "register_operand" "r")