diff options
12 files changed, 815 insertions, 41 deletions
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 778ff7e..8568a7a 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -443,14 +443,20 @@ def select_constant_cmp: GICombineRule< // TODO: handle compares (currently not marked as isCommutable) def commute_int_constant_to_rhs : GICombineRule< (defs root:$root), - (match (wip_match_opcode G_ADD, G_MUL, G_AND, G_OR, G_XOR):$root, + (match (wip_match_opcode G_ADD, G_MUL, G_AND, G_OR, G_XOR, + G_SMIN, G_SMAX, G_UMIN, G_UMAX, G_UADDO, G_SADDO, + G_UMULO, G_SMULO, G_UMULH, G_SMULH, + G_UADDSAT, G_SADDSAT, G_SMULFIX, G_UMULFIX, + G_SMULFIXSAT, G_UMULFIXSAT):$root, [{ return Helper.matchCommuteConstantToRHS(*${root}); }]), (apply [{ Helper.applyCommuteBinOpOperands(*${root}); }]) >; def commute_fp_constant_to_rhs : GICombineRule< (defs root:$root), - (match (wip_match_opcode G_FADD, G_FMUL):$root, + (match (wip_match_opcode G_FADD, G_FMUL, G_FMINNUM, G_FMAXNUM, + G_FMINNUM_IEEE, G_FMAXNUM_IEEE, + G_FMINIMUM, G_FMAXIMUM):$root, [{ return Helper.matchCommuteFPConstantToRHS(*${root}); }]), (apply [{ Helper.applyCommuteBinOpOperands(*${root}); }]) >; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 40c5119..3829c33 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6273,8 +6273,21 @@ bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) { } bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) { - Register LHS = MI.getOperand(1).getReg(); - Register RHS = MI.getOperand(2).getReg(); + unsigned LHSOpndIdx = 1; + unsigned RHSOpndIdx = 2; + switch (MI.getOpcode()) { + case TargetOpcode::G_UADDO: + case TargetOpcode::G_SADDO: + case TargetOpcode::G_UMULO: + case TargetOpcode::G_SMULO: + LHSOpndIdx = 2; + RHSOpndIdx = 3; + break; + default: + break; + } + Register LHS = MI.getOperand(LHSOpndIdx).getReg(); + Register RHS = MI.getOperand(RHSOpndIdx).getReg(); if (!getIConstantVRegVal(LHS, MRI)) { // Skip commuting if LHS is not a constant. But, LHS may be a // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already @@ -6300,10 +6313,23 @@ bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr &MI) { void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) { Observer.changingInstr(MI); - Register LHSReg = MI.getOperand(1).getReg(); - Register RHSReg = MI.getOperand(2).getReg(); - MI.getOperand(1).setReg(RHSReg); - MI.getOperand(2).setReg(LHSReg); + unsigned LHSOpndIdx = 1; + unsigned RHSOpndIdx = 2; + switch (MI.getOpcode()) { + case TargetOpcode::G_UADDO: + case TargetOpcode::G_SADDO: + case TargetOpcode::G_UMULO: + case TargetOpcode::G_SMULO: + LHSOpndIdx = 2; + RHSOpndIdx = 3; + break; + default: + break; + } + Register LHSReg = MI.getOperand(LHSOpndIdx).getReg(); + Register RHSReg = MI.getOperand(RHSOpndIdx).getReg(); + MI.getOperand(LHSOpndIdx).setReg(RHSReg); + MI.getOperand(RHSOpndIdx).setReg(LHSReg); Observer.changedInstr(MI); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir index 76d8288..d791660 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir @@ -116,3 +116,129 @@ body: | $q0 = COPY %mul RET_ReallyLR ... +--- +name: fminnum +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: fminnum + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %min:_(s32) = G_FMINNUM [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %min(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + %min:_(s32) = G_FMINNUM %cst, %0 + $s0 = COPY %min + RET_ReallyLR +... +--- +name: fmaxnum +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: fmaxnum + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %max:_(s32) = G_FMAXNUM [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %max(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + %max:_(s32) = G_FMAXNUM %cst, %0 + $s0 = COPY %max + RET_ReallyLR +... +--- +name: fminnum_ieee +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: fminnum_ieee + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %min:_(s32) = G_FMINNUM_IEEE [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %min(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + %min:_(s32) = G_FMINNUM_IEEE %cst, %0 + $s0 = COPY %min + RET_ReallyLR +... +--- +name: fmaxnum_ieee +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: fmaxnum_ieee + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %max:_(s32) = G_FMAXNUM_IEEE [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %max(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + %max:_(s32) = G_FMAXNUM_IEEE %cst, %0 + $s0 = COPY %max + RET_ReallyLR +... +--- +name: fminimum +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: fminimum + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %min:_(s32) = G_FMINIMUM [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %min(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + %min:_(s32) = G_FMINIMUM %cst, %0 + $s0 = COPY %min + RET_ReallyLR +... +--- +name: fmaximum +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: fmaximum + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %max:_(s32) = G_FMAXIMUM [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %max(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + %max:_(s32) = G_FMAXIMUM %cst, %0 + $s0 = COPY %max + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir new file mode 100644 index 0000000..1636549 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir @@ -0,0 +1,456 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- +name: add +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: add + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %add:_(s32) = G_ADD [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %add(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 1 + %add:_(s32) = G_ADD %cst, %0 + $s0 = COPY %add + RET_ReallyLR + +... +--- +name: mul +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: mul + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_MUL [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_MUL %cst, %0 + $s0 = COPY %mul + RET_ReallyLR +... +--- +name: and +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: and + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: %and:_(s32) = G_AND [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %and(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 5 + %and:_(s32) = G_AND %cst, %0 + $s0 = COPY %and + RET_ReallyLR +... +--- +name: or +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: or + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: %or:_(s32) = G_OR [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %or(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 5 + %or:_(s32) = G_OR %cst, %0 + $s0 = COPY %or + RET_ReallyLR +... +--- +name: xor +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: xor + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: %xor:_(s32) = G_XOR [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %xor(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 5 + %xor:_(s32) = G_XOR %cst, %0 + $s0 = COPY %xor + RET_ReallyLR +... +--- +name: smin +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: smin + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %min:_(s32) = G_SMIN [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %min(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 10 + %min:_(s32) = G_SMIN %cst, %0 + $s0 = COPY %min + RET_ReallyLR +... +--- +name: smax +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: smax + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %max:_(s32) = G_SMAX [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %max(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 10 + %max:_(s32) = G_SMAX %cst, %0 + $s0 = COPY %max + RET_ReallyLR +... +--- +name: umin +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: umin + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %min:_(s32) = G_UMIN [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %min(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 10 + %min:_(s32) = G_UMIN %cst, %0 + $s0 = COPY %min + RET_ReallyLR +... +--- +name: umax +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: umax + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %max:_(s32) = G_UMAX [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %max(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 10 + %max:_(s32) = G_UMAX %cst, %0 + $s0 = COPY %max + RET_ReallyLR +... +--- +name: uaddo +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: uaddo + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %add:_(s32), %overflow:_(s1) = G_UADDO [[COPY]], %cst + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $s0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 1 + %add:_(s32), %overflow:_(s1) = G_UADDO %cst, %0 + %ret:_(s32) = G_ANYEXT %overflow + $s0 = COPY %ret + RET_ReallyLR + +... +--- +name: saddo +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: saddo + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %add:_(s32), %overflow:_(s1) = G_SADDO [[COPY]], %cst + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $s0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 1 + %add:_(s32), %overflow:_(s1) = G_SADDO %cst, %0 + %ret:_(s32) = G_ANYEXT %overflow + $s0 = COPY %ret + RET_ReallyLR + +... +--- +name: umulo +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: umulo + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32), %overflow:_(s1) = G_UMULO [[COPY]], %cst + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $s0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32), %overflow:_(s1) = G_UMULO %cst, %0 + %ret:_(s32) = G_ANYEXT %overflow + $s0 = COPY %ret + RET_ReallyLR +... +--- +name: smulo +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: smulo + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32), %overflow:_(s1) = G_SMULO [[COPY]], %cst + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $s0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32), %overflow:_(s1) = G_SMULO %cst, %0 + %ret:_(s32) = G_ANYEXT %overflow + $s0 = COPY %ret + RET_ReallyLR +... +--- +name: umulh +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: umulh + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_UMULH [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_UMULH %cst, %0 + $s0 = COPY %mul + RET_ReallyLR +... +--- +name: smulh +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: smulh + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_UMULH [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_UMULH %cst, %0 + $s0 = COPY %mul + RET_ReallyLR +... +--- +name: uaddsat +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: uaddsat + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %add:_(s32) = G_UADDSAT [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %add(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 1 + %add:_(s32) = G_UADDSAT %cst, %0 + $s0 = COPY %add + RET_ReallyLR + +... +--- +name: saddsat +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: saddsat + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %add:_(s32) = G_SADDSAT [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %add(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 1 + %add:_(s32) = G_SADDSAT %cst, %0 + $s0 = COPY %add + RET_ReallyLR + +... +--- +name: smulfix +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: smulfix + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_SMULFIX [[COPY]], %cst, 7 + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_SMULFIX %cst, %0, 7 + $s0 = COPY %mul + RET_ReallyLR +... +--- +name: umulfix +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: umulfix + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_UMULFIX [[COPY]], %cst, 7 + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_UMULFIX %cst, %0, 7 + $s0 = COPY %mul + RET_ReallyLR +... +--- +name: smulfixsat +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: smulfixsat + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_SMULFIXSAT [[COPY]], %cst, 7 + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_SMULFIXSAT %cst, %0, 7 + $s0 = COPY %mul + RET_ReallyLR +... +--- +name: umulfixsat +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: umulfixsat + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_UMULFIXSAT [[COPY]], %cst, 7 + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_UMULFIXSAT %cst, %0, 7 + $s0 = COPY %mul + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-const-fold-barrier-rhs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-const-fold-barrier-rhs.mir index 01e0dce..c967e4f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-const-fold-barrier-rhs.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-const-fold-barrier-rhs.mir @@ -78,3 +78,163 @@ body: | RET_ReallyLR ... +--- +name: cfb_lhs_smulo +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: cfb_lhs_smulo + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + ; CHECK-NEXT: %mul:_(s32), %overflow:_(s1) = G_SMULO [[COPY]], %cfb + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $w0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $w0 + %cst:_(s32) = G_CONSTANT i32 1 + %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + %mul:_(s32), %overflow:_(s1) = G_SMULO %cfb, %0 + %ret:_(s32) = G_ANYEXT %overflow + $w0 = COPY %ret + RET_ReallyLR + +... +--- +name: cfb_lhs_cfb_already_rhs_smulo +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: cfb_lhs_cfb_already_rhs_smulo + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + ; CHECK-NEXT: %cst2:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: %cfb2:_(s32) = G_CONSTANT_FOLD_BARRIER %cst2 + ; CHECK-NEXT: %mul:_(s32), %overflow:_(s1) = G_SMULO %cfb, %cfb2 + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $w0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $w0 + %cst:_(s32) = G_CONSTANT i32 1 + %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + %cst2:_(s32) = G_CONSTANT i32 6 + %cfb2:_(s32) = G_CONSTANT_FOLD_BARRIER %cst2 + %mul:_(s32), %overflow:_(s1) = G_SMULO %cfb, %cfb2 + %ret:_(s32) = G_ANYEXT %overflow + $w0 = COPY %ret + RET_ReallyLR + +... +--- +name: cfb_lhs_cst_on_rhs_smulo +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: cfb_lhs_cst_on_rhs_smulo + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + ; CHECK-NEXT: %cst2:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: %mul:_(s32), %overflow:_(s1) = G_SMULO %cfb, %cst2 + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $w0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $w0 + %cst:_(s32) = G_CONSTANT i32 1 + %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + %cst2:_(s32) = G_CONSTANT i32 6 + %mul:_(s32), %overflow:_(s1) = G_SMULO %cfb, %cst2 + %ret:_(s32) = G_ANYEXT %overflow + $w0 = COPY %ret + RET_ReallyLR + +... +--- +name: cfb_lhs_umulfixsat +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: cfb_lhs_umulfixsat + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + ; CHECK-NEXT: %mul:_(s32) = G_UMULFIXSAT [[COPY]], %cfb, 7 + ; CHECK-NEXT: $w0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $w0 + %cst:_(s32) = G_CONSTANT i32 1 + %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + %mul:_(s32) = G_UMULFIXSAT %cfb, %0, 7 + $w0 = COPY %mul + RET_ReallyLR + +... +--- +name: cfb_lhs_cfb_already_rhs_umulfixsat +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: cfb_lhs_cfb_already_rhs_umulfixsat + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + ; CHECK-NEXT: %cst2:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %cfb2:_(s32) = G_CONSTANT_FOLD_BARRIER %cst2 + ; CHECK-NEXT: %add:_(s32) = G_UMULFIXSAT %cfb, %cfb2, 7 + ; CHECK-NEXT: $w0 = COPY %add(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $w0 + %cst:_(s32) = G_CONSTANT i32 1 + %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + %cst2:_(s32) = G_CONSTANT i32 2 + %cfb2:_(s32) = G_CONSTANT_FOLD_BARRIER %cst2 + %add:_(s32) = G_UMULFIXSAT %cfb, %cfb2, 7 + $w0 = COPY %add + RET_ReallyLR + +... +--- +name: cfb_lhs_cst_on_rhs_umulfixsat +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: cfb_lhs_cst_on_rhs_umulfixsat + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + ; CHECK-NEXT: %cst2:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %add:_(s32) = G_UMULFIXSAT %cfb, %cst2, 7 + ; CHECK-NEXT: $w0 = COPY %add(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $w0 + %cst:_(s32) = G_CONSTANT i32 1 + %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + %cst2:_(s32) = G_CONSTANT i32 2 + %add:_(s32) = G_UMULFIXSAT %cfb, %cst2, 7 + $w0 = COPY %add + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir index 8c4300d..03e507f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir @@ -11,7 +11,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s16) = G_FMAXIMUM [[C]], [[COPY]] + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s16) = G_FMAXIMUM [[COPY]], [[C]] ; CHECK-NEXT: $h0 = COPY [[FMAXIMUM]](s16) ; CHECK-NEXT: RET_ReallyLR implicit $h0 %0:_(s16) = COPY $h0 @@ -33,7 +33,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s32) = G_FMAXIMUM [[C]], [[COPY]] + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s32) = G_FMAXIMUM [[COPY]], [[C]] ; CHECK-NEXT: $s0 = COPY [[FMAXIMUM]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $s0 %0:_(s32) = COPY $s0 @@ -55,7 +55,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s64) = G_FMAXIMUM [[C]], [[COPY]] + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s64) = G_FMAXIMUM [[COPY]], [[C]] ; CHECK-NEXT: $d0 = COPY [[FMAXIMUM]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(s64) = COPY $d0 @@ -77,7 +77,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(s64) = G_FMINIMUM [[C]], [[COPY]] + ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(s64) = G_FMINIMUM [[COPY]], [[C]] ; CHECK-NEXT: $d0 = COPY [[FMINIMUM]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(s64) = COPY $d0 @@ -100,7 +100,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16) - ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<8 x s16>) = G_FMAXIMUM [[BUILD_VECTOR]], [[COPY]] + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<8 x s16>) = G_FMAXIMUM [[COPY]], [[BUILD_VECTOR]] ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<8 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<8 x s16>) = COPY $q0 @@ -125,7 +125,7 @@ body: | ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXIMUM [[BUILD_VECTOR]], [[BITCAST]] + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXIMUM [[BITCAST]], [[BUILD_VECTOR]] ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %1:_(<2 x s64>) = COPY $q0 @@ -150,7 +150,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMAXIMUM [[BUILD_VECTOR]], [[COPY]] + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMAXIMUM [[COPY]], [[BUILD_VECTOR]] ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<2 x s64>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x s64>) = COPY $q0 @@ -174,7 +174,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMINIMUM [[BUILD_VECTOR]], [[COPY]] + ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMINIMUM [[COPY]], [[BUILD_VECTOR]] ; CHECK-NEXT: $q0 = COPY [[FMINIMUM]](<2 x s64>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x s64>) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll index 7badf47..ae0a9b1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll @@ -4,7 +4,7 @@ define half @test_s16(half %a) #0 { ; CHECK-LABEL: test_s16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fmax h0, h1, h0 +; CHECK-NEXT: fmax h0, h0, h1 ; CHECK-NEXT: ret entry: %fcmp = fcmp olt half %a, 0.0 @@ -16,7 +16,7 @@ define float @test_s32(float %a) #0 { ; CHECK-LABEL: test_s32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fmax s0, s1, s0 +; CHECK-NEXT: fmax s0, s0, s1 ; CHECK-NEXT: ret entry: %fcmp = fcmp olt float %a, 0.0 @@ -28,7 +28,7 @@ define double @test_s64(double %a) #0 { ; CHECK-LABEL: test_s64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fmax d0, d1, d0 +; CHECK-NEXT: fmax d0, d0, d1 ; CHECK-NEXT: ret entry: %fcmp = fcmp olt double %a, 0.0 @@ -40,7 +40,7 @@ define <4 x half> @test_v4s16(<4 x half> %a) #0 { ; CHECK-LABEL: test_v4s16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: fmax v0.4h, v1.4h, v0.4h +; CHECK-NEXT: fmax v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret entry: %fcmp = fcmp olt <4 x half> %a, zeroinitializer @@ -52,7 +52,7 @@ define <8 x half> @test_v8s16(<8 x half> %a) #0 { ; CHECK-LABEL: test_v8s16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: fmax v0.8h, v1.8h, v0.8h +; CHECK-NEXT: fmax v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret entry: %fcmp = fcmp olt <8 x half> %a, zeroinitializer @@ -64,7 +64,7 @@ define <2 x float> @test_v2s32(<2 x float> %a) #0 { ; CHECK-LABEL: test_v2s32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: fmax v0.2s, v1.2s, v0.2s +; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret entry: %fcmp = fcmp olt <2 x float> %a, zeroinitializer @@ -76,7 +76,7 @@ define <4 x float> @test_v4s32(<4 x float> %a) #0 { ; CHECK-LABEL: test_v4s32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: fmax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret entry: %fcmp = fcmp olt <4 x float> %a, zeroinitializer @@ -88,7 +88,7 @@ define <2 x double> @test_v2s64(<2 x double> %a) #0 { ; CHECK-LABEL: test_v2s64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: fmax v0.2d, v1.2d, v0.2d +; CHECK-NEXT: fmax v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret entry: %fcmp = fcmp olt <2 x double> %a, zeroinitializer diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir index ee0e83c..0207613 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir @@ -254,8 +254,8 @@ body: | ; CHECK-NEXT: %one_s32:_(s32) = G_ANYEXT %one(s16) ; CHECK-NEXT: %one_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %one_s32(s32), %undef(s32) ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], %two_splat - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE %zero_undef, [[FMUL]] - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE %one_undef, [[FMAXNUM_IEEE]] + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FMUL]], %zero_undef + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], %one_undef ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %two:_(s16) = G_FCONSTANT half 0xH4000 @@ -306,7 +306,7 @@ body: | ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], %two_splat ; CHECK-NEXT: %snan_undef_fcan:_(<2 x s16>) = G_FCANONICALIZE %snan_undef ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE %snan_undef_fcan, [[FMUL]] - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE %qnan_undef, [[FMAXNUM_IEEE]] + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], %qnan_undef ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %two:_(s16) = G_FCONSTANT half 0xH4000 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir index d6321da..67e6de1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir @@ -318,7 +318,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %val:_(s32) = COPY $vgpr4 ; CHECK-NEXT: %k255:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: %umin0:_(s32) = G_UMIN %k255, %val + ; CHECK-NEXT: %umin0:_(s32) = G_UMIN %val, %k255 ; CHECK-NEXT: $vgpr0 = COPY %umin0(s32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll index dc13dee..1d94d76 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll @@ -145,10 +145,10 @@ define <2 x i16> @test_max_K0min_K1Val__v2i16(<2 x i16> %a) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 17 ; GFX8-NEXT: v_min_i16_e32 v1, 17, v0 -; GFX8-NEXT: v_min_i16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_i16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_mov_b32_e32 v2, -12 ; GFX8-NEXT: v_max_i16_e32 v1, -12, v1 -; GFX8-NEXT: v_max_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_max_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll index 7e38762..a8233054 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll @@ -145,10 +145,10 @@ define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 17 ; GFX8-NEXT: v_min_u16_e32 v1, 17, v0 -; GFX8-NEXT: v_min_u16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_u16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_mov_b32_e32 v2, 12 ; GFX8-NEXT: v_max_u16_e32 v1, 12, v1 -; GFX8-NEXT: v_max_u16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_max_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index 07480a0..cc0f7e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -983,7 +983,7 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v7, v3, v5 -; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, v3, v5 ; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v6, v3 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 ; CHECK-NEXT: v_mul_lo_u32 v8, v4, v7 @@ -1010,7 +1010,7 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v3 +; CHECK-NEXT: v_mul_hi_u32 v7, v3, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v8, v4, v6 ; CHECK-NEXT: v_mul_hi_u32 v9, v3, v6 @@ -1058,7 +1058,7 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v4, v4, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 @@ -1265,10 +1265,10 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 -; GISEL-NEXT: v_mul_hi_u32 v9, v4, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v9, v4 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 ; GISEL-NEXT: v_mul_lo_u32 v12, v5, v4 -; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v5, v4 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v4 @@ -1339,7 +1339,7 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 ; CGP-NEXT: v_mul_lo_u32 v8, v6, v7 ; CGP-NEXT: v_mul_lo_u32 v9, v5, v7 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v5 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 ; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CGP-NEXT: v_mul_lo_u32 v10, v6, v9 @@ -1366,7 +1366,7 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 ; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v8, v5, v7 -; CGP-NEXT: v_mul_hi_u32 v9, v7, v5 +; CGP-NEXT: v_mul_hi_u32 v9, v5, v7 ; CGP-NEXT: v_mul_lo_u32 v7, v6, v7 ; CGP-NEXT: v_mul_lo_u32 v10, v6, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 @@ -1433,10 +1433,10 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; CGP-NEXT: v_mul_lo_u32 v9, v7, v4 -; CGP-NEXT: v_mul_hi_u32 v7, v4, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v7, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; CGP-NEXT: v_mul_lo_u32 v11, v5, v4 -; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v5, v4 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_mul_lo_u32 v8, v8, v4 |