diff options
4 files changed, 298 insertions, 1 deletions
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 3af3204..4b8aec8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -237,6 +237,11 @@ public: /// or an implicit_def if \p Ops is empty. void applyCombineConcatVectors(MachineInstr &MI, SmallVector<Register> &Ops); + bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector<Register> &Ops); + /// Replace \p MI with a flattened build_vector with \p Ops + /// or an implicit_def if \p Ops is empty. + void applyCombineShuffleConcat(MachineInstr &MI, SmallVector<Register> &Ops); + /// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS. /// Returns true if MI changed. /// diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 8568a7a..31b903e 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1513,6 +1513,18 @@ def combine_concat_vector : GICombineRule< [{ return Helper.matchCombineConcatVectors(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyCombineConcatVectors(*${root}, ${matchinfo}); }])>; +// Combines Shuffles of Concats +// a = G_CONCAT_VECTORS x, y, undef, undef +// b = G_CONCAT_VECTORS z, undef, undef, undef +// c = G_SHUFFLE_VECTORS a, b, <0, 1, 4, undef> +// ===> +// c = G_CONCAT_VECTORS x, y, z, undef +def combine_shuffle_concat : GICombineRule< + (defs root:$root, concat_matchinfo:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, + [{ return Helper.matchCombineShuffleConcat(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyCombineShuffleConcat(*${root}, ${matchinfo}); }])>; + // match_extract_of_element must be the first! def vector_ops_combines: GICombineGroup<[ match_extract_of_element_undef_vector, @@ -1620,7 +1632,8 @@ def all_combines : GICombineGroup<[trivial_combines, vector_ops_combines, and_or_disjoint_mask, fma_combines, fold_binop_into_select, sub_add_reg, select_to_minmax, redundant_binop_in_equality, fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, - combine_concat_vector, double_icmp_zero_and_or_combine, match_addos]>; + combine_concat_vector, double_icmp_zero_and_or_combine, match_addos, + combine_shuffle_concat]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 3829c33..c5ee354 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -303,6 +303,83 @@ void CombinerHelper::applyCombineConcatVectors(MachineInstr &MI, replaceRegWith(MRI, DstReg, NewDstReg); } +bool CombinerHelper::matchCombineShuffleConcat(MachineInstr &MI, + SmallVector<Register> &Ops) { + ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); + auto ConcatMI1 = + dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg())); + auto ConcatMI2 = + dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg())); + if (!ConcatMI1 || !ConcatMI2) + return false; + + // Check that the sources of the Concat instructions have the same type + if (MRI.getType(ConcatMI1->getSourceReg(0)) != + MRI.getType(ConcatMI2->getSourceReg(0))) + return false; + + LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1)); + LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg()); + unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements(); + for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) { + // Check if the index takes a whole source register from G_CONCAT_VECTORS + // Assumes that all Sources of G_CONCAT_VECTORS are the same type + if (Mask[i] == -1) { + for (unsigned j = 1; j < ConcatSrcNumElt; j++) { + if (i + j >= Mask.size()) + return false; + if (Mask[i + j] != -1) + return false; + } + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}})) + return false; + Ops.push_back(0); + } else if (Mask[i] % ConcatSrcNumElt == 0) { + for (unsigned j = 1; j < ConcatSrcNumElt; j++) { + if (i + j >= Mask.size()) + return false; + if (Mask[i + j] != Mask[i] + static_cast<int>(j)) + return false; + } + // Retrieve the source register from its respective G_CONCAT_VECTORS + // instruction + if (Mask[i] < ShuffleSrcTy1.getNumElements()) { + Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt)); + } else { + Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt - + ConcatMI1->getNumSources())); + } + } else { + return false; + } + } + + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_CONCAT_VECTORS, + {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}})) + return false; + + return !Ops.empty(); +} + +void CombinerHelper::applyCombineShuffleConcat(MachineInstr &MI, + SmallVector<Register> &Ops) { + LLT SrcTy = MRI.getType(Ops[0]); + Register UndefReg = 0; + + for (unsigned i = 0; i < Ops.size(); i++) { + if (Ops[i] == 0) { + if (UndefReg == 0) + UndefReg = Builder.buildUndef(SrcTy).getReg(0); + Ops[i] = UndefReg; + } + } + + Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops); + MI.eraseFromParent(); +} + bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { SmallVector<Register, 4> Ops; if (matchCombineShuffleVector(MI, Ops)) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shufflevector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shufflevector.mir new file mode 100644 index 0000000..0de989f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shufflevector.mir @@ -0,0 +1,202 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s + +--- +name: shuffle_concat_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: shuffle_concat_1 + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %p2:_(p0) = COPY $x1 + ; CHECK-NEXT: %p3:_(p0) = COPY $x2 + ; CHECK-NEXT: %p4:_(p0) = COPY $x3 + ; CHECK-NEXT: %a:_(<4 x s8>) = G_LOAD %p4(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: %b:_(<4 x s8>) = G_LOAD %p3(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: %c:_(<4 x s8>) = G_LOAD %p2(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: %z:_(<16 x s8>) = G_CONCAT_VECTORS %a(<4 x s8>), %b(<4 x s8>), %c(<4 x s8>), [[DEF]](<4 x s8>) + ; CHECK-NEXT: $q0 = COPY %z(<16 x s8>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %p1:_(p0) = COPY $x0 + %p2:_(p0) = COPY $x1 + %p3:_(p0) = COPY $x2 + %p4:_(p0) = COPY $x3 + + %ImpDef:_(<4 x s8>) = G_IMPLICIT_DEF + %a:_(<4 x s8>) = G_LOAD %p4:_(p0) :: (load (<4 x s8>)) + %b:_(<4 x s8>) = G_LOAD %p3:_(p0) :: (load (<4 x s8>)) + %c:_(<4 x s8>) = G_LOAD %p2:_(p0) :: (load (<4 x s8>)) + %d:_(<4 x s8>) = G_LOAD %p1:_(p0) :: (load (<4 x s8>)) + + %x:_(<16 x s8>) = G_SHUFFLE_VECTOR %a:_(<4 x s8>), %b:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, undef, undef, undef, undef, undef, undef, undef, undef) + %y:_(<16 x s8>) = G_SHUFFLE_VECTOR %c:_(<4 x s8>), %d:_, shufflemask(0, 1, 2, 3, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) + %z:_(<16 x s8>) = G_SHUFFLE_VECTOR %x:_(<16 x s8>), %y:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, -1, -1, -1, -1) + + $q0 = COPY %z(<16 x s8>) + RET_ReallyLR implicit $q0 +... + +--- +name: shuffle_concat_2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: shuffle_concat_2 + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %p1:_(p0) = COPY $x0 + ; CHECK-NEXT: %p2:_(p0) = COPY $x1 + ; CHECK-NEXT: %p3:_(p0) = COPY $x2 + ; CHECK-NEXT: %p4:_(p0) = COPY $x3 + ; CHECK-NEXT: %a:_(<4 x s8>) = G_LOAD %p4(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: %b:_(<4 x s8>) = G_LOAD %p3(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: %c:_(<4 x s8>) = G_LOAD %p2(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: %d:_(<4 x s8>) = G_LOAD %p1(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: %z:_(<16 x s8>) = G_CONCAT_VECTORS %a(<4 x s8>), %b(<4 x s8>), %c(<4 x s8>), %d(<4 x s8>) + ; CHECK-NEXT: $q0 = COPY %z(<16 x s8>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %p1:_(p0) = COPY $x0 + %p2:_(p0) = COPY $x1 + %p3:_(p0) = COPY $x2 + %p4:_(p0) = COPY $x3 + + %ImpDef:_(<4 x s8>) = G_IMPLICIT_DEF + %a:_(<4 x s8>) = G_LOAD %p4:_(p0) :: (load (<4 x s8>)) + %b:_(<4 x s8>) = G_LOAD %p3:_(p0) :: (load (<4 x s8>)) + %c:_(<4 x s8>) = G_LOAD %p2:_(p0) :: (load (<4 x s8>)) + %d:_(<4 x s8>) = G_LOAD %p1:_(p0) :: (load (<4 x s8>)) + + %v:_(<16 x s8>) = G_SHUFFLE_VECTOR %a:_(<4 x s8>), %b:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, undef, undef, undef, undef, undef, undef, undef, undef) + %w:_(<16 x s8>) = G_SHUFFLE_VECTOR %c:_(<4 x s8>), %ImpDef:_, shufflemask(0, 1, 2, 3, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) + %x:_(<16 x s8>) = G_SHUFFLE_VECTOR %v:_(<16 x s8>), %w:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, undef, undef, undef, undef) + %y:_(<16 x s8>) = G_SHUFFLE_VECTOR %d:_(<4 x s8>), %ImpDef:_, shufflemask(0, 1, 2, 3, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) + %z:_(<16 x s8>) = G_SHUFFLE_VECTOR %x:_(<16 x s8>), %y:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19) + + $q0 = COPY %z(<16 x s8>) + RET_ReallyLR implicit $q0 +... + +--- +name: shuffle_concat_3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: shuffle_concat_3 + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %p2:_(p0) = COPY $x1 + ; CHECK-NEXT: %p3:_(p0) = COPY $x2 + ; CHECK-NEXT: %p4:_(p0) = COPY $x3 + ; CHECK-NEXT: %a:_(<4 x s8>) = G_LOAD %p4(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: %b:_(<4 x s8>) = G_LOAD %p3(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: %c:_(<4 x s8>) = G_LOAD %p2(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %a(<4 x s8>), %b(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %c(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) + ; CHECK-NEXT: %z:_(<16 x s8>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<16 x s8>), [[CONCAT_VECTORS1]], shufflemask(0, undef, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, undef, undef, undef, undef) + ; CHECK-NEXT: $q0 = COPY %z(<16 x s8>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %p1:_(p0) = COPY $x0 + %p2:_(p0) = COPY $x1 + %p3:_(p0) = COPY $x2 + %p4:_(p0) = COPY $x3 + + %ImpDef:_(<4 x s8>) = G_IMPLICIT_DEF + %a:_(<4 x s8>) = G_LOAD %p4:_(p0) :: (load (<4 x s8>)) + %b:_(<4 x s8>) = G_LOAD %p3:_(p0) :: (load (<4 x s8>)) + %c:_(<4 x s8>) = G_LOAD %p2:_(p0) :: (load (<4 x s8>)) + %d:_(<4 x s8>) = G_LOAD %p1:_(p0) :: (load (<4 x s8>)) + + %x:_(<16 x s8>) = G_SHUFFLE_VECTOR %a:_(<4 x s8>), %b:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, undef, undef, undef, undef, undef, undef, undef, undef) + %y:_(<16 x s8>) = G_SHUFFLE_VECTOR %c:_(<4 x s8>), %d:_, shufflemask(0, 1, 2, 3, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) + %z:_(<16 x s8>) = G_SHUFFLE_VECTOR %x:_(<16 x s8>), %y:_, shufflemask(0, -1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, -1, -1, -1, -1) + + $q0 = COPY %z(<16 x s8>) + RET_ReallyLR implicit $q0 +... + +--- +name: shuffle_concat_4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: shuffle_concat_4 + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %p2:_(p0) = COPY $x1 + ; CHECK-NEXT: %p4:_(p0) = COPY $x3 + ; CHECK-NEXT: %a:_(<4 x s8>) = G_LOAD %p4(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: %c:_(<4 x s8>) = G_LOAD %p2(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: %z:_(<16 x s8>) = G_CONCAT_VECTORS %a(<4 x s8>), [[DEF]](<4 x s8>), %c(<4 x s8>), [[DEF]](<4 x s8>) + ; CHECK-NEXT: $q0 = COPY %z(<16 x s8>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %p1:_(p0) = COPY $x0 + %p2:_(p0) = COPY $x1 + %p3:_(p0) = COPY $x2 + %p4:_(p0) = COPY $x3 + + %ImpDef:_(<4 x s8>) = G_IMPLICIT_DEF + %a:_(<4 x s8>) = G_LOAD %p4:_(p0) :: (load (<4 x s8>)) + %b:_(<4 x s8>) = G_LOAD %p3:_(p0) :: (load (<4 x s8>)) + %c:_(<4 x s8>) = G_LOAD %p2:_(p0) :: (load (<4 x s8>)) + %d:_(<4 x s8>) = G_LOAD %p1:_(p0) :: (load (<4 x s8>)) + + %x:_(<16 x s8>) = G_SHUFFLE_VECTOR %a:_(<4 x s8>), %b:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, undef, undef, undef, undef, undef, undef, undef, undef) + %y:_(<16 x s8>) = G_SHUFFLE_VECTOR %c:_(<4 x s8>), %d:_, shufflemask(0, 1, 2, 3, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) + %z:_(<16 x s8>) = G_SHUFFLE_VECTOR %x:_(<16 x s8>), %y:_, shufflemask(0, 1, 2, 3, -1, -1, -1, -1, 16, 17, 18, 19, -1, -1, -1, -1) + + $q0 = COPY %z(<16 x s8>) + RET_ReallyLR implicit $q0 +... + +--- +name: shuffle_concat_5 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: shuffle_concat_5 + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %p2:_(p0) = COPY $x1 + ; CHECK-NEXT: %p3:_(p0) = COPY $x2 + ; CHECK-NEXT: %p4:_(p0) = COPY $x3 + ; CHECK-NEXT: %a:_(<4 x s8>) = G_LOAD %p4(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: %b:_(<4 x s8>) = G_LOAD %p3(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: %c:_(<4 x s8>) = G_LOAD %p2(p0) :: (load (<4 x s8>)) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %a(<4 x s8>), %b(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %c(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) + ; CHECK-NEXT: %z:_(<16 x s8>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<16 x s8>), [[CONCAT_VECTORS1]], shufflemask(undef, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, undef, undef, undef, undef) + ; CHECK-NEXT: $q0 = COPY %z(<16 x s8>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %p1:_(p0) = COPY $x0 + %p2:_(p0) = COPY $x1 + %p3:_(p0) = COPY $x2 + %p4:_(p0) = COPY $x3 + + %ImpDef:_(<4 x s8>) = G_IMPLICIT_DEF + %a:_(<4 x s8>) = G_LOAD %p4:_(p0) :: (load (<4 x s8>)) + %b:_(<4 x s8>) = G_LOAD %p3:_(p0) :: (load (<4 x s8>)) + %c:_(<4 x s8>) = G_LOAD %p2:_(p0) :: (load (<4 x s8>)) + %d:_(<4 x s8>) = G_LOAD %p1:_(p0) :: (load (<4 x s8>)) + + %x:_(<16 x s8>) = G_SHUFFLE_VECTOR %a:_(<4 x s8>), %b:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, undef, undef, undef, undef, undef, undef, undef, undef) + %y:_(<16 x s8>) = G_SHUFFLE_VECTOR %c:_(<4 x s8>), %d:_, shufflemask(0, 1, 2, 3, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) + %z:_(<16 x s8>) = G_SHUFFLE_VECTOR %x:_(<16 x s8>), %y:_, shufflemask(-1, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, -1, -1, -1, -1) + + $q0 = COPY %z(<16 x s8>) + RET_ReallyLR implicit $q0 +... |
