diff options
6 files changed, 80 insertions, 42 deletions
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index da330b5..ca62f38 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -432,9 +432,13 @@ public: DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1; LLT UnmergeTy = CastSrcTy.changeElementCount( ElementCount::getFixed(UnmergeNumElts)); + LLT SrcWideTy = + SrcTy.changeElementCount(ElementCount::getFixed(UnmergeNumElts)); if (isInstUnsupported( - {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}})) + {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}) || + LI.getAction({TargetOpcode::G_TRUNC, {SrcWideTy, UnmergeTy}}) + .Action == LegalizeActions::MoreElements) return false; Builder.setInstr(MI); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 2ae2923..996abe8 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -628,7 +628,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) return DstTy.isVector() && SrcTy.getSizeInBits() > 128 && DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits(); }) - + .clampMinNumElements(0, s8, 8) + .clampMinNumElements(0, s16, 4) + .clampMinNumElements(0, s32, 2) .alwaysLegal(); getActionDefinitionsBuilder(G_SEXT_INREG) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir index aa152ae..b8328ed 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -607,9 +607,11 @@ body: | ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $s0 = COPY [[TRUNC]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[TRUNC]](<4 x s16>) + ; CHECK-NEXT: $s0 = COPY [[UV]](<2 x s16>) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 %1(<2 x s16>) = G_LOAD %0(p0) :: (load (<2 x s16>)) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir index 6612651..ed40a2f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir @@ -540,9 +540,17 @@ body: | ; CHECK: liveins: $d0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s8>), [[TRUNC]](<2 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[CONCAT_VECTORS]](<4 x s8>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s8>), [[UV3:%[0-9]+]]:_(<2 x s8>), [[UV4:%[0-9]+]]:_(<2 x s8>), [[UV5:%[0-9]+]]:_(<2 x s8>) = G_UNMERGE_VALUES [[TRUNC2]](<8 x s8>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s8>) = G_CONCAT_VECTORS [[UV2]](<2 x s8>), [[UV2]](<2 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[CONCAT_VECTORS1]](<4 x s8>) ; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll index 9ebd570..b7c02d6 100644 --- a/llvm/test/CodeGen/AArch64/bitcast.ll +++ b/llvm/test/CodeGen/AArch64/bitcast.ll @@ -4,12 +4,10 @@ ; PR23065: SCALAR_TO_VECTOR implies the top elements 1 to N-1 of the N-element vector are undefined. -; CHECK-GI: warning: Instruction selection used fallback path for bitcast_v4i8_i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v2i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i8_v2i16 +; CHECK-GI: warning: Instruction selection used fallback path for bitcast_i32_v4i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v2i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_v4i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i8_v2i16 define <4 x i16> @foo1(<2 x i32> %a) { ; CHECK-SD-LABEL: foo1: @@ -54,15 +52,28 @@ define <4 x i16> @foo2(<2 x i32> %a) { ; ===== To and From Scalar Types ===== define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){ -; CHECK-LABEL: bitcast_v4i8_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: bitcast_v4i8_i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: bitcast_v4i8_i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: mov h3, v0.h[3] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret %c = add <4 x i8> %a, %b %d = bitcast <4 x i8> %c to i32 ret i32 %d @@ -81,18 +92,27 @@ define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){ } define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){ -; CHECK-LABEL: bitcast_v2i16_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [sp, #12] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: ldr w0, [sp, #12] -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: bitcast_v2i16_i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [sp, #12] +; CHECK-SD-NEXT: strh w8, [sp, #14] +; CHECK-SD-NEXT: ldr w0, [sp, #12] +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: bitcast_v2i16_i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret %c = add <2 x i16> %a, %b %d = bitcast <2 x i16> %c to i32 ret i32 %d diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index 2164c2a..ceac37f 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -5521,7 +5521,8 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-FP16-NEXT: xtn v0.4h, v0.4s ; CHECK-GI-FP16-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-FP16-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] @@ -5580,12 +5581,13 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) { ; ; CHECK-GI-FP16-LABEL: utofp_v2i8_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: movi d1, #0x0000ff000000ff -; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov w8, v0.s[1] +; CHECK-GI-FP16-NEXT: fmov w9, s0 +; CHECK-GI-FP16-NEXT: and w9, w9, #0xff +; CHECK-GI-FP16-NEXT: and w8, w8, #0xff +; CHECK-GI-FP16-NEXT: ucvtf h0, w9 +; CHECK-GI-FP16-NEXT: ucvtf h1, w8 ; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret |