diff options
Diffstat (limited to 'llvm/test/CodeGen')
31 files changed, 1173 insertions, 771 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll index 7872c02..461a7ef 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll @@ -177,7 +177,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; GISEL-NEXT: neg v2.16b, v3.16b ; GISEL-NEXT: shl v3.16b, v4.16b, #7 ; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b -; GISEL-NEXT: sshr v2.16b, v3.16b, #7 +; GISEL-NEXT: cmlt v2.16b, v3.16b, #0 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> @@ -229,7 +229,7 @@ define <8 x i16> @pr38477(<8 x i16> %a0) { ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h ; GISEL-NEXT: neg v2.8h, v4.8h ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h -; GISEL-NEXT: sshr v2.8h, v3.8h, #15 +; GISEL-NEXT: cmlt v2.8h, v3.8h, #0 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31> diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir index 0b950b7..76d4d29 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir @@ -14,8 +14,7 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[C]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL %v1, [[DUP]](<4 x s32>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: %sext:_(<4 x s32>) = G_VASHR [[SHL]], [[C1]](s32) + ; CHECK-NEXT: %sext:_(<4 x s32>) = G_VASHR [[SHL]], 16 ; CHECK-NEXT: $q0 = COPY %sext(<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %v1:_(<4 x s32>) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir index b3fb5a4..dfaddba 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir @@ -15,8 +15,7 @@ body: | ; CHECK: liveins: $d0, $d1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[VASHR:%[0-9]+]]:_(<4 x s32>) = G_VASHR [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[VASHR:%[0-9]+]]:_(<4 x s32>) = G_VASHR [[COPY]], 5 ; CHECK-NEXT: $q0 = COPY [[VASHR]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 @@ -39,8 +38,7 @@ body: | ; CHECK: liveins: $d0, $d1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<4 x s32>) = G_VLSHR [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<4 x s32>) = G_VLSHR [[COPY]], 5 ; CHECK-NEXT: $q0 = COPY [[VLSHR]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 @@ -63,8 +61,7 @@ body: | ; CHECK: liveins: $d0, $d1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<8 x s16>) = G_VLSHR [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<8 x s16>) = G_VLSHR [[COPY]], 5 ; CHECK-NEXT: $q0 = COPY [[VLSHR]](<8 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<8 x s16>) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir index c38e4a8..cf227cb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir @@ -29,7 +29,6 @@ body: | ; CHECK-NEXT: [[UCVTFd:%[0-9]+]]:fpr64 = UCVTFd [[COPY]], 12 ; CHECK-NEXT: $d1 = COPY [[UCVTFd]] %0(s64) = COPY $d0 - %1(s32) = G_CONSTANT i32 12 - %2(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.vcvtfxu2fp.f64), %0, %1 + %2(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.vcvtfxu2fp.f64), %0, 12 $d1 = COPY %2(s64) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir index 0706115..9fa6326 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir @@ -499,8 +499,7 @@ body: | ; CHECK-NEXT: $d0 = COPY [[SSHRv4i16_shift]] ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:fpr(<4 x s16>) = COPY $d0 - %1:gpr(s32) = G_CONSTANT i32 5 - %2:fpr(<4 x s16>) = G_VASHR %0, %1 + %2:fpr(<4 x s16>) = G_VASHR %0, 5 $d0 = COPY %2(<4 x s16>) RET_ReallyLR implicit $d0 ... @@ -520,8 +519,7 @@ body: | ; CHECK-NEXT: $d0 = COPY [[USHRv4i16_shift]] ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:fpr(<4 x s16>) = COPY $d0 - %1:gpr(s32) = G_CONSTANT i32 5 - %2:fpr(<4 x s16>) = G_VLSHR %0, %1 + %2:fpr(<4 x s16>) = G_VLSHR %0, 5 $d0 = COPY %2(<4 x s16>) RET_ReallyLR implicit $d0 ... diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll index cdde110..63c08dd 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -902,7 +902,7 @@ define void @sink_v8z16_0(ptr %p, ptr %d, i64 %n, <16 x i8> %a) { ; CHECK-GI-NEXT: subs x2, x2, #8 ; CHECK-GI-NEXT: add x8, x8, #8 ; CHECK-GI-NEXT: umull v1.8h, v1.8b, v0.8b -; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15 +; CHECK-GI-NEXT: cmlt v1.8h, v1.8h, #0 ; CHECK-GI-NEXT: xtn v1.8b, v1.8h ; CHECK-GI-NEXT: str d1, [x0], #32 ; CHECK-GI-NEXT: b.ne .LBB8_1 @@ -967,8 +967,8 @@ define void @sink_v16s16_8(ptr %p, ptr %d, i64 %n, <16 x i8> %a) { ; CHECK-GI-NEXT: mov d2, v1.d[1] ; CHECK-GI-NEXT: smull v1.8h, v1.8b, v0.8b ; CHECK-GI-NEXT: smull v2.8h, v2.8b, v0.8b -; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15 -; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15 +; CHECK-GI-NEXT: cmlt v1.8h, v1.8h, #0 +; CHECK-GI-NEXT: cmlt v2.8h, v2.8h, #0 ; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: str q1, [x0], #32 ; CHECK-GI-NEXT: b.ne .LBB9_1 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll index 9bafc5b..2a8b3ce2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -999,16 +999,10 @@ entry: } define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-SD-LABEL: test_vaddhn_s16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vaddhn_s16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vaddhn_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h +; CHECK-NEXT: ret entry: %vaddhn.i = add <8 x i16> %a, %b %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> @@ -1017,16 +1011,10 @@ entry: } define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-SD-LABEL: test_vaddhn_s32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vaddhn_s32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vaddhn_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %vaddhn.i = add <4 x i32> %a, %b %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16> @@ -1035,16 +1023,10 @@ entry: } define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-SD-LABEL: test_vaddhn_s64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vaddhn_s64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vaddhn_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d +; CHECK-NEXT: ret entry: %vaddhn.i = add <2 x i64> %a, %b %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32> @@ -1053,16 +1035,10 @@ entry: } define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-SD-LABEL: test_vaddhn_u16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vaddhn_u16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vaddhn_u16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h +; CHECK-NEXT: ret entry: %vaddhn.i = add <8 x i16> %a, %b %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> @@ -1071,16 +1047,10 @@ entry: } define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-SD-LABEL: test_vaddhn_u32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vaddhn_u32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vaddhn_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %vaddhn.i = add <4 x i32> %a, %b %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16> @@ -1089,16 +1059,10 @@ entry: } define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-SD-LABEL: test_vaddhn_u64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vaddhn_u64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vaddhn_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d +; CHECK-NEXT: ret entry: %vaddhn.i = add <2 x i64> %a, %b %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32> @@ -1115,9 +1079,8 @@ define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) ; ; CHECK-GI-LABEL: test_vaddhn_high_s16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: addhn v1.8b, v1.8h, v2.8h ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1141,9 +1104,8 @@ define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) ; ; CHECK-GI-LABEL: test_vaddhn_high_s32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: addhn v1.4h, v1.4s, v2.4s ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1167,9 +1129,8 @@ define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) ; ; CHECK-GI-LABEL: test_vaddhn_high_s64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-GI-NEXT: addhn v1.2s, v1.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1193,9 +1154,8 @@ define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) ; ; CHECK-GI-LABEL: test_vaddhn_high_u16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: addhn v1.8b, v1.8h, v2.8h ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1219,9 +1179,8 @@ define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) ; ; CHECK-GI-LABEL: test_vaddhn_high_u32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: addhn v1.4h, v1.4s, v2.4s ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1245,9 +1204,8 @@ define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) ; ; CHECK-GI-LABEL: test_vaddhn_high_u64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-GI-NEXT: addhn v1.2s, v1.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1461,16 +1419,10 @@ entry: } define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-SD-LABEL: test_vsubhn_s16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vsubhn_s16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vsubhn_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h +; CHECK-NEXT: ret entry: %vsubhn.i = sub <8 x i16> %a, %b %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> @@ -1479,16 +1431,10 @@ entry: } define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-SD-LABEL: test_vsubhn_s32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vsubhn_s32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vsubhn_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %vsubhn.i = sub <4 x i32> %a, %b %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16> @@ -1497,16 +1443,10 @@ entry: } define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-SD-LABEL: test_vsubhn_s64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vsubhn_s64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vsubhn_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d +; CHECK-NEXT: ret entry: %vsubhn.i = sub <2 x i64> %a, %b %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32> @@ -1515,16 +1455,10 @@ entry: } define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-SD-LABEL: test_vsubhn_u16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vsubhn_u16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vsubhn_u16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h +; CHECK-NEXT: ret entry: %vsubhn.i = sub <8 x i16> %a, %b %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> @@ -1533,16 +1467,10 @@ entry: } define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-SD-LABEL: test_vsubhn_u32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vsubhn_u32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vsubhn_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %vsubhn.i = sub <4 x i32> %a, %b %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16> @@ -1551,16 +1479,10 @@ entry: } define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-SD-LABEL: test_vsubhn_u64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vsubhn_u64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vsubhn_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d +; CHECK-NEXT: ret entry: %vsubhn.i = sub <2 x i64> %a, %b %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32> @@ -1577,9 +1499,8 @@ define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) ; ; CHECK-GI-LABEL: test_vsubhn_high_s16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: subhn v1.8b, v1.8h, v2.8h ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1603,9 +1524,8 @@ define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) ; ; CHECK-GI-LABEL: test_vsubhn_high_s32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: subhn v1.4h, v1.4s, v2.4s ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1629,9 +1549,8 @@ define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) ; ; CHECK-GI-LABEL: test_vsubhn_high_s64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d +; CHECK-GI-NEXT: subhn v1.2s, v1.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1655,9 +1574,8 @@ define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) ; ; CHECK-GI-LABEL: test_vsubhn_high_u16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: subhn v1.8b, v1.8h, v2.8h ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1681,9 +1599,8 @@ define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) ; ; CHECK-GI-LABEL: test_vsubhn_high_u32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: subhn v1.4h, v1.4s, v2.4s ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1707,9 +1624,8 @@ define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) ; ; CHECK-GI-LABEL: test_vsubhn_high_u64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d +; CHECK-GI-NEXT: subhn v1.2s, v1.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll index 84879d1..03e6ca1 100644 --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -524,8 +524,8 @@ define <32 x i8> @sext_v32i1(<32 x i1> %arg) { ; CHECK-GI-NEXT: mov.b v1[15], w9 ; CHECK-GI-NEXT: shl.16b v0, v0, #7 ; CHECK-GI-NEXT: shl.16b v1, v1, #7 -; CHECK-GI-NEXT: sshr.16b v0, v0, #7 -; CHECK-GI-NEXT: sshr.16b v1, v1, #7 +; CHECK-GI-NEXT: cmlt.16b v0, v0, #0 +; CHECK-GI-NEXT: cmlt.16b v1, v1, #0 ; CHECK-GI-NEXT: ret %res = sext <32 x i1> %arg to <32 x i8> ret <32 x i8> %res @@ -934,10 +934,10 @@ define <64 x i8> @sext_v64i1(<64 x i1> %arg) { ; CHECK-GI-NEXT: shl.16b v1, v1, #7 ; CHECK-GI-NEXT: shl.16b v2, v2, #7 ; CHECK-GI-NEXT: shl.16b v3, v3, #7 -; CHECK-GI-NEXT: sshr.16b v0, v0, #7 -; CHECK-GI-NEXT: sshr.16b v1, v1, #7 -; CHECK-GI-NEXT: sshr.16b v2, v2, #7 -; CHECK-GI-NEXT: sshr.16b v3, v3, #7 +; CHECK-GI-NEXT: cmlt.16b v0, v0, #0 +; CHECK-GI-NEXT: cmlt.16b v1, v1, #0 +; CHECK-GI-NEXT: cmlt.16b v2, v2, #0 +; CHECK-GI-NEXT: cmlt.16b v3, v3, #0 ; CHECK-GI-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-GI-NEXT: ret %res = sext <64 x i1> %arg to <64 x i8> diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll index c408d7f..a3f4722 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -1914,21 +1914,13 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) { } define <8 x i16> @pr88784(<8 x i8> %l0, <8 x i8> %l1, <8 x i16> %l2) { -; CHECK-SD-LABEL: pr88784: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: usubl.8h v0, v0, v1 -; CHECK-SD-NEXT: cmlt.8h v1, v2, #0 -; CHECK-SD-NEXT: ssra.8h v0, v2, #15 -; CHECK-SD-NEXT: eor.16b v0, v1, v0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: pr88784: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: usubl.8h v0, v0, v1 -; CHECK-GI-NEXT: sshr.8h v1, v2, #15 -; CHECK-GI-NEXT: ssra.8h v0, v2, #15 -; CHECK-GI-NEXT: eor.16b v0, v1, v0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: pr88784: +; CHECK: // %bb.0: +; CHECK-NEXT: usubl.8h v0, v0, v1 +; CHECK-NEXT: cmlt.8h v1, v2, #0 +; CHECK-NEXT: ssra.8h v0, v2, #15 +; CHECK-NEXT: eor.16b v0, v1, v0 +; CHECK-NEXT: ret %l4 = zext <8 x i8> %l0 to <8 x i16> %l5 = ashr <8 x i16> %l2, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> %l6 = zext <8 x i8> %l1 to <8 x i16> @@ -1947,7 +1939,7 @@ define <8 x i16> @pr88784_fixed(<8 x i8> %l0, <8 x i8> %l1, <8 x i16> %l2) { ; CHECK-GI-LABEL: pr88784_fixed: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: usubl.8h v0, v0, v1 -; CHECK-GI-NEXT: sshr.8h v1, v0, #15 +; CHECK-GI-NEXT: cmlt.8h v1, v0, #0 ; CHECK-GI-NEXT: ssra.8h v0, v0, #15 ; CHECK-GI-NEXT: eor.16b v0, v1, v0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll index 11fb732..938712a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll @@ -1103,20 +1103,12 @@ define <2 x i64> @ssubl2_duplhs(i32 %lhs, <4 x i32> %rhs) { } define <8 x i8> @addhn8b_natural(ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: addhn8b_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q0, [x0] -; CHECK-SD-NEXT: ldr q1, [x1] -; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn8b_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn8b_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %sum = add <8 x i16> %tmp1, %tmp2 @@ -1126,20 +1118,12 @@ define <8 x i8> @addhn8b_natural(ptr %A, ptr %B) nounwind { } define <4 x i16> @addhn4h_natural(ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: addhn4h_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q0, [x0] -; CHECK-SD-NEXT: ldr q1, [x1] -; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn4h_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn4h_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %sum = add <4 x i32> %tmp1, %tmp2 @@ -1149,20 +1133,12 @@ define <4 x i16> @addhn4h_natural(ptr %A, ptr %B) nounwind { } define <2 x i32> @addhn2s_natural(ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: addhn2s_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q0, [x0] -; CHECK-SD-NEXT: ldr q1, [x1] -; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn2s_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn2s_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d +; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %sum = add <2 x i64> %tmp1, %tmp2 @@ -1172,22 +1148,13 @@ define <2 x i32> @addhn2s_natural(ptr %A, ptr %B) nounwind { } define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: addhn2_16b_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: addhn2 v0.16b, v1.8h, v2.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn2_16b_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q1, [x0] -; CHECK-GI-NEXT: ldr q2, [x1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h -; CHECK-GI-NEXT: shrn2 v0.16b, v1.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn2_16b_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %sum = add <8 x i16> %tmp1, %tmp2 @@ -1198,22 +1165,13 @@ define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind { } define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: addhn2_8h_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: addhn2 v0.8h, v1.4s, v2.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn2_8h_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q1, [x0] -; CHECK-GI-NEXT: ldr q2, [x1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-GI-NEXT: shrn2 v0.8h, v1.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn2_8h_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %sum = add <4 x i32> %tmp1, %tmp2 @@ -1224,22 +1182,13 @@ define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind { } define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: addhn2_4s_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn2_4s_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q1, [x0] -; CHECK-GI-NEXT: ldr q2, [x1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn2_4s_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d +; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %sum = add <2 x i64> %tmp1, %tmp2 @@ -1250,22 +1199,13 @@ define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind { } define <4 x i32> @addhn_addhn2_4s(ptr %A, ptr %B, ptr %C, ptr %D) nounwind { -; CHECK-SD-LABEL: addhn_addhn2_4s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: addhn v0.2s, v1.2d, v2.2d -; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn_addhn2_4s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: add v1.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v1.2d, #32 -; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn_addhn2_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: addhn v0.2s, v1.2d, v2.2d +; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d +; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %sum1 = add <2 x i64> %tmp1, %tmp2 @@ -1281,20 +1221,12 @@ define <4 x i32> @addhn_addhn2_4s(ptr %A, ptr %B, ptr %C, ptr %D) nounwind { } define <8 x i8> @subhn8b_natural(ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: subhn8b_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q0, [x0] -; CHECK-SD-NEXT: ldr q1, [x1] -; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: subhn8b_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: subhn8b_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %diff = sub <8 x i16> %tmp1, %tmp2 @@ -1304,20 +1236,12 @@ define <8 x i8> @subhn8b_natural(ptr %A, ptr %B) nounwind { } define <4 x i16> @subhn4h_natural(ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: subhn4h_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q0, [x0] -; CHECK-SD-NEXT: ldr q1, [x1] -; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: subhn4h_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: subhn4h_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %diff = sub <4 x i32> %tmp1, %tmp2 @@ -1327,20 +1251,12 @@ define <4 x i16> @subhn4h_natural(ptr %A, ptr %B) nounwind { } define <2 x i32> @subhn2s_natural(ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: subhn2s_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q0, [x0] -; CHECK-SD-NEXT: ldr q1, [x1] -; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: subhn2s_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: subhn2s_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d +; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %diff = sub <2 x i64> %tmp1, %tmp2 @@ -1350,22 +1266,13 @@ define <2 x i32> @subhn2s_natural(ptr %A, ptr %B) nounwind { } define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: subhn2_16b_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: subhn2 v0.16b, v1.8h, v2.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: subhn2_16b_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q1, [x0] -; CHECK-GI-NEXT: ldr q2, [x1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h -; CHECK-GI-NEXT: shrn2 v0.16b, v1.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: subhn2_16b_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %diff = sub <8 x i16> %tmp1, %tmp2 @@ -1376,22 +1283,13 @@ define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind { } define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: subhn2_8h_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: subhn2 v0.8h, v1.4s, v2.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: subhn2_8h_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q1, [x0] -; CHECK-GI-NEXT: ldr q2, [x1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s -; CHECK-GI-NEXT: shrn2 v0.8h, v1.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: subhn2_8h_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %diff = sub <4 x i32> %tmp1, %tmp2 @@ -1402,22 +1300,13 @@ define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind { } define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: subhn2_4s_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: subhn2 v0.4s, v1.2d, v2.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: subhn2_4s_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q1, [x0] -; CHECK-GI-NEXT: ldr q2, [x1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d -; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: subhn2_4s_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d +; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %diff = sub <2 x i64> %tmp1, %tmp2 @@ -1428,20 +1317,12 @@ define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind { } define <16 x i8> @neg_narrow_i8(<16 x i16> %a) { -; CHECK-SD-LABEL: neg_narrow_i8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff -; CHECK-SD-NEXT: subhn v0.8b, v2.8h, v0.8h -; CHECK-SD-NEXT: subhn2 v0.16b, v2.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: neg_narrow_i8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: mvn v1.16b, v1.16b -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: shrn2 v0.16b, v1.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: neg_narrow_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-NEXT: subhn v0.8b, v2.8h, v0.8h +; CHECK-NEXT: subhn2 v0.16b, v2.8h, v1.8h +; CHECK-NEXT: ret %not.i = xor <16 x i16> %a, splat (i16 -1) %s = lshr <16 x i16> %not.i, splat (i16 8) %vshrn_n = trunc nuw <16 x i16> %s to <16 x i8> @@ -1449,20 +1330,12 @@ define <16 x i8> @neg_narrow_i8(<16 x i16> %a) { } define <8 x i16> @neg_narrow_i16(<8 x i32> %a) { -; CHECK-SD-LABEL: neg_narrow_i16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff -; CHECK-SD-NEXT: subhn v0.4h, v2.4s, v0.4s -; CHECK-SD-NEXT: subhn2 v0.8h, v2.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: neg_narrow_i16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: mvn v1.16b, v1.16b -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: shrn2 v0.8h, v1.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: neg_narrow_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-NEXT: subhn v0.4h, v2.4s, v0.4s +; CHECK-NEXT: subhn2 v0.8h, v2.4s, v1.4s +; CHECK-NEXT: ret %not.i = xor <8 x i32> %a, splat (i32 -1) %s = lshr <8 x i32> %not.i, splat (i32 16) %vshrn_n = trunc nuw <8 x i32> %s to <8 x i16> @@ -1470,20 +1343,12 @@ define <8 x i16> @neg_narrow_i16(<8 x i32> %a) { } define <4 x i32> @neg_narrow_i32(<4 x i64> %a) { -; CHECK-SD-LABEL: neg_narrow_i32: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff -; CHECK-SD-NEXT: subhn v0.2s, v2.2d, v0.2d -; CHECK-SD-NEXT: subhn2 v0.4s, v2.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: neg_narrow_i32: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: mvn v1.16b, v1.16b -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: neg_narrow_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-NEXT: subhn v0.2s, v2.2d, v0.2d +; CHECK-NEXT: subhn2 v0.4s, v2.2d, v1.2d +; CHECK-NEXT: ret %not.i = xor <4 x i64> %a, splat (i64 -1) %s = lshr <4 x i64> %not.i, splat (i64 32) %vshrn_n = trunc nuw <4 x i64> %s to <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll index 9d0ade2..dc88f94 100644 --- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll +++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll @@ -66,9 +66,9 @@ define <4 x i32> @combine_vec_sdiv_by_minsigned(<4 x i32> %x) { ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_minsigned: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v0.4s, #0 ; CHECK-GI-NEXT: usra v0.4s, v1.4s, #1 -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-NEXT: neg v0.4s, v0.4s ; CHECK-GI-NEXT: ret %1 = sdiv <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> @@ -176,7 +176,7 @@ define <4 x i32> @combine_vec_sdiv_by_pos1(<4 x i32> %x) { ; CHECK-GI-NEXT: mov v1.s[2], w9 ; CHECK-GI-NEXT: mov v1.s[3], w9 ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255> @@ -185,39 +185,24 @@ define <4 x i32> @combine_vec_sdiv_by_pos1(<4 x i32> %x) { } define <4 x i32> @combine_vec_sdiv_by_pow2a(<4 x i32> %x) { -; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2a: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v1.4s, v0.4s, #0 -; CHECK-SD-NEXT: usra v0.4s, v1.4s, #30 -; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #2 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2a: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31 -; CHECK-GI-NEXT: usra v0.4s, v1.4s, #30 -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #2 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: combine_vec_sdiv_by_pow2a: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v1.4s, v0.4s, #0 +; CHECK-NEXT: usra v0.4s, v1.4s, #30 +; CHECK-NEXT: sshr v0.4s, v0.4s, #2 +; CHECK-NEXT: ret %1 = sdiv <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4> ret <4 x i32> %1 } define <4 x i32> @combine_vec_sdiv_by_pow2a_neg(<4 x i32> %x) { -; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2a_neg: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v1.4s, v0.4s, #0 -; CHECK-SD-NEXT: usra v0.4s, v1.4s, #30 -; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #2 -; CHECK-SD-NEXT: neg v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2a_neg: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31 -; CHECK-GI-NEXT: usra v0.4s, v1.4s, #30 -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #2 -; CHECK-GI-NEXT: neg v0.4s, v0.4s -; CHECK-GI-NEXT: ret +; CHECK-LABEL: combine_vec_sdiv_by_pow2a_neg: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v1.4s, v0.4s, #0 +; CHECK-NEXT: usra v0.4s, v1.4s, #30 +; CHECK-NEXT: sshr v0.4s, v0.4s, #2 +; CHECK-NEXT: neg v0.4s, v0.4s +; CHECK-NEXT: ret %1 = sdiv <4 x i32> %x, <i32 -4, i32 -4, i32 -4, i32 -4> ret <4 x i32> %1 } @@ -240,7 +225,7 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) { ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI14_1 -; CHECK-GI-NEXT: sshr v2.16b, v0.16b, #7 +; CHECK-GI-NEXT: cmlt v2.16b, v0.16b, #0 ; CHECK-GI-NEXT: adrp x9, .LCPI14_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] ; CHECK-GI-NEXT: adrp x8, .LCPI14_2 @@ -252,7 +237,7 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) { ; CHECK-GI-NEXT: neg v2.16b, v2.16b ; CHECK-GI-NEXT: add v1.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: sshl v1.16b, v1.16b, v2.16b -; CHECK-GI-NEXT: sshr v2.16b, v3.16b, #7 +; CHECK-GI-NEXT: cmlt v2.16b, v3.16b, #0 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ret %1 = sdiv <16 x i8> %x, <i8 1, i8 4, i8 2, i8 16, i8 8, i8 32, i8 64, i8 2, i8 1, i8 4, i8 2, i8 16, i8 8, i8 32, i8 64, i8 2> @@ -278,7 +263,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) { ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v8i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI15_1 -; CHECK-GI-NEXT: sshr v2.8h, v0.8h, #15 +; CHECK-GI-NEXT: cmlt v2.8h, v0.8h, #0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 ; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI15_0] @@ -291,7 +276,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) { ; CHECK-GI-NEXT: add v1.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: shl v2.8h, v2.8h, #15 ; CHECK-GI-NEXT: sshl v1.8h, v1.8h, v3.8h -; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15 +; CHECK-GI-NEXT: cmlt v2.8h, v2.8h, #0 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i16> %x, <i16 1, i16 4, i16 2, i16 16, i16 8, i16 32, i16 64, i16 2> @@ -322,8 +307,8 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) { ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 -; CHECK-GI-NEXT: sshr v3.8h, v0.8h, #15 -; CHECK-GI-NEXT: sshr v4.8h, v1.8h, #15 +; CHECK-GI-NEXT: cmlt v3.8h, v0.8h, #0 +; CHECK-GI-NEXT: cmlt v4.8h, v1.8h, #0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI16_1] ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 ; CHECK-GI-NEXT: ldr d5, [x8, :lo12:.LCPI16_0] @@ -339,7 +324,7 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) { ; CHECK-GI-NEXT: add v2.8h, v1.8h, v2.8h ; CHECK-GI-NEXT: sshl v3.8h, v3.8h, v4.8h ; CHECK-GI-NEXT: sshl v2.8h, v2.8h, v4.8h -; CHECK-GI-NEXT: sshr v4.8h, v5.8h, #15 +; CHECK-GI-NEXT: cmlt v4.8h, v5.8h, #0 ; CHECK-GI-NEXT: bif v0.16b, v3.16b, v4.16b ; CHECK-GI-NEXT: bif v1.16b, v2.16b, v4.16b ; CHECK-GI-NEXT: ret @@ -381,12 +366,12 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) { ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v32i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI17_1 -; CHECK-GI-NEXT: sshr v5.8h, v0.8h, #15 -; CHECK-GI-NEXT: sshr v6.8h, v1.8h, #15 +; CHECK-GI-NEXT: cmlt v5.8h, v0.8h, #0 +; CHECK-GI-NEXT: cmlt v6.8h, v1.8h, #0 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI17_1] ; CHECK-GI-NEXT: adrp x8, .LCPI17_0 -; CHECK-GI-NEXT: sshr v7.8h, v2.8h, #15 -; CHECK-GI-NEXT: sshr v16.8h, v3.8h, #15 +; CHECK-GI-NEXT: cmlt v7.8h, v2.8h, #0 +; CHECK-GI-NEXT: cmlt v16.8h, v3.8h, #0 ; CHECK-GI-NEXT: ldr d17, [x8, :lo12:.LCPI17_0] ; CHECK-GI-NEXT: adrp x8, .LCPI17_2 ; CHECK-GI-NEXT: neg v4.8h, v4.8h @@ -402,7 +387,7 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) { ; CHECK-GI-NEXT: add v6.8h, v1.8h, v6.8h ; CHECK-GI-NEXT: add v7.8h, v2.8h, v7.8h ; CHECK-GI-NEXT: add v4.8h, v3.8h, v4.8h -; CHECK-GI-NEXT: sshr v17.8h, v17.8h, #15 +; CHECK-GI-NEXT: cmlt v17.8h, v17.8h, #0 ; CHECK-GI-NEXT: sshl v5.8h, v5.8h, v16.8h ; CHECK-GI-NEXT: sshl v6.8h, v6.8h, v16.8h ; CHECK-GI-NEXT: sshl v7.8h, v7.8h, v16.8h @@ -436,7 +421,7 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_v4i32(<4 x i32> %x) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 -; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v3.4s, v0.4s, #0 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI18_0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] @@ -451,7 +436,7 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_v4i32(<4 x i32> %x) { ; CHECK-GI-NEXT: mov v1.s[3], w9 ; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %1 = sdiv <4 x i32> %x, <i32 1, i32 4, i32 8, i32 16> @@ -483,10 +468,10 @@ define <8 x i32> @combine_vec_sdiv_by_pow2b_v8i32(<8 x i32> %x) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 -; CHECK-GI-NEXT: sshr v4.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v4.4s, v0.4s, #0 ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI19_0 -; CHECK-GI-NEXT: sshr v5.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v5.4s, v1.4s, #0 ; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI19_0] ; CHECK-GI-NEXT: adrp x8, .LCPI19_1 ; CHECK-GI-NEXT: mov v2.h[1], w9 @@ -503,7 +488,7 @@ define <8 x i32> @combine_vec_sdiv_by_pow2b_v8i32(<8 x i32> %x) { ; CHECK-GI-NEXT: sshl v3.4s, v3.4s, v5.4s ; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-GI-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-GI-NEXT: sshr v2.4s, v2.4s, #31 +; CHECK-GI-NEXT: cmlt v2.4s, v2.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v4.16b, v2.16b ; CHECK-GI-NEXT: bif v1.16b, v3.16b, v2.16b ; CHECK-GI-NEXT: ret @@ -546,13 +531,13 @@ define <16 x i32> @combine_vec_sdiv_by_pow2b_v16i32(<16 x i32> %x) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 -; CHECK-GI-NEXT: sshr v6.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v6.4s, v0.4s, #0 ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI20_0 -; CHECK-GI-NEXT: sshr v7.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v7.4s, v1.4s, #0 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI20_0] -; CHECK-GI-NEXT: sshr v16.4s, v2.4s, #31 -; CHECK-GI-NEXT: sshr v17.4s, v3.4s, #31 +; CHECK-GI-NEXT: cmlt v16.4s, v2.4s, #0 +; CHECK-GI-NEXT: cmlt v17.4s, v3.4s, #0 ; CHECK-GI-NEXT: adrp x8, .LCPI20_1 ; CHECK-GI-NEXT: mov v4.h[1], w9 ; CHECK-GI-NEXT: neg v5.4s, v5.4s @@ -574,7 +559,7 @@ define <16 x i32> @combine_vec_sdiv_by_pow2b_v16i32(<16 x i32> %x) { ; CHECK-GI-NEXT: sshl v5.4s, v5.4s, v17.4s ; CHECK-GI-NEXT: ushll v4.4s, v4.4h, #0 ; CHECK-GI-NEXT: shl v4.4s, v4.4s, #31 -; CHECK-GI-NEXT: sshr v4.4s, v4.4s, #31 +; CHECK-GI-NEXT: cmlt v4.4s, v4.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v6.16b, v4.16b ; CHECK-GI-NEXT: bif v1.16b, v7.16b, v4.16b ; CHECK-GI-NEXT: bif v2.16b, v16.16b, v4.16b @@ -603,7 +588,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) { ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v2i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI21_1 -; CHECK-GI-NEXT: sshr v2.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v2.2d, v0.2d, #0 ; CHECK-GI-NEXT: adrp x9, .LCPI21_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI21_1] ; CHECK-GI-NEXT: adrp x8, .LCPI21_2 @@ -615,7 +600,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) { ; CHECK-GI-NEXT: neg v2.2d, v2.2d ; CHECK-GI-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: sshl v1.2d, v1.2d, v2.2d -; CHECK-GI-NEXT: sshr v2.2d, v3.2d, #63 +; CHECK-GI-NEXT: cmlt v2.2d, v3.2d, #0 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ret %1 = sdiv <2 x i64> %x, <i64 1, i64 4> @@ -649,7 +634,7 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) { ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v4i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI22_2 -; CHECK-GI-NEXT: sshr v3.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v3.2d, v0.2d, #0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI22_2] ; CHECK-GI-NEXT: adrp x8, .LCPI22_1 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI22_1] @@ -662,13 +647,13 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) { ; CHECK-GI-NEXT: adrp x8, .LCPI22_3 ; CHECK-GI-NEXT: neg v5.2d, v5.2d ; CHECK-GI-NEXT: ushl v2.2d, v3.2d, v2.2d -; CHECK-GI-NEXT: sshr v3.2d, v1.2d, #63 +; CHECK-GI-NEXT: cmlt v3.2d, v1.2d, #0 ; CHECK-GI-NEXT: shl v6.2d, v6.2d, #63 ; CHECK-GI-NEXT: add v2.2d, v0.2d, v2.2d ; CHECK-GI-NEXT: ushl v3.2d, v3.2d, v4.2d ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI22_3] ; CHECK-GI-NEXT: sshl v2.2d, v2.2d, v5.2d -; CHECK-GI-NEXT: sshr v5.2d, v6.2d, #63 +; CHECK-GI-NEXT: cmlt v5.2d, v6.2d, #0 ; CHECK-GI-NEXT: add v1.2d, v1.2d, v3.2d ; CHECK-GI-NEXT: neg v3.2d, v4.2d ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v5.16b @@ -715,13 +700,13 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 -; CHECK-GI-NEXT: sshr v7.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v7.2d, v0.2d, #0 ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI23_1 -; CHECK-GI-NEXT: sshr v16.2d, v1.2d, #63 +; CHECK-GI-NEXT: cmlt v16.2d, v1.2d, #0 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI23_1] -; CHECK-GI-NEXT: sshr v17.2d, v2.2d, #63 -; CHECK-GI-NEXT: sshr v18.2d, v3.2d, #63 +; CHECK-GI-NEXT: cmlt v17.2d, v2.2d, #0 +; CHECK-GI-NEXT: cmlt v18.2d, v3.2d, #0 ; CHECK-GI-NEXT: adrp x8, .LCPI23_3 ; CHECK-GI-NEXT: mov v4.h[1], w9 ; CHECK-GI-NEXT: neg v5.2d, v5.2d @@ -754,9 +739,9 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) { ; CHECK-GI-NEXT: shl v4.2d, v4.2d, #63 ; CHECK-GI-NEXT: sshl v16.2d, v16.2d, v20.2d ; CHECK-GI-NEXT: sshl v6.2d, v6.2d, v20.2d -; CHECK-GI-NEXT: sshr v17.2d, v17.2d, #63 -; CHECK-GI-NEXT: sshr v18.2d, v18.2d, #63 -; CHECK-GI-NEXT: sshr v4.2d, v4.2d, #63 +; CHECK-GI-NEXT: cmlt v17.2d, v17.2d, #0 +; CHECK-GI-NEXT: cmlt v18.2d, v18.2d, #0 +; CHECK-GI-NEXT: cmlt v4.2d, v4.2d, #0 ; CHECK-GI-NEXT: bif v0.16b, v7.16b, v17.16b ; CHECK-GI-NEXT: bif v1.16b, v16.16b, v18.16b ; CHECK-GI-NEXT: bif v2.16b, v5.16b, v4.16b @@ -792,7 +777,7 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) { ; CHECK-GI-NEXT: adrp x10, .LCPI24_0 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: ldr q2, [x10, :lo12:.LCPI24_0] -; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v3.4s, v0.4s, #0 ; CHECK-GI-NEXT: fmov s4, w9 ; CHECK-GI-NEXT: adrp x10, .LCPI24_1 ; CHECK-GI-NEXT: neg v2.4s, v2.4s @@ -807,10 +792,10 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) { ; CHECK-GI-NEXT: mov v1.s[3], w9 ; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: shl v1.4s, v4.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: neg v2.4s, v0.4s ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret @@ -871,7 +856,7 @@ define <16 x i8> @non_splat_minus_one_divisor_0(<16 x i8> %A) { ; CHECK-GI-NEXT: neg v2.16b, v0.16b ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] ; CHECK-GI-NEXT: shl v1.16b, v1.16b, #7 -; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7 +; CHECK-GI-NEXT: cmlt v1.16b, v1.16b, #0 ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %div = sdiv <16 x i8> %A, <i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> @@ -901,7 +886,7 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) { ; CHECK-GI-LABEL: non_splat_minus_one_divisor_1: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI26_2 -; CHECK-GI-NEXT: sshr v2.16b, v0.16b, #7 +; CHECK-GI-NEXT: cmlt v2.16b, v0.16b, #0 ; CHECK-GI-NEXT: adrp x9, .LCPI26_1 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI26_2] ; CHECK-GI-NEXT: adrp x8, .LCPI26_3 @@ -914,11 +899,11 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) { ; CHECK-GI-NEXT: neg v2.16b, v2.16b ; CHECK-GI-NEXT: add v1.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: sshl v1.16b, v1.16b, v2.16b -; CHECK-GI-NEXT: sshr v2.16b, v3.16b, #7 +; CHECK-GI-NEXT: cmlt v2.16b, v3.16b, #0 ; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI26_0] ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: shl v1.16b, v3.16b, #7 -; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7 +; CHECK-GI-NEXT: cmlt v1.16b, v1.16b, #0 ; CHECK-GI-NEXT: neg v2.16b, v0.16b ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret @@ -954,7 +939,7 @@ define <4 x i32> @non_splat_minus_one_divisor_2(<4 x i32> %A) { ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: ldr q2, [x9, :lo12:.LCPI27_0] ; CHECK-GI-NEXT: fmov s4, w8 -; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v3.4s, v0.4s, #0 ; CHECK-GI-NEXT: adrp x9, .LCPI27_1 ; CHECK-GI-NEXT: neg v2.4s, v2.4s ; CHECK-GI-NEXT: mov v1.s[1], w8 @@ -969,10 +954,10 @@ define <4 x i32> @non_splat_minus_one_divisor_2(<4 x i32> %A) { ; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: mov v4.s[3], w8 ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: shl v1.4s, v4.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: neg v2.4s, v0.4s ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret @@ -1207,7 +1192,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform7(<8 x i16> %x) { ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0] ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-GI-NEXT: shl v1.8h, v1.8h, #15 -; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15 +; CHECK-GI-NEXT: cmlt v1.8h, v1.8h, #0 ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 1, i16 1, i16 1, i16 1> diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll index 121cc30..babb4ed 100644 --- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll @@ -605,7 +605,7 @@ define i32 @extract_v4i32_select(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %c ; CHECK-GI-NEXT: mov w8, w0 ; CHECK-GI-NEXT: and x8, x8, #0x3 ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: str q0, [sp] ; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] @@ -634,7 +634,7 @@ define i32 @extract_v4i32_select_const(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x ; CHECK-GI-NEXT: adrp x8, .LCPI23_0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI23_0] ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: mov s0, v0.s[2] ; CHECK-GI-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll index 6d673f1..30fb82e 100644 --- a/llvm/test/CodeGen/AArch64/fcmp.ll +++ b/llvm/test/CodeGen/AArch64/fcmp.ll @@ -661,7 +661,7 @@ define <2 x double> @v2f128_double(<2 x fp128> %a, <2 x fp128> %b, <2 x double> ; CHECK-GI-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63 -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-GI-NEXT: bsl v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: add sp, sp, #80 ; CHECK-GI-NEXT: ret @@ -1540,7 +1540,7 @@ define <7 x i32> @v7f16_i32(<7 x half> %a, <7 x half> %b, <7 x i32> %d, <7 x i32 ; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-GI-FP16-NEXT: mov v1.s[2], w8 ; CHECK-GI-FP16-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-GI-FP16-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-FP16-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-FP16-NEXT: fmov s4, w8 ; CHECK-GI-FP16-NEXT: mov v4.s[1], w8 ; CHECK-GI-FP16-NEXT: ushl v1.4s, v1.4s, v2.4s @@ -1602,7 +1602,7 @@ define <4 x i32> @v4f16_i32(<4 x half> %a, <4 x half> %b, <4 x i32> %d, <4 x i32 ; CHECK-GI-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-FP16-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-FP16-NEXT: bsl v0.16b, v2.16b, v3.16b ; CHECK-GI-FP16-NEXT: ret entry: @@ -1657,8 +1657,8 @@ define <8 x i32> @v8f16_i32(<8 x half> %a, <8 x half> %b, <8 x i32> %d, <8 x i32 ; CHECK-GI-FP16-NEXT: ushll2 v0.4s, v0.8h, #0 ; CHECK-GI-FP16-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v1.4s, v1.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v6.4s, v0.4s, #31 +; CHECK-GI-FP16-NEXT: cmlt v1.4s, v1.4s, #0 +; CHECK-GI-FP16-NEXT: cmlt v6.4s, v0.4s, #0 ; CHECK-GI-FP16-NEXT: mov v0.16b, v1.16b ; CHECK-GI-FP16-NEXT: mov v1.16b, v6.16b ; CHECK-GI-FP16-NEXT: bsl v0.16b, v2.16b, v4.16b @@ -1748,10 +1748,10 @@ define <16 x i32> @v16f16_i32(<16 x half> %a, <16 x half> %b, <16 x i32> %d, <16 ; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-GI-FP16-NEXT: shl v3.4s, v3.4s, #31 ; CHECK-GI-FP16-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v2.4s, v2.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v16.4s, v0.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v3.4s, v3.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v17.4s, v1.4s, #31 +; CHECK-GI-FP16-NEXT: cmlt v2.4s, v2.4s, #0 +; CHECK-GI-FP16-NEXT: cmlt v16.4s, v0.4s, #0 +; CHECK-GI-FP16-NEXT: cmlt v3.4s, v3.4s, #0 +; CHECK-GI-FP16-NEXT: cmlt v17.4s, v1.4s, #0 ; CHECK-GI-FP16-NEXT: ldp q0, q1, [sp] ; CHECK-GI-FP16-NEXT: bit v0.16b, v4.16b, v2.16b ; CHECK-GI-FP16-NEXT: mov v2.16b, v3.16b diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll index 00de153..24be923 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll @@ -111,14 +111,14 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) { -; CHECK-CVT-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) { +; CHECK-CVT-LABEL: utest_f16i32: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvtzu w0, s0 ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: utesth_f16i32: +; CHECK-FP16-LABEL: utest_f16i32: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w0, h0 ; CHECK-FP16-NEXT: ret @@ -298,8 +298,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16(half %x) { -; CHECK-CVT-LABEL: utesth_f16i16: +define i16 @utest_f16i16(half %x) { +; CHECK-CVT-LABEL: utest_f16i16: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff @@ -308,7 +308,7 @@ define i16 @utesth_f16i16(half %x) { ; CHECK-CVT-NEXT: csel w0, w8, w9, lo ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: utesth_f16i16: +; CHECK-FP16-LABEL: utest_f16i16: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w8, h0 ; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff @@ -493,8 +493,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64(half %x) { -; CHECK-LABEL: utesth_f16i64: +define i64 @utest_f16i64(half %x) { +; CHECK-LABEL: utest_f16i64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 @@ -636,14 +636,14 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32_mm(half %x) { -; CHECK-CVT-LABEL: utesth_f16i32_mm: +define i32 @utest_f16i32_mm(half %x) { +; CHECK-CVT-LABEL: utest_f16i32_mm: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvtzu w0, s0 ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: utesth_f16i32_mm: +; CHECK-FP16-LABEL: utest_f16i32_mm: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w0, h0 ; CHECK-FP16-NEXT: ret @@ -808,8 +808,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16_mm(half %x) { -; CHECK-CVT-LABEL: utesth_f16i16_mm: +define i16 @utest_f16i16_mm(half %x) { +; CHECK-CVT-LABEL: utest_f16i16_mm: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff @@ -818,7 +818,7 @@ define i16 @utesth_f16i16_mm(half %x) { ; CHECK-CVT-NEXT: csel w0, w8, w9, lo ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: utesth_f16i16_mm: +; CHECK-FP16-LABEL: utest_f16i16_mm: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w8, h0 ; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff @@ -986,8 +986,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_mm(half %x) { -; CHECK-LABEL: utesth_f16i64_mm: +define i64 @utest_f16i64_mm(half %x) { +; CHECK-LABEL: utest_f16i64_mm: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 @@ -1026,6 +1026,29 @@ entry: ret i64 %conv6 } +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) { +; CHECK-CVT-LABEL: ustest_f16i32_nsat: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs w8, s0 +; CHECK-CVT-NEXT: and w8, w8, w8, asr #31 +; CHECK-CVT-NEXT: bic w0, w8, w8, asr #31 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: ustest_f16i32_nsat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w8, h0 +; CHECK-FP16-NEXT: and w8, w8, w8, asr #31 +; CHECK-FP16-NEXT: bic w0, w8, w8, asr #31 +; CHECK-FP16-NEXT: ret + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll index b09a867..637c028 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -321,20 +321,20 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-CVT-SD-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) { +; CHECK-CVT-SD-LABEL: utest_f16i32: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i32: +; CHECK-FP16-SD-LABEL: utest_f16i32: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-SD-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-CVT-GI-LABEL: utesth_f16i32: +; CHECK-CVT-GI-LABEL: utest_f16i32: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff @@ -349,7 +349,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s ; CHECK-CVT-GI-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i32: +; CHECK-FP16-GI-LABEL: utest_f16i32: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] @@ -614,8 +614,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-CVT-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) { +; CHECK-CVT-LABEL: utest_f16i16: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h ; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h @@ -625,12 +625,12 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i16: +; CHECK-FP16-SD-LABEL: utest_f16i16: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: fcvtzu v0.8h, v0.8h ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i16: +; CHECK-FP16-GI-LABEL: utest_f16i16: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h ; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h @@ -1746,8 +1746,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) { -; CHECK-CVT-SD-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) { +; CHECK-CVT-SD-LABEL: utest_f16i64: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 ; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill @@ -1777,7 +1777,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-CVT-SD-NEXT: add sp, sp, #48 ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i64: +; CHECK-FP16-SD-LABEL: utest_f16i64: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 ; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill @@ -1807,7 +1807,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-CVT-GI-LABEL: utesth_f16i64: +; CHECK-CVT-GI-LABEL: utest_f16i64: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] @@ -1819,7 +1819,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-CVT-GI-NEXT: mov v0.d[1], x9 ; CHECK-CVT-GI-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i64: +; CHECK-FP16-GI-LABEL: utest_f16i64: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] @@ -2307,20 +2307,20 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-CVT-SD-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { +; CHECK-CVT-SD-LABEL: utest_f16i32_mm: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i32_mm: +; CHECK-FP16-SD-LABEL: utest_f16i32_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-SD-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-CVT-GI-LABEL: utesth_f16i32_mm: +; CHECK-CVT-GI-LABEL: utest_f16i32_mm: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff @@ -2335,7 +2335,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s ; CHECK-CVT-GI-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i32_mm: +; CHECK-FP16-GI-LABEL: utest_f16i32_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] @@ -2585,8 +2585,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-CVT-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { +; CHECK-CVT-LABEL: utest_f16i16_mm: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h ; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h @@ -2596,12 +2596,12 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i16_mm: +; CHECK-FP16-SD-LABEL: utest_f16i16_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: fcvtzu v0.8h, v0.8h ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i16_mm: +; CHECK-FP16-GI-LABEL: utest_f16i16_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h ; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h @@ -3694,8 +3694,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { -; CHECK-CVT-SD-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { +; CHECK-CVT-SD-LABEL: utest_f16i64_mm: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 ; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill @@ -3725,7 +3725,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-CVT-SD-NEXT: add sp, sp, #48 ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i64_mm: +; CHECK-FP16-SD-LABEL: utest_f16i64_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 ; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill @@ -3755,7 +3755,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-CVT-GI-LABEL: utesth_f16i64_mm: +; CHECK-CVT-GI-LABEL: utest_f16i64_mm: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] @@ -3767,7 +3767,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-CVT-GI-NEXT: mov v0.d[1], x9 ; CHECK-CVT-GI-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i64_mm: +; CHECK-FP16-GI-LABEL: utest_f16i64_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] @@ -3941,6 +3941,51 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { +; CHECK-CVT-SD-LABEL: ustest_f16i32_nsat: +; CHECK-CVT-SD: // %bb.0: // %entry +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: movi v1.2d, #0000000000000000 +; CHECK-CVT-SD-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-SD-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: ustest_f16i32_nsat: +; CHECK-FP16-SD: // %bb.0: // %entry +; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-SD-NEXT: movi v1.2d, #0000000000000000 +; CHECK-FP16-SD-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-FP16-SD-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-FP16-SD-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: ustest_f16i32_nsat: +; CHECK-CVT-GI: // %bb.0: // %entry +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: movi v1.2d, #0000000000000000 +; CHECK-CVT-GI-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-GI-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: ustest_f16i32_nsat: +; CHECK-FP16-GI: // %bb.0: // %entry +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: movi v1.2d, #0000000000000000 +; CHECK-FP16-GI-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-FP16-GI-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-FP16-GI-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: ret +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll index 0c84468f..2026959 100644 --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1110,7 +1110,7 @@ define <8 x i8> @vselect_constant_cond_zero_v8i8(<8 x i8> %a) { ; CHECK-GI-NEXT: adrp x8, .LCPI83_0 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI83_0] ; CHECK-GI-NEXT: shl v1.8b, v1.8b, #7 -; CHECK-GI-NEXT: sshr v1.8b, v1.8b, #7 +; CHECK-GI-NEXT: cmlt v1.8b, v1.8b, #0 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %b = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> zeroinitializer @@ -1133,7 +1133,7 @@ define <4 x i16> @vselect_constant_cond_zero_v4i16(<4 x i16> %a) { ; CHECK-GI-NEXT: mov v1.h[2], w9 ; CHECK-GI-NEXT: mov v1.h[3], w8 ; CHECK-GI-NEXT: shl v1.4h, v1.4h, #15 -; CHECK-GI-NEXT: sshr v1.4h, v1.4h, #15 +; CHECK-GI-NEXT: cmlt v1.4h, v1.4h, #0 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> %a, <4 x i16> zeroinitializer @@ -1157,7 +1157,7 @@ define <4 x i32> @vselect_constant_cond_zero_v4i32(<4 x i32> %a) { ; CHECK-GI-NEXT: mov v1.s[2], w9 ; CHECK-GI-NEXT: mov v1.s[3], w8 ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> zeroinitializer @@ -1176,7 +1176,7 @@ define <8 x i8> @vselect_constant_cond_v8i8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-GI-NEXT: adrp x8, .LCPI86_0 ; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI86_0] ; CHECK-GI-NEXT: shl v2.8b, v2.8b, #7 -; CHECK-GI-NEXT: sshr v2.8b, v2.8b, #7 +; CHECK-GI-NEXT: cmlt v2.8b, v2.8b, #0 ; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: ret %c = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> %b @@ -1199,7 +1199,7 @@ define <4 x i16> @vselect_constant_cond_v4i16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-GI-NEXT: mov v2.h[2], w9 ; CHECK-GI-NEXT: mov v2.h[3], w8 ; CHECK-GI-NEXT: shl v2.4h, v2.4h, #15 -; CHECK-GI-NEXT: sshr v2.4h, v2.4h, #15 +; CHECK-GI-NEXT: cmlt v2.4h, v2.4h, #0 ; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: ret %c = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> %a, <4 x i16> %b @@ -1223,7 +1223,7 @@ define <4 x i32> @vselect_constant_cond_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-GI-NEXT: mov v2.s[2], w9 ; CHECK-GI-NEXT: mov v2.s[3], w8 ; CHECK-GI-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-GI-NEXT: sshr v2.4s, v2.4s, #31 +; CHECK-GI-NEXT: cmlt v2.4s, v2.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ret %c = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll index fb8b721..11b3b62 100644 --- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -966,7 +966,7 @@ define <8 x i8> @cmgez8xi8_alt(<8 x i8> %A) { ; ; CHECK-GI-LABEL: cmgez8xi8_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.8b, v0.8b, #7 +; CHECK-GI-NEXT: cmlt v0.8b, v0.8b, #0 ; CHECK-GI-NEXT: mvn v0.8b, v0.8b ; CHECK-GI-NEXT: ret %sign = ashr <8 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> @@ -982,7 +982,7 @@ define <16 x i8> @cmgez16xi8_alt(<16 x i8> %A) { ; ; CHECK-GI-LABEL: cmgez16xi8_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-GI-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-GI-NEXT: mvn v0.16b, v0.16b ; CHECK-GI-NEXT: ret %sign = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> @@ -998,7 +998,7 @@ define <4 x i16> @cmgez4xi16_alt(<4 x i16> %A) { ; ; CHECK-GI-LABEL: cmgez4xi16_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #15 +; CHECK-GI-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-GI-NEXT: mvn v0.8b, v0.8b ; CHECK-GI-NEXT: ret %sign = ashr <4 x i16> %A, <i16 15, i16 15, i16 15, i16 15> @@ -1014,7 +1014,7 @@ define <8 x i16> @cmgez8xi16_alt(<8 x i16> %A) { ; ; CHECK-GI-LABEL: cmgez8xi16_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-GI-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-GI-NEXT: mvn v0.16b, v0.16b ; CHECK-GI-NEXT: ret %sign = ashr <8 x i16> %A, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> @@ -1030,7 +1030,7 @@ define <2 x i32> @cmgez2xi32_alt(<2 x i32> %A) { ; ; CHECK-GI-LABEL: cmgez2xi32_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31 +; CHECK-GI-NEXT: cmlt v0.2s, v0.2s, #0 ; CHECK-GI-NEXT: mvn v0.8b, v0.8b ; CHECK-GI-NEXT: ret %sign = ashr <2 x i32> %A, <i32 31, i32 31> @@ -1046,7 +1046,7 @@ define <4 x i32> @cmgez4xi32_alt(<4 x i32> %A) { ; ; CHECK-GI-LABEL: cmgez4xi32_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-NEXT: mvn v0.16b, v0.16b ; CHECK-GI-NEXT: ret %sign = ashr <4 x i32> %A, <i32 31, i32 31, i32 31, i32 31> @@ -1062,7 +1062,7 @@ define <2 x i64> @cmgez2xi64_alt(<2 x i64> %A) { ; ; CHECK-GI-LABEL: cmgez2xi64_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-GI-NEXT: mvn v0.16b, v0.16b ; CHECK-GI-NEXT: ret %sign = ashr <2 x i64> %A, <i64 63, i64 63> @@ -1503,99 +1503,64 @@ entry: } define <8 x i8> @cmltz8xi8_alt(<8 x i8> %A) { -; CHECK-SD-LABEL: cmltz8xi8_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.8b, v0.8b, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz8xi8_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.8b, v0.8b, #7 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz8xi8_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 +; CHECK-NEXT: ret %A.lobit = ashr <8 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> ret <8 x i8> %A.lobit } define <16 x i8> @cmltz16xi8_alt(<16 x i8> %A) { -; CHECK-SD-LABEL: cmltz16xi8_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.16b, v0.16b, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz16xi8_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz16xi8_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 +; CHECK-NEXT: ret %A.lobit = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> ret <16 x i8> %A.lobit } define <4 x i16> @cmltz4xi16_alt(<4 x i16> %A) { -; CHECK-SD-LABEL: cmltz4xi16_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.4h, v0.4h, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz4xi16_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #15 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz4xi16_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 +; CHECK-NEXT: ret %A.lobit = ashr <4 x i16> %A, <i16 15, i16 15, i16 15, i16 15> ret <4 x i16> %A.lobit } define <8 x i16> @cmltz8xi16_alt(<8 x i16> %A) { -; CHECK-SD-LABEL: cmltz8xi16_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.8h, v0.8h, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz8xi16_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz8xi16_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 +; CHECK-NEXT: ret %A.lobit = ashr <8 x i16> %A, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> ret <8 x i16> %A.lobit } define <2 x i32> @cmltz2xi32_alt(<2 x i32> %A) { -; CHECK-SD-LABEL: cmltz2xi32_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.2s, v0.2s, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz2xi32_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz2xi32_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 +; CHECK-NEXT: ret %A.lobit = ashr <2 x i32> %A, <i32 31, i32 31> ret <2 x i32> %A.lobit } define <4 x i32> @cmltz4xi32_alt(<4 x i32> %A) { -; CHECK-SD-LABEL: cmltz4xi32_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.4s, v0.4s, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz4xi32_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz4xi32_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 +; CHECK-NEXT: ret %A.lobit = ashr <4 x i32> %A, <i32 31, i32 31, i32 31, i32 31> ret <4 x i32> %A.lobit } define <2 x i64> @cmltz2xi64_alt(<2 x i64> %A) { -; CHECK-SD-LABEL: cmltz2xi64_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.2d, v0.2d, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz2xi64_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz2xi64_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 +; CHECK-NEXT: ret %A.lobit = ashr <2 x i64> %A, <i64 63, i64 63> ret <2 x i64> %A.lobit } @@ -2523,7 +2488,7 @@ define <2 x i32> @fcmal2xfloat(<2 x float> %A, <2 x float> %B) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2s, #1 ; CHECK-GI-NEXT: shl v0.2s, v0.2s, #31 -; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31 +; CHECK-GI-NEXT: cmlt v0.2s, v0.2s, #0 ; CHECK-GI-NEXT: ret %tmp3 = fcmp true <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2542,7 +2507,7 @@ define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) { ; CHECK-GI-NEXT: dup v0.2s, w8 ; CHECK-GI-NEXT: mov v0.d[1], v0.d[0] ; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-NEXT: ret %tmp3 = fcmp true <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2559,7 +2524,7 @@ define <2 x i64> @fcmal2xdouble(<2 x double> %A, <2 x double> %B) { ; CHECK-GI-NEXT: adrp x8, .LCPI221_0 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI221_0] ; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63 -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-GI-NEXT: ret %tmp3 = fcmp true <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2589,7 +2554,7 @@ define <4 x i32> @fcmnv4xfloat(<4 x float> %A, <4 x float> %B) { ; CHECK-GI-NEXT: mov v0.s[1], w8 ; CHECK-GI-NEXT: mov v0.d[1], v0.d[0] ; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-NEXT: ret %tmp3 = fcmp false <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll index 282f437..a8c55b4 100644 --- a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll +++ b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll @@ -465,7 +465,7 @@ define <8 x i16> @test_ushll_cmp(<8 x i8> %a, <8 x i8> %b) #0 { ; CHECK-GI-NEXT: movi v1.2d, #0xff00ff00ff00ff ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: shl v0.8h, v0.8h, #15 -; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-GI-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %cmp.i = icmp eq <8 x i8> %a, %b diff --git a/llvm/test/CodeGen/AArch64/select_cc.ll b/llvm/test/CodeGen/AArch64/select_cc.ll index 483f6c2..b562340 100644 --- a/llvm/test/CodeGen/AArch64/select_cc.ll +++ b/llvm/test/CodeGen/AArch64/select_cc.ll @@ -98,7 +98,7 @@ define <2 x double> @select_olt_load_cmp(<2 x double> %a, ptr %src) { ; CHECK-GI-NEXT: fcmgt v1.2s, v1.2s, #0.0 ; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-GI-NEXT: shl v1.2d, v1.2d, #63 -; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #63 +; CHECK-GI-NEXT: cmlt v1.2d, v1.2d, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret entry: @@ -136,7 +136,7 @@ define <4 x i32> @select_icmp_sgt(<4 x i32> %a, <4 x i8> %b) { ; CHECK-GI-NEXT: mov v2.s[2], w8 ; CHECK-GI-NEXT: mov v2.s[3], w9 ; CHECK-GI-NEXT: shl v1.4s, v2.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bic v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll index 293b74ec..96a7a9d0 100644 --- a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll @@ -255,7 +255,7 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7 ; CHECK-GI-NEXT: movi v1.16b, #128 -; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-GI-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %shl = select <16 x i1> %t, <16 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>, <16 x i8> zeroinitializer @@ -277,7 +277,7 @@ define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) { ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: movi v1.8h, #128 ; CHECK-GI-NEXT: shl v0.8h, v0.8h, #15 -; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-GI-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %shl= select <8 x i1> %t, <8 x i16> <i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128>, <8 x i16> zeroinitializer @@ -299,7 +299,7 @@ define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) { ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: movi v1.4s, #64 ; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %shl = select <4 x i1> %t, <4 x i32> <i32 64, i32 64, i32 64, i32 64>, <4 x i32> zeroinitializer @@ -323,7 +323,7 @@ define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) { ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63 -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %shl = select <2 x i1> %t, <2 x i64> <i64 65536, i64 65536>, <2 x i64> zeroinitializer diff --git a/llvm/test/CodeGen/AMDGPU/true16-fold.mir b/llvm/test/CodeGen/AMDGPU/true16-fold.mir index 93cc12f..9484417 100644 --- a/llvm/test/CodeGen/AMDGPU/true16-fold.mir +++ b/llvm/test/CodeGen/AMDGPU/true16-fold.mir @@ -57,6 +57,7 @@ body: | %4:vgpr_16 = COPY %3:sgpr_lo16 %5:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, %0:sreg_32, 0, killed %1:sreg_32, 0, killed %4:vgpr_16, 0, 0, implicit $exec S_ENDPGM 0, implicit %5 +... --- name: fold_16bit_madmix_clamp @@ -207,3 +208,27 @@ body: | $vgpr0 = COPY %4 S_ENDPGM 0, implicit $vgpr0 ... + +--- +name: fold_imm16_across_reg_sequence +tracksRegLiveness: true +registers: +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-LABEL: name: fold_imm16_across_reg_sequence + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MOV_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec + ; CHECK-NEXT: [[V_MOV_B16_t16_e64_1:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_MOV_B16_t16_e64_]], %subreg.lo16, [[V_MOV_B16_t16_e64_1]], %subreg.hi16 + ; CHECK-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, -1, 0, -1, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F32_e64_]] + ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0 + %0:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec + %1:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec + %2:vgpr_32 = REG_SEQUENCE %0, %subreg.lo16, %1, %subreg.hi16 + %3:vgpr_32 = nofpexcept V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %3 + S_ENDPGM 0, implicit $vgpr0 +... diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 8ab56b2..a6f0a03 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -383,8 +383,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) { -; SOFT-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) { +; SOFT-LABEL: utest_f16i32: ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r7, lr} ; SOFT-NEXT: push {r7, lr} @@ -400,7 +400,7 @@ define i32 @utesth_f16i32(half %x) { ; SOFT-NEXT: .LBB7_2: @ %entry ; SOFT-NEXT: pop {r7, pc} ; -; VFP2-LABEL: utesth_f16i32: +; VFP2-LABEL: utest_f16i32: ; VFP2: @ %bb.0: @ %entry ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} @@ -411,7 +411,7 @@ define i32 @utesth_f16i32(half %x) { ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: pop {r7, pc} ; -; FULL-LABEL: utesth_f16i32: +; FULL-LABEL: utest_f16i32: ; FULL: @ %bb.0: @ %entry ; FULL-NEXT: vcvt.u32.f16 s0, s0 ; FULL-NEXT: vmov r0, s0 @@ -3985,6 +3985,46 @@ entry: ret i32 %spec.store.select7 } +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) { +; SOFT-LABEL: ustest_f16i32_nsat: +; SOFT: @ %bb.0: +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} +; SOFT-NEXT: uxth r0, r0 +; SOFT-NEXT: bl __aeabi_h2f +; SOFT-NEXT: bl __aeabi_f2iz +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: ands r0, r1 +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: bics r0, r1 +; SOFT-NEXT: pop {r7, pc} +; +; VFP2-LABEL: ustest_f16i32_nsat: +; VFP2: @ %bb.0: +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: bl __aeabi_h2f +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: usat r0, #0, r0 +; VFP2-NEXT: pop {r7, pc} +; +; FULL-LABEL: ustest_f16i32_nsat: +; FULL: @ %bb.0: +; FULL-NEXT: vcvt.s32.f16 s0, s0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: usat r0, #0, r0 +; FULL-NEXT: bx lr + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index 96f009a..ba31b35 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -748,8 +748,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i32: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} @@ -821,7 +821,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: vpop {d12, d13} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i32: +; CHECK-FP16-LABEL: utest_f16i32: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} @@ -1366,8 +1366,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i16: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} @@ -1441,7 +1441,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i16: +; CHECK-FP16-LABEL: utest_f16i16: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: vmovx.f16 s4, s0 ; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0 @@ -2109,8 +2109,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i64: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, lr} @@ -2148,7 +2148,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8} ; CHECK-NEON-NEXT: pop {r4, r5, r6, pc} ; -; CHECK-FP16-LABEL: utesth_f16i64: +; CHECK-FP16-LABEL: utest_f16i64: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, lr} @@ -2835,8 +2835,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i32_mm: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} @@ -2881,7 +2881,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i32_mm: +; CHECK-FP16-LABEL: utest_f16i32_mm: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, lr} @@ -3344,8 +3344,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i16_mm: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} @@ -3419,7 +3419,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i16_mm: +; CHECK-FP16-LABEL: utest_f16i16_mm: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: vmovx.f16 s4, s0 ; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0 @@ -4044,8 +4044,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i64_mm: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, lr} @@ -4083,7 +4083,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8} ; CHECK-NEON-NEXT: pop {r4, r5, r6, pc} ; -; CHECK-FP16-LABEL: utesth_f16i64_mm: +; CHECK-FP16-LABEL: utest_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, lr} @@ -4215,6 +4215,77 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { +; CHECK-NEON-LABEL: ustest_f16i32_nsat: +; CHECK-NEON: @ %bb.0: @ %entry +; CHECK-NEON-NEXT: .save {r4, lr} +; CHECK-NEON-NEXT: push {r4, lr} +; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.f32 s16, s3 +; CHECK-NEON-NEXT: vmov.f32 s18, s2 +; CHECK-NEON-NEXT: vmov.f32 s20, s1 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: mov r4, r0 +; CHECK-NEON-NEXT: vmov r0, s16 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vmov s16, r0 +; CHECK-NEON-NEXT: vmov r0, s18 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vmov s0, r0 +; CHECK-NEON-NEXT: vmov r1, s20 +; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0 +; CHECK-NEON-NEXT: vmov s18, r4 +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.32 d11[0], r0 +; CHECK-NEON-NEXT: mov r0, r1 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s18 +; CHECK-NEON-NEXT: vmov s0, r0 +; CHECK-NEON-NEXT: vcvt.s32.f32 s4, s16 +; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0 +; CHECK-NEON-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEON-NEXT: vmov r0, s2 +; CHECK-NEON-NEXT: vmov.32 d10[0], r0 +; CHECK-NEON-NEXT: vmov r0, s4 +; CHECK-NEON-NEXT: vmov.32 d11[1], r0 +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.32 d10[1], r0 +; CHECK-NEON-NEXT: vmin.s32 q9, q5, q8 +; CHECK-NEON-NEXT: vmax.s32 q0, q9, q8 +; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEON-NEXT: pop {r4, pc} +; +; CHECK-FP16-LABEL: ustest_f16i32_nsat: +; CHECK-FP16: @ %bb.0: @ %entry +; CHECK-FP16-NEXT: vmovx.f16 s2, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s1 +; CHECK-FP16-NEXT: vmovx.f16 s4, s1 +; CHECK-FP16-NEXT: vmov r0, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4 +; CHECK-FP16-NEXT: vcvt.s32.f16 s2, s2 +; CHECK-FP16-NEXT: vmov.i32 q9, #0x0 +; CHECK-FP16-NEXT: vmov.32 d17[0], r0 +; CHECK-FP16-NEXT: vmov r0, s6 +; CHECK-FP16-NEXT: vmov.32 d16[0], r0 +; CHECK-FP16-NEXT: vmov r0, s4 +; CHECK-FP16-NEXT: vmov.32 d17[1], r0 +; CHECK-FP16-NEXT: vmov r0, s2 +; CHECK-FP16-NEXT: vmov.32 d16[1], r0 +; CHECK-FP16-NEXT: vmin.s32 q8, q8, q9 +; CHECK-FP16-NEXT: vmax.s32 q0, q8, q9 +; CHECK-FP16-NEXT: bx lr +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll b/llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll new file mode 100644 index 0000000..8b121c5 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll @@ -0,0 +1,93 @@ +;; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b %s -o - | FileCheck %s + +define dso_local void @store_isnan_f32(ptr %a, ptr %b, ptr %isnan_cmp) local_unnamed_addr { +entry: + %arrayidx_a = getelementptr inbounds nuw float, ptr %a, i32 0 + %arrayidx_b = getelementptr inbounds nuw float, ptr %b, i32 0 + %0 = load <32 x float>, ptr %arrayidx_a, align 4 + %1 = load <32 x float>, ptr %arrayidx_b, align 4 + %.vectorized = fcmp uno <32 x float> %0, %1 + %.LS.instance = zext <32 x i1> %.vectorized to <32 x i32> + %arrayidx1 = getelementptr inbounds nuw i32, ptr %isnan_cmp, i32 0 + store <32 x i32> %.LS.instance, ptr %arrayidx1, align 4 + ret void +} + +; CHECK: store_isnan_f32 +; CHECK: [[RONE32:r[0-9]+]] = #1 +; CHECK: [[VOP2_F32:v[0-9]+]] = vxor([[VOP2_F32]],[[VOP2_F32]]) +; CHECK: [[VOP1_F32:v[0-9]+]] = vmemu(r0+#0) +; CHECK: [[VONES32:v[0-9]+]] = vsplat([[RONE32]]) +; CHECK: [[Q1_F32:q[0-9]+]] = vcmp.eq([[VOP1_F32]].w,[[VOP1_F32]].w) +; CHECK: [[VOP3_F32:v[0-9]+]] = vmemu(r1+#0) +; CHECK: [[Q1_F32]] &= vcmp.eq([[VOP3_F32]].w,[[VOP3_F32]].w) +; CHECK: [[VOUT_F32:v[0-9]+]] = vmux([[Q1_F32]],[[VOP2_F32]],[[VONES32]]) +; CHECK: vmemu(r2+#0) = [[VOUT_F32]] + +define dso_local void @store_isnan_f16(ptr %a, ptr %b, ptr %isnan_cmp) local_unnamed_addr { +entry: + %arrayidx_a = getelementptr inbounds nuw half, ptr %a, i32 0 + %arrayidx_b = getelementptr inbounds nuw half, ptr %b, i32 0 + %0 = load <64 x half>, ptr %arrayidx_a, align 2 + %1 = load <64 x half>, ptr %arrayidx_b, align 2 + %.vectorized = fcmp uno <64 x half> %0, %1 + %conv.LS.instance = zext <64 x i1> %.vectorized to <64 x i16> + %arrayidx1 = getelementptr inbounds nuw i16, ptr %isnan_cmp, i32 0 + store <64 x i16> %conv.LS.instance, ptr %arrayidx1, align 2 + ret void +} +; CHECK-LABEL: store_isnan_f16 +; CHECK: [[RONE16:r[0-9]+]] = #1 +; CHECK: [[VOP2_F16:v[0-9]+]] = vxor([[VOP2_F16]],[[VOP2_F16]]) +; CHECK: [[VOP1_F16:v[0-9]+]] = vmemu(r0+#0) +; CHECK: [[VONES16:v[0-9]+]].h = vsplat([[RONE16]]) +; CHECK: [[Q1_F16:q[0-9]+]] = vcmp.eq([[VOP1_F16]].h,[[VOP1_F16]].h) +; CHECK: [[VOP3_F16:v[0-9]+]] = vmemu(r1+#0) +; CHECK: [[Q1_F16]] &= vcmp.eq([[VOP3_F16]].h,[[VOP3_F16]].h) +; CHECK: [[VOUT_F16:v[0-9]+]] = vmux([[Q1_F16]],[[VOP2_F16]],[[VONES16]]) +; CHECK: vmemu(r2+#0) = [[VOUT_F32]] + +define dso_local void @store_isordered_f32(ptr %a, ptr %b, ptr %isordered_cmp) local_unnamed_addr { +entry: + %arrayidx_a = getelementptr inbounds nuw float, ptr %a, i32 0 + %arrayidx_b = getelementptr inbounds nuw float, ptr %b, i32 0 + %0 = load <32 x float>, ptr %arrayidx_a, align 4 + %1 = load <32 x float>, ptr %arrayidx_b, align 4 + %.vectorized = fcmp ord <32 x float> %0, %1 + %.LS.instance = zext <32 x i1> %.vectorized to <32 x i32> + %arrayidx1 = getelementptr inbounds nuw i32, ptr %isordered_cmp, i32 0 + store <32 x i32> %.LS.instance, ptr %arrayidx1, align 4 + ret void +} +; CHECK-LABEL: store_isordered_f32 +; CHECK: [[VOP2_ORD_F32:v[0-9]+]] = vxor([[VOP2_ORD_F32]],[[VOP2_ORD_F32]]) +; CHECK: [[VOP1_ORD_F32:v[0-9]+]] = vmemu(r0+#0) +; CHECK: [[VONES_ORD_F32:v[0-9]+]] = vsplat([[RONE32]]) +; CHECK: [[Q1_ORD_F32:q[0-9]+]] = vcmp.eq([[VOP1_ORD_F32]].w,[[VOP1_ORD_F32]].w) +; CHECK: [[VOP3_ORD_F32:v[0-9]+]] = vmemu(r1+#0) +; CHECK: [[Q1_ORD_F32]] &= vcmp.eq([[VOP3_ORD_F32]].w,[[VOP3_ORD_F32]].w) +; CHECK: [[VOUT_ORD_F32:v[0-9]+]] = vmux([[Q1_ORD_F32]],[[VONES_ORD_F32]],[[VOP2_ORD_F32]]) +; CHECK: vmemu(r2+#0) = [[VOUT_ORD_F32]] + + +define dso_local void @store_isordered_f16(ptr %a, ptr %b, ptr %isordered_cmp) local_unnamed_addr { +entry: + %arrayidx_a = getelementptr inbounds nuw half, ptr %a, i32 0 + %arrayidx_b = getelementptr inbounds nuw half, ptr %b, i32 0 + %0 = load <64 x half>, ptr %arrayidx_a, align 2 + %1 = load <64 x half>, ptr %arrayidx_b, align 2 + %.vectorized = fcmp ord <64 x half> %0, %1 + %conv.LS.instance = zext <64 x i1> %.vectorized to <64 x i16> + %arrayidx1 = getelementptr inbounds nuw i16, ptr %isordered_cmp, i32 0 + store <64 x i16> %conv.LS.instance, ptr %arrayidx1, align 2 + ret void +} +; CHECK-LABEL: store_isordered_f16 +; CHECK: [[VOP2_ORD_F16:v[0-9]+]] = vxor([[VOP2_ORD_F16]],[[VOP2_ORD_F16]]) +; CHECK: [[VOP1_ORD_F16:v[0-9]+]] = vmemu(r0+#0) +; CHECK: [[VONES_ORD_F16:v[0-9]+]].h = vsplat([[RONE16]]) +; CHECK: [[Q1_ORD_F16:q[0-9]+]] = vcmp.eq([[VOP1_ORD_F16]].h,[[VOP1_ORD_F16]].h) +; CHECK: [[VOP3_ORD_F16:v[0-9]+]] = vmemu(r1+#0) +; CHECK: [[Q1_ORD_F16]] &= vcmp.eq([[VOP3_ORD_F16]].h,[[VOP3_ORD_F16]].h) +; CHECK: [[VOUT_ORD_F16:v[0-9]+]] = vmux([[Q1_ORD_F16]],[[VONES_ORD_F16]],[[VOP2_ORD_F16]]) +; CHECK: vmemu(r2+#0) = [[VOUT_ORD_F16]] diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 18d071c..a0d1ecc 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -436,8 +436,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) { -; RV32-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) { +; RV32-LABEL: utest_f16i32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 @@ -456,7 +456,7 @@ define i32 @utesth_f16i32(half %x) { ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; -; RV64-LABEL: utesth_f16i32: +; RV64-LABEL: utest_f16i32: ; RV64: # %bb.0: # %entry ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 @@ -974,8 +974,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16(half %x) { -; RV32-LABEL: utesth_f16i16: +define i16 @utest_f16i16(half %x) { +; RV32-LABEL: utest_f16i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 @@ -995,7 +995,7 @@ define i16 @utesth_f16i16(half %x) { ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; -; RV64-LABEL: utesth_f16i16: +; RV64-LABEL: utest_f16i16: ; RV64: # %bb.0: # %entry ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 @@ -3829,6 +3829,52 @@ entry: ret i64 %conv6 } +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) { +; RV32-LABEL: ustest_f16i32_nsat: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: call __extendhfsf2 +; RV32-NEXT: fcvt.w.s a0, fa0, rtz +; RV32-NEXT: srai a1, a0, 31 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: sgtz a1, a0 +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ustest_f16i32_nsat: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: call __extendhfsf2 +; RV64-NEXT: fcvt.l.s a0, fa0, rtz +; RV64-NEXT: srai a1, a0, 63 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sgtz a1, a0 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index aba9d37..f5977625 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -519,8 +519,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i32: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -64 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64 @@ -610,7 +610,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: bgeu a3, a1, .LBB7_4 ; CHECK-NOV-NEXT: j .LBB7_5 ; -; CHECK-V-LABEL: utesth_f16i32: +; CHECK-V-LABEL: utest_f16i32: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -48 ; CHECK-V-NEXT: .cfi_def_cfa_offset 48 @@ -1594,8 +1594,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i16: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -128 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 128 @@ -1765,7 +1765,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: bgeu a7, a3, .LBB16_8 ; CHECK-NOV-NEXT: j .LBB16_9 ; -; CHECK-V-LABEL: utesth_f16i16: +; CHECK-V-LABEL: utest_f16i16: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -80 ; CHECK-V-NEXT: .cfi_def_cfa_offset 80 @@ -3332,8 +3332,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i64: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -32 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 32 @@ -3373,7 +3373,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NOV-NEXT: ret ; -; CHECK-V-LABEL: utesth_f16i64: +; CHECK-V-LABEL: utest_f16i64: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -32 ; CHECK-V-NEXT: .cfi_def_cfa_offset 32 @@ -4074,8 +4074,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i32_mm: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -64 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64 @@ -4165,7 +4165,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: bgeu a3, a1, .LBB34_4 ; CHECK-NOV-NEXT: j .LBB34_5 ; -; CHECK-V-LABEL: utesth_f16i32_mm: +; CHECK-V-LABEL: utest_f16i32_mm: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -48 ; CHECK-V-NEXT: .cfi_def_cfa_offset 48 @@ -5134,8 +5134,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i16_mm: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -128 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 128 @@ -5305,7 +5305,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: bgeu a7, a3, .LBB43_8 ; CHECK-NOV-NEXT: j .LBB43_9 ; -; CHECK-V-LABEL: utesth_f16i16_mm: +; CHECK-V-LABEL: utest_f16i16_mm: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -80 ; CHECK-V-NEXT: .cfi_def_cfa_offset 80 @@ -6837,8 +6837,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i64_mm: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -32 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 32 @@ -6877,7 +6877,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NOV-NEXT: ret ; -; CHECK-V-LABEL: utesth_f16i64_mm: +; CHECK-V-LABEL: utest_f16i64_mm: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -32 ; CHECK-V-NEXT: .cfi_def_cfa_offset 32 @@ -7048,6 +7048,172 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { +; CHECK-NOV-LABEL: ustest_f16i32_nsat: +; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: addi sp, sp, -64 +; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NOV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: .cfi_offset ra, -8 +; CHECK-NOV-NEXT: .cfi_offset s0, -16 +; CHECK-NOV-NEXT: .cfi_offset s1, -24 +; CHECK-NOV-NEXT: .cfi_offset s2, -32 +; CHECK-NOV-NEXT: .cfi_offset s3, -40 +; CHECK-NOV-NEXT: .cfi_offset fs0, -48 +; CHECK-NOV-NEXT: .cfi_offset fs1, -56 +; CHECK-NOV-NEXT: lhu s1, 0(a1) +; CHECK-NOV-NEXT: lhu s2, 8(a1) +; CHECK-NOV-NEXT: lhu a2, 16(a1) +; CHECK-NOV-NEXT: lhu s3, 24(a1) +; CHECK-NOV-NEXT: mv s0, a0 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: call __extendhfsf2 +; CHECK-NOV-NEXT: fmv.s fs0, fa0 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: call __extendhfsf2 +; CHECK-NOV-NEXT: fmv.s fs1, fa0 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: call __extendhfsf2 +; CHECK-NOV-NEXT: fcvt.l.s s1, fa0, rtz +; CHECK-NOV-NEXT: fcvt.l.s s2, fs1, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fcvt.l.s s3, fs0, rtz +; CHECK-NOV-NEXT: call __extendhfsf2 +; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-NOV-NEXT: srai a1, s3, 63 +; CHECK-NOV-NEXT: and a1, a1, s3 +; CHECK-NOV-NEXT: srai a2, s2, 63 +; CHECK-NOV-NEXT: and a2, a2, s2 +; CHECK-NOV-NEXT: srai a3, s1, 63 +; CHECK-NOV-NEXT: and a3, a3, s1 +; CHECK-NOV-NEXT: srai a4, a0, 63 +; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: sgtz a4, a3 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a3, a4, a3 +; CHECK-NOV-NEXT: sgtz a4, a2 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a2, a4, a2 +; CHECK-NOV-NEXT: sgtz a4, a1 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: sgtz a4, a0 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: sw a3, 0(s0) +; CHECK-NOV-NEXT: sw a2, 4(s0) +; CHECK-NOV-NEXT: sw a1, 8(s0) +; CHECK-NOV-NEXT: sw a0, 12(s0) +; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: .cfi_restore ra +; CHECK-NOV-NEXT: .cfi_restore s0 +; CHECK-NOV-NEXT: .cfi_restore s1 +; CHECK-NOV-NEXT: .cfi_restore s2 +; CHECK-NOV-NEXT: .cfi_restore s3 +; CHECK-NOV-NEXT: .cfi_restore fs0 +; CHECK-NOV-NEXT: .cfi_restore fs1 +; CHECK-NOV-NEXT: addi sp, sp, 64 +; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NOV-NEXT: ret +; +; CHECK-V-LABEL: ustest_f16i32_nsat: +; CHECK-V: # %bb.0: # %entry +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: .cfi_offset ra, -8 +; CHECK-V-NEXT: .cfi_offset s0, -16 +; CHECK-V-NEXT: .cfi_offset s1, -24 +; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) +; CHECK-V-NEXT: fmv.w.x fa0, a0 +; CHECK-V-NEXT: call __extendhfsf2 +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-V-NEXT: call __extendhfsf2 +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s1 +; CHECK-V-NEXT: call __extendhfsf2 +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-V-NEXT: call __extendhfsf2 +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: vmin.vx v8, v8, zero +; CHECK-V-NEXT: vmax.vx v8, v8, zero +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: .cfi_def_cfa sp, 48 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: .cfi_restore ra +; CHECK-V-NEXT: .cfi_restore s0 +; CHECK-V-NEXT: .cfi_restore s1 +; CHECK-V-NEXT: .cfi_restore s2 +; CHECK-V-NEXT: addi sp, sp, 48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 0 +; CHECK-V-NEXT: ret +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll b/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll new file mode 100644 index 0000000..8491328 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll @@ -0,0 +1,28 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#v2_uint:]] = OpTypeVector %[[#uint]] 2 +; CHECK-DAG: %[[#double:]] = OpTypeFloat 64 +; CHECK-DAG: %[[#v2_double:]] = OpTypeVector %[[#double]] 2 +; CHECK-DAG: %[[#v4_uint:]] = OpTypeVector %[[#uint]] 4 +@.str = private unnamed_addr constant [3 x i8] c"In\00", align 1 +@.str.2 = private unnamed_addr constant [4 x i8] c"Out\00", align 1 + +define void @main() local_unnamed_addr #0 { +entry: + %0 = tail call target("spirv.VulkanBuffer", [0 x <2 x i32>], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v2i32_12_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) + %1 = tail call target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v2f64_12_1t(i32 0, i32 2, i32 1, i32 0, ptr nonnull @.str.2) + %2 = tail call noundef align 8 dereferenceable(8) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2i32_12_0t(target("spirv.VulkanBuffer", [0 x <2 x i32>], 12, 0) %0, i32 0) + %3 = load <2 x i32>, ptr addrspace(11) %2, align 8 + %4 = tail call noundef align 8 dereferenceable(8) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2i32_12_0t(target("spirv.VulkanBuffer", [0 x <2 x i32>], 12, 0) %0, i32 1) + %5 = load <2 x i32>, ptr addrspace(11) %4, align 8 +; CHECK: %[[#tmp:]] = OpVectorShuffle %[[#v4_uint]] {{%[0-9]+}} {{%[0-9]+}} 0 2 1 3 + %6 = shufflevector <2 x i32> %3, <2 x i32> %5, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; CHECK: %[[#access:]] = OpAccessChain {{.*}} + %7 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2f64_12_1t(target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) %1, i32 0) +; CHECK: %[[#bitcast:]] = OpBitcast %[[#v2_double]] %[[#tmp]] +; CHECK: OpStore %[[#access]] %[[#bitcast]] Aligned 16 + store <4 x i32> %6, ptr addrspace(11) %7, align 16 + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll index 137994ce..59f3edc 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -136,9 +136,9 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) { -; CHECK-LABEL: utesth_f16i32: -; CHECK: .functype utesth_f16i32 (f32) -> (i32) +define i32 @utest_f16i32(half %x) { +; CHECK-LABEL: utest_f16i32: +; CHECK: .functype utest_f16i32 (f32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 @@ -153,9 +153,9 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32_cse(half %x) { -; CHECK-LABEL: utesth_f16i32_cse: -; CHECK: .functype utesth_f16i32_cse (f32) -> (i32) +define i32 @utest_f16i32_cse(half %x) { +; CHECK-LABEL: utest_f16i32_cse: +; CHECK: .functype utest_f16i32_cse (f32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 @@ -403,9 +403,9 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16(half %x) { -; CHECK-LABEL: utesth_f16i16: -; CHECK: .functype utesth_f16i16 (f32) -> (i32) +define i16 @utest_f16i16(half %x) { +; CHECK-LABEL: utest_f16i16: +; CHECK: .functype utest_f16i16 (f32) -> (i32) ; CHECK-NEXT: .local i32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 @@ -427,9 +427,9 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16_cse(half %x) { -; CHECK-LABEL: utesth_f16i16_cse: -; CHECK: .functype utesth_f16i16_cse (f32) -> (i32) +define i16 @utest_f16i16_cse(half %x) { +; CHECK-LABEL: utest_f16i16_cse: +; CHECK: .functype utest_f16i16_cse (f32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 @@ -880,9 +880,9 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64(half %x) { -; CHECK-LABEL: utesth_f16i64: -; CHECK: .functype utesth_f16i64 (f32) -> (i64) +define i64 @utest_f16i64(half %x) { +; CHECK-LABEL: utest_f16i64: +; CHECK: .functype utest_f16i64 (f32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -919,9 +919,9 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_cse(half %x) { -; CHECK-LABEL: utesth_f16i64_cse: -; CHECK: .functype utesth_f16i64_cse (f32) -> (i64) +define i64 @utest_f16i64_cse(half %x) { +; CHECK-LABEL: utest_f16i64_cse: +; CHECK: .functype utest_f16i64_cse (f32) -> (i64) ; CHECK-NEXT: .local i32, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1118,9 +1118,9 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32_mm(half %x) { -; CHECK-LABEL: utesth_f16i32_mm: -; CHECK: .functype utesth_f16i32_mm (f32) -> (i32) +define i32 @utest_f16i32_mm(half %x) { +; CHECK-LABEL: utest_f16i32_mm: +; CHECK: .functype utest_f16i32_mm (f32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 @@ -1353,9 +1353,9 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16_mm(half %x) { -; CHECK-LABEL: utesth_f16i16_mm: -; CHECK: .functype utesth_f16i16_mm (f32) -> (i32) +define i16 @utest_f16i16_mm(half %x) { +; CHECK-LABEL: utest_f16i16_mm: +; CHECK: .functype utest_f16i16_mm (f32) -> (i32) ; CHECK-NEXT: .local i32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 @@ -1637,9 +1637,9 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_mm(half %x) { -; CHECK-LABEL: utesth_f16i64_mm: -; CHECK: .functype utesth_f16i64_mm (f32) -> (i64) +define i64 @utest_f16i64_mm(half %x) { +; CHECK-LABEL: utest_f16i64_mm: +; CHECK: .functype utest_f16i64_mm (f32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1724,9 +1724,9 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_mm_cse(half %x) { -; CHECK-LABEL: utesth_f16i64_mm_cse: -; CHECK: .functype utesth_f16i64_mm_cse (f32) -> (i64) +define i64 @utest_f16i64_mm_cse(half %x) { +; CHECK-LABEL: utest_f16i64_mm_cse: +; CHECK: .functype utest_f16i64_mm_cse (f32) -> (i64) ; CHECK-NEXT: .local i32, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1754,6 +1754,35 @@ entry: ret i64 %conv6 } +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) { +; CHECK-LABEL: ustest_f16i32_nsat: +; CHECK: .functype ustest_f16i32_nsat (f32) -> (i32) +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32.shr_s +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.and +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.gt_s +; CHECK-NEXT: i32.select +; CHECK-NEXT: # fallthrough-return + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index 7190e16..52f57dc 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -209,9 +209,9 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-LABEL: utesth_f16i32: -; CHECK: .functype utesth_f16i32 (f32, f32, f32, f32) -> (v128) +define <4 x i32> @utest_f16i32(<4 x half> %x) { +; CHECK-LABEL: utest_f16i32: +; CHECK: .functype utest_f16i32 (f32, f32, f32, f32) -> (v128) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 @@ -513,9 +513,9 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-LABEL: utesth_f16i16: -; CHECK: .functype utesth_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) +define <8 x i16> @utest_f16i16(<8 x half> %x) { +; CHECK-LABEL: utest_f16i16: +; CHECK: .functype utest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) ; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 @@ -1295,9 +1295,9 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) { -; CHECK-LABEL: utesth_f16i64: -; CHECK: .functype utesth_f16i64 (f32, f32) -> (v128) +define <2 x i64> @utest_f16i64(<2 x half> %x) { +; CHECK-LABEL: utest_f16i64: +; CHECK: .functype utest_f16i64 (f32, f32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1649,9 +1649,9 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-LABEL: utesth_f16i32_mm: -; CHECK: .functype utesth_f16i32_mm (f32, f32, f32, f32) -> (v128) +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { +; CHECK-LABEL: utest_f16i32_mm: +; CHECK: .functype utest_f16i32_mm (f32, f32, f32, f32) -> (v128) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 @@ -1938,9 +1938,9 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-LABEL: utesth_f16i16_mm: -; CHECK: .functype utesth_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { +; CHECK-LABEL: utest_f16i16_mm: +; CHECK: .functype utest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) ; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 @@ -2673,9 +2673,9 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { -; CHECK-LABEL: utesth_f16i64_mm: -; CHECK: .functype utesth_f16i64_mm (f32, f32) -> (v128) +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { +; CHECK-LABEL: utest_f16i64_mm: +; CHECK: .functype utest_f16i64_mm (f32, f32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -2810,6 +2810,48 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { +; CHECK-LABEL: ustest_f16i32_nsat: +; CHECK: .functype ustest_f16i32_nsat (f32, f32, f32, f32) -> (v128) +; CHECK-NEXT: .local v128 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 2 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 3 +; CHECK-NEXT: v128.const 0, 0, 0, 0 +; CHECK-NEXT: local.tee 4 +; CHECK-NEXT: i32x4.min_s +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i32x4.max_s +; CHECK-NEXT: # fallthrough-return +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll index 3f5ec7b..67483be 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat.ll @@ -161,8 +161,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) nounwind { -; CHECK-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) nounwind { +; CHECK-LABEL: utest_f16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -360,8 +360,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16(half %x) nounwind { -; CHECK-LABEL: utesth_f16i16: +define i16 @utest_f16i16(half %x) nounwind { +; CHECK-LABEL: utest_f16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -566,8 +566,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64(half %x) nounwind { -; CHECK-LABEL: utesth_f16i64: +define i64 @utest_f16i64(half %x) nounwind { +; CHECK-LABEL: utest_f16i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __fixunshfti@PLT @@ -762,8 +762,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32_mm(half %x) nounwind { -; CHECK-LABEL: utesth_f16i32_mm: +define i32 @utest_f16i32_mm(half %x) nounwind { +; CHECK-LABEL: utest_f16i32_mm: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -946,8 +946,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16_mm(half %x) nounwind { -; CHECK-LABEL: utesth_f16i16_mm: +define i16 @utest_f16i16_mm(half %x) nounwind { +; CHECK-LABEL: utest_f16i16_mm: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -1131,8 +1131,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_mm(half %x) nounwind { -; CHECK-LABEL: utesth_f16i64_mm: +define i64 @utest_f16i64_mm(half %x) nounwind { +; CHECK-LABEL: utest_f16i64_mm: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __fixunshfti@PLT @@ -1170,6 +1170,27 @@ entry: ret i64 %conv6 } +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) nounwind { +; CHECK-LABEL: ustest_f16i32_nsat: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: cvttss2si %xmm0, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: sarl $31, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: andl %ecx, %eax +; CHECK-NEXT: cmovlel %edx, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll index 1a2cfd6..991ce33 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll @@ -747,8 +747,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) nounwind { +; SSE-LABEL: utest_f16i32: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movaps %xmm0, %xmm1 @@ -835,7 +835,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i32: +; AVX2-LABEL: utest_f16i32: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2 @@ -893,7 +893,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i32: +; AVX512-LABEL: utest_f16i32: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0 @@ -1338,8 +1338,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) nounwind { +; SSE-LABEL: utest_f16i16: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill @@ -1436,7 +1436,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i16: +; AVX2-LABEL: utest_f16i16: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] @@ -1453,7 +1453,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i16: +; AVX512-LABEL: utest_f16i16: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0 @@ -2456,8 +2456,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) nounwind { +; SSE-LABEL: utest_f16i64: ; SSE: # %bb.0: # %entry ; SSE-NEXT: pushq %r14 ; SSE-NEXT: pushq %rbx @@ -2483,7 +2483,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind { ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i64: +; AVX2-LABEL: utest_f16i64: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: pushq %r14 ; AVX2-NEXT: pushq %rbx @@ -2508,7 +2508,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind { ; AVX2-NEXT: popq %r14 ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i64: +; AVX512-LABEL: utest_f16i64: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: pushq %r14 ; AVX512-NEXT: pushq %rbx @@ -3359,8 +3359,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) nounwind { +; SSE-LABEL: utest_f16i32_mm: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movaps %xmm0, %xmm1 @@ -3447,7 +3447,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i32_mm: +; AVX2-LABEL: utest_f16i32_mm: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2 @@ -3505,7 +3505,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i32_mm: +; AVX512-LABEL: utest_f16i32_mm: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0 @@ -3935,8 +3935,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) nounwind { +; SSE-LABEL: utest_f16i16_mm: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill @@ -4033,7 +4033,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i16_mm: +; AVX2-LABEL: utest_f16i16_mm: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] @@ -4050,7 +4050,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i16_mm: +; AVX512-LABEL: utest_f16i16_mm: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0 @@ -4820,8 +4820,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) nounwind { +; SSE-LABEL: utest_f16i64_mm: ; SSE: # %bb.0: # %entry ; SSE-NEXT: pushq %r14 ; SSE-NEXT: pushq %rbx @@ -4847,7 +4847,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind { ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i64_mm: +; AVX2-LABEL: utest_f16i64_mm: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: pushq %r14 ; AVX2-NEXT: pushq %rbx @@ -4872,7 +4872,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind { ; AVX2-NEXT: popq %r14 ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i64_mm: +; AVX512-LABEL: utest_f16i64_mm: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: pushq %r14 ; AVX512-NEXT: pushq %rbx @@ -4974,6 +4974,63 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) nounwind { +; SSE-LABEL: ustest_f16i32_nsat: +; SSE: # %bb.0: # %entry +; SSE-NEXT: subq $72, %rsp +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; SSE-NEXT: psrlq $48, %xmm0 +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE-NEXT: cvttps2dq %xmm1, %xmm0 +; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload +; SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE-NEXT: pand %xmm0, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: addq $72, %rsp +; SSE-NEXT: retq +; +; AVX-LABEL: ustest_f16i32_nsat: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) |