diff options
Diffstat (limited to 'llvm/test/CodeGen/AArch64')
90 files changed, 7628 insertions, 5739 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll index f0d9aa4..639b6fd 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll @@ -20,8 +20,8 @@ define i32 @cse_gep(ptr %ptr, i32 %idx) { ; O0-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] ; O0-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64) ; O0-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; O0-NEXT: %11:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; O0-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %11(p0) :: (load (s32) from %ir.gep2) + ; O0-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) + ; O0-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2) ; O0-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] ; O0-NEXT: $w0 = COPY [[ADD]](s32) ; O0-NEXT: RET_ReallyLR implicit $w0 @@ -39,8 +39,8 @@ define i32 @cse_gep(ptr %ptr, i32 %idx) { ; O3-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) ; O3-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1) ; O3-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; O3-NEXT: %9:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD]], [[C1]](s64) - ; O3-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %9(p0) :: (load (s32) from %ir.gep2) + ; O3-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[PTR_ADD]], [[C1]](s64) + ; O3-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.gep2) ; O3-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] ; O3-NEXT: $w0 = COPY [[ADD]](s32) ; O3-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll index 3b12885..79b2e2e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll @@ -795,8 +795,8 @@ define void @jt_multiple_jump_tables(ptr %arg, i32 %arg1, ptr %arg2) { ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[PHI]], [[C111]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[MUL]](s64) ; CHECK-NEXT: [[C112:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: %120:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD]], [[C112]](s64) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD %120(p0) :: (load (p0) from %ir.tmp59) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[PTR_ADD]], [[C112]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD1]](p0) :: (load (p0) from %ir.tmp59) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK-NEXT: $x0 = COPY [[COPY]](p0) ; CHECK-NEXT: $x1 = COPY [[LOAD]](p0) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index d4574187..675c953 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -599,10 +599,10 @@ define ptr @test_constant_null() { ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[VAL1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.addr, align 4) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST1]](s64) +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST1]](s64) ; CHECK: [[VAL2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load (s32) from %ir.addr + 4) ; CHECK: G_STORE [[VAL1]](s8), [[ADDR]](p0) :: (store (s8) into %ir.addr, align 4) -; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST1]](s64) +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST1]](s64) ; CHECK: G_STORE [[VAL2]](s32), [[GEP2]](p0) :: (store (s32) into %ir.addr + 4) define void @test_struct_memops(ptr %addr) { %val = load { i8, i32 }, ptr %addr @@ -706,7 +706,7 @@ define float @test_frem(float %arg1, float %arg2) { ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SADDO [[LHS]], [[RHS]] ; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store (s32) into %ir.addr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) +; CHECK: [[GEP:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) define void @test_sadd_overflow(i32 %lhs, i32 %rhs, ptr %addr) { @@ -722,7 +722,7 @@ define void @test_sadd_overflow(i32 %lhs, i32 %rhs, ptr %addr) { ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_UADDO [[LHS]], [[RHS]] ; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store (s32) into %ir.addr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) +; CHECK: [[GEP:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) define void @test_uadd_overflow(i32 %lhs, i32 %rhs, ptr %addr) { @@ -738,7 +738,7 @@ define void @test_uadd_overflow(i32 %lhs, i32 %rhs, ptr %addr) { ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SSUBO [[LHS]], [[RHS]] ; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store (s32) into %ir.subr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) +; CHECK: [[GEP:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.subr + 4, align 4) declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) define void @test_ssub_overflow(i32 %lhs, i32 %rhs, ptr %subr) { @@ -754,7 +754,7 @@ define void @test_ssub_overflow(i32 %lhs, i32 %rhs, ptr %subr) { ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_USUBO [[LHS]], [[RHS]] ; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store (s32) into %ir.subr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) +; CHECK: [[GEP:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.subr + 4, align 4) declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) define void @test_usub_overflow(i32 %lhs, i32 %rhs, ptr %subr) { @@ -770,7 +770,7 @@ define void @test_usub_overflow(i32 %lhs, i32 %rhs, ptr %subr) { ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SMULO [[LHS]], [[RHS]] ; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store (s32) into %ir.addr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) +; CHECK: [[GEP:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) define void @test_smul_overflow(i32 %lhs, i32 %rhs, ptr %addr) { @@ -786,7 +786,7 @@ define void @test_smul_overflow(i32 %lhs, i32 %rhs, ptr %addr) { ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_UMULO [[LHS]], [[RHS]] ; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store (s32) into %ir.addr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) +; CHECK: [[GEP:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) define void @test_umul_overflow(i32 %lhs, i32 %rhs, ptr %addr) { @@ -799,13 +799,13 @@ define void @test_umul_overflow(i32 %lhs, i32 %rhs, ptr %addr) { ; CHECK: %0:_(p0) = COPY $x0 ; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.addr, align 4) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST1]](s64) ; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load (s8) from %ir.addr + 4, align 4) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 -; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST2]](s64) +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST2]](s64) ; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load (s32) from %ir.addr + 8) ; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 -; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST3]](s64) ; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load (s32) from %ir.addr + 12) ; CHECK: $w0 = COPY [[LD3]](s32) %struct.nested = type {i8, { i8, i32 }, i32} @@ -820,16 +820,16 @@ define i32 @test_extractvalue(ptr %addr) { ; CHECK: %1:_(p0) = COPY $x1 ; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.addr, align 4) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST1]](s64) ; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load (s8) from %ir.addr + 4, align 4) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 -; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST2]](s64) +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST2]](s64) ; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load (s32) from %ir.addr + 8) ; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 -; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST3]](s64) ; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load (s32) from %ir.addr + 12) ; CHECK: G_STORE [[LD2]](s8), %1(p0) :: (store (s8) into %ir.addr2, align 4) -; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD %1, [[CST1]](s64) +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %1, [[CST1]](s64) ; CHECK: G_STORE [[LD3]](s32), [[GEP4]](p0) :: (store (s32) into %ir.addr2 + 4) define void @test_extractvalue_agg(ptr %addr, ptr %addr2) { %struct = load %struct.nested, ptr %addr @@ -854,20 +854,20 @@ define void @test_trivial_extract_ptr([1 x ptr] %s, i8 %val) { ; CHECK: %1:_(s32) = COPY $w1 ; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.addr, align 4) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST1]](s64) ; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load (s8) from %ir.addr + 4, align 4) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 -; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST2]](s64) +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST2]](s64) ; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load (s32) from %ir.addr + 8) ; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 -; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST3]](s64) ; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load (s32) from %ir.addr + 12) ; CHECK: G_STORE [[LD1]](s8), %0(p0) :: (store (s8) into %ir.addr, align 4) -; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST1]](s64) ; CHECK: G_STORE [[LD2]](s8), [[GEP4]](p0) :: (store (s8) into %ir.addr + 4, align 4) -; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST2]](s64) +; CHECK: [[GEP5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST2]](s64) ; CHECK: G_STORE %1(s32), [[GEP5]](p0) :: (store (s32) into %ir.addr + 8) -; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) +; CHECK: [[GEP6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST3]](s64) ; CHECK: G_STORE [[LD4]](s32), [[GEP6]](p0) :: (store (s32) into %ir.addr + 12) define void @test_insertvalue(ptr %addr, i32 %val) { %struct = load %struct.nested, ptr %addr @@ -899,23 +899,23 @@ define [1 x ptr] @test_trivial_insert_ptr([1 x ptr] %s, ptr %val) { ; CHECK: %1:_(p0) = COPY $x1 ; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %1(p0) :: (load (s8) from %ir.addr2, align 4) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD %1, [[CST1]](s64) +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %1, [[CST1]](s64) ; CHECK: [[LD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load (s32) from %ir.addr2 + 4) ; CHECK: [[LD3:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.addr, align 4) -; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST1]](s64) ; CHECK: [[LD4:%[0-9]+]]:_(s8) = G_LOAD [[GEP2]](p0) :: (load (s8) from %ir.addr + 4, align 4) ; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 -; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST3]](s64) ; CHECK: [[LD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load (s32) from %ir.addr + 8) ; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 -; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST4]](s64) +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST4]](s64) ; CHECK: [[LD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load (s32) from %ir.addr + 12) ; CHECK: G_STORE [[LD3]](s8), %0(p0) :: (store (s8) into %ir.addr, align 4) -; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) +; CHECK: [[GEP5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST1]](s64) ; CHECK: G_STORE [[LD1]](s8), [[GEP5]](p0) :: (store (s8) into %ir.addr + 4, align 4) -; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) +; CHECK: [[GEP6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST3]](s64) ; CHECK: G_STORE [[LD2]](s32), [[GEP6]](p0) :: (store (s32) into %ir.addr + 8) -; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST4]](s64) +; CHECK: [[GEP7:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST4]](s64) ; CHECK: G_STORE [[LD6]](s32), [[GEP7]](p0) :: (store (s32) into %ir.addr + 12) define void @test_insertvalue_agg(ptr %addr, ptr %addr2) { %smallstruct = load {i8, i32}, ptr %addr2 @@ -1905,19 +1905,19 @@ define void @test_phi_diamond(ptr %a.ptr, ptr %b.ptr, i1 %selector, ptr %dst) { ; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD [[ARG1]](p0) :: (load (s8) from %ir.a.ptr, align 4) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 -; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG1]], [[CST1]](s64) +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ARG1]], [[CST1]](s64) ; CHECK: [[LD2:%[0-9]+]]:_(s16) = G_LOAD [[GEP1]](p0) :: (load (s16) from %ir.a.ptr + 2) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG1]], [[CST2]](s64) +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ARG1]], [[CST2]](s64) ; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load (s32) from %ir.a.ptr + 4) ; CHECK: G_BR %bb.4 ; CHECK: [[LD4:%[0-9]+]]:_(s8) = G_LOAD [[ARG2]](p0) :: (load (s8) from %ir.b.ptr, align 4) ; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 -; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG2]], [[CST3]](s64) +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ARG2]], [[CST3]](s64) ; CHECK: [[LD5:%[0-9]+]]:_(s16) = G_LOAD [[GEP3]](p0) :: (load (s16) from %ir.b.ptr + 2) ; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG2]], [[CST4]](s64) +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ARG2]], [[CST4]](s64) ; CHECK: [[LD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load (s32) from %ir.b.ptr + 4) ; CHECK: [[PN1:%[0-9]+]]:_(s8) = G_PHI [[LD1]](s8), %bb.2, [[LD4]](s8), %bb.3 @@ -1925,10 +1925,10 @@ define void @test_phi_diamond(ptr %a.ptr, ptr %b.ptr, i1 %selector, ptr %dst) { ; CHECK: [[PN3:%[0-9]+]]:_(s32) = G_PHI [[LD3]](s32), %bb.2, [[LD6]](s32), %bb.3 ; CHECK: G_STORE [[PN1]](s8), [[ARG4]](p0) :: (store (s8) into %ir.dst, align 4) ; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 -; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG4]], [[CST5]](s64) +; CHECK: [[GEP5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ARG4]], [[CST5]](s64) ; CHECK: G_STORE [[PN2]](s16), [[GEP5]](p0) :: (store (s16) into %ir.dst + 2) ; CHECK: [[CST6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG4]], [[CST6]](s64) +; CHECK: [[GEP6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ARG4]], [[CST6]](s64) ; CHECK: G_STORE [[PN3]](s32), [[GEP6]](p0) :: (store (s32) into %ir.dst + 4) ; CHECK: RET_ReallyLR @@ -1964,22 +1964,22 @@ define void @test_nested_aggregate_const(ptr %ptr) { ; CHECK: [[CST6:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 ; CHECK: G_STORE [[CST1]](s32), [[BASE]](p0) :: (store (s32) into %ir.ptr, align 8) ; CHECK: [[CST7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST7]](s64) +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[BASE]], [[CST7]](s64) ; CHECK: G_STORE [[CST1]](s32), [[GEP1]](p0) :: (store (s32) into %ir.ptr + 4) ; CHECK: [[CST8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 -; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST8]](s64) +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[BASE]], [[CST8]](s64) ; CHECK: G_STORE [[CST2]](s16), [[GEP2]](p0) :: (store (s16) into %ir.ptr + 8, align 8) ; CHECK: [[CST9:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 -; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST9]](s64) +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[BASE]], [[CST9]](s64) ; CHECK: G_STORE [[CST3]](s8), [[GEP3]](p0) :: (store (s8) into %ir.ptr + 10, align 2) ; CHECK: [[CST10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 -; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST10]](s64) +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[BASE]], [[CST10]](s64) ; CHECK: G_STORE [[CST4]](s64), [[GEP4]](p0) :: (store (s64) into %ir.ptr + 16) ; CHECK: [[CST11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 -; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST11]](s64) +; CHECK: [[GEP5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[BASE]], [[CST11]](s64) ; CHECK: G_STORE [[CST5]](s64), [[GEP5]](p0) :: (store (s64) into %ir.ptr + 24) ; CHECK: [[CST12:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 -; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST12]](s64) +; CHECK: [[GEP6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[BASE]], [[CST12]](s64) ; CHECK: G_STORE [[CST6]](s32), [[GEP6]](p0) :: (store (s32) into %ir.ptr + 32, align 8) store %agg.nested { i32 1, i32 1, %agg.inner { i16 2, i8 3, %agg.inner.inner {i64 5, i64 8} }, i32 13}, ptr %ptr ret void @@ -2519,7 +2519,7 @@ define {i8, i32} @test_freeze_struct(ptr %addr) { ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]] ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s8) = G_FREEZE [[LOAD]] ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[LOAD1]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll index 2779e89..4a85d84 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -12,7 +12,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) { ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -46,13 +46,13 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $w1, $x0, $x2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def $x9, pcsections !0 :: (load (s32) from %ir.pnew) + ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def renamable $x9, pcsections !0 :: (load (s32) from %ir.pnew) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.cmpxchg.start: ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0, $x9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -91,7 +91,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) { ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -243,7 +243,7 @@ define i32 @fetch_and_nand(ptr %p) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w8, 2, pcsections !0 ; CHECK-NEXT: $w9 = ORNWrs $wzr, killed renamable $w9, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRW killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p) @@ -295,7 +295,7 @@ define i32 @fetch_and_or(ptr %p) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: $w10 = ORRWrs renamable $w8, renamable $w9, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRW killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 @@ -726,7 +726,7 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -750,7 +750,7 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -773,7 +773,7 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -797,7 +797,7 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -821,7 +821,7 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -845,7 +845,7 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -869,7 +869,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0 @@ -895,7 +895,7 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0 @@ -923,10 +923,10 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -951,10 +951,10 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -977,7 +977,7 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -1001,7 +1001,7 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: early-clobber renamable $w9 = STXRH renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1024,7 +1024,7 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -1048,7 +1048,7 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -1072,7 +1072,7 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -1096,7 +1096,7 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 @@ -1120,7 +1120,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0 @@ -1146,7 +1146,7 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0 @@ -1174,10 +1174,10 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1202,10 +1202,10 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1230,7 +1230,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) { ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000) ; CHECK-NEXT: liveins: $w1, $w2, $x8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 7, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0 @@ -1272,7 +1272,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) { ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000) ; CHECK-NEXT: liveins: $w1, $w2, $x8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 15, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-sret-demotion.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-sret-demotion.ll index a8520af..08021cc 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-sret-demotion.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-sret-demotion.ll @@ -11,28 +11,28 @@ define [9 x i64] @callee_sret_demotion() { ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64)) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64)) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD1]](p0) :: (store (s64)) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD2]](p0) :: (store (s64)) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C4]](s64) ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD3]](p0) :: (store (s64)) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD4]](p0) :: (store (s64)) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C6]](s64) ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD5]](p0) :: (store (s64)) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C7]](s64) ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD6]](p0) :: (store (s64)) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C8]](s64) ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD7]](p0) :: (store (s64)) ; CHECK-NEXT: RET_ReallyLR ret [9 x i64] zeroinitializer @@ -48,28 +48,28 @@ define i64 @caller() { ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C3]](s64) ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C4]](s64) ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C5]](s64) ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD5]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s64) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C6]](s64) ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s64) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C7]](s64) ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD7]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: $x0 = COPY [[LOAD4]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 @@ -88,28 +88,28 @@ define i64 @caller_tail() { ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C3]](s64) ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C4]](s64) ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C5]](s64) ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD5]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s64) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C6]](s64) ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s64) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C7]](s64) ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD7]](p0) :: (load (s64) from %stack.0) ; CHECK-NEXT: $x0 = COPY [[LOAD4]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-cse.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-cse.ll index 4aac649..39860a7 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-cse.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-cse.ll @@ -4,7 +4,7 @@ ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.ptr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 -; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) +; CHECK: [[GEP:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load (s64) from %ir.ptr + 8) ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll index b10c887e..b3e436b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll @@ -61,7 +61,7 @@ define void @take_128bit_struct(ptr %ptr, [2 x i64] %in) { ; CHECK-LABEL: name: test_split_struct ; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.ptr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 -; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST]](s64) +; CHECK: [[GEP:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST]](s64) ; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.ptr + 8) ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll index ca8f5de..36529be 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -67,10 +67,10 @@ define void @test_multiple_args(i64 %in) { ; CHECK: G_STORE [[DBL]](s64), [[ADDR]](p0) :: (store (s64) into %ir.addr) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 -; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST1]](s64) +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST1]](s64) ; CHECK: G_STORE [[I64]](s64), [[GEP1]](p0) :: (store (s64) into %ir.addr + 8) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 -; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST2]](s64) +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST2]](s64) ; CHECK: G_STORE [[I8]](s8), [[GEP2]](p0) :: (store (s8) into %ir.addr + 16, align 8) ; CHECK: RET_ReallyLR define void @test_struct_formal({double, i64, i8} %in, ptr %addr) { @@ -84,10 +84,10 @@ define void @test_struct_formal({double, i64, i8} %in, ptr %addr) { ; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load (s64) from %ir.addr) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 -; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST1]](s64) +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST1]](s64) ; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load (s64) from %ir.addr + 8) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 -; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST2]](s64) +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST2]](s64) ; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load (s32) from %ir.addr + 16, align 8) ; CHECK: $d0 = COPY [[LD1]](s64) @@ -103,13 +103,13 @@ define {double, i64, i32} @test_struct_return(ptr %addr) { ; CHECK: %0:_(p0) = COPY $x0 ; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.addr) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 -; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST1]](s64) ; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load (s64) from %ir.addr + 8) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 -; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST2]](s64) +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST2]](s64) ; CHECK: [[LD3:%[0-9]+]]:_(s64) = G_LOAD [[GEP2]](p0) :: (load (s64) from %ir.addr + 16) ; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 -; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %0, [[CST3]](s64) ; CHECK: [[LD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP3]](p0) :: (load (s64) from %ir.addr + 24) ; CHECK: $x0 = COPY [[LD1]](s64) @@ -286,7 +286,7 @@ define void @take_128bit_struct(ptr %ptr, [2 x i64] %in) { ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.ptr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 -; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) +; CHECK: [[GEP:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load (s64) from %ir.ptr + 8) ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir index f50540b..1c0fc3f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir @@ -38,44 +38,44 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load (s128) from %ir.1 + 32, align 4) - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 32, align 4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load (s128) from %ir.1 + 48, align 4) - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store (s128) into %ir.0 + 48, align 4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C3]](s64) ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD6]](p0) :: (load (s128) from %ir.1 + 64, align 4) - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[LOAD4]](s128), [[PTR_ADD7]](p0) :: (store (s128) into %ir.0 + 64, align 4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C4]](s64) ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD8]](p0) :: (load (s128) from %ir.1 + 80, align 4) - ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C4]](s64) ; CHECK-NEXT: G_STORE [[LOAD5]](s128), [[PTR_ADD9]](p0) :: (store (s128) into %ir.0 + 80, align 4) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 - ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C5]](s64) ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD10]](p0) :: (load (s128) from %ir.1 + 96, align 4) - ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; CHECK-NEXT: G_STORE [[LOAD6]](s128), [[PTR_ADD11]](p0) :: (store (s128) into %ir.0 + 96, align 4) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 - ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64) + ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C6]](s64) ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD12]](p0) :: (load (s128) from %ir.1 + 112, align 4) - ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C6]](s64) ; CHECK-NEXT: G_STORE [[LOAD7]](s128), [[PTR_ADD13]](p0) :: (store (s128) into %ir.0 + 112, align 4) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 127 - ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64) + ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C7]](s64) ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD14]](p0) :: (load (s128) from %ir.1 + 127, align 1, basealign 4) - ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C7]](s64) ; CHECK-NEXT: G_STORE [[LOAD8]](s128), [[PTR_ADD15]](p0) :: (store (s128) into %ir.0 + 127, align 1, basealign 4) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir index b21046d..97a0417 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir @@ -111,24 +111,24 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load (s128) from %ir.1 + 32, align 4) - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 32, align 4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load (s128) from %ir.1 + 48, align 4) - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store (s128) into %ir.0 + 48, align 4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C3]](s64) ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load (s64) from %ir.1 + 64, align 4) - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[LOAD4]](s64), [[PTR_ADD7]](p0) :: (store (s64) into %ir.0 + 64, align 4) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 @@ -159,24 +159,24 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load (s128) from %ir.1 + 32, align 4) - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 32, align 4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load (s128) from %ir.1 + 48, align 4) - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store (s128) into %ir.0 + 48, align 4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C3]](s64) ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load (s64) from %ir.1 + 64, align 4) - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[LOAD4]](s64), [[PTR_ADD7]](p0) :: (store (s64) into %ir.0 + 64, align 4) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 @@ -235,44 +235,44 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load (s128) from %ir.1 + 32, align 4) - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 32, align 4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load (s128) from %ir.1 + 48, align 4) - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store (s128) into %ir.0 + 48, align 4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C3]](s64) ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD6]](p0) :: (load (s128) from %ir.1 + 64, align 4) - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[LOAD4]](s128), [[PTR_ADD7]](p0) :: (store (s128) into %ir.0 + 64, align 4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C4]](s64) ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD8]](p0) :: (load (s128) from %ir.1 + 80, align 4) - ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C4]](s64) ; CHECK-NEXT: G_STORE [[LOAD5]](s128), [[PTR_ADD9]](p0) :: (store (s128) into %ir.0 + 80, align 4) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 - ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C5]](s64) ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD10]](p0) :: (load (s128) from %ir.1 + 96, align 4) - ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; CHECK-NEXT: G_STORE [[LOAD6]](s128), [[PTR_ADD11]](p0) :: (store (s128) into %ir.0 + 96, align 4) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 - ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64) + ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C6]](s64) ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD12]](p0) :: (load (s128) from %ir.1 + 112, align 4) - ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C6]](s64) ; CHECK-NEXT: G_STORE [[LOAD7]](s128), [[PTR_ADD13]](p0) :: (store (s128) into %ir.0 + 112, align 4) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 127 - ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64) + ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C7]](s64) ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD14]](p0) :: (load (s128) from %ir.1 + 127, align 1, basealign 4) - ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C7]](s64) ; CHECK-NEXT: G_STORE [[LOAD8]](s128), [[PTR_ADD15]](p0) :: (store (s128) into %ir.0 + 127, align 1, basealign 4) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 @@ -303,24 +303,24 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p2) :: (load (s128) from %ir.1, align 4, addrspace 2) ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p1) :: (store (s128) into %ir.0, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p2) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p2) :: (load (s128) from %ir.1 + 16, align 4, addrspace 2) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p1) :: (store (s128) into %ir.0 + 16, align 4, addrspace 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p2) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p2) :: (load (s128) from %ir.1 + 32, align 4, addrspace 2) - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p1) :: (store (s128) into %ir.0 + 32, align 4, addrspace 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p2) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p2) :: (load (s128) from %ir.1 + 48, align 4, addrspace 2) - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p1) :: (store (s128) into %ir.0 + 48, align 4, addrspace 1) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p2) = nuw inbounds G_PTR_ADD [[COPY1]], [[C3]](s64) ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p2) :: (load (s64) from %ir.1 + 64, align 4, addrspace 2) - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[LOAD4]](s64), [[PTR_ADD7]](p1) :: (store (s64) into %ir.0 + 64, align 4, addrspace 1) ; CHECK-NEXT: RET_ReallyLR %0:_(p1) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir index 57d031d..fc4fbac 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir @@ -89,17 +89,17 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4) ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 32, align 4) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 @@ -124,35 +124,35 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load (s128) from %ir.1 + 48, align 4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C3]](s64) ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD3]](p0) :: (load (s128) from %ir.1 + 64, align 4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C4]](s64) ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load (s128) from %ir.1 + 80, align 4) ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD5]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C6]](s64) ; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD6]](p0) :: (store (s128) into %ir.0 + 32, align 4) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C7]](s64) ; CHECK-NEXT: G_STORE [[LOAD3]](s128), [[PTR_ADD7]](p0) :: (store (s128) into %ir.0 + 48, align 4) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C8]](s64) ; CHECK-NEXT: G_STORE [[LOAD4]](s128), [[PTR_ADD8]](p0) :: (store (s128) into %ir.0 + 64, align 4) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) + ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C9]](s64) ; CHECK-NEXT: G_STORE [[LOAD5]](s128), [[PTR_ADD9]](p0) :: (store (s128) into %ir.0 + 80, align 4) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 @@ -177,23 +177,23 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.1 + 48) ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C4]](s64) ; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD4]](p0) :: (store (s128) into %ir.0 + 32, align 4) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; CHECK-NEXT: G_STORE [[LOAD3]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.0 + 48) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 @@ -218,17 +218,17 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p2) = COPY $x1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p2) :: (load (s128) from %ir.1, align 4, addrspace 2) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p2) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p2) :: (load (s128) from %ir.1 + 16, align 4, addrspace 2) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p2) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p2) :: (load (s128) from %ir.1 + 32, align 4, addrspace 2) ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p1) :: (store (s128) into %ir.0, align 4, addrspace 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p1) :: (store (s128) into %ir.0 + 16, align 4, addrspace 1) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p1) :: (store (s128) into %ir.0 + 32, align 4, addrspace 1) ; CHECK-NEXT: RET_ReallyLR %0:_(p1) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir index f8d2bf3..b06cadf 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir @@ -100,7 +100,7 @@ body: | ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] ; CHECK-NEXT: G_STORE [[MUL]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[MUL]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 @@ -127,13 +127,13 @@ body: | ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.dst, align 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into %ir.dst + 16, align 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into %ir.dst + 32, align 1) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (<2 x s64>) into %ir.dst + 48, align 1) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 @@ -160,7 +160,7 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4629771061636907072 ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 @@ -190,13 +190,13 @@ body: | ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MUL]](s64), [[MUL]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.dst, align 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into %ir.dst + 16, align 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into %ir.dst + 32, align 1) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (<2 x s64>) into %ir.dst + 44, align 1) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 @@ -222,11 +222,11 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4629771061636907072 ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 16448 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[C2]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.dst + 16, align 1) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 @@ -254,7 +254,7 @@ body: | ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] ; CHECK-NEXT: G_STORE [[MUL]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[MUL]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-small-memcpy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-small-memcpy.mir index 8d8f717..7393091 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-small-memcpy.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-small-memcpy.mir @@ -46,9 +46,9 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-gep-flags.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-gep-flags.ll index 34ac4f6..8a6f266 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-gep-flags.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-gep-flags.ll @@ -17,8 +17,8 @@ define i32 @gep_nusw_nuw(ptr %ptr, i32 %idx) { ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: %11:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %11(p0) :: (load (s32) from %ir.gep2) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] ; CHECK-NEXT: $w0 = COPY [[ADD]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -77,8 +77,8 @@ define i32 @gep_nusw(ptr %ptr, i32 %idx) { ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: %11:_(p0) = nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %11(p0) :: (load (s32) from %ir.gep2) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] ; CHECK-NEXT: $w0 = COPY [[ADD]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir index fa1700a..1a21064 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir @@ -32,11 +32,11 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 16) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C1]](s64) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16) from unknown-address + 8, align 8) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 10, align 2) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[DEF]](s32) @@ -48,7 +48,7 @@ body: | ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 16) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) ; CHECK-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY1]](p0) :: (load (s16) from unknown-address + 8, align 8) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 10, align 2) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD3]](s32), [[DEF]](s32) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[C3]](s64) @@ -61,7 +61,7 @@ body: | ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) ; CHECK-NEXT: G_STORE [[COPY2]](s64), %ptr(p0) :: (store (s64), align 16) ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C3]](s64) - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 8, align 8) ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD3]](p0) :: (store (s8) into unknown-address + 10, align 2) %ptr:_(p0) = COPY $x0 @@ -96,16 +96,16 @@ body: | ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s64) = G_AND [[AND4]], [[C1]] ; CHECK-NEXT: G_STORE [[AND5]](s64), %ptr(p0) :: (store (s64), align 64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C2]](s64) ; CHECK-NEXT: G_STORE [[AND6]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C3]](s64) ; CHECK-NEXT: G_STORE [[AND7]](s64), [[PTR_ADD1]](p0) :: (store (s64) into unknown-address + 16, align 16) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C4]](s64) ; CHECK-NEXT: G_STORE [[AND8]](s64), [[PTR_ADD2]](p0) :: (store (s64) into unknown-address + 24) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](s64) ; CHECK-NEXT: G_STORE [[AND9]](s64), [[PTR_ADD3]](p0) :: (store (s64) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %a:_(s318) = G_IMPLICIT_DEF @@ -140,16 +140,16 @@ body: | ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s64) = G_AND [[AND4]], [[C1]] ; CHECK-NEXT: G_STORE [[AND5]](s64), %ptr(p0) :: (store (s64), align 64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C2]](s64) ; CHECK-NEXT: G_STORE [[AND6]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C3]](s64) ; CHECK-NEXT: G_STORE [[AND7]](s64), [[PTR_ADD1]](p0) :: (store (s64) into unknown-address + 16, align 16) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C4]](s64) ; CHECK-NEXT: G_STORE [[AND8]](s64), [[PTR_ADD2]](p0) :: (store (s64) into unknown-address + 24) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](s64) ; CHECK-NEXT: G_STORE [[AND9]](s64), [[PTR_ADD3]](p0) :: (store (s64) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %a:_(s318) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir index b0736fb..2378401 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir @@ -195,13 +195,13 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C1]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[UV]](s64), [[COPY]](p0) :: (store (s32), align 16) ; CHECK-NEXT: G_STORE [[LSHR1]](s64), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 4, align 4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C1]](s64) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[UV1]](s64), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 6, align 2) ; CHECK-NEXT: G_STORE [[LSHR2]](s64), [[PTR_ADD2]](p0) :: (store (s16) into unknown-address + 10) ; CHECK-NEXT: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir index 96be30b..c301e76 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir @@ -97,16 +97,16 @@ body: | ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[C1]], [[C3]] ; CHECK-NEXT: G_STORE [[AND]](s64), %ptr(p0) :: (store (s64), align 64) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C4]](s64) ; CHECK-NEXT: G_STORE [[AND1]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](s64) ; CHECK-NEXT: G_STORE [[AND2]](s64), [[PTR_ADD1]](p0) :: (store (s64) into unknown-address + 16, align 16) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C6]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C6]](s64) ; CHECK-NEXT: G_STORE [[AND3]](s64), [[PTR_ADD2]](p0) :: (store (s64) into unknown-address + 24) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C7]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C7]](s64) ; CHECK-NEXT: G_STORE [[AND4]](s64), [[PTR_ADD3]](p0) :: (store (s64) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %cst:_(s318) = G_CONSTANT i318 1234 @@ -136,10 +136,10 @@ body: | ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[AND2]](s64), 0 ; CHECK-NEXT: G_STORE [[COPY]](s64), %ptr(p0) :: (store (s64), align 32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C4]](s64) ; CHECK-NEXT: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](s64) ; CHECK-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 16, align 16) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %cst:_(s158) = G_CONSTANT i158 1234 @@ -170,10 +170,10 @@ body: | ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[AND2]](s64), 0 ; CHECK-NEXT: G_STORE [[COPY]](s64), %ptr(p0) :: (store (s64), align 32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C4]](s64) ; CHECK-NEXT: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](s64) ; CHECK-NEXT: G_STORE [[EXTRACT]](s16), [[PTR_ADD1]](p0) :: (store (s16) into unknown-address + 16, align 16) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %cst:_(s142) = G_CONSTANT i142 1234 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir index b0b0e6b..dafc304 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir @@ -328,7 +328,7 @@ body: | ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 ; CHECK-NEXT: G_STORE [[COPY]](<2 x s64>), [[FRAME_INDEX]](p0) :: (store (<2 x s64>) into %stack.0, align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) ; CHECK-NEXT: G_STORE [[COPY1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into %stack.0 + 16, basealign 32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND %idx, [[C1]] @@ -426,7 +426,7 @@ body: | ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 ; CHECK-NEXT: G_STORE [[COPY]](<4 x s32>), [[FRAME_INDEX]](p0) :: (store (<4 x s32>) into %stack.0, align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) ; CHECK-NEXT: G_STORE [[COPY1]](<4 x s32>), [[PTR_ADD]](p0) :: (store (<4 x s32>) into %stack.0 + 16, basealign 32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND %idxprom, [[C1]] @@ -460,7 +460,7 @@ body: | ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 ; CHECK-NEXT: G_STORE [[COPY]](<8 x s16>), [[FRAME_INDEX]](p0) :: (store (<8 x s16>) into %stack.0, align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) ; CHECK-NEXT: G_STORE [[COPY1]](<8 x s16>), [[PTR_ADD]](p0) :: (store (<8 x s16>) into %stack.0 + 16, basealign 32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND %idxprom, [[C1]] @@ -495,7 +495,7 @@ body: | ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[DEF]](<2 x p0>) ; CHECK-NEXT: G_STORE [[BITCAST]](<2 x s64>), [[FRAME_INDEX]](p0) :: (store (<2 x s64>) into %stack.0, align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[DEF]](<2 x p0>) ; CHECK-NEXT: G_STORE [[BITCAST1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into %stack.0 + 16, basealign 32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir index 588dfd9..1c10e08 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir @@ -22,7 +22,7 @@ body: | ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>) ; CHECK-NEXT: G_STORE [[FPEXT]](<2 x s64>), [[COPY1]](p0) :: (store (<2 x s64>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: G_STORE [[FPEXT1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %0:_(<4 x s32>) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir index e1b6437..a19ab0b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir @@ -135,7 +135,7 @@ body: | ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[FPTRUNC2]](<2 x s32>), [[FPTRUNC3]](<2 x s32>) ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY5]](p0) :: (store (<4 x s32>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY5]], [[C]](s64) ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS1]](<4 x s32>), [[PTR_ADD]](p0) :: (store (<4 x s32>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %2:_(<2 x s64>) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir index 11c6c7f..858a5a2 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir @@ -258,10 +258,10 @@ body: | ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UITOFP]](<4 x s32>) ; CHECK-NEXT: G_STORE [[UV10]](s32), [[COPY]](p0) :: (store (s32), align 16) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C4]](s64) ; CHECK-NEXT: G_STORE [[UV12]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 8, align 8) ; CHECK-NEXT: G_BR %bb.1 bb.1: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir index 3a2c57a..29a3e38 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir @@ -46,7 +46,7 @@ body: | ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>)) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x8 @@ -72,7 +72,7 @@ body: | ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>)) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x8 @@ -95,7 +95,7 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64), align 16) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[C1]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x8 @@ -140,7 +140,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) ; CHECK-NEXT: $q0 = COPY [[LOAD]](<2 x s64>) ; CHECK-NEXT: $q1 = COPY [[LOAD1]](<2 x s64>) @@ -166,7 +166,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) ; CHECK-NEXT: $q0 = COPY [[LOAD]](<2 x s64>) ; CHECK-NEXT: $q1 = COPY [[LOAD1]](<2 x s64>) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir index 94bdcf7..2c326902 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -332,7 +332,7 @@ body: | ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[DEF]](<16 x s8>), %ptr(p0) :: (store (<16 x s8>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: G_STORE [[DEF]](<16 x s8>), [[PTR_ADD]](p0) :: (store (<16 x s8>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %val:_(<32 x s8>) = G_IMPLICIT_DEF @@ -355,7 +355,7 @@ body: | ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[DEF]](<8 x s16>), %ptr(p0) :: (store (<8 x s16>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: G_STORE [[DEF]](<8 x s16>), [[PTR_ADD]](p0) :: (store (<8 x s16>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %val:_(<16 x s16>) = G_IMPLICIT_DEF @@ -378,7 +378,7 @@ body: | ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), %ptr(p0) :: (store (<4 x s32>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD]](p0) :: (store (<4 x s32>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %val:_(<8 x s32>) = G_IMPLICIT_DEF @@ -401,7 +401,7 @@ body: | ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[DEF]](<2 x s64>), %ptr(p0) :: (store (<2 x s64>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: G_STORE [[DEF]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %val:_(<4 x s64>) = G_IMPLICIT_DEF @@ -423,10 +423,10 @@ body: | ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD %ptr(p0) :: (load (<16 x s8>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD]](p0) :: (load (<16 x s8>) from unknown-address + 16) ; CHECK-NEXT: G_STORE [[LOAD]](<16 x s8>), %ptr(p0) :: (store (<16 x s8>), align 32) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](<16 x s8>), [[PTR_ADD1]](p0) :: (store (<16 x s8>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 @@ -448,10 +448,10 @@ body: | ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD %ptr(p0) :: (load (<8 x s16>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<8 x s16>) from unknown-address + 16) ; CHECK-NEXT: G_STORE [[LOAD]](<8 x s16>), %ptr(p0) :: (store (<8 x s16>), align 32) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](<8 x s16>), [[PTR_ADD1]](p0) :: (store (<8 x s16>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 @@ -473,10 +473,10 @@ body: | ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (<4 x s32>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16) ; CHECK-NEXT: G_STORE [[LOAD]](<4 x s32>), %ptr(p0) :: (store (<4 x s32>), align 32) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[PTR_ADD1]](p0) :: (store (<4 x s32>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 @@ -498,10 +498,10 @@ body: | ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD %ptr(p0) :: (load (<2 x s64>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) ; CHECK-NEXT: G_STORE [[LOAD]](<2 x s64>), %ptr(p0) :: (store (<2 x s64>), align 32) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 @@ -549,10 +549,10 @@ body: | ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[DEF]](<2 x s64>), %ptr(p0) :: (store (<2 x s64>)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: G_STORE [[DEF]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C1]](s64) ; CHECK-NEXT: G_STORE [[DEF]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32) ; CHECK-NEXT: RET_ReallyLR %val:_(<6 x s64>) = G_IMPLICIT_DEF @@ -575,7 +575,7 @@ body: | ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) ; CHECK-NEXT: G_STORE [[UV]](s16), [[COPY]](p0) :: (store (s16), align 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[UV1]](s16), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 2) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 @@ -597,7 +597,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s16) @@ -626,10 +626,10 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 16) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C1]](s64) ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 8, align 8) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C2]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 10, align 2) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[DEF]](s32) @@ -641,9 +641,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[OR1]](s64) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[OR2]](s64) ; CHECK-NEXT: G_STORE [[COPY]](s64), %ptr(p0) :: (store (s64), align 16) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C1]](s64) ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C3]](s64) - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD2]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD2]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD2]](p0) :: (store (s16) into unknown-address + 8, align 8) ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD3]](p0) :: (store (s8) into unknown-address + 10, align 2) ; CHECK-NEXT: RET_ReallyLR @@ -710,19 +710,19 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD %ptr(p0) :: (load (<2 x s64>), align 64) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD]](<2 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD1]](<2 x s64>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C1]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<2 x s64>) from unknown-address + 32, align 32) ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD2]](<2 x s64>) ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BITCAST]](<2 x p0>) ; CHECK-NEXT: G_STORE [[BITCAST3]](<2 x s64>), %ptr(p0) :: (store (<2 x s64>), align 64) - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64) ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BITCAST1]](<2 x p0>) ; CHECK-NEXT: G_STORE [[BITCAST4]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (<2 x s64>) into unknown-address + 16) - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C1]](s64) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BITCAST2]](<2 x p0>) ; CHECK-NEXT: G_STORE [[BITCAST5]](<2 x s64>), [[PTR_ADD3]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-min-max.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-min-max.mir index fae979d..30afd7e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-min-max.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-min-max.mir @@ -61,7 +61,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[SMIN]](<16 x s8>), [[COPY]](p0) :: (store (<16 x s8>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[SMIN1]](<16 x s8>), [[PTR_ADD]](p0) :: (store (<16 x s8>) into unknown-address + 16) %vec:_(<32 x s8>) = G_IMPLICIT_DEF %vec1:_(<32 x s8>) = G_IMPLICIT_DEF @@ -130,7 +130,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[SMIN]](<8 x s16>), [[COPY]](p0) :: (store (<8 x s16>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[SMIN1]](<8 x s16>), [[PTR_ADD]](p0) :: (store (<8 x s16>) into unknown-address + 16) %vec:_(<16 x s16>) = G_IMPLICIT_DEF %vec1:_(<16 x s16>) = G_IMPLICIT_DEF @@ -199,7 +199,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[SMIN]](<4 x s32>), [[COPY]](p0) :: (store (<4 x s32>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[SMIN1]](<4 x s32>), [[PTR_ADD]](p0) :: (store (<4 x s32>) into unknown-address + 16) %vec:_(<8 x s32>) = G_IMPLICIT_DEF %vec1:_(<8 x s32>) = G_IMPLICIT_DEF @@ -262,7 +262,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[OR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>), align 32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[OR1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) %vec:_(<4 x s64>) = G_IMPLICIT_DEF %vec1:_(<4 x s64>) = G_IMPLICIT_DEF @@ -331,7 +331,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[UMIN]](<16 x s8>), [[COPY]](p0) :: (store (<16 x s8>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[UMIN1]](<16 x s8>), [[PTR_ADD]](p0) :: (store (<16 x s8>) into unknown-address + 16) %vec:_(<32 x s8>) = G_IMPLICIT_DEF %vec1:_(<32 x s8>) = G_IMPLICIT_DEF @@ -400,7 +400,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[UMIN]](<8 x s16>), [[COPY]](p0) :: (store (<8 x s16>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[UMIN1]](<8 x s16>), [[PTR_ADD]](p0) :: (store (<8 x s16>) into unknown-address + 16) %vec:_(<16 x s16>) = G_IMPLICIT_DEF %vec1:_(<16 x s16>) = G_IMPLICIT_DEF @@ -469,7 +469,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[UMIN]](<4 x s32>), [[COPY]](p0) :: (store (<4 x s32>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[UMIN1]](<4 x s32>), [[PTR_ADD]](p0) :: (store (<4 x s32>) into unknown-address + 16) %vec:_(<8 x s32>) = G_IMPLICIT_DEF %vec1:_(<8 x s32>) = G_IMPLICIT_DEF @@ -532,7 +532,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[OR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>), align 32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[OR1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) %vec:_(<4 x s64>) = G_IMPLICIT_DEF %vec1:_(<4 x s64>) = G_IMPLICIT_DEF @@ -623,7 +623,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[SMAX]](<16 x s8>), [[COPY]](p0) :: (store (<16 x s8>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[SMAX1]](<16 x s8>), [[PTR_ADD]](p0) :: (store (<16 x s8>) into unknown-address + 16) %vec:_(<32 x s8>) = G_IMPLICIT_DEF %vec1:_(<32 x s8>) = G_IMPLICIT_DEF @@ -670,7 +670,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[SMAX]](<8 x s16>), [[COPY]](p0) :: (store (<8 x s16>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[SMAX1]](<8 x s16>), [[PTR_ADD]](p0) :: (store (<8 x s16>) into unknown-address + 16) %vec:_(<16 x s16>) = G_IMPLICIT_DEF %vec1:_(<16 x s16>) = G_IMPLICIT_DEF @@ -739,7 +739,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[SMAX]](<4 x s32>), [[COPY]](p0) :: (store (<4 x s32>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[SMAX1]](<4 x s32>), [[PTR_ADD]](p0) :: (store (<4 x s32>) into unknown-address + 16) %vec:_(<8 x s32>) = G_IMPLICIT_DEF %vec1:_(<8 x s32>) = G_IMPLICIT_DEF @@ -802,7 +802,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[OR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>), align 32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[OR1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) %vec:_(<4 x s64>) = G_IMPLICIT_DEF %vec1:_(<4 x s64>) = G_IMPLICIT_DEF @@ -871,7 +871,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[UMAX]](<16 x s8>), [[COPY]](p0) :: (store (<16 x s8>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[UMAX1]](<16 x s8>), [[PTR_ADD]](p0) :: (store (<16 x s8>) into unknown-address + 16) %vec:_(<32 x s8>) = G_IMPLICIT_DEF %vec1:_(<32 x s8>) = G_IMPLICIT_DEF @@ -940,7 +940,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[UMAX]](<8 x s16>), [[COPY]](p0) :: (store (<8 x s16>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[UMAX1]](<8 x s16>), [[PTR_ADD]](p0) :: (store (<8 x s16>) into unknown-address + 16) %vec:_(<16 x s16>) = G_IMPLICIT_DEF %vec1:_(<16 x s16>) = G_IMPLICIT_DEF @@ -1009,7 +1009,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[UMAX]](<4 x s32>), [[COPY]](p0) :: (store (<4 x s32>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: G_STORE [[UMAX1]](<4 x s32>), [[PTR_ADD]](p0) :: (store (<4 x s32>) into unknown-address + 16) %vec:_(<8 x s32>) = G_IMPLICIT_DEF %vec1:_(<8 x s32>) = G_IMPLICIT_DEF @@ -1072,7 +1072,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[OR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>), align 32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[OR1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) %vec:_(<4 x s64>) = G_IMPLICIT_DEF %vec1:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir index 332f933..b6488e9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir @@ -16,13 +16,13 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16), align 4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 2, align 2) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s64) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[OR]](s32), [[COPY1]](p0) :: (store (s16), align 4) ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store (s8) into unknown-address + 2, align 2) ; CHECK-NEXT: $w0 = COPY [[C]](s32) @@ -54,13 +54,13 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[C]], [[C1]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s32), align 8) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C3]](s64) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; CHECK-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 4, align 4) ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p0) :: (store (s8) into unknown-address + 6, align 2) ; CHECK-NEXT: RET_ReallyLR @@ -91,16 +91,16 @@ body: | ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[DEF]], [[C1]] ; CHECK-NEXT: G_STORE [[AND]](s64), %ptr(p0) :: (store (s64)) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C2]](s64) ; CHECK-NEXT: G_STORE [[AND1]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C3]](s64) ; CHECK-NEXT: G_STORE [[AND2]](s64), [[PTR_ADD1]](p0) :: (store (s64) into unknown-address + 16) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C4]](s64) ; CHECK-NEXT: G_STORE [[AND3]](s64), [[PTR_ADD2]](p0) :: (store (s64) into unknown-address + 24) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](s64) ; CHECK-NEXT: G_STORE [[AND4]](s64), [[PTR_ADD3]](p0) :: (store (s64) into unknown-address + 32) ; CHECK-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 @@ -130,10 +130,10 @@ body: | ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[AND2]](s64), 0 ; CHECK-NEXT: G_STORE [[COPY]](s64), %ptr(p0) :: (store (s64)) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C2]](s64) ; CHECK-NEXT: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C3]](s64) ; CHECK-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 16, align 8) ; CHECK-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 @@ -163,10 +163,10 @@ body: | ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[AND2]](s64), 0 ; CHECK-NEXT: G_STORE [[COPY]](s64), %ptr(p0) :: (store (s64)) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C2]](s64) ; CHECK-NEXT: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C3]](s64) ; CHECK-NEXT: G_STORE [[EXTRACT]](s16), [[PTR_ADD1]](p0) :: (store (s16) into unknown-address + 16, align 8) ; CHECK-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-or.mir index 7b3be34..9edc1cb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-or.mir @@ -84,16 +84,16 @@ body: | ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[OR4]], [[C1]] ; CHECK-NEXT: G_STORE [[AND]](s64), %ptr(p0) :: (store (s64), align 64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C2]](s64) ; CHECK-NEXT: G_STORE [[AND1]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C3]](s64) ; CHECK-NEXT: G_STORE [[AND2]](s64), [[PTR_ADD1]](p0) :: (store (s64) into unknown-address + 16, align 16) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C4]](s64) ; CHECK-NEXT: G_STORE [[AND3]](s64), [[PTR_ADD2]](p0) :: (store (s64) into unknown-address + 24) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](s64) ; CHECK-NEXT: G_STORE [[AND4]](s64), [[PTR_ADD3]](p0) :: (store (s64) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %a:_(s318) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir index 7dbe3fe..47aa570 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir @@ -715,7 +715,7 @@ body: | ; CHECK-NEXT: %ptr2:_(p0) = COPY $x0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD %ptr1(p0) :: (load (<2 x s64>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr1, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr1, [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -728,7 +728,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s64>) = G_LOAD %ptr2(p0) :: (load (<2 x s64>), align 32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr2, [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr2, [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<2 x s64>) from unknown-address + 16) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: @@ -903,7 +903,7 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD %ptr1(p0) :: (load (<2 x s64>), align 32) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD]](<2 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr1, [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr1, [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD1]](<2 x s64>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -918,7 +918,7 @@ body: | ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s64>) = G_LOAD %ptr2(p0) :: (load (<2 x s64>), align 32) ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD2]](<2 x s64>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr2, [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr2, [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<2 x s64>) from unknown-address + 16) ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD3]](<2 x s64>) ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir index af03a21..2e70252 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -165,7 +165,7 @@ body: | ; CHECK-NEXT: [[SHUF1:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY3]](<2 x s64>), [[COPY]], shufflemask(1, 2) ; CHECK-NEXT: G_STORE [[SHUF]](<2 x s64>), [[COPY4]](p0) :: (store (<2 x s64>), align 32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY4]], [[C]](s64) ; CHECK-NEXT: G_STORE [[SHUF1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %3:_(<2 x s64>) = COPY $q0 @@ -208,7 +208,7 @@ body: | ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY1]](<4 x s32>), [[COPY]], shufflemask(2, 6, 5, 3) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](p0) :: (store (<4 x s32>), align 32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY4]], [[C4]](s64) ; CHECK-NEXT: G_STORE [[SHUF]](<4 x s32>), [[PTR_ADD]](p0) :: (store (<4 x s32>) into unknown-address + 16) ; CHECK-NEXT: RET_ReallyLR %3:_(<4 x s32>) = COPY $q0 @@ -271,10 +271,10 @@ body: | ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY8]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY8]], [[C3]](s64) ; CHECK-NEXT: G_STORE [[SHUF]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR %3:_(s64) = COPY $d0 @@ -458,7 +458,7 @@ body: | ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s32>), [[BUILD_VECTOR3]](<2 x s32>) ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY8]](p0) :: (store (<4 x s32>), align 32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY8]], [[C4]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR4]](<2 x s32>), [[PTR_ADD]](p0) :: (store (<2 x s32>) into unknown-address + 16, align 16) ; CHECK-NEXT: RET_ReallyLR %3:_(s32) = COPY $s0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vacopy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vacopy.mir index e665637..4f93f69 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vacopy.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vacopy.mir @@ -24,20 +24,20 @@ body: | ; CHECK-LINUX-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK-LINUX-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p0) :: (load (s64)) ; CHECK-LINUX-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-LINUX-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK-LINUX-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-LINUX-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 8) ; CHECK-LINUX-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-LINUX-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK-LINUX-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-LINUX-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64) from unknown-address + 16) ; CHECK-LINUX-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK-LINUX-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK-LINUX-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64) ; CHECK-LINUX-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p0) :: (load (s64) from unknown-address + 24) ; CHECK-LINUX-NEXT: G_STORE [[LOAD]](s64), [[COPY]](p0) :: (store (s64)) - ; CHECK-LINUX-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-LINUX-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-LINUX-NEXT: G_STORE [[LOAD1]](s64), [[PTR_ADD3]](p0) :: (store (s64) into unknown-address + 8) - ; CHECK-LINUX-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-LINUX-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK-LINUX-NEXT: G_STORE [[LOAD2]](s64), [[PTR_ADD4]](p0) :: (store (s64) into unknown-address + 16) - ; CHECK-LINUX-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-LINUX-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-LINUX-NEXT: G_STORE [[LOAD3]](s64), [[PTR_ADD5]](p0) :: (store (s64) into unknown-address + 24) ; CHECK-LINUX-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir index 9c528623..1e1ae01 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir @@ -46,16 +46,16 @@ body: | ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[XOR4]], [[C1]] ; CHECK-NEXT: G_STORE [[AND]](s64), %ptr(p0) :: (store (s64), align 64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C2]](s64) ; CHECK-NEXT: G_STORE [[AND1]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C3]](s64) ; CHECK-NEXT: G_STORE [[AND2]](s64), [[PTR_ADD1]](p0) :: (store (s64) into unknown-address + 16, align 16) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C4]](s64) ; CHECK-NEXT: G_STORE [[AND3]](s64), [[PTR_ADD2]](p0) :: (store (s64) into unknown-address + 24) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](s64) ; CHECK-NEXT: G_STORE [[AND4]](s64), [[PTR_ADD3]](p0) :: (store (s64) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %a:_(s318) = G_IMPLICIT_DEF @@ -90,16 +90,16 @@ body: | ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[XOR4]], [[C1]] ; CHECK-NEXT: G_STORE [[AND]](s64), %ptr(p0) :: (store (s64), align 64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C2]](s64) ; CHECK-NEXT: G_STORE [[AND1]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C3]](s64) ; CHECK-NEXT: G_STORE [[AND2]](s64), [[PTR_ADD1]](p0) :: (store (s64) into unknown-address + 16, align 16) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C4]](s64) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C4]](s64) ; CHECK-NEXT: G_STORE [[AND3]](s64), [[PTR_ADD2]](p0) :: (store (s64) into unknown-address + 24) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C5]](s64) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](s64) ; CHECK-NEXT: G_STORE [[AND4]](s64), [[PTR_ADD3]](p0) :: (store (s64) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %a:_(s319) = G_IMPLICIT_DEF @@ -133,10 +133,10 @@ body: | ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[AND2]](s64), 0 ; CHECK-NEXT: G_STORE [[COPY]](s64), %ptr(p0) :: (store (s64), align 32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C2]](s64) ; CHECK-NEXT: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C3]](s64) ; CHECK-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 16, align 16) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %a:_(s158) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir index cf4f321..491d693 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir +++ b/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir @@ -1,8 +1,8 @@ -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s # name: f1_2s registers: @@ -16,18 +16,18 @@ body: | %2:fpr64 = COPY $d2 %1:fpr64 = COPY $d1 %0:fpr64 = COPY $d0 - %3:fpr64 = FMULv2f32 %0, %1, implicit $fpcr - %4:fpr64 = FSUBv2f32 killed %3, %2, implicit $fpcr + %3:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr + %4:fpr64 = contract FSUBv2f32 killed %3, %2, implicit $fpcr $d0 = COPY %4 RET_ReallyLR implicit $d0 ... # UNPROFITABLE-LABEL: name: f1_2s -# UNPROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2 +# UNPROFITABLE: [[R1:%[0-9]+]]:fpr64 = contract FNEGv2f32 %2 # UNPROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr # # PROFITABLE-LABEL: name: f1_2s -# PROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2 +# PROFITABLE: [[R1:%[0-9]+]]:fpr64 = contract FNEGv2f32 %2 # PROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr --- name: f1_4s @@ -42,18 +42,18 @@ body: | %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %3:fpr128 = FMULv4f32 %0, %1, implicit $fpcr - %4:fpr128 = FSUBv4f32 killed %3, %2, implicit $fpcr + %3:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr + %4:fpr128 = contract FSUBv4f32 killed %3, %2, implicit $fpcr $q0 = COPY %4 RET_ReallyLR implicit $q0 ... # UNPROFITABLE-LABEL: name: f1_4s -# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = FMULv4f32 %0, %1, implicit $fpcr +# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr # UNPROFITABLE-NEXT: FSUBv4f32 killed [[R1]], %2, implicit $fpcr # # PROFITABLE-LABEL: name: f1_4s -# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv4f32 %2 +# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FNEGv4f32 %2 # PROFITABLE-NEXT: FMLAv4f32 killed [[R1]], %0, %1, implicit $fpcr --- name: f1_2d @@ -68,18 +68,18 @@ body: | %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr - %4:fpr128 = FSUBv2f64 killed %3, %2, implicit $fpcr + %3:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr + %4:fpr128 = contract FSUBv2f64 killed %3, %2, implicit $fpcr $q0 = COPY %4 RET_ReallyLR implicit $q0 ... # UNPROFITABLE-LABEL: name: f1_2d -# UNPROFITABLE: %3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr +# UNPROFITABLE: %3:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr # UNPROFITABLE-NEXT: FSUBv2f64 killed %3, %2, implicit $fpcr # # PROFITABLE-LABEL: name: f1_2d -# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv2f64 %2 +# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FNEGv2f64 %2 # PROFITABLE-NEXT: FMLAv2f64 killed [[R1]], %0, %1, implicit $fpcr --- name: f1_both_fmul_2s @@ -97,15 +97,15 @@ body: | %2:fpr64 = COPY $q2 %1:fpr64 = COPY $q1 %0:fpr64 = COPY $q0 - %4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr - %5:fpr64 = FMULv2f32 %2, %3, implicit $fpcr - %6:fpr64 = FSUBv2f32 killed %4, %5, implicit $fpcr + %4:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr + %5:fpr64 = contract FMULv2f32 %2, %3, implicit $fpcr + %6:fpr64 = contract FSUBv2f32 killed %4, %5, implicit $fpcr $q0 = COPY %6 RET_ReallyLR implicit $q0 ... # ALL-LABEL: name: f1_both_fmul_2s -# ALL: %4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr +# ALL: %4:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr # ALL-NEXT: FMLSv2f32 killed %4, %2, %3, implicit $fpcr --- name: f1_both_fmul_4s @@ -123,15 +123,15 @@ body: | %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr - %5:fpr128 = FMULv4f32 %2, %3, implicit $fpcr - %6:fpr128 = FSUBv4f32 killed %4, %5, implicit $fpcr + %4:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr + %5:fpr128 = contract FMULv4f32 %2, %3, implicit $fpcr + %6:fpr128 = contract FSUBv4f32 killed %4, %5, implicit $fpcr $q0 = COPY %6 RET_ReallyLR implicit $q0 ... # ALL-LABEL: name: f1_both_fmul_4s -# ALL: %4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr +# ALL: %4:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr # ALL-NEXT: FMLSv4f32 killed %4, %2, %3, implicit $fpcr --- name: f1_both_fmul_2d @@ -149,14 +149,14 @@ body: | %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr - %5:fpr128 = FMULv2f64 %2, %3, implicit $fpcr - %6:fpr128 = FSUBv2f64 killed %4, %5, implicit $fpcr + %4:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr + %5:fpr128 = contract FMULv2f64 %2, %3, implicit $fpcr + %6:fpr128 = contract FSUBv2f64 killed %4, %5, implicit $fpcr $q0 = COPY %6 RET_ReallyLR implicit $q0 ... # ALL-LABEL: name: f1_both_fmul_2d -# ALL: %4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr +# ALL: %4:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr # ALL-NEXT: FMLSv2f64 killed %4, %2, %3, implicit $fpcr diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir deleted file mode 100644 index 09eb18b..0000000 --- a/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir +++ /dev/null @@ -1,364 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -run-pass=machine-combiner -mcpu=neoverse-n2 -mtriple=aarch64-none-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s - ---- -name: split_loads_to_fpr128 -body: | - bb.0.entry: - liveins: $x0, $x1, $x2, $x3, $x4 - - ; CHECK-LABEL: name: split_loads_to_fpr128 - ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4 - ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1 - ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub - ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY2]] - ; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr32 = LDRSui [[COPY3]], 0 - ; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.ssub - ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, killed [[COPY4]] - ; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_1]], [[LD1_1]] - ; CHECK-NEXT: $q0 = COPY [[ZIP]] - ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:gpr64common = COPY $x0 - %1:gpr64common = COPY $x1 - %2:gpr64common = COPY $x2 - %3:gpr64common = COPY $x3 - %4:gpr64common = COPY $x4 - %5:fpr32 = LDRSroX %0, killed %1, 0, 1 - %6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub - %7:fpr128 = LD1i32 %6, 1, killed %2 - %8:fpr128 = LD1i32 %7, 2, killed %3 - %9:fpr128 = LD1i32 %8, 3, killed %4 - $q0 = COPY %9 - RET_ReallyLR implicit $q0 - ---- -name: split_loads_to_fpr128_ui -body: | - bb.0.entry: - liveins: $x0, $x1, $x2, $x3, $x4 - - ; CHECK-LABEL: name: split_loads_to_fpr128_ui - ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4 - ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 0 - ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub - ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY1]] - ; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr32 = LDRSui [[COPY2]], 0 - ; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.ssub - ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, killed [[COPY3]] - ; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_1]], [[LD1_1]] - ; CHECK-NEXT: $q0 = COPY [[ZIP]] - ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:gpr64common = COPY $x0 - %1:gpr64common = COPY $x1 - %2:gpr64common = COPY $x2 - %3:gpr64common = COPY $x3 - %4:gpr64common = COPY $x4 - %5:fpr32 = LDRSui %0, 0 - %6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub - %7:fpr128 = LD1i32 %6, 1, killed %1 - %8:fpr128 = LD1i32 %7, 2, killed %2 - %9:fpr128 = LD1i32 %8, 3, killed %3 - $q0 = COPY %9 - RET_ReallyLR implicit $q0 - ---- -name: split_loads_to_fpr128_i16 -body: | - bb.0.entry: - liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8 - - ; CHECK-LABEL: name: split_loads_to_fpr128_i16 - ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64common = COPY $x5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr64common = COPY $x6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gpr64common = COPY $x7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr64common = COPY $x8 - ; CHECK-NEXT: [[LD_i16:%[0-9]+]]:fpr16 = LDRHroX [[COPY]], killed [[COPY1]], 0, 1 - ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i16]], %subreg.hsub - ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i16 [[FIRST_REG]], 1, killed [[COPY2]] - ; CHECK-NEXT: [[LD0_2:%[0-9]+]]:fpr128 = LD1i16 [[LD0_1]], 2, killed [[COPY3]] - ; CHECK-NEXT: [[LD0_3:%[0-9]+]]:fpr128 = LD1i16 [[LD0_2]], 3, killed [[COPY4]] - ; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr16 = LDRHui [[COPY5]], 0 - ; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.hsub - ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i16 [[SECOND_REG]], 1, killed [[COPY6]] - ; CHECK-NEXT: [[LD1_2:%[0-9]+]]:fpr128 = LD1i16 [[LD1_1]], 2, killed [[COPY7]] - ; CHECK-NEXT: [[LD1_3:%[0-9]+]]:fpr128 = LD1i16 [[LD1_2]], 3, killed [[COPY8]] - ; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_3]], [[LD1_3]] - ; CHECK-NEXT: $q0 = COPY [[ZIP]] - ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:gpr64common = COPY $x0 - %1:gpr64common = COPY $x1 - %2:gpr64common = COPY $x2 - %3:gpr64common = COPY $x3 - %4:gpr64common = COPY $x4 - %5:gpr64common = COPY $x5 - %6:gpr64common = COPY $x6 - %7:gpr64common = COPY $x7 - %8:gpr64common = COPY $x8 - %9:fpr16 = LDRHroX %0, killed %1, 0, 1 - %10:fpr128 = SUBREG_TO_REG 0, killed %9, %subreg.hsub - %11:fpr128 = LD1i16 %10, 1, killed %2 - %12:fpr128 = LD1i16 %11, 2, killed %3 - %13:fpr128 = LD1i16 %12, 3, killed %4 - %14:fpr128 = LD1i16 %13, 4, killed %5 - %15:fpr128 = LD1i16 %14, 5, killed %6 - %16:fpr128 = LD1i16 %15, 6, killed %7 - %17:fpr128 = LD1i16 %16, 7, killed %8 - $q0 = COPY %17 - RET_ReallyLR implicit $q0 - ---- -name: split_loads_to_fpr128_i16_ui -body: | - bb.0.entry: - liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8 - - ; CHECK-LABEL: name: split_loads_to_fpr128_i16_ui - ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64common = COPY $x5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr64common = COPY $x6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gpr64common = COPY $x7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr64common = COPY $x8 - ; CHECK-NEXT: [[LD_i16:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 - ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i16]], %subreg.hsub - ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i16 [[FIRST_REG]], 1, killed [[COPY1]] - ; CHECK-NEXT: [[LD0_2:%[0-9]+]]:fpr128 = LD1i16 [[LD0_1]], 2, killed [[COPY2]] - ; CHECK-NEXT: [[LD0_3:%[0-9]+]]:fpr128 = LD1i16 [[LD0_2]], 3, killed [[COPY3]] - ; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr16 = LDRHui [[COPY4]], 0 - ; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.hsub - ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i16 [[SECOND_REG]], 1, killed [[COPY5]] - ; CHECK-NEXT: [[LD1_2:%[0-9]+]]:fpr128 = LD1i16 [[LD1_1]], 2, killed [[COPY6]] - ; CHECK-NEXT: [[LD1_3:%[0-9]+]]:fpr128 = LD1i16 [[LD1_2]], 3, killed [[COPY7]] - ; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_3]], [[LD1_3]] - ; CHECK-NEXT: $q0 = COPY [[ZIP]] - ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:gpr64common = COPY $x0 - %1:gpr64common = COPY $x1 - %2:gpr64common = COPY $x2 - %3:gpr64common = COPY $x3 - %4:gpr64common = COPY $x4 - %5:gpr64common = COPY $x5 - %6:gpr64common = COPY $x6 - %7:gpr64common = COPY $x7 - %8:gpr64common = COPY $x8 - %9:fpr16 = LDRHui %0, 0 - %10:fpr128 = SUBREG_TO_REG 0, killed %9, %subreg.hsub - %11:fpr128 = LD1i16 %10, 1, killed %1 - %12:fpr128 = LD1i16 %11, 2, killed %2 - %13:fpr128 = LD1i16 %12, 3, killed %3 - %14:fpr128 = LD1i16 %13, 4, killed %4 - %15:fpr128 = LD1i16 %14, 5, killed %5 - %16:fpr128 = LD1i16 %15, 6, killed %6 - %17:fpr128 = LD1i16 %16, 7, killed %7 - $q0 = COPY %17 - RET_ReallyLR implicit $q0 - ---- -name: split_loads_to_fpr128_i8 -body: | - bb.0.entry: - liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16 - - ; CHECK-LABEL: name: split_loads_to_fpr128_i8 - ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64common = COPY $x5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr64common = COPY $x6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gpr64common = COPY $x7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr64common = COPY $x8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gpr64common = COPY $x9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:gpr64common = COPY $x10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr64common = COPY $x11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr64common = COPY $x12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:gpr64common = COPY $x13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:gpr64common = COPY $x14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:gpr64common = COPY $x15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:gpr64common = COPY $x16 - ; CHECK-NEXT: [[LD_i8:%[0-9]+]]:fpr8 = LDRBroX [[COPY]], killed [[COPY1]], 0, 0 - ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i8]], %subreg.bsub - ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i8 [[FIRST_REG]], 1, killed [[COPY2]] - ; CHECK-NEXT: [[LD0_2:%[0-9]+]]:fpr128 = LD1i8 [[LD0_1]], 2, killed [[COPY3]] - ; CHECK-NEXT: [[LD0_3:%[0-9]+]]:fpr128 = LD1i8 [[LD0_2]], 3, killed [[COPY4]] - ; CHECK-NEXT: [[LD0_4:%[0-9]+]]:fpr128 = LD1i8 [[LD0_3]], 4, killed [[COPY5]] - ; CHECK-NEXT: [[LD0_5:%[0-9]+]]:fpr128 = LD1i8 [[LD0_4]], 5, killed [[COPY6]] - ; CHECK-NEXT: [[LD0_6:%[0-9]+]]:fpr128 = LD1i8 [[LD0_5]], 6, killed [[COPY7]] - ; CHECK-NEXT: [[LD0_7:%[0-9]+]]:fpr128 = LD1i8 [[LD0_6]], 7, killed [[COPY8]] - ; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr8 = LDRBui [[COPY9]], 0 - ; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.bsub - ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i8 [[SECOND_REG]], 1, killed [[COPY10]] - ; CHECK-NEXT: [[LD1_2:%[0-9]+]]:fpr128 = LD1i8 [[LD1_1]], 2, killed [[COPY11]] - ; CHECK-NEXT: [[LD1_3:%[0-9]+]]:fpr128 = LD1i8 [[LD1_2]], 3, killed [[COPY12]] - ; CHECK-NEXT: [[LD1_4:%[0-9]+]]:fpr128 = LD1i8 [[LD1_3]], 4, killed [[COPY13]] - ; CHECK-NEXT: [[LD1_5:%[0-9]+]]:fpr128 = LD1i8 [[LD1_4]], 5, killed [[COPY14]] - ; CHECK-NEXT: [[LD1_6:%[0-9]+]]:fpr128 = LD1i8 [[LD1_5]], 6, killed [[COPY15]] - ; CHECK-NEXT: [[LD1_7:%[0-9]+]]:fpr128 = LD1i8 [[LD1_6]], 7, killed [[COPY16]] - ; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_7]], [[LD1_7]] - ; CHECK-NEXT: $q0 = COPY [[ZIP]] - ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:gpr64common = COPY $x0 - %1:gpr64common = COPY $x1 - %2:gpr64common = COPY $x2 - %3:gpr64common = COPY $x3 - %4:gpr64common = COPY $x4 - %5:gpr64common = COPY $x5 - %6:gpr64common = COPY $x6 - %7:gpr64common = COPY $x7 - %8:gpr64common = COPY $x8 - %9:gpr64common = COPY $x9 - %10:gpr64common = COPY $x10 - %11:gpr64common = COPY $x11 - %12:gpr64common = COPY $x12 - %13:gpr64common = COPY $x13 - %14:gpr64common = COPY $x14 - %15:gpr64common = COPY $x15 - %16:gpr64common = COPY $x16 - %17:fpr8 = LDRBroX %0, killed %1, 0, 0 - %18:fpr128 = SUBREG_TO_REG 0, killed %17, %subreg.bsub - %19:fpr128 = LD1i8 %18, 1, killed %2 - %20:fpr128 = LD1i8 %19, 2, killed %3 - %21:fpr128 = LD1i8 %20, 3, killed %4 - %22:fpr128 = LD1i8 %21, 4, killed %5 - %23:fpr128 = LD1i8 %22, 5, killed %6 - %24:fpr128 = LD1i8 %23, 6, killed %7 - %25:fpr128 = LD1i8 %24, 7, killed %8 - %26:fpr128 = LD1i8 %25, 8, killed %9 - %27:fpr128 = LD1i8 %26, 9, killed %10 - %28:fpr128 = LD1i8 %27, 10, killed %11 - %29:fpr128 = LD1i8 %28, 11, killed %12 - %30:fpr128 = LD1i8 %29, 12, killed %13 - %31:fpr128 = LD1i8 %30, 13, killed %14 - %32:fpr128 = LD1i8 %31, 14, killed %15 - %33:fpr128 = LD1i8 %32, 15, killed %16 - $q0 = COPY %33 - RET_ReallyLR implicit $q0 - ---- -name: negative_pattern_missing_lanes -body: | - bb.0.entry: - liveins: $x0, $x1 - - ; CHECK-LABEL: name: negative_pattern_missing_lanes - ; CHECK: [[LD1:%.*]]:fpr128 = LDRQui $x1, 0 - ; CHECK-NEXT: [[LD2:%.*]]:fpr128 = LD1i32 [[LD1]] - - %0:gpr64common = COPY $x0 - %1:fpr128 = LDRQui $x1, 0 - %2:fpr128 = LD1i32 %1, 3, %0 - $q0 = COPY %2 - RET_ReallyLR implicit $q0 - ---- -name: out_of_order_lanes -body: | - bb.0.entry: - liveins: $x0, $x1, $x2, $x3, $x4 - - ; CHECK-LABEL: name: out_of_order_lanes - ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4 - ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1 - ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub - ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY3]] - ; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr32 = LDRSui [[COPY2]], 0 - ; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.ssub - ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, killed [[COPY4]] - ; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_1]], [[LD1_1]] - ; CHECK-NEXT: $q0 = COPY [[ZIP]] - ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:gpr64common = COPY $x0 - %1:gpr64common = COPY $x1 - %2:gpr64common = COPY $x2 - %3:gpr64common = COPY $x3 - %4:gpr64common = COPY $x4 - %5:fpr32 = LDRSroX %0, killed %1, 0, 1 - %6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub - %7:fpr128 = LD1i32 %6, 2, killed %2 - %8:fpr128 = LD1i32 %7, 1, killed %3 - %9:fpr128 = LD1i32 %8, 3, killed %4 - $q0 = COPY %9 - RET_ReallyLR implicit $q0 - ---- -name: negative_pattern_no_subreg_to_reg -body: | - bb.0.entry: - liveins: $x0, $x1, $x2, $x3 - - ; CHECK-LABEL: name: negative_pattern_no_subreg_to_reg - ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3 - ; CHECK-NEXT: [[INITIAL_VEC:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 - ; CHECK-NEXT: [[LD_LANE_1:%[0-9]+]]:fpr128 = LD1i32 [[INITIAL_VEC]], 1, killed [[COPY1]] - ; CHECK-NEXT: [[LD_LANE_2:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_1]], 2, killed [[COPY2]] - ; CHECK-NEXT: [[LD_LANE_3:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_2]], 3, killed [[COPY3]] - ; CHECK-NEXT: $q0 = COPY [[LD_LANE_3]] - ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:gpr64common = COPY $x0 - %1:gpr64common = COPY $x1 - %2:gpr64common = COPY $x2 - %3:gpr64common = COPY $x3 - %4:fpr128 = LDRQui %0, 0 - %5:fpr128 = LD1i32 %4, 1, killed %1 - %6:fpr128 = LD1i32 %5, 2, killed %2 - %7:fpr128 = LD1i32 %6, 3, killed %3 - $q0 = COPY %7 - RET_ReallyLR implicit $q0 - ---- -name: negative_pattern_multiple_users -body: | - bb.0.entry: - liveins: $x0, $x1, $x2, $x3, $x4 - - ; CHECK-LABEL: name: negative_pattern_multiple_users - ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4 - ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1 - ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub - ; CHECK-NEXT: [[LD_LANE_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY2]] - ; CHECK-NEXT: [[LD_LANE_2:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_1]], 2, killed [[COPY3]] - ; CHECK-NEXT: [[LD_LANE_3:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_2]], 3, killed [[COPY4]] - ; CHECK-NEXT: $q0 = COPY [[LD_LANE_3]] - ; CHECK-NEXT: $q1 = COPY [[LD_LANE_2]] - ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1 - %0:gpr64common = COPY $x0 - %1:gpr64common = COPY $x1 - %2:gpr64common = COPY $x2 - %3:gpr64common = COPY $x3 - %4:gpr64common = COPY $x4 - %5:fpr32 = LDRSroX %0, killed %1, 0, 1 - %6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub - %7:fpr128 = LD1i32 %6, 1, killed %2 - %8:fpr128 = LD1i32 %7, 2, killed %3 - %9:fpr128 = LD1i32 %8, 3, killed %4 - $q0 = COPY %9 - $q1 = COPY %8 - RET_ReallyLR implicit $q0, implicit $q1 diff --git a/llvm/test/CodeGen/AArch64/aarch64-isel-csinc-type.ll b/llvm/test/CodeGen/AArch64/aarch64-isel-csinc-type.ll index 7706ca9..9fab3d1 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-isel-csinc-type.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-isel-csinc-type.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-- -o - < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-- -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-- -global-isel -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Verify that we can fold csneg/csel into csinc instruction. @@ -8,12 +9,20 @@ target triple = "aarch64-unknown-linux-gnu" ; char csinc1 (char a, char b) { return !a ? b+1 : b+3; } define i8 @csinc1(i8 %a, i8 %b) local_unnamed_addr #0 { -; CHECK-LABEL: csinc1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: tst w0, #0xff -; CHECK-NEXT: add w8, w1, #3 -; CHECK-NEXT: csinc w0, w8, w1, ne -; CHECK-NEXT: ret +; CHECK-SD-LABEL: csinc1: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: tst w0, #0xff +; CHECK-SD-NEXT: add w8, w1, #3 +; CHECK-SD-NEXT: csinc w0, w8, w1, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: csinc1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, #3 // =0x3 +; CHECK-GI-NEXT: tst w0, #0xff +; CHECK-GI-NEXT: csinc w8, w8, wzr, ne +; CHECK-GI-NEXT: add w0, w8, w1 +; CHECK-GI-NEXT: ret entry: %tobool.not = icmp eq i8 %a, 0 %cond.v = select i1 %tobool.not, i8 1, i8 3 @@ -23,12 +32,20 @@ entry: ; short csinc2 (short a, short b) { return !a ? b+1 : b+3; } define i16 @csinc2(i16 %a, i16 %b) local_unnamed_addr #0 { -; CHECK-LABEL: csinc2: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: tst w0, #0xffff -; CHECK-NEXT: add w8, w1, #3 -; CHECK-NEXT: csinc w0, w8, w1, ne -; CHECK-NEXT: ret +; CHECK-SD-LABEL: csinc2: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: tst w0, #0xffff +; CHECK-SD-NEXT: add w8, w1, #3 +; CHECK-SD-NEXT: csinc w0, w8, w1, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: csinc2: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, #3 // =0x3 +; CHECK-GI-NEXT: tst w0, #0xffff +; CHECK-GI-NEXT: csinc w8, w8, wzr, ne +; CHECK-GI-NEXT: add w0, w8, w1 +; CHECK-GI-NEXT: ret entry: %tobool.not = icmp eq i16 %a, 0 %cond.v = select i1 %tobool.not, i16 1, i16 3 @@ -38,12 +55,20 @@ entry: ; int csinc3 (int a, int b) { return !a ? b+1 : b+3; } define i32 @csinc3(i32 %a, i32 %b) local_unnamed_addr #0 { -; CHECK-LABEL: csinc3: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: add w8, w1, #3 -; CHECK-NEXT: csinc w0, w8, w1, ne -; CHECK-NEXT: ret +; CHECK-SD-LABEL: csinc3: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: add w8, w1, #3 +; CHECK-SD-NEXT: csinc w0, w8, w1, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: csinc3: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, #3 // =0x3 +; CHECK-GI-NEXT: cmp w0, #0 +; CHECK-GI-NEXT: csinc w8, w8, wzr, ne +; CHECK-GI-NEXT: add w0, w8, w1 +; CHECK-GI-NEXT: ret entry: %tobool.not = icmp eq i32 %a, 0 %cond.v = select i1 %tobool.not, i32 1, i32 3 @@ -53,12 +78,20 @@ entry: ; long long csinc4 (long long a, long long b) { return !a ? b+1 : b+3; } define i64 @csinc4(i64 %a, i64 %b) local_unnamed_addr #0 { -; CHECK-LABEL: csinc4: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: add x8, x1, #3 -; CHECK-NEXT: csinc x0, x8, x1, ne -; CHECK-NEXT: ret +; CHECK-SD-LABEL: csinc4: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp x0, #0 +; CHECK-SD-NEXT: add x8, x1, #3 +; CHECK-SD-NEXT: csinc x0, x8, x1, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: csinc4: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, #3 // =0x3 +; CHECK-GI-NEXT: cmp x0, #0 +; CHECK-GI-NEXT: csinc x8, x8, xzr, ne +; CHECK-GI-NEXT: add x0, x8, x1 +; CHECK-GI-NEXT: ret entry: %tobool.not = icmp eq i64 %a, 0 %cond.v = select i1 %tobool.not, i64 1, i64 3 @@ -68,12 +101,21 @@ entry: ; long long csinc8 (long long a, long long b) { return a ? b-1 : b+1; } define i64 @csinc8(i64 %a, i64 %b) { -; CHECK-LABEL: csinc8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub x8, x1, #1 -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: csinc x0, x8, x1, ne -; CHECK-NEXT: ret +; CHECK-SD-LABEL: csinc8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub x8, x1, #1 +; CHECK-SD-NEXT: cmp x0, #0 +; CHECK-SD-NEXT: csinc x0, x8, x1, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: csinc8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp x0, #0 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: sbfx x8, x8, #0, #1 +; CHECK-GI-NEXT: orr x8, x8, #0x1 +; CHECK-GI-NEXT: add x0, x8, x1 +; CHECK-GI-NEXT: ret entry: %tobool.not = icmp eq i64 %a, 0 %cond.v = select i1 %tobool.not, i64 1, i64 -1 @@ -83,15 +125,26 @@ entry: ; long long csinc9 (long long a, long long b) { return a ? b+1 : b-1; } define i64 @csinc9(i64 %a, i64 %b) { -; CHECK-LABEL: csinc9: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub x8, x1, #1 -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: csinc x0, x8, x1, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: csinc9: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub x8, x1, #1 +; CHECK-SD-NEXT: cmp x0, #0 +; CHECK-SD-NEXT: csinc x0, x8, x1, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: csinc9: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp x0, #0 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: sbfx x8, x8, #0, #1 +; CHECK-GI-NEXT: orr x8, x8, #0x1 +; CHECK-GI-NEXT: add x0, x8, x1 +; CHECK-GI-NEXT: ret entry: %tobool.not = icmp eq i64 %a, 0 %cond.v = select i1 %tobool.not, i64 -1, i64 1 %cond = add nsw i64 %cond.v, %b ret i64 %cond } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll index ff7872c..83530049a 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll @@ -87,46 +87,17 @@ entry: } define void @memset_10_zeroval_volatile(ptr %dst) { -; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_zeroval_volatile: -; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry -; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 -; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa -; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w1, wzr -; GISel-WITHOUT-MOPS-O0-NEXT: bl memset -; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; GISel-WITHOUT-MOPS-O0-NEXT: ret -; -; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_zeroval_volatile: -; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry -; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 -; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O3-NEXT: mov w1, wzr -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa -; GISel-WITHOUT-MOPS-O3-NEXT: bl memset -; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; GISel-WITHOUT-MOPS-O3-NEXT: ret -; -; GISel-MOPS-O0-LABEL: memset_10_zeroval_volatile: -; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa -; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 -; GISel-MOPS-O0-NEXT: mov x9, xzr -; GISel-MOPS-O0-NEXT: setp [x0]!, x8!, x9 -; GISel-MOPS-O0-NEXT: setm [x0]!, x8!, x9 -; GISel-MOPS-O0-NEXT: sete [x0]!, x8!, x9 -; GISel-MOPS-O0-NEXT: ret +; GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval_volatile: +; GISel-WITHOUT-MOPS: // %bb.0: // %entry +; GISel-WITHOUT-MOPS-NEXT: str xzr, [x0] +; GISel-WITHOUT-MOPS-NEXT: strh wzr, [x0, #8] +; GISel-WITHOUT-MOPS-NEXT: ret ; -; GISel-MOPS-O3-LABEL: memset_10_zeroval_volatile: -; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa -; GISel-MOPS-O3-NEXT: setp [x0]!, x8!, xzr -; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, xzr -; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, xzr -; GISel-MOPS-O3-NEXT: ret +; GISel-MOPS-LABEL: memset_10_zeroval_volatile: +; GISel-MOPS: // %bb.0: // %entry +; GISel-MOPS-NEXT: str xzr, [x0] +; GISel-MOPS-NEXT: strh wzr, [x0, #8] +; GISel-MOPS-NEXT: ret ; ; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_zeroval_volatile: ; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry @@ -490,43 +461,46 @@ entry: define void @memset_10_volatile(ptr %dst, i32 %value) { ; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_volatile: ; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry -; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 -; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa -; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 -; GISel-WITHOUT-MOPS-O0-NEXT: bl memset -; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; GISel-WITHOUT-MOPS-O0-NEXT: // implicit-def: $x8 +; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, w1 +; GISel-WITHOUT-MOPS-O0-NEXT: and x8, x8, #0xff +; GISel-WITHOUT-MOPS-O0-NEXT: mov x9, #72340172838076673 // =0x101010101010101 +; GISel-WITHOUT-MOPS-O0-NEXT: mul x8, x8, x9 +; GISel-WITHOUT-MOPS-O0-NEXT: str x8, [x0] +; GISel-WITHOUT-MOPS-O0-NEXT: // kill: def $w8 killed $w8 killed $x8 +; GISel-WITHOUT-MOPS-O0-NEXT: strh w8, [x0, #8] ; GISel-WITHOUT-MOPS-O0-NEXT: ret ; ; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_volatile: ; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry -; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 -; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa -; GISel-WITHOUT-MOPS-O3-NEXT: bl memset -; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; GISel-WITHOUT-MOPS-O3-NEXT: // kill: def $w1 killed $w1 def $x1 +; GISel-WITHOUT-MOPS-O3-NEXT: mov x8, #72340172838076673 // =0x101010101010101 +; GISel-WITHOUT-MOPS-O3-NEXT: and x9, x1, #0xff +; GISel-WITHOUT-MOPS-O3-NEXT: mul x8, x9, x8 +; GISel-WITHOUT-MOPS-O3-NEXT: str x8, [x0] +; GISel-WITHOUT-MOPS-O3-NEXT: strh w8, [x0, #8] ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memset_10_volatile: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa -; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 -; GISel-MOPS-O0-NEXT: // implicit-def: $x9 -; GISel-MOPS-O0-NEXT: mov w9, w1 -; GISel-MOPS-O0-NEXT: setp [x0]!, x8!, x9 -; GISel-MOPS-O0-NEXT: setm [x0]!, x8!, x9 -; GISel-MOPS-O0-NEXT: sete [x0]!, x8!, x9 +; GISel-MOPS-O0-NEXT: // implicit-def: $x8 +; GISel-MOPS-O0-NEXT: mov w8, w1 +; GISel-MOPS-O0-NEXT: and x8, x8, #0xff +; GISel-MOPS-O0-NEXT: mov x9, #72340172838076673 // =0x101010101010101 +; GISel-MOPS-O0-NEXT: mul x8, x8, x9 +; GISel-MOPS-O0-NEXT: str x8, [x0] +; GISel-MOPS-O0-NEXT: // kill: def $w8 killed $w8 killed $x8 +; GISel-MOPS-O0-NEXT: strh w8, [x0, #8] ; GISel-MOPS-O0-NEXT: ret ; ; GISel-MOPS-O3-LABEL: memset_10_volatile: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa ; GISel-MOPS-O3-NEXT: // kill: def $w1 killed $w1 def $x1 -; GISel-MOPS-O3-NEXT: setp [x0]!, x8!, x1 -; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, x1 -; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, x1 +; GISel-MOPS-O3-NEXT: mov x8, #72340172838076673 // =0x101010101010101 +; GISel-MOPS-O3-NEXT: and x9, x1, #0xff +; GISel-MOPS-O3-NEXT: mul x8, x9, x8 +; GISel-MOPS-O3-NEXT: str x8, [x0] +; GISel-MOPS-O3-NEXT: strh w8, [x0, #8] ; GISel-MOPS-O3-NEXT: ret ; ; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_volatile: @@ -905,43 +879,21 @@ entry: } define void @memcpy_10_volatile(ptr %dst, ptr %src, i32 %value) { -; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_10_volatile: -; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry -; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 -; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa -; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 -; GISel-WITHOUT-MOPS-O0-NEXT: bl memcpy -; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; GISel-WITHOUT-MOPS-O0-NEXT: ret -; -; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_10_volatile: -; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry -; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 -; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa -; GISel-WITHOUT-MOPS-O3-NEXT: bl memcpy -; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; GISel-WITHOUT-MOPS-O3-NEXT: ret -; -; GISel-MOPS-O0-LABEL: memcpy_10_volatile: -; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa -; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 -; GISel-MOPS-O0-NEXT: cpyfp [x0]!, [x1]!, x8! -; GISel-MOPS-O0-NEXT: cpyfm [x0]!, [x1]!, x8! -; GISel-MOPS-O0-NEXT: cpyfe [x0]!, [x1]!, x8! -; GISel-MOPS-O0-NEXT: ret +; GISel-WITHOUT-MOPS-LABEL: memcpy_10_volatile: +; GISel-WITHOUT-MOPS: // %bb.0: // %entry +; GISel-WITHOUT-MOPS-NEXT: ldr x8, [x1] +; GISel-WITHOUT-MOPS-NEXT: str x8, [x0] +; GISel-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; GISel-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; GISel-WITHOUT-MOPS-NEXT: ret ; -; GISel-MOPS-O3-LABEL: memcpy_10_volatile: -; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa -; GISel-MOPS-O3-NEXT: cpyfp [x0]!, [x1]!, x8! -; GISel-MOPS-O3-NEXT: cpyfm [x0]!, [x1]!, x8! -; GISel-MOPS-O3-NEXT: cpyfe [x0]!, [x1]!, x8! -; GISel-MOPS-O3-NEXT: ret +; GISel-MOPS-LABEL: memcpy_10_volatile: +; GISel-MOPS: // %bb.0: // %entry +; GISel-MOPS-NEXT: ldr x8, [x1] +; GISel-MOPS-NEXT: str x8, [x0] +; GISel-MOPS-NEXT: ldrh w8, [x1, #8] +; GISel-MOPS-NEXT: strh w8, [x0, #8] +; GISel-MOPS-NEXT: ret ; ; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_10_volatile: ; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry @@ -1736,40 +1688,34 @@ entry: define void @memmove_10_volatile(ptr %dst, ptr %src, i32 %value) { ; GISel-WITHOUT-MOPS-O0-LABEL: memmove_10_volatile: ; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry -; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 -; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa -; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 -; GISel-WITHOUT-MOPS-O0-NEXT: bl memmove -; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; GISel-WITHOUT-MOPS-O0-NEXT: ldr x9, [x1] +; GISel-WITHOUT-MOPS-O0-NEXT: ldrh w8, [x1, #8] +; GISel-WITHOUT-MOPS-O0-NEXT: str x9, [x0] +; GISel-WITHOUT-MOPS-O0-NEXT: strh w8, [x0, #8] ; GISel-WITHOUT-MOPS-O0-NEXT: ret ; ; GISel-WITHOUT-MOPS-O3-LABEL: memmove_10_volatile: ; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry -; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 -; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa -; GISel-WITHOUT-MOPS-O3-NEXT: bl memmove -; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; GISel-WITHOUT-MOPS-O3-NEXT: ldr x8, [x1] +; GISel-WITHOUT-MOPS-O3-NEXT: ldrh w9, [x1, #8] +; GISel-WITHOUT-MOPS-O3-NEXT: str x8, [x0] +; GISel-WITHOUT-MOPS-O3-NEXT: strh w9, [x0, #8] ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memmove_10_volatile: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa -; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 -; GISel-MOPS-O0-NEXT: cpyp [x0]!, [x1]!, x8! -; GISel-MOPS-O0-NEXT: cpym [x0]!, [x1]!, x8! -; GISel-MOPS-O0-NEXT: cpye [x0]!, [x1]!, x8! +; GISel-MOPS-O0-NEXT: ldr x9, [x1] +; GISel-MOPS-O0-NEXT: ldrh w8, [x1, #8] +; GISel-MOPS-O0-NEXT: str x9, [x0] +; GISel-MOPS-O0-NEXT: strh w8, [x0, #8] ; GISel-MOPS-O0-NEXT: ret ; ; GISel-MOPS-O3-LABEL: memmove_10_volatile: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa -; GISel-MOPS-O3-NEXT: cpyp [x0]!, [x1]!, x8! -; GISel-MOPS-O3-NEXT: cpym [x0]!, [x1]!, x8! -; GISel-MOPS-O3-NEXT: cpye [x0]!, [x1]!, x8! +; GISel-MOPS-O3-NEXT: ldr x8, [x1] +; GISel-MOPS-O3-NEXT: ldrh w9, [x1, #8] +; GISel-MOPS-O3-NEXT: str x8, [x0] +; GISel-MOPS-O3-NEXT: strh w9, [x0, #8] ; GISel-MOPS-O3-NEXT: ret ; ; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_10_volatile: diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll index e31c9a0..113eb14 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll @@ -263,3 +263,110 @@ entry: %conv = zext i1 %cmp to i8 ret i8 %conv } + +; Test ANDS. +define i32 @test1_ands(i32 %a) { +; CHECK-LABEL: test1_ands: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w8, w0, #0x3ffc00 +; CHECK-NEXT: ands w8, w8, #0xffe007ff +; CHECK-NEXT: csel w0, w0, w8, eq +; CHECK-NEXT: ret +entry: + %ands = and i32 %a, 2098176 + %c = icmp eq i32 %ands, 0 + %r = select i1 %c, i32 %a, i32 %ands + ret i32 %r +} + +; This constant should not be split because it can be handled by one mov. +define i32 @test2_ands(i32 %a) { +; CHECK-LABEL: test2_ands: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #135 // =0x87 +; CHECK-NEXT: ands w8, w0, w8 +; CHECK-NEXT: csel w0, w0, w8, eq +; CHECK-NEXT: ret +entry: + %ands = and i32 %a, 135 + %c = icmp eq i32 %ands, 0 + %r = select i1 %c, i32 %a, i32 %ands + ret i32 %r +} + +; This constant should not be split because the split immediate is not valid +; bitmask immediate. +define i32 @test3_ands(i32 %a) { +; CHECK-LABEL: test3_ands: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #1024 // =0x400 +; CHECK-NEXT: movk w8, #33, lsl #16 +; CHECK-NEXT: ands w8, w0, w8 +; CHECK-NEXT: csel w0, w0, w8, eq +; CHECK-NEXT: ret +entry: + %ands = and i32 %a, 2163712 + %c = icmp eq i32 %ands, 0 + %r = select i1 %c, i32 %a, i32 %ands + ret i32 %r +} + +define i64 @test4_ands(i64 %a) { +; CHECK-LABEL: test4_ands: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and x8, x0, #0x3ffc00 +; CHECK-NEXT: ands x8, x8, #0xffffffffffe007ff +; CHECK-NEXT: csel x0, x0, x8, eq +; CHECK-NEXT: ret +entry: + %ands = and i64 %a, 2098176 + %c = icmp eq i64 %ands, 0 + %r = select i1 %c, i64 %a, i64 %ands + ret i64 %r +} + +define i64 @test5_ands(i64 %a) { +; CHECK-LABEL: test5_ands: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and x8, x0, #0x3ffffc000 +; CHECK-NEXT: ands x8, x8, #0xfffffffe00007fff +; CHECK-NEXT: csel x0, x0, x8, eq +; CHECK-NEXT: ret +entry: + %ands = and i64 %a, 8589950976 + %c = icmp eq i64 %ands, 0 + %r = select i1 %c, i64 %a, i64 %ands + ret i64 %r +} + +; This constant should not be split because it can be handled by one mov. +define i64 @test6_ands(i64 %a) { +; CHECK-LABEL: test6_ands: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #135 // =0x87 +; CHECK-NEXT: ands x8, x0, x8 +; CHECK-NEXT: csel x0, x0, x8, eq +; CHECK-NEXT: ret +entry: + %ands = and i64 %a, 135 + %c = icmp eq i64 %ands, 0 + %r = select i1 %c, i64 %a, i64 %ands + ret i64 %r +} + +; This constant should not be split because the split immediate is not valid +; bitmask immediate. +define i64 @test7_ands(i64 %a) { +; CHECK-LABEL: test7_ands: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #1024 // =0x400 +; CHECK-NEXT: movk w8, #33, lsl #16 +; CHECK-NEXT: ands x8, x0, x8 +; CHECK-NEXT: csel x0, x0, x8, eq +; CHECK-NEXT: ret +entry: + %ands = and i64 %a, 2163712 + %c = icmp eq i64 %ands, 0 + %r = select i1 %c, i64 %a, i64 %ands + ret i64 %r +} diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll index ac7cb1f..7524782 100644 --- a/llvm/test/CodeGen/AArch64/abds-neg.ll +++ b/llvm/test/CodeGen/AArch64/abds-neg.ll @@ -77,10 +77,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w9, w1, w8 -; CHECK-NEXT: subs w8, w8, w1 -; CHECK-NEXT: csel w8, w8, w9, gt -; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: subs w8, w1, w8 +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %aext = sext i16 %a to i64 %bext = sext i32 %b to i64 @@ -111,10 +109,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w8, w9, w8, gt -; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: subs w8, w1, w0 +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -129,10 +125,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sub w9, w8, w0 -; CHECK-NEXT: subs w8, w0, w8 -; CHECK-NEXT: csel w8, w8, w9, gt -; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: subs w8, w8, w0 +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i16 %b to i64 @@ -146,10 +140,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w8, w9, w8, gt -; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: subs w8, w1, w0 +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -163,10 +155,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x8, x9, x8, gt -; CHECK-NEXT: neg x0, x8 +; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: cneg x0, x8, ge ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -180,10 +170,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x8, x9, x8, gt -; CHECK-NEXT: neg x0, x8 +; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: cneg x0, x8, ge ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -200,8 +188,7 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; CHECK-NEXT: subs x8, x0, x2 ; CHECK-NEXT: sbc x9, x1, x3 ; CHECK-NEXT: subs x10, x2, x0 -; CHECK-NEXT: sbc x11, x3, x1 -; CHECK-NEXT: sbcs xzr, x3, x1 +; CHECK-NEXT: sbcs x11, x3, x1 ; CHECK-NEXT: csel x8, x8, x10, lt ; CHECK-NEXT: csel x9, x9, x11, lt ; CHECK-NEXT: negs x0, x8 @@ -222,8 +209,7 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; CHECK-NEXT: subs x8, x0, x2 ; CHECK-NEXT: sbc x9, x1, x3 ; CHECK-NEXT: subs x10, x2, x0 -; CHECK-NEXT: sbc x11, x3, x1 -; CHECK-NEXT: sbcs xzr, x3, x1 +; CHECK-NEXT: sbcs x11, x3, x1 ; CHECK-NEXT: csel x8, x8, x10, lt ; CHECK-NEXT: csel x9, x9, x11, lt ; CHECK-NEXT: negs x0, x8 @@ -361,9 +347,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_cmp_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w8, w9, ge +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %cmp = icmp sge i32 %a, %b %ab = sub i32 %a, %b @@ -375,9 +360,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_cmp_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x9, x8, lt +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, ge ; CHECK-NEXT: ret %cmp = icmp slt i64 %a, %b %ab = sub i64 %a, %b @@ -389,14 +373,12 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; CHECK-LABEL: abd_cmp_i128: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: sbc x8, x1, x3 -; CHECK-NEXT: subs x9, x2, x0 -; CHECK-NEXT: sbc x10, x3, x1 -; CHECK-NEXT: subs x11, x0, x2 -; CHECK-NEXT: sbcs xzr, x1, x3 -; CHECK-NEXT: csel x0, x11, x9, lt -; CHECK-NEXT: csel x1, x8, x10, lt +; CHECK-NEXT: subs x8, x2, x0 +; CHECK-NEXT: sbc x9, x3, x1 +; CHECK-NEXT: subs x10, x0, x2 +; CHECK-NEXT: sbcs x11, x1, x3 +; CHECK-NEXT: csel x0, x10, x8, lt +; CHECK-NEXT: csel x1, x11, x9, lt ; CHECK-NEXT: ret %cmp = icmp slt i128 %a, %b %ab = sub i128 %a, %b diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll index 62db30f..bbdb116 100644 --- a/llvm/test/CodeGen/AArch64/abds.ll +++ b/llvm/test/CodeGen/AArch64/abds.ll @@ -73,9 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w9, w1, w8 ; CHECK-NEXT: subs w8, w8, w1 -; CHECK-NEXT: csel w0, w8, w9, gt +; CHECK-NEXT: cneg w0, w8, le ; CHECK-NEXT: ret %aext = sext i16 %a to i64 %bext = sext i32 %b to i64 @@ -104,9 +103,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w9, w8, gt +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, le ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -120,9 +118,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sub w9, w8, w0 ; CHECK-NEXT: subs w8, w0, w8 -; CHECK-NEXT: csel w0, w8, w9, gt +; CHECK-NEXT: cneg w0, w8, le ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i16 %b to i64 @@ -135,9 +132,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w9, w8, gt +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, le ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -150,9 +146,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x9, x8, gt +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, le ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -165,9 +160,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x9, x8, gt +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, le ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -183,8 +177,7 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; CHECK-NEXT: subs x8, x0, x2 ; CHECK-NEXT: sbc x9, x1, x3 ; CHECK-NEXT: subs x10, x2, x0 -; CHECK-NEXT: sbc x11, x3, x1 -; CHECK-NEXT: sbcs xzr, x3, x1 +; CHECK-NEXT: sbcs x11, x3, x1 ; CHECK-NEXT: csel x0, x8, x10, lt ; CHECK-NEXT: csel x1, x9, x11, lt ; CHECK-NEXT: ret @@ -202,8 +195,7 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; CHECK-NEXT: subs x8, x0, x2 ; CHECK-NEXT: sbc x9, x1, x3 ; CHECK-NEXT: subs x10, x2, x0 -; CHECK-NEXT: sbc x11, x3, x1 -; CHECK-NEXT: sbcs xzr, x3, x1 +; CHECK-NEXT: sbcs x11, x3, x1 ; CHECK-NEXT: csel x0, x8, x10, lt ; CHECK-NEXT: csel x1, x9, x11, lt ; CHECK-NEXT: ret @@ -250,9 +242,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_minmax_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w9, w8, gt +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, le ; CHECK-NEXT: ret %min = call i32 @llvm.smin.i32(i32 %a, i32 %b) %max = call i32 @llvm.smax.i32(i32 %a, i32 %b) @@ -263,9 +254,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_minmax_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x9, x8, gt +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, le ; CHECK-NEXT: ret %min = call i64 @llvm.smin.i64(i64 %a, i64 %b) %max = call i64 @llvm.smax.i64(i64 %a, i64 %b) @@ -279,8 +269,7 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; CHECK-NEXT: subs x8, x0, x2 ; CHECK-NEXT: sbc x9, x1, x3 ; CHECK-NEXT: subs x10, x2, x0 -; CHECK-NEXT: sbc x11, x3, x1 -; CHECK-NEXT: sbcs xzr, x3, x1 +; CHECK-NEXT: sbcs x11, x3, x1 ; CHECK-NEXT: csel x0, x8, x10, lt ; CHECK-NEXT: csel x1, x9, x11, lt ; CHECK-NEXT: ret @@ -327,9 +316,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_cmp_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w9, w8, gt +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, le ; CHECK-NEXT: ret %cmp = icmp slt i32 %a, %b %ab = sub i32 %a, %b @@ -341,9 +329,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_cmp_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x9, x8, gt +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, le ; CHECK-NEXT: ret %cmp = icmp sge i64 %a, %b %ab = sub i64 %a, %b @@ -358,8 +345,7 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; CHECK-NEXT: subs x8, x0, x2 ; CHECK-NEXT: sbc x9, x1, x3 ; CHECK-NEXT: subs x10, x2, x0 -; CHECK-NEXT: sbc x11, x3, x1 -; CHECK-NEXT: sbcs xzr, x3, x1 +; CHECK-NEXT: sbcs x11, x3, x1 ; CHECK-NEXT: csel x0, x8, x10, lt ; CHECK-NEXT: csel x1, x9, x11, lt ; CHECK-NEXT: ret @@ -576,9 +562,8 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_select_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w9, w8, gt +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, le ; CHECK-NEXT: ret %cmp = icmp sgt i32 %a, %b %ab = select i1 %cmp, i32 %a, i32 %b @@ -590,9 +575,8 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_select_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x9, x8, gt +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, le ; CHECK-NEXT: ret %cmp = icmp sge i64 %a, %b %ab = select i1 %cmp, i64 %a, i64 %b @@ -607,8 +591,7 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; CHECK-NEXT: subs x8, x0, x2 ; CHECK-NEXT: sbc x9, x1, x3 ; CHECK-NEXT: subs x10, x2, x0 -; CHECK-NEXT: sbc x11, x3, x1 -; CHECK-NEXT: sbcs xzr, x3, x1 +; CHECK-NEXT: sbcs x11, x3, x1 ; CHECK-NEXT: csel x0, x8, x10, lt ; CHECK-NEXT: csel x1, x9, x11, lt ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index 2118816..d07f099a 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -77,10 +77,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w9, w1, w8 -; CHECK-NEXT: subs w8, w8, w1 -; CHECK-NEXT: csel w8, w8, w9, hi -; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: subs w8, w1, w8 +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i16 %a to i64 %bext = zext i32 %b to i64 @@ -111,10 +109,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w8, w9, w8, hi -; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: subs w8, w1, w0 +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -129,10 +125,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: sub w9, w8, w0 -; CHECK-NEXT: subs w8, w0, w8 -; CHECK-NEXT: csel w8, w8, w9, hi -; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: subs w8, w8, w0 +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i16 %b to i64 @@ -146,10 +140,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w8, w9, w8, hi -; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: subs w8, w1, w0 +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -163,10 +155,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x8, x9, x8, hi -; CHECK-NEXT: neg x0, x8 +; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: cneg x0, x8, hs ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 @@ -180,10 +170,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x8, x9, x8, hi -; CHECK-NEXT: neg x0, x8 +; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: cneg x0, x8, hs ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 @@ -363,9 +351,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_cmp_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w8, w9, hs +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %cmp = icmp uge i32 %a, %b %ab = sub i32 %a, %b @@ -377,9 +364,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_cmp_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x9, x8, lo +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, hs ; CHECK-NEXT: ret %cmp = icmp ult i64 %a, %b %ab = sub i64 %a, %b @@ -391,14 +377,12 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; CHECK-LABEL: abd_cmp_i128: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: sbc x8, x1, x3 -; CHECK-NEXT: subs x9, x2, x0 -; CHECK-NEXT: sbc x10, x3, x1 -; CHECK-NEXT: subs x11, x0, x2 -; CHECK-NEXT: sbcs xzr, x1, x3 -; CHECK-NEXT: csel x0, x11, x9, lo -; CHECK-NEXT: csel x1, x8, x10, lo +; CHECK-NEXT: subs x8, x2, x0 +; CHECK-NEXT: sbc x9, x3, x1 +; CHECK-NEXT: subs x10, x0, x2 +; CHECK-NEXT: sbcs x11, x1, x3 +; CHECK-NEXT: csel x0, x10, x8, lo +; CHECK-NEXT: csel x1, x11, x9, lo ; CHECK-NEXT: ret %cmp = icmp ult i128 %a, %b %ab = sub i128 %a, %b diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll index 4585de9..1045ee2 100644 --- a/llvm/test/CodeGen/AArch64/abdu.ll +++ b/llvm/test/CodeGen/AArch64/abdu.ll @@ -73,9 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w9, w1, w8 ; CHECK-NEXT: subs w8, w8, w1 -; CHECK-NEXT: csel w0, w8, w9, hi +; CHECK-NEXT: cneg w0, w8, ls ; CHECK-NEXT: ret %aext = zext i16 %a to i64 %bext = zext i32 %b to i64 @@ -104,9 +103,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w9, w8, hi +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, ls ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -120,9 +118,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: sub w9, w8, w0 ; CHECK-NEXT: subs w8, w0, w8 -; CHECK-NEXT: csel w0, w8, w9, hi +; CHECK-NEXT: cneg w0, w8, ls ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i16 %b to i64 @@ -135,9 +132,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w9, w8, hi +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, ls ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -150,9 +146,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x9, x8, hi +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, ls ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 @@ -165,9 +160,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x9, x8, hi +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, ls ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 @@ -252,9 +246,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_minmax_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w9, w8, hi +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, ls ; CHECK-NEXT: ret %min = call i32 @llvm.umin.i32(i32 %a, i32 %b) %max = call i32 @llvm.umax.i32(i32 %a, i32 %b) @@ -265,9 +258,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_minmax_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x9, x8, hi +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, ls ; CHECK-NEXT: ret %min = call i64 @llvm.umin.i64(i64 %a, i64 %b) %max = call i64 @llvm.umax.i64(i64 %a, i64 %b) @@ -330,9 +322,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_cmp_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w9, w8, hi +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, ls ; CHECK-NEXT: ret %cmp = icmp ult i32 %a, %b %ab = sub i32 %a, %b @@ -344,9 +335,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_cmp_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x9, x8, hi +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, ls ; CHECK-NEXT: ret %cmp = icmp uge i64 %a, %b %ab = sub i64 %a, %b @@ -437,9 +427,8 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_select_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: subs w9, w0, w1 -; CHECK-NEXT: csel w0, w9, w8, hi +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, ls ; CHECK-NEXT: ret %cmp = icmp ugt i32 %a, %b %ab = select i1 %cmp, i32 %a, i32 %b @@ -451,9 +440,8 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_select_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, x0 -; CHECK-NEXT: subs x9, x0, x1 -; CHECK-NEXT: csel x0, x9, x8, hi +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, ls ; CHECK-NEXT: ret %cmp = icmp uge i64 %a, %b %ab = select i1 %cmp, i64 %a, i64 %b diff --git a/llvm/test/CodeGen/AArch64/add-extract.ll b/llvm/test/CodeGen/AArch64/add-extract.ll index 67c9f74..923bf08 100644 --- a/llvm/test/CodeGen/AArch64/add-extract.ll +++ b/llvm/test/CodeGen/AArch64/add-extract.ll @@ -1,13 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s +; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i64 @add_i64_ext_load(<1 x i64> %A, ptr %B) nounwind { -; CHECK-LABEL: add_i64_ext_load: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: add d0, d0, d1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_i64_ext_load: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: add d0, d0, d1 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_i64_ext_load: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x9, d0 +; CHECK-GI-NEXT: ldr x8, [x0] +; CHECK-GI-NEXT: add x0, x9, x8 +; CHECK-GI-NEXT: ret %a = extractelement <1 x i64> %A, i32 0 %b = load i64, ptr %B %c = add i64 %a, %b @@ -15,12 +23,19 @@ define i64 @add_i64_ext_load(<1 x i64> %A, ptr %B) nounwind { } define i64 @sub_i64_ext_load(<1 x i64> %A, ptr %B) nounwind { -; CHECK-LABEL: sub_i64_ext_load: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: sub d0, d0, d1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sub_i64_ext_load: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: sub d0, d0, d1 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sub_i64_ext_load: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x9, d0 +; CHECK-GI-NEXT: ldr x8, [x0] +; CHECK-GI-NEXT: sub x0, x9, x8 +; CHECK-GI-NEXT: ret %a = extractelement <1 x i64> %A, i32 0 %b = load i64, ptr %B %c = sub i64 %a, %b @@ -28,12 +43,20 @@ define i64 @sub_i64_ext_load(<1 x i64> %A, ptr %B) nounwind { } define void @add_i64_ext_load_store(<1 x i64> %A, ptr %B) nounwind { -; CHECK-LABEL: add_i64_ext_load_store: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: add d0, d0, d1 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_i64_ext_load_store: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: add d0, d0, d1 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_i64_ext_load_store: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x9, d0 +; CHECK-GI-NEXT: ldr x8, [x0] +; CHECK-GI-NEXT: add x8, x9, x8 +; CHECK-GI-NEXT: str x8, [x0] +; CHECK-GI-NEXT: ret %a = extractelement <1 x i64> %A, i32 0 %b = load i64, ptr %B %c = add i64 %a, %b @@ -55,11 +78,18 @@ define i64 @add_v2i64_ext_load(<2 x i64> %A, ptr %B) nounwind { } define i64 @add_i64_ext_ext(<1 x i64> %A, <1 x i64> %B) nounwind { -; CHECK-LABEL: add_i64_ext_ext: -; CHECK: // %bb.0: -; CHECK-NEXT: add d0, d0, d1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_i64_ext_ext: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: add d0, d0, d1 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_i64_ext_ext: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: add x0, x8, x9 +; CHECK-GI-NEXT: ret %a = extractelement <1 x i64> %A, i32 0 %b = extractelement <1 x i64> %B, i32 0 %c = add i64 %a, %b @@ -67,13 +97,20 @@ define i64 @add_i64_ext_ext(<1 x i64> %A, <1 x i64> %B) nounwind { } define i32 @add_i32_ext_load(<1 x i32> %A, ptr %B) nounwind { -; CHECK-LABEL: add_i32_ext_load: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: add w0, w9, w8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_i32_ext_load: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: ldr w8, [x0] +; CHECK-SD-NEXT: add w0, w9, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_i32_ext_load: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov w9, s0 +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: add w0, w9, w8 +; CHECK-GI-NEXT: ret %a = extractelement <1 x i32> %A, i32 0 %b = load i32, ptr %B %c = add i32 %a, %b @@ -81,13 +118,22 @@ define i32 @add_i32_ext_load(<1 x i32> %A, ptr %B) nounwind { } define i64 @add_i64_ext_ext_test1(<1 x i64> %A, <2 x i64> %B) nounwind { -; CHECK-LABEL: add_i64_ext_ext_test1: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: add d0, d0, d1 -; CHECK-NEXT: add d0, d0, d2 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_i64_ext_ext_test1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-SD-NEXT: add d0, d0, d1 +; CHECK-SD-NEXT: add d0, d0, d2 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_i64_ext_ext_test1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, v1.d[1] +; CHECK-GI-NEXT: fmov x9, d0 +; CHECK-GI-NEXT: fmov x10, d1 +; CHECK-GI-NEXT: add x9, x9, x10 +; CHECK-GI-NEXT: add x0, x9, x8 +; CHECK-GI-NEXT: ret %a = extractelement <1 x i64> %A, i32 0 %b = extractelement <2 x i64> %B, i32 0 %c = extractelement <2 x i64> %B, i32 1 @@ -97,13 +143,22 @@ define i64 @add_i64_ext_ext_test1(<1 x i64> %A, <2 x i64> %B) nounwind { } define i64 @sub_i64_ext_ext_test1(<1 x i64> %A, <2 x i64> %B) nounwind { -; CHECK-LABEL: sub_i64_ext_ext_test1: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: sub d0, d0, d1 -; CHECK-NEXT: sub d0, d0, d2 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sub_i64_ext_ext_test1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-SD-NEXT: sub d0, d0, d1 +; CHECK-SD-NEXT: sub d0, d0, d2 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sub_i64_ext_ext_test1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, v1.d[1] +; CHECK-GI-NEXT: fmov x9, d0 +; CHECK-GI-NEXT: fmov x10, d1 +; CHECK-GI-NEXT: sub x9, x9, x10 +; CHECK-GI-NEXT: sub x0, x9, x8 +; CHECK-GI-NEXT: ret %a = extractelement <1 x i64> %A, i32 0 %b = extractelement <2 x i64> %B, i32 0 %c = extractelement <2 x i64> %B, i32 1 diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll index 3a4955c..bb0d38a 100644 --- a/llvm/test/CodeGen/AArch64/addsub.ll +++ b/llvm/test/CodeGen/AArch64/addsub.ll @@ -1,50 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-linux-gnu -verify-machineinstrs | FileCheck %s - -; Note that this should be refactored (for efficiency if nothing else) -; when the PCS is implemented so we don't have to worry about the -; loads and stores. - -@var_i32 = global i32 42 -@var2_i32 = global i32 43 -@var_i64 = global i64 0 +; RUN: llc -mtriple=aarch64-none-elf < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-none-elf -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Add pure 12-bit immediates: -define void @add_small() { -; CHECK-LABEL: add_small: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, :got:var_i32 -; CHECK-NEXT: adrp x9, :got:var_i64 -; CHECK-NEXT: ldr x8, [x8, :got_lo12:var_i32] -; CHECK-NEXT: ldr x9, [x9, :got_lo12:var_i64] -; CHECK-NEXT: ldr w10, [x8] -; CHECK-NEXT: ldr x11, [x9] -; CHECK-NEXT: add w10, w10, #4095 -; CHECK-NEXT: add x11, x11, #52 -; CHECK-NEXT: str w10, [x8] -; CHECK-NEXT: str x11, [x9] -; CHECK-NEXT: ret - - %val32 = load i32, ptr @var_i32 +define i32 @add_small_i32(i32 %val32) { +; CHECK-LABEL: add_small_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w0, w0, #4095 +; CHECK-NEXT: ret %newval32 = add i32 %val32, 4095 - store i32 %newval32, ptr @var_i32 + ret i32 %newval32 +} - %val64 = load i64, ptr @var_i64 +define i64 @add_small_i64(i64 %val64) { +; CHECK-LABEL: add_small_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x0, x0, #52 +; CHECK-NEXT: ret %newval64 = add i64 %val64, 52 - store i64 %newval64, ptr @var_i64 - - ret void + ret i64 %newval64 } -; Make sure we grab the imm variant when the register operand -; can be implicitly zero-extend. -; We used to generate something horrible like this: -; wA = ldrb -; xB = ldimm 12 -; xC = add xB, wA, uxtb -; whereas this can be achieved with: -; wA = ldrb -; xC = add xA, #12 ; <- xA implicitly zero extend wA. define void @add_small_imm(ptr %p, ptr %q, i32 %b, ptr %addr) { ; CHECK-LABEL: add_small_imm: ; CHECK: // %bb.0: // %entry @@ -55,98 +31,71 @@ define void @add_small_imm(ptr %p, ptr %q, i32 %b, ptr %addr) { ; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: ret entry: - %t = load i8, ptr %p %promoted = zext i8 %t to i64 %zextt = zext i8 %t to i32 %add = add nuw i32 %zextt, %b - %add2 = add nuw i64 %promoted, 12 store i32 %add, ptr %addr - store i64 %add2, ptr %q ret void } ; Add 12-bit immediates, shifted left by 12 bits -define void @add_med() { -; CHECK-LABEL: add_med: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, :got:var_i32 -; CHECK-NEXT: adrp x9, :got:var_i64 -; CHECK-NEXT: ldr x8, [x8, :got_lo12:var_i32] -; CHECK-NEXT: ldr x9, [x9, :got_lo12:var_i64] -; CHECK-NEXT: ldr w10, [x8] -; CHECK-NEXT: ldr x11, [x9] -; CHECK-NEXT: add w10, w10, #3567, lsl #12 // =14610432 -; CHECK-NEXT: add x11, x11, #4095, lsl #12 // =16773120 -; CHECK-NEXT: str w10, [x8] -; CHECK-NEXT: str x11, [x9] -; CHECK-NEXT: ret - - %val32 = load i32, ptr @var_i32 +define i32 @add_med_i32(i32 %val32) { +; CHECK-LABEL: add_med_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w0, w0, #3567, lsl #12 // =14610432 +; CHECK-NEXT: ret %newval32 = add i32 %val32, 14610432 ; =0xdef000 - store i32 %newval32, ptr @var_i32 + ret i32 %newval32 +} - %val64 = load i64, ptr @var_i64 +define i64 @add_med_i64(i64 %val64) { +; CHECK-LABEL: add_med_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x0, x0, #4095, lsl #12 // =16773120 +; CHECK-NEXT: ret %newval64 = add i64 %val64, 16773120 ; =0xfff000 - store i64 %newval64, ptr @var_i64 - - ret void + ret i64 %newval64 } ; Subtract 12-bit immediates -define void @sub_small() { -; CHECK-LABEL: sub_small: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, :got:var_i32 -; CHECK-NEXT: adrp x9, :got:var_i64 -; CHECK-NEXT: ldr x8, [x8, :got_lo12:var_i32] -; CHECK-NEXT: ldr x9, [x9, :got_lo12:var_i64] -; CHECK-NEXT: ldr w10, [x8] -; CHECK-NEXT: ldr x11, [x9] -; CHECK-NEXT: sub w10, w10, #4095 -; CHECK-NEXT: sub x11, x11, #52 -; CHECK-NEXT: str w10, [x8] -; CHECK-NEXT: str x11, [x9] -; CHECK-NEXT: ret - - %val32 = load i32, ptr @var_i32 +define i32 @sub_small_i32(i32 %val32) { +; CHECK-LABEL: sub_small_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w0, w0, #4095 +; CHECK-NEXT: ret %newval32 = sub i32 %val32, 4095 - store i32 %newval32, ptr @var_i32 + ret i32 %newval32 +} - %val64 = load i64, ptr @var_i64 +define i64 @sub_small_i64(i64 %val64) { +; CHECK-LABEL: sub_small_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x0, x0, #52 +; CHECK-NEXT: ret %newval64 = sub i64 %val64, 52 - store i64 %newval64, ptr @var_i64 - - ret void + ret i64 %newval64 } ; Subtract 12-bit immediates, shifted left by 12 bits -define void @sub_med() { -; CHECK-LABEL: sub_med: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, :got:var_i32 -; CHECK-NEXT: adrp x9, :got:var_i64 -; CHECK-NEXT: ldr x8, [x8, :got_lo12:var_i32] -; CHECK-NEXT: ldr x9, [x9, :got_lo12:var_i64] -; CHECK-NEXT: ldr w10, [x8] -; CHECK-NEXT: ldr x11, [x9] -; CHECK-NEXT: sub w10, w10, #3567, lsl #12 // =14610432 -; CHECK-NEXT: sub x11, x11, #4095, lsl #12 // =16773120 -; CHECK-NEXT: str w10, [x8] -; CHECK-NEXT: str x11, [x9] -; CHECK-NEXT: ret - - %val32 = load i32, ptr @var_i32 +define i32 @sub_med_i32(i32 %val32) { +; CHECK-LABEL: sub_med_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w0, w0, #3567, lsl #12 // =14610432 +; CHECK-NEXT: ret %newval32 = sub i32 %val32, 14610432 ; =0xdef000 - store i32 %newval32, ptr @var_i32 + ret i32 %newval32 +} - %val64 = load i64, ptr @var_i64 +define i64 @sub_med_i64(i64 %val64) { +; CHECK-LABEL: sub_med_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x0, x0, #4095, lsl #12 // =16773120 +; CHECK-NEXT: ret %newval64 = sub i64 %val64, 16773120 ; =0xfff000 - store i64 %newval64, ptr @var_i64 - - ret void + ret i64 %newval64 } define i64 @add_two_parts_imm_i64(i64 %a) { @@ -261,10 +210,10 @@ define void @add_in_loop(i32 %0) { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w19, #43690 // =0xaaaa ; CHECK-NEXT: movk w19, #170, lsl #16 -; CHECK-NEXT: .LBB15_1: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB19_1: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add w0, w0, w19 ; CHECK-NEXT: bl foox -; CHECK-NEXT: b .LBB15_1 +; CHECK-NEXT: b .LBB19_1 br label %2 2: %3 = phi i32 [ %0, %1 ], [ %5, %2 ] @@ -273,75 +222,103 @@ define void @add_in_loop(i32 %0) { br label %2 } -define void @testing() { -; CHECK-LABEL: testing: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, :got:var_i32 -; CHECK-NEXT: ldr x8, [x8, :got_lo12:var_i32] -; CHECK-NEXT: ldr w9, [x8] -; CHECK-NEXT: cmp w9, #4095 -; CHECK-NEXT: b.ne .LBB16_6 -; CHECK-NEXT: // %bb.1: // %test2 -; CHECK-NEXT: adrp x10, :got:var2_i32 -; CHECK-NEXT: add w11, w9, #1 -; CHECK-NEXT: ldr x10, [x10, :got_lo12:var2_i32] -; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: ldr w10, [x10] -; CHECK-NEXT: cmp w10, #3567, lsl #12 // =14610432 -; CHECK-NEXT: b.lo .LBB16_6 -; CHECK-NEXT: // %bb.2: // %test3 -; CHECK-NEXT: add w11, w9, #2 -; CHECK-NEXT: cmp w9, #123 -; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: b.lt .LBB16_6 -; CHECK-NEXT: // %bb.3: // %test4 -; CHECK-NEXT: add w11, w9, #3 -; CHECK-NEXT: cmp w10, #321 -; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: b.gt .LBB16_6 -; CHECK-NEXT: // %bb.4: // %test5 -; CHECK-NEXT: add w11, w9, #4 -; CHECK-NEXT: cmn w10, #443 -; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: b.ge .LBB16_6 -; CHECK-NEXT: // %bb.5: // %test6 -; CHECK-NEXT: add w9, w9, #5 -; CHECK-NEXT: str w9, [x8] -; CHECK-NEXT: .LBB16_6: // %common.ret -; CHECK-NEXT: ret - %val = load i32, ptr @var_i32 - %val2 = load i32, ptr @var2_i32 +define void @testing(ptr %var_i32, ptr %var2_i32) { +; CHECK-SD-LABEL: testing: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr w8, [x0] +; CHECK-SD-NEXT: cmp w8, #4095 +; CHECK-SD-NEXT: b.ne .LBB20_6 +; CHECK-SD-NEXT: // %bb.1: // %test2 +; CHECK-SD-NEXT: ldr w9, [x1] +; CHECK-SD-NEXT: add w10, w8, #1 +; CHECK-SD-NEXT: str w10, [x0] +; CHECK-SD-NEXT: cmp w9, #3567, lsl #12 // =14610432 +; CHECK-SD-NEXT: b.lo .LBB20_6 +; CHECK-SD-NEXT: // %bb.2: // %test3 +; CHECK-SD-NEXT: add w10, w8, #2 +; CHECK-SD-NEXT: cmp w8, #123 +; CHECK-SD-NEXT: str w10, [x0] +; CHECK-SD-NEXT: b.lt .LBB20_6 +; CHECK-SD-NEXT: // %bb.3: // %test4 +; CHECK-SD-NEXT: add w10, w8, #3 +; CHECK-SD-NEXT: cmp w9, #321 +; CHECK-SD-NEXT: str w10, [x0] +; CHECK-SD-NEXT: b.gt .LBB20_6 +; CHECK-SD-NEXT: // %bb.4: // %test5 +; CHECK-SD-NEXT: add w10, w8, #4 +; CHECK-SD-NEXT: cmn w9, #443 +; CHECK-SD-NEXT: str w10, [x0] +; CHECK-SD-NEXT: b.ge .LBB20_6 +; CHECK-SD-NEXT: // %bb.5: // %test6 +; CHECK-SD-NEXT: add w8, w8, #5 +; CHECK-SD-NEXT: str w8, [x0] +; CHECK-SD-NEXT: .LBB20_6: // %common.ret +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testing: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: cmp w8, #4095 +; CHECK-GI-NEXT: b.ne .LBB20_6 +; CHECK-GI-NEXT: // %bb.1: // %test2 +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: add w10, w8, #1 +; CHECK-GI-NEXT: str w10, [x0] +; CHECK-GI-NEXT: cmp w9, #3567, lsl #12 // =14610432 +; CHECK-GI-NEXT: b.lo .LBB20_6 +; CHECK-GI-NEXT: // %bb.2: // %test3 +; CHECK-GI-NEXT: add w10, w8, #2 +; CHECK-GI-NEXT: cmp w8, #123 +; CHECK-GI-NEXT: str w10, [x0] +; CHECK-GI-NEXT: b.lt .LBB20_6 +; CHECK-GI-NEXT: // %bb.3: // %test4 +; CHECK-GI-NEXT: add w10, w8, #3 +; CHECK-GI-NEXT: cmp w9, #321 +; CHECK-GI-NEXT: str w10, [x0] +; CHECK-GI-NEXT: b.gt .LBB20_6 +; CHECK-GI-NEXT: // %bb.4: // %test5 +; CHECK-GI-NEXT: add w10, w8, #4 +; CHECK-GI-NEXT: cmn w9, #444 +; CHECK-GI-NEXT: str w10, [x0] +; CHECK-GI-NEXT: b.gt .LBB20_6 +; CHECK-GI-NEXT: // %bb.5: // %test6 +; CHECK-GI-NEXT: add w8, w8, #5 +; CHECK-GI-NEXT: str w8, [x0] +; CHECK-GI-NEXT: .LBB20_6: // %common.ret +; CHECK-GI-NEXT: ret + %val = load i32, ptr %var_i32 + %val2 = load i32, ptr %var2_i32 %cmp_pos_small = icmp ne i32 %val, 4095 br i1 %cmp_pos_small, label %ret, label %test2 test2: %newval2 = add i32 %val, 1 - store i32 %newval2, ptr @var_i32 + store i32 %newval2, ptr %var_i32 %cmp_pos_big = icmp ult i32 %val2, 14610432 br i1 %cmp_pos_big, label %ret, label %test3 test3: %newval3 = add i32 %val, 2 - store i32 %newval3, ptr @var_i32 + store i32 %newval3, ptr %var_i32 %cmp_pos_slt = icmp slt i32 %val, 123 br i1 %cmp_pos_slt, label %ret, label %test4 test4: %newval4 = add i32 %val, 3 - store i32 %newval4, ptr @var_i32 + store i32 %newval4, ptr %var_i32 %cmp_pos_sgt = icmp sgt i32 %val2, 321 br i1 %cmp_pos_sgt, label %ret, label %test5 test5: %newval5 = add i32 %val, 4 - store i32 %newval5, ptr @var_i32 + store i32 %newval5, ptr %var_i32 %cmp_neg_uge = icmp sgt i32 %val2, -444 br i1 %cmp_neg_uge, label %ret, label %test6 test6: %newval6 = add i32 %val, 5 - store i32 %newval6, ptr @var_i32 + store i32 %newval6, ptr %var_i32 ret void ret: @@ -371,15 +348,26 @@ define i1 @sadd_add(i32 %a, i32 %b, ptr %p) { declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b) define i1 @uadd_add(i8 %a, i8 %b, ptr %p) { -; CHECK-LABEL: uadd_add: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #255 // =0xff -; CHECK-NEXT: bic w8, w8, w0 -; CHECK-NEXT: add w8, w8, w1, uxtb -; CHECK-NEXT: lsr w0, w8, #8 -; CHECK-NEXT: add w8, w8, #1 -; CHECK-NEXT: strb w8, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uadd_add: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #255 // =0xff +; CHECK-SD-NEXT: bic w8, w8, w0 +; CHECK-SD-NEXT: add w8, w8, w1, uxtb +; CHECK-SD-NEXT: lsr w0, w8, #8 +; CHECK-SD-NEXT: add w8, w8, #1 +; CHECK-SD-NEXT: strb w8, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uadd_add: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvn w8, w0 +; CHECK-GI-NEXT: and w9, w1, #0xff +; CHECK-GI-NEXT: add w8, w9, w8, uxtb +; CHECK-GI-NEXT: cmp w8, w8, uxtb +; CHECK-GI-NEXT: add w8, w8, #1 +; CHECK-GI-NEXT: cset w0, ne +; CHECK-GI-NEXT: strb w8, [x2] +; CHECK-GI-NEXT: ret %nota = xor i8 %a, -1 %a0 = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %nota, i8 %b) %e0 = extractvalue {i8, i1} %a0, 0 @@ -521,29 +509,48 @@ define i1 @reject_non_eqne_csinc(i32 %0) { } define i32 @accept_csel(i32 %0) { -; CHECK-LABEL: accept_csel: -; CHECK: // %bb.0: -; CHECK-NEXT: sub w9, w0, #273, lsl #12 // =1118208 -; CHECK-NEXT: mov w8, #17 // =0x11 -; CHECK-NEXT: cmp w9, #273 -; CHECK-NEXT: mov w9, #11 // =0xb -; CHECK-NEXT: csel w0, w9, w8, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: accept_csel: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub w9, w0, #273, lsl #12 // =1118208 +; CHECK-SD-NEXT: mov w8, #17 // =0x11 +; CHECK-SD-NEXT: cmp w9, #273 +; CHECK-SD-NEXT: mov w9, #11 // =0xb +; CHECK-SD-NEXT: csel w0, w9, w8, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: accept_csel: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sub w8, w0, #273, lsl #12 // =1118208 +; CHECK-GI-NEXT: mov w9, #17 // =0x11 +; CHECK-GI-NEXT: mov w10, #11 // =0xb +; CHECK-GI-NEXT: cmp w8, #273 +; CHECK-GI-NEXT: csel w0, w10, w9, eq +; CHECK-GI-NEXT: ret %2 = icmp eq i32 %0, 1118481 %3 = select i1 %2, i32 11, i32 17 ret i32 %3 } define i32 @reject_non_eqne_csel(i32 %0) { -; CHECK-LABEL: reject_non_eqne_csel: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4369 // =0x1111 -; CHECK-NEXT: mov w9, #11 // =0xb -; CHECK-NEXT: movk w8, #17, lsl #16 -; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: mov w8, #17 // =0x11 -; CHECK-NEXT: csel w0, w9, w8, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: reject_non_eqne_csel: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #4369 // =0x1111 +; CHECK-SD-NEXT: mov w9, #11 // =0xb +; CHECK-SD-NEXT: movk w8, #17, lsl #16 +; CHECK-SD-NEXT: cmp w0, w8 +; CHECK-SD-NEXT: mov w8, #17 // =0x11 +; CHECK-SD-NEXT: csel w0, w9, w8, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: reject_non_eqne_csel: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #4369 // =0x1111 +; CHECK-GI-NEXT: mov w9, #17 // =0x11 +; CHECK-GI-NEXT: mov w10, #11 // =0xb +; CHECK-GI-NEXT: movk w8, #17, lsl #16 +; CHECK-GI-NEXT: cmp w0, w8 +; CHECK-GI-NEXT: csel w0, w10, w9, lo +; CHECK-GI-NEXT: ret %2 = icmp ult i32 %0, 1118481 %3 = select i1 %2, i32 11, i32 17 ret i32 %3 @@ -556,10 +563,10 @@ define void @accept_branch(i32 %0) { ; CHECK: // %bb.0: ; CHECK-NEXT: sub w8, w0, #291, lsl #12 // =1191936 ; CHECK-NEXT: cmp w8, #1110 -; CHECK-NEXT: b.eq .LBB32_2 +; CHECK-NEXT: b.eq .LBB36_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: .LBB36_2: ; CHECK-NEXT: b fooy %2 = icmp ne i32 %0, 1193046 br i1 %2, label %4, label %3 @@ -576,10 +583,10 @@ define void @reject_non_eqne_branch(i32 %0) { ; CHECK-NEXT: mov w8, #13398 // =0x3456 ; CHECK-NEXT: movk w8, #18, lsl #16 ; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: b.le .LBB33_2 +; CHECK-NEXT: b.le .LBB37_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB37_2: ; CHECK-NEXT: b fooy %2 = icmp sgt i32 %0, 1193046 br i1 %2, label %4, label %3 @@ -591,25 +598,45 @@ define void @reject_non_eqne_branch(i32 %0) { } define i32 @reject_multiple_usages(i32 %0) { -; CHECK-LABEL: reject_multiple_usages: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4369 // =0x1111 -; CHECK-NEXT: mov w9, #3 // =0x3 -; CHECK-NEXT: mov w10, #17 // =0x11 -; CHECK-NEXT: movk w8, #17, lsl #16 -; CHECK-NEXT: mov w11, #12 // =0xc -; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: mov w8, #9 // =0x9 -; CHECK-NEXT: csel w8, w8, w9, eq -; CHECK-NEXT: csel w9, w11, w10, hi -; CHECK-NEXT: mov w10, #53312 // =0xd040 -; CHECK-NEXT: movk w10, #2, lsl #16 -; CHECK-NEXT: add w8, w8, w9 -; CHECK-NEXT: mov w9, #26304 // =0x66c0 -; CHECK-NEXT: cmp w0, w10 -; CHECK-NEXT: movk w9, #1433, lsl #16 -; CHECK-NEXT: csel w0, w8, w9, hi -; CHECK-NEXT: ret +; CHECK-SD-LABEL: reject_multiple_usages: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #4369 // =0x1111 +; CHECK-SD-NEXT: mov w9, #3 // =0x3 +; CHECK-SD-NEXT: mov w10, #17 // =0x11 +; CHECK-SD-NEXT: movk w8, #17, lsl #16 +; CHECK-SD-NEXT: mov w11, #12 // =0xc +; CHECK-SD-NEXT: cmp w0, w8 +; CHECK-SD-NEXT: mov w8, #9 // =0x9 +; CHECK-SD-NEXT: csel w8, w8, w9, eq +; CHECK-SD-NEXT: csel w9, w11, w10, hi +; CHECK-SD-NEXT: mov w10, #53312 // =0xd040 +; CHECK-SD-NEXT: movk w10, #2, lsl #16 +; CHECK-SD-NEXT: add w8, w8, w9 +; CHECK-SD-NEXT: mov w9, #26304 // =0x66c0 +; CHECK-SD-NEXT: cmp w0, w10 +; CHECK-SD-NEXT: movk w9, #1433, lsl #16 +; CHECK-SD-NEXT: csel w0, w8, w9, hi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: reject_multiple_usages: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #4369 // =0x1111 +; CHECK-GI-NEXT: mov w9, #3 // =0x3 +; CHECK-GI-NEXT: mov w10, #9 // =0x9 +; CHECK-GI-NEXT: movk w8, #17, lsl #16 +; CHECK-GI-NEXT: mov w11, #12 // =0xc +; CHECK-GI-NEXT: cmp w0, w8 +; CHECK-GI-NEXT: mov w8, #17 // =0x11 +; CHECK-GI-NEXT: csel w9, w10, w9, eq +; CHECK-GI-NEXT: csel w8, w11, w8, hi +; CHECK-GI-NEXT: mov w10, #53312 // =0xd040 +; CHECK-GI-NEXT: movk w10, #2, lsl #16 +; CHECK-GI-NEXT: add w8, w9, w8 +; CHECK-GI-NEXT: mov w9, #26304 // =0x66c0 +; CHECK-GI-NEXT: movk w9, #1433, lsl #16 +; CHECK-GI-NEXT: cmp w0, w10 +; CHECK-GI-NEXT: csel w0, w8, w9, hi +; CHECK-GI-NEXT: ret %2 = icmp eq i32 %0, 1118481 %3 = icmp ugt i32 %0, 1118481 %4 = select i1 %2, i32 9, i32 3 @@ -629,12 +656,12 @@ define dso_local i32 @neigh_periodic_work_tbl_1() { ; CHECK-NEXT: add x8, x8, :lo12:neigh_periodic_work_tbl_1 ; CHECK-NEXT: add x8, x8, #18, lsl #12 // =73728 ; CHECK-NEXT: cmn x8, #1272 -; CHECK-NEXT: b.mi .LBB35_2 +; CHECK-NEXT: b.mi .LBB39_2 ; CHECK-NEXT: // %bb.1: // %if.end ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB35_2: // %for.cond +; CHECK-NEXT: .LBB39_2: // %for.cond ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: b .LBB35_2 +; CHECK-NEXT: b .LBB39_2 entry: %cmp = icmp slt i64 add (i64 ptrtoint (ptr @neigh_periodic_work_tbl_1 to i64), i64 75000), 0 br i1 %cmp, label %for.cond, label %if.end @@ -654,15 +681,15 @@ define dso_local i32 @_extract_crng_crng() { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, _extract_crng_crng ; CHECK-NEXT: add x8, x8, :lo12:_extract_crng_crng -; CHECK-NEXT: tbnz x8, #63, .LBB36_2 +; CHECK-NEXT: tbnz x8, #63, .LBB40_2 ; CHECK-NEXT: // %bb.1: // %lor.lhs.false ; CHECK-NEXT: adrp x9, jiffies ; CHECK-NEXT: ldrsw x9, [x9, :lo12:jiffies] ; CHECK-NEXT: sub x8, x8, x9 ; CHECK-NEXT: add x8, x8, #18, lsl #12 // =73728 ; CHECK-NEXT: cmn x8, #1272 -; CHECK-NEXT: b.pl .LBB36_3 -; CHECK-NEXT: .LBB36_2: // %if.then +; CHECK-NEXT: b.pl .LBB40_3 +; CHECK-NEXT: .LBB40_2: // %if.then ; CHECK-NEXT: adrp x8, primary_crng ; CHECK-NEXT: ldr w8, [x8, :lo12:primary_crng] ; CHECK-NEXT: cmp w8, #0 @@ -670,7 +697,7 @@ define dso_local i32 @_extract_crng_crng() { ; CHECK-NEXT: add x8, x8, :lo12:input_pool ; CHECK-NEXT: csel x0, xzr, x8, eq ; CHECK-NEXT: b crng_reseed -; CHECK-NEXT: .LBB36_3: // %if.end +; CHECK-NEXT: .LBB40_3: // %if.end ; CHECK-NEXT: ret entry: %cmp2 = icmp slt ptr @_extract_crng_crng, null @@ -694,11 +721,18 @@ if.end: ; preds = %if.then, %lor.lhs.f ; ((X << C) - Y) + Z --> (Z - Y) + (X << C) define i32 @commute_subop0(i32 %x, i32 %y, i32 %z) { -; CHECK-LABEL: commute_subop0: -; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w2, w1 -; CHECK-NEXT: add w0, w8, w0, lsl #3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: commute_subop0: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub w8, w2, w1 +; CHECK-SD-NEXT: add w0, w8, w0, lsl #3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: commute_subop0: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: lsl w8, w0, #3 +; CHECK-GI-NEXT: sub w8, w8, w1 +; CHECK-GI-NEXT: add w0, w8, w2 +; CHECK-GI-NEXT: ret %shl = shl i32 %x, 3 %sub = sub i32 %shl, %y %add = add i32 %sub, %z @@ -707,11 +741,18 @@ define i32 @commute_subop0(i32 %x, i32 %y, i32 %z) { ; ((X >> C) - Y) + Z --> (Z - Y) + (X >> C) define i32 @commute_subop0_lshr(i32 %x, i32 %y, i32 %z) { -; CHECK-LABEL: commute_subop0_lshr: -; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w2, w1 -; CHECK-NEXT: add w0, w8, w0, lsr #3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: commute_subop0_lshr: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub w8, w2, w1 +; CHECK-SD-NEXT: add w0, w8, w0, lsr #3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: commute_subop0_lshr: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: lsr w8, w0, #3 +; CHECK-GI-NEXT: sub w8, w8, w1 +; CHECK-GI-NEXT: add w0, w8, w2 +; CHECK-GI-NEXT: ret %lshr = lshr i32 %x, 3 %sub = sub i32 %lshr, %y %add = add i32 %sub, %z @@ -720,11 +761,18 @@ define i32 @commute_subop0_lshr(i32 %x, i32 %y, i32 %z) { ; ((X >> C) - Y) + Z --> (Z - Y) + (X >> C) define i32 @commute_subop0_ashr(i32 %x, i32 %y, i32 %z) { -; CHECK-LABEL: commute_subop0_ashr: -; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w2, w1 -; CHECK-NEXT: add w0, w8, w0, asr #3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: commute_subop0_ashr: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub w8, w2, w1 +; CHECK-SD-NEXT: add w0, w8, w0, asr #3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: commute_subop0_ashr: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: asr w8, w0, #3 +; CHECK-GI-NEXT: sub w8, w8, w1 +; CHECK-GI-NEXT: add w0, w8, w2 +; CHECK-GI-NEXT: ret %ashr = ashr i32 %x, 3 %sub = sub i32 %ashr, %y %add = add i32 %sub, %z @@ -733,11 +781,19 @@ define i32 @commute_subop0_ashr(i32 %x, i32 %y, i32 %z) { ; ((sext X) - Y) + Z --> (Z - Y) + (sext X) define i64 @commute_subop0_sext(i32 %x, i64 %y, i64 %z) { -; CHECK-LABEL: commute_subop0_sext: -; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x2, x1 -; CHECK-NEXT: add x0, x8, w0, sxtw -; CHECK-NEXT: ret +; CHECK-SD-LABEL: commute_subop0_sext: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub x8, x2, x1 +; CHECK-SD-NEXT: add x0, x8, w0, sxtw +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: commute_subop0_sext: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-GI-NEXT: sxtw x8, w0 +; CHECK-GI-NEXT: sub x8, x8, x1 +; CHECK-GI-NEXT: add x0, x8, x2 +; CHECK-GI-NEXT: ret %sext = sext i32 %x to i64 %sub = sub i64 %sext, %y %add = add i64 %sub, %z @@ -746,11 +802,18 @@ define i64 @commute_subop0_sext(i32 %x, i64 %y, i64 %z) { ; ((sext_inreg X) - Y) + Z --> (Z - Y) + (sext_inreg X) define i64 @commute_subop0_sext_inreg(i64 %x, i64 %y, i64 %z) { -; CHECK-LABEL: commute_subop0_sext_inreg: -; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x2, x1 -; CHECK-NEXT: add x0, x8, w0, sxth -; CHECK-NEXT: ret +; CHECK-SD-LABEL: commute_subop0_sext_inreg: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub x8, x2, x1 +; CHECK-SD-NEXT: add x0, x8, w0, sxth +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: commute_subop0_sext_inreg: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sxth x8, w0 +; CHECK-GI-NEXT: sub x8, x8, x1 +; CHECK-GI-NEXT: add x0, x8, x2 +; CHECK-GI-NEXT: ret %shl = shl i64 %x, 48 %ashr = ashr i64 %shl, 48 %sub = sub i64 %ashr, %y @@ -760,11 +823,18 @@ define i64 @commute_subop0_sext_inreg(i64 %x, i64 %y, i64 %z) { ; ((zext X) - Y) + Z --> (Z - Y) + (zext X) define i32 @commute_subop0_zext(i16 %x, i32 %y, i32 %z) { -; CHECK-LABEL: commute_subop0_zext: -; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w2, w1 -; CHECK-NEXT: add w0, w8, w0, uxth -; CHECK-NEXT: ret +; CHECK-SD-LABEL: commute_subop0_zext: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub w8, w2, w1 +; CHECK-SD-NEXT: add w0, w8, w0, uxth +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: commute_subop0_zext: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xffff +; CHECK-GI-NEXT: sub w8, w8, w1 +; CHECK-GI-NEXT: add w0, w8, w2 +; CHECK-GI-NEXT: ret %zext = zext i16 %x to i32 %sub = sub i32 %zext, %y %add = add i32 %sub, %z @@ -774,14 +844,25 @@ define i32 @commute_subop0_zext(i16 %x, i32 %y, i32 %z) { ; ((anyext X) - Y) + Z --> (Z - Y) + (anyext X) define i8 @commute_subop0_anyext(i16 %a, i16 %b, i32 %c) { -; CHECK-LABEL: commute_subop0_anyext: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #111 // =0x6f -; CHECK-NEXT: sub w9, w2, w1 -; CHECK-NEXT: madd w8, w0, w8, w9 -; CHECK-NEXT: lsl w8, w8, #3 -; CHECK-NEXT: sub w0, w8, #1776 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: commute_subop0_anyext: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #111 // =0x6f +; CHECK-SD-NEXT: sub w9, w2, w1 +; CHECK-SD-NEXT: madd w8, w0, w8, w9 +; CHECK-SD-NEXT: lsl w8, w8, #3 +; CHECK-SD-NEXT: sub w0, w8, #1776 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: commute_subop0_anyext: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #111 // =0x6f +; CHECK-GI-NEXT: add w9, w1, #222 +; CHECK-GI-NEXT: mul w8, w0, w8 +; CHECK-GI-NEXT: and w8, w8, #0xffff +; CHECK-GI-NEXT: sub w8, w8, w9, uxth +; CHECK-GI-NEXT: add w8, w8, w2 +; CHECK-GI-NEXT: lsl w0, w8, #3 +; CHECK-GI-NEXT: ret %aa = mul i16 %a, 111 %bb = add i16 %b, 222 %a_32 = zext i16 %aa to i32 @@ -795,11 +876,18 @@ define i8 @commute_subop0_anyext(i16 %a, i16 %b, i32 %c) { ; ((X and C) - Y) + Z --> (Z - Y) + (X and C) define i32 @commute_subop0_and(i32 %x, i32 %y, i32 %z) { -; CHECK-LABEL: commute_subop0_and: -; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w2, w1 -; CHECK-NEXT: add w0, w8, w0, uxtb -; CHECK-NEXT: ret +; CHECK-SD-LABEL: commute_subop0_and: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub w8, w2, w1 +; CHECK-SD-NEXT: add w0, w8, w0, uxtb +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: commute_subop0_and: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xff +; CHECK-GI-NEXT: sub w8, w8, w1 +; CHECK-GI-NEXT: add w0, w8, w2 +; CHECK-GI-NEXT: ret %and = and i32 %x, 255 %sub = sub i32 %and, %y %add = add i32 %sub, %z @@ -808,11 +896,18 @@ define i32 @commute_subop0_and(i32 %x, i32 %y, i32 %z) { ; Z + ((X << C) - Y) --> (Z - Y) + (X << C) define i32 @commute_subop0_cadd(i32 %x, i32 %y, i32 %z) { -; CHECK-LABEL: commute_subop0_cadd: -; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w2, w1 -; CHECK-NEXT: add w0, w8, w0, lsl #3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: commute_subop0_cadd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub w8, w2, w1 +; CHECK-SD-NEXT: add w0, w8, w0, lsl #3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: commute_subop0_cadd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: lsl w8, w0, #3 +; CHECK-GI-NEXT: sub w8, w8, w1 +; CHECK-GI-NEXT: add w0, w2, w8 +; CHECK-GI-NEXT: ret %shl = shl i32 %x, 3 %sub = sub i32 %shl, %y %add = add i32 %z, %sub @@ -821,11 +916,18 @@ define i32 @commute_subop0_cadd(i32 %x, i32 %y, i32 %z) { ; Y + ((X << C) - X) --> (Y - X) + (X << C) define i32 @commute_subop0_mul(i32 %x, i32 %y) { -; CHECK-LABEL: commute_subop0_mul: -; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: add w0, w8, w0, lsl #3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: commute_subop0_mul: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub w8, w1, w0 +; CHECK-SD-NEXT: add w0, w8, w0, lsl #3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: commute_subop0_mul: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: lsl w8, w0, #3 +; CHECK-GI-NEXT: sub w8, w8, w0 +; CHECK-GI-NEXT: add w0, w8, w1 +; CHECK-GI-NEXT: ret %mul = mul i32 %x, 7 %add = add i32 %mul, %y ret i32 %add @@ -863,13 +965,22 @@ define i32 @commute_subop0_zshiftc_oneuse(i32 %x, i32 %y, i32 %z) { } define i32 @commute_subop0_zshiftc(i32 %x, i32 %y, i32 %z) { -; CHECK-LABEL: commute_subop0_zshiftc: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w2, #2 -; CHECK-NEXT: sub w9, w8, w1 -; CHECK-NEXT: add w9, w9, w0, lsl #3 -; CHECK-NEXT: eor w0, w8, w9 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: commute_subop0_zshiftc: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl w8, w2, #2 +; CHECK-SD-NEXT: sub w9, w8, w1 +; CHECK-SD-NEXT: add w9, w9, w0, lsl #3 +; CHECK-SD-NEXT: eor w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: commute_subop0_zshiftc: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: lsl w8, w0, #3 +; CHECK-GI-NEXT: lsl w9, w2, #2 +; CHECK-GI-NEXT: sub w8, w8, w1 +; CHECK-GI-NEXT: add w8, w8, w9 +; CHECK-GI-NEXT: eor w0, w9, w8 +; CHECK-GI-NEXT: ret %xshl = shl i32 %x, 3 %sub = sub i32 %xshl, %y %zshl = shl i32 %z, 2 diff --git a/llvm/test/CodeGen/AArch64/andcompare.ll b/llvm/test/CodeGen/AArch64/andcompare.ll index cbacd17..0e15b94 100644 --- a/llvm/test/CodeGen/AArch64/andcompare.ll +++ b/llvm/test/CodeGen/AArch64/andcompare.ll @@ -1,23 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,SDISEL -; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i32 @and_eq_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_eq_eq: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, eq -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_eq_eq: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, eq -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_eq_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, eq +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_eq_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, eq +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp eq i32 %s0, %s1 %c1 = icmp eq i32 %s2, %s3 @@ -27,21 +27,21 @@ entry: } define i32 @and_eq_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_eq_ne: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, eq -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_eq_ne: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_eq_ne: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, eq +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_eq_ne: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp eq i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -51,21 +51,21 @@ entry: } define i32 @and_eq_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_eq_ult: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, eq -; SDISEL-NEXT: cset w0, lo -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_eq_ult: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_eq_ult: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, eq +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_eq_ult: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp eq i32 %s0, %s1 %c1 = icmp ult i32 %s2, %s3 @@ -75,21 +75,21 @@ entry: } define i32 @and_eq_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_eq_ule: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, eq -; SDISEL-NEXT: cset w0, ls -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_eq_ule: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ls -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_eq_ule: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, eq +; CHECK-SD-NEXT: cset w0, ls +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_eq_ule: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ls +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp eq i32 %s0, %s1 %c1 = icmp ule i32 %s2, %s3 @@ -99,21 +99,21 @@ entry: } define i32 @and_eq_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_eq_ugt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, eq -; SDISEL-NEXT: cset w0, hi -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_eq_ugt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_eq_ugt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, eq +; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_eq_ugt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp eq i32 %s0, %s1 %c1 = icmp ugt i32 %s2, %s3 @@ -123,21 +123,21 @@ entry: } define i32 @and_eq_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_eq_uge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, eq -; SDISEL-NEXT: cset w0, hs -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_eq_uge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_eq_uge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, eq +; CHECK-SD-NEXT: cset w0, hs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_eq_uge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp eq i32 %s0, %s1 %c1 = icmp uge i32 %s2, %s3 @@ -147,21 +147,21 @@ entry: } define i32 @and_eq_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_eq_slt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, eq -; SDISEL-NEXT: cset w0, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_eq_slt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_eq_slt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, eq +; CHECK-SD-NEXT: cset w0, lt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_eq_slt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp eq i32 %s0, %s1 %c1 = icmp slt i32 %s2, %s3 @@ -171,21 +171,21 @@ entry: } define i32 @and_eq_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_eq_sle: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, eq -; SDISEL-NEXT: cset w0, le -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_eq_sle: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, le -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_eq_sle: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, eq +; CHECK-SD-NEXT: cset w0, le +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_eq_sle: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, le +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp eq i32 %s0, %s1 %c1 = icmp sle i32 %s2, %s3 @@ -195,21 +195,21 @@ entry: } define i32 @and_eq_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_eq_sgt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, eq -; SDISEL-NEXT: cset w0, gt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_eq_sgt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_eq_sgt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, eq +; CHECK-SD-NEXT: cset w0, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_eq_sgt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, gt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp eq i32 %s0, %s1 %c1 = icmp sgt i32 %s2, %s3 @@ -219,21 +219,21 @@ entry: } define i32 @and_eq_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_eq_sge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #8, eq -; SDISEL-NEXT: cset w0, ge -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_eq_sge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ge -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_eq_sge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #8, eq +; CHECK-SD-NEXT: cset w0, ge +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_eq_sge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ge +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp eq i32 %s0, %s1 %c1 = icmp sge i32 %s2, %s3 @@ -243,21 +243,21 @@ entry: } define i32 @and_ne_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ne_eq: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ne -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ne_eq: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, eq -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ne_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ne +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ne_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, eq +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ne i32 %s0, %s1 %c1 = icmp eq i32 %s2, %s3 @@ -267,21 +267,21 @@ entry: } define i32 @and_ne_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ne_ne: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, ne -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ne_ne: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ne_ne: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, ne +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ne_ne: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ne i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -291,21 +291,21 @@ entry: } define i32 @and_ne_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ne_ult: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, ne -; SDISEL-NEXT: cset w0, lo -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ne_ult: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ne_ult: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, ne +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ne_ult: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ne i32 %s0, %s1 %c1 = icmp ult i32 %s2, %s3 @@ -315,21 +315,21 @@ entry: } define i32 @and_ne_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ne_ule: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, ne -; SDISEL-NEXT: cset w0, ls -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ne_ule: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ls -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ne_ule: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, ne +; CHECK-SD-NEXT: cset w0, ls +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ne_ule: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ls +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ne i32 %s0, %s1 %c1 = icmp ule i32 %s2, %s3 @@ -339,21 +339,21 @@ entry: } define i32 @and_ne_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ne_ugt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ne -; SDISEL-NEXT: cset w0, hi -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ne_ugt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ne_ugt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ne +; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ne_ugt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ne i32 %s0, %s1 %c1 = icmp ugt i32 %s2, %s3 @@ -363,21 +363,21 @@ entry: } define i32 @and_ne_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ne_uge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ne -; SDISEL-NEXT: cset w0, hs -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ne_uge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ne_uge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ne +; CHECK-SD-NEXT: cset w0, hs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ne_uge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ne i32 %s0, %s1 %c1 = icmp uge i32 %s2, %s3 @@ -387,21 +387,21 @@ entry: } define i32 @and_ne_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ne_slt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ne -; SDISEL-NEXT: cset w0, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ne_slt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ne_slt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ne +; CHECK-SD-NEXT: cset w0, lt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ne_slt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ne i32 %s0, %s1 %c1 = icmp slt i32 %s2, %s3 @@ -411,21 +411,21 @@ entry: } define i32 @and_ne_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ne_sle: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ne -; SDISEL-NEXT: cset w0, le -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ne_sle: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, le -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ne_sle: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ne +; CHECK-SD-NEXT: cset w0, le +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ne_sle: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, le +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ne i32 %s0, %s1 %c1 = icmp sle i32 %s2, %s3 @@ -435,21 +435,21 @@ entry: } define i32 @and_ne_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ne_sgt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, ne -; SDISEL-NEXT: cset w0, gt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ne_sgt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ne_sgt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, ne +; CHECK-SD-NEXT: cset w0, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ne_sgt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, gt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ne i32 %s0, %s1 %c1 = icmp sgt i32 %s2, %s3 @@ -459,21 +459,21 @@ entry: } define i32 @and_ne_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ne_sge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #8, ne -; SDISEL-NEXT: cset w0, ge -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ne_sge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ge -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ne_sge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #8, ne +; CHECK-SD-NEXT: cset w0, ge +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ne_sge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ge +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ne i32 %s0, %s1 %c1 = icmp sge i32 %s2, %s3 @@ -483,21 +483,21 @@ entry: } define i32 @and_ult_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ult_eq: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, lo -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ult_eq: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, eq -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ult_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, lo +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ult_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, eq +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ult i32 %s0, %s1 %c1 = icmp eq i32 %s2, %s3 @@ -507,21 +507,21 @@ entry: } define i32 @and_ult_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ult_ne: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, lo -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ult_ne: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ult_ne: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, lo +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ult_ne: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ult i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -531,21 +531,21 @@ entry: } define i32 @and_ult_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ult_ult: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, lo -; SDISEL-NEXT: cset w0, lo -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ult_ult: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ult_ult: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, lo +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ult_ult: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ult i32 %s0, %s1 %c1 = icmp ult i32 %s2, %s3 @@ -555,21 +555,21 @@ entry: } define i32 @and_ult_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ult_ule: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, lo -; SDISEL-NEXT: cset w0, ls -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ult_ule: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ls -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ult_ule: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, lo +; CHECK-SD-NEXT: cset w0, ls +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ult_ule: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ls +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ult i32 %s0, %s1 %c1 = icmp ule i32 %s2, %s3 @@ -579,21 +579,21 @@ entry: } define i32 @and_ult_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ult_ugt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, lo -; SDISEL-NEXT: cset w0, hi -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ult_ugt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ult_ugt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, lo +; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ult_ugt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ult i32 %s0, %s1 %c1 = icmp ugt i32 %s2, %s3 @@ -603,21 +603,21 @@ entry: } define i32 @and_ult_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ult_uge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, lo -; SDISEL-NEXT: cset w0, hs -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ult_uge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ult_uge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, lo +; CHECK-SD-NEXT: cset w0, hs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ult_uge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ult i32 %s0, %s1 %c1 = icmp uge i32 %s2, %s3 @@ -627,21 +627,21 @@ entry: } define i32 @and_ult_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ult_slt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, lo -; SDISEL-NEXT: cset w0, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ult_slt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ult_slt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, lo +; CHECK-SD-NEXT: cset w0, lt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ult_slt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ult i32 %s0, %s1 %c1 = icmp slt i32 %s2, %s3 @@ -651,21 +651,21 @@ entry: } define i32 @and_ult_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ult_sle: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, lo -; SDISEL-NEXT: cset w0, le -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ult_sle: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, le -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ult_sle: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, lo +; CHECK-SD-NEXT: cset w0, le +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ult_sle: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, le +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ult i32 %s0, %s1 %c1 = icmp sle i32 %s2, %s3 @@ -675,21 +675,21 @@ entry: } define i32 @and_ult_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ult_sgt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, lo -; SDISEL-NEXT: cset w0, gt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ult_sgt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ult_sgt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, lo +; CHECK-SD-NEXT: cset w0, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ult_sgt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, gt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ult i32 %s0, %s1 %c1 = icmp sgt i32 %s2, %s3 @@ -699,21 +699,21 @@ entry: } define i32 @and_ult_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ult_sge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #8, lo -; SDISEL-NEXT: cset w0, ge -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ult_sge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ge -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ult_sge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #8, lo +; CHECK-SD-NEXT: cset w0, ge +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ult_sge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ge +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ult i32 %s0, %s1 %c1 = icmp sge i32 %s2, %s3 @@ -723,21 +723,21 @@ entry: } define i32 @and_ule_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ule_eq: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ls -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ule_eq: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ls -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, eq -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ule_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ls +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ule_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ls +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, eq +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ule i32 %s0, %s1 %c1 = icmp eq i32 %s2, %s3 @@ -747,21 +747,21 @@ entry: } define i32 @and_ule_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ule_ne: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, ls -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ule_ne: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ls -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ule_ne: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, ls +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ule_ne: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ls +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ule i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -771,21 +771,21 @@ entry: } define i32 @and_ule_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ule_ult: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, ls -; SDISEL-NEXT: cset w0, lo -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ule_ult: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ls -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ule_ult: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, ls +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ule_ult: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ls +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ule i32 %s0, %s1 %c1 = icmp ult i32 %s2, %s3 @@ -795,21 +795,21 @@ entry: } define i32 @and_ule_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ule_ule: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, ls -; SDISEL-NEXT: cset w0, ls -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ule_ule: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ls -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ls -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ule_ule: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, ls +; CHECK-SD-NEXT: cset w0, ls +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ule_ule: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ls +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ls +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ule i32 %s0, %s1 %c1 = icmp ule i32 %s2, %s3 @@ -819,21 +819,21 @@ entry: } define i32 @and_ule_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ule_ugt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ls -; SDISEL-NEXT: cset w0, hi -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ule_ugt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ls -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ule_ugt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ls +; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ule_ugt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ls +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ule i32 %s0, %s1 %c1 = icmp ugt i32 %s2, %s3 @@ -843,21 +843,21 @@ entry: } define i32 @and_ule_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ule_uge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ls -; SDISEL-NEXT: cset w0, hs -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ule_uge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ls -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ule_uge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ls +; CHECK-SD-NEXT: cset w0, hs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ule_uge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ls +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ule i32 %s0, %s1 %c1 = icmp uge i32 %s2, %s3 @@ -867,21 +867,21 @@ entry: } define i32 @and_ule_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ule_slt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ls -; SDISEL-NEXT: cset w0, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ule_slt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ls -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ule_slt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ls +; CHECK-SD-NEXT: cset w0, lt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ule_slt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ls +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ule i32 %s0, %s1 %c1 = icmp slt i32 %s2, %s3 @@ -891,21 +891,21 @@ entry: } define i32 @and_ule_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ule_sle: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ls -; SDISEL-NEXT: cset w0, le -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ule_sle: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ls -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, le -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ule_sle: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ls +; CHECK-SD-NEXT: cset w0, le +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ule_sle: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ls +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, le +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ule i32 %s0, %s1 %c1 = icmp sle i32 %s2, %s3 @@ -915,21 +915,21 @@ entry: } define i32 @and_ule_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ule_sgt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, ls -; SDISEL-NEXT: cset w0, gt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ule_sgt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ls -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ule_sgt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, ls +; CHECK-SD-NEXT: cset w0, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ule_sgt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ls +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, gt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ule i32 %s0, %s1 %c1 = icmp sgt i32 %s2, %s3 @@ -939,21 +939,21 @@ entry: } define i32 @and_ule_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ule_sge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #8, ls -; SDISEL-NEXT: cset w0, ge -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ule_sge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ls -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ge -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ule_sge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #8, ls +; CHECK-SD-NEXT: cset w0, ge +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ule_sge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ls +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ge +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ule i32 %s0, %s1 %c1 = icmp sge i32 %s2, %s3 @@ -963,21 +963,21 @@ entry: } define i32 @and_ugt_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ugt_eq: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, hi -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ugt_eq: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hi -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, eq -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ugt_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, hi +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ugt_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, eq +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ugt i32 %s0, %s1 %c1 = icmp eq i32 %s2, %s3 @@ -987,21 +987,21 @@ entry: } define i32 @and_ugt_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ugt_ne: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, hi -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ugt_ne: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hi -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ugt_ne: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, hi +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ugt_ne: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ugt i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -1011,21 +1011,21 @@ entry: } define i32 @and_ugt_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ugt_ult: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, hi -; SDISEL-NEXT: cset w0, lo -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ugt_ult: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hi -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ugt_ult: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, hi +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ugt_ult: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ugt i32 %s0, %s1 %c1 = icmp ult i32 %s2, %s3 @@ -1035,21 +1035,21 @@ entry: } define i32 @and_ugt_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ugt_ule: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, hi -; SDISEL-NEXT: cset w0, ls -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ugt_ule: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hi -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ls -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ugt_ule: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, hi +; CHECK-SD-NEXT: cset w0, ls +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ugt_ule: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ls +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ugt i32 %s0, %s1 %c1 = icmp ule i32 %s2, %s3 @@ -1059,21 +1059,21 @@ entry: } define i32 @and_ugt_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ugt_ugt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, hi -; SDISEL-NEXT: cset w0, hi -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ugt_ugt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hi -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ugt_ugt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, hi +; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ugt_ugt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ugt i32 %s0, %s1 %c1 = icmp ugt i32 %s2, %s3 @@ -1083,21 +1083,21 @@ entry: } define i32 @and_ugt_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ugt_uge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, hi -; SDISEL-NEXT: cset w0, hs -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ugt_uge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hi -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ugt_uge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, hi +; CHECK-SD-NEXT: cset w0, hs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ugt_uge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ugt i32 %s0, %s1 %c1 = icmp uge i32 %s2, %s3 @@ -1107,21 +1107,21 @@ entry: } define i32 @and_ugt_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ugt_slt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, hi -; SDISEL-NEXT: cset w0, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ugt_slt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hi -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ugt_slt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, hi +; CHECK-SD-NEXT: cset w0, lt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ugt_slt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ugt i32 %s0, %s1 %c1 = icmp slt i32 %s2, %s3 @@ -1131,21 +1131,21 @@ entry: } define i32 @and_ugt_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ugt_sle: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, hi -; SDISEL-NEXT: cset w0, le -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ugt_sle: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hi -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, le -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ugt_sle: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, hi +; CHECK-SD-NEXT: cset w0, le +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ugt_sle: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, le +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ugt i32 %s0, %s1 %c1 = icmp sle i32 %s2, %s3 @@ -1155,21 +1155,21 @@ entry: } define i32 @and_ugt_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ugt_sgt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, hi -; SDISEL-NEXT: cset w0, gt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ugt_sgt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hi -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ugt_sgt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, hi +; CHECK-SD-NEXT: cset w0, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ugt_sgt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, gt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ugt i32 %s0, %s1 %c1 = icmp sgt i32 %s2, %s3 @@ -1179,21 +1179,21 @@ entry: } define i32 @and_ugt_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_ugt_sge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #8, hi -; SDISEL-NEXT: cset w0, ge -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_ugt_sge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hi -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ge -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_ugt_sge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #8, hi +; CHECK-SD-NEXT: cset w0, ge +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_ugt_sge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ge +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp ugt i32 %s0, %s1 %c1 = icmp sge i32 %s2, %s3 @@ -1203,21 +1203,21 @@ entry: } define i32 @and_uge_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_uge_eq: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, hs -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_uge_eq: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, eq -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_uge_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, hs +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_uge_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hs +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, eq +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp uge i32 %s0, %s1 %c1 = icmp eq i32 %s2, %s3 @@ -1227,21 +1227,21 @@ entry: } define i32 @and_uge_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_uge_ne: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, hs -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_uge_ne: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_uge_ne: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, hs +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_uge_ne: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hs +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp uge i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -1251,21 +1251,21 @@ entry: } define i32 @and_uge_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_uge_ult: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, hs -; SDISEL-NEXT: cset w0, lo -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_uge_ult: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_uge_ult: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, hs +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_uge_ult: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hs +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp uge i32 %s0, %s1 %c1 = icmp ult i32 %s2, %s3 @@ -1275,21 +1275,21 @@ entry: } define i32 @and_uge_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_uge_ule: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, hs -; SDISEL-NEXT: cset w0, ls -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_uge_ule: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ls -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_uge_ule: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, hs +; CHECK-SD-NEXT: cset w0, ls +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_uge_ule: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hs +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ls +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp uge i32 %s0, %s1 %c1 = icmp ule i32 %s2, %s3 @@ -1299,21 +1299,21 @@ entry: } define i32 @and_uge_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_uge_ugt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, hs -; SDISEL-NEXT: cset w0, hi -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_uge_ugt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_uge_ugt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, hs +; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_uge_ugt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hs +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp uge i32 %s0, %s1 %c1 = icmp ugt i32 %s2, %s3 @@ -1323,21 +1323,21 @@ entry: } define i32 @and_uge_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_uge_uge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, hs -; SDISEL-NEXT: cset w0, hs -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_uge_uge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_uge_uge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, hs +; CHECK-SD-NEXT: cset w0, hs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_uge_uge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hs +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp uge i32 %s0, %s1 %c1 = icmp uge i32 %s2, %s3 @@ -1347,21 +1347,21 @@ entry: } define i32 @and_uge_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_uge_slt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, hs -; SDISEL-NEXT: cset w0, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_uge_slt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_uge_slt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, hs +; CHECK-SD-NEXT: cset w0, lt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_uge_slt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hs +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp uge i32 %s0, %s1 %c1 = icmp slt i32 %s2, %s3 @@ -1371,21 +1371,21 @@ entry: } define i32 @and_uge_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_uge_sle: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, hs -; SDISEL-NEXT: cset w0, le -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_uge_sle: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, le -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_uge_sle: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, hs +; CHECK-SD-NEXT: cset w0, le +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_uge_sle: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hs +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, le +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp uge i32 %s0, %s1 %c1 = icmp sle i32 %s2, %s3 @@ -1395,21 +1395,21 @@ entry: } define i32 @and_uge_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_uge_sgt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, hs -; SDISEL-NEXT: cset w0, gt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_uge_sgt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_uge_sgt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, hs +; CHECK-SD-NEXT: cset w0, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_uge_sgt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hs +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, gt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp uge i32 %s0, %s1 %c1 = icmp sgt i32 %s2, %s3 @@ -1419,21 +1419,21 @@ entry: } define i32 @and_uge_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_uge_sge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #8, hs -; SDISEL-NEXT: cset w0, ge -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_uge_sge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ge -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_uge_sge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #8, hs +; CHECK-SD-NEXT: cset w0, ge +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_uge_sge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hs +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ge +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp uge i32 %s0, %s1 %c1 = icmp sge i32 %s2, %s3 @@ -1443,21 +1443,21 @@ entry: } define i32 @and_slt_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_slt_eq: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, lt -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_slt_eq: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, eq -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_slt_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, lt +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_slt_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, eq +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp slt i32 %s0, %s1 %c1 = icmp eq i32 %s2, %s3 @@ -1467,21 +1467,21 @@ entry: } define i32 @and_slt_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_slt_ne: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, lt -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_slt_ne: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_slt_ne: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, lt +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_slt_ne: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp slt i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -1491,21 +1491,21 @@ entry: } define i32 @and_slt_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_slt_ult: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, lt -; SDISEL-NEXT: cset w0, lo -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_slt_ult: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_slt_ult: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, lt +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_slt_ult: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp slt i32 %s0, %s1 %c1 = icmp ult i32 %s2, %s3 @@ -1515,21 +1515,21 @@ entry: } define i32 @and_slt_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_slt_ule: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, lt -; SDISEL-NEXT: cset w0, ls -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_slt_ule: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ls -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_slt_ule: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, lt +; CHECK-SD-NEXT: cset w0, ls +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_slt_ule: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ls +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp slt i32 %s0, %s1 %c1 = icmp ule i32 %s2, %s3 @@ -1539,21 +1539,21 @@ entry: } define i32 @and_slt_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_slt_ugt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, lt -; SDISEL-NEXT: cset w0, hi -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_slt_ugt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_slt_ugt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, lt +; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_slt_ugt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp slt i32 %s0, %s1 %c1 = icmp ugt i32 %s2, %s3 @@ -1563,21 +1563,21 @@ entry: } define i32 @and_slt_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_slt_uge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, lt -; SDISEL-NEXT: cset w0, hs -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_slt_uge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_slt_uge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, lt +; CHECK-SD-NEXT: cset w0, hs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_slt_uge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp slt i32 %s0, %s1 %c1 = icmp uge i32 %s2, %s3 @@ -1587,21 +1587,21 @@ entry: } define i32 @and_slt_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_slt_slt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, lt -; SDISEL-NEXT: cset w0, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_slt_slt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_slt_slt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, lt +; CHECK-SD-NEXT: cset w0, lt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_slt_slt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp slt i32 %s0, %s1 %c1 = icmp slt i32 %s2, %s3 @@ -1611,21 +1611,21 @@ entry: } define i32 @and_slt_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_slt_sle: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, lt -; SDISEL-NEXT: cset w0, le -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_slt_sle: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, le -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_slt_sle: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, lt +; CHECK-SD-NEXT: cset w0, le +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_slt_sle: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, le +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp slt i32 %s0, %s1 %c1 = icmp sle i32 %s2, %s3 @@ -1635,21 +1635,21 @@ entry: } define i32 @and_slt_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_slt_sgt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, lt -; SDISEL-NEXT: cset w0, gt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_slt_sgt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_slt_sgt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, lt +; CHECK-SD-NEXT: cset w0, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_slt_sgt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, gt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp slt i32 %s0, %s1 %c1 = icmp sgt i32 %s2, %s3 @@ -1659,21 +1659,21 @@ entry: } define i32 @and_slt_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_slt_sge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #8, lt -; SDISEL-NEXT: cset w0, ge -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_slt_sge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ge -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_slt_sge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #8, lt +; CHECK-SD-NEXT: cset w0, ge +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_slt_sge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ge +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp slt i32 %s0, %s1 %c1 = icmp sge i32 %s2, %s3 @@ -1683,21 +1683,21 @@ entry: } define i32 @and_sle_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sle_eq: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, le -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sle_eq: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, le -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, eq -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sle_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, le +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sle_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, le +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, eq +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sle i32 %s0, %s1 %c1 = icmp eq i32 %s2, %s3 @@ -1707,21 +1707,21 @@ entry: } define i32 @and_sle_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sle_ne: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, le -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sle_ne: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, le -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sle_ne: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, le +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sle_ne: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, le +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sle i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -1731,21 +1731,21 @@ entry: } define i32 @and_sle_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sle_ult: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, le -; SDISEL-NEXT: cset w0, lo -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sle_ult: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, le -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sle_ult: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, le +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sle_ult: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, le +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sle i32 %s0, %s1 %c1 = icmp ult i32 %s2, %s3 @@ -1755,21 +1755,21 @@ entry: } define i32 @and_sle_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sle_ule: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, le -; SDISEL-NEXT: cset w0, ls -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sle_ule: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, le -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ls -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sle_ule: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, le +; CHECK-SD-NEXT: cset w0, ls +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sle_ule: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, le +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ls +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sle i32 %s0, %s1 %c1 = icmp ule i32 %s2, %s3 @@ -1779,21 +1779,21 @@ entry: } define i32 @and_sle_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sle_ugt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, le -; SDISEL-NEXT: cset w0, hi -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sle_ugt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, le -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sle_ugt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, le +; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sle_ugt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, le +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sle i32 %s0, %s1 %c1 = icmp ugt i32 %s2, %s3 @@ -1803,21 +1803,21 @@ entry: } define i32 @and_sle_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sle_uge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, le -; SDISEL-NEXT: cset w0, hs -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sle_uge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, le -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sle_uge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, le +; CHECK-SD-NEXT: cset w0, hs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sle_uge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, le +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sle i32 %s0, %s1 %c1 = icmp uge i32 %s2, %s3 @@ -1827,21 +1827,21 @@ entry: } define i32 @and_sle_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sle_slt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, le -; SDISEL-NEXT: cset w0, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sle_slt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, le -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sle_slt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, le +; CHECK-SD-NEXT: cset w0, lt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sle_slt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, le +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sle i32 %s0, %s1 %c1 = icmp slt i32 %s2, %s3 @@ -1851,21 +1851,21 @@ entry: } define i32 @and_sle_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sle_sle: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, le -; SDISEL-NEXT: cset w0, le -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sle_sle: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, le -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, le -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sle_sle: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, le +; CHECK-SD-NEXT: cset w0, le +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sle_sle: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, le +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, le +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sle i32 %s0, %s1 %c1 = icmp sle i32 %s2, %s3 @@ -1875,21 +1875,21 @@ entry: } define i32 @and_sle_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sle_sgt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, le -; SDISEL-NEXT: cset w0, gt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sle_sgt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, le -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sle_sgt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, le +; CHECK-SD-NEXT: cset w0, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sle_sgt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, le +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, gt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sle i32 %s0, %s1 %c1 = icmp sgt i32 %s2, %s3 @@ -1899,21 +1899,21 @@ entry: } define i32 @and_sle_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sle_sge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #8, le -; SDISEL-NEXT: cset w0, ge -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sle_sge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, le -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ge -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sle_sge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #8, le +; CHECK-SD-NEXT: cset w0, ge +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sle_sge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, le +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ge +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sle i32 %s0, %s1 %c1 = icmp sge i32 %s2, %s3 @@ -1923,21 +1923,21 @@ entry: } define i32 @and_sgt_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sgt_eq: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, gt -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sgt_eq: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, eq -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sgt_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, gt +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sgt_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, eq +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sgt i32 %s0, %s1 %c1 = icmp eq i32 %s2, %s3 @@ -1947,21 +1947,21 @@ entry: } define i32 @and_sgt_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sgt_ne: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, gt -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sgt_ne: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sgt_ne: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, gt +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sgt_ne: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sgt i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -1971,21 +1971,21 @@ entry: } define i32 @and_sgt_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sgt_ult: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, gt -; SDISEL-NEXT: cset w0, lo -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sgt_ult: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sgt_ult: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, gt +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sgt_ult: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sgt i32 %s0, %s1 %c1 = icmp ult i32 %s2, %s3 @@ -1995,21 +1995,21 @@ entry: } define i32 @and_sgt_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sgt_ule: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, gt -; SDISEL-NEXT: cset w0, ls -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sgt_ule: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ls -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sgt_ule: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, gt +; CHECK-SD-NEXT: cset w0, ls +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sgt_ule: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ls +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sgt i32 %s0, %s1 %c1 = icmp ule i32 %s2, %s3 @@ -2019,21 +2019,21 @@ entry: } define i32 @and_sgt_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sgt_ugt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, gt -; SDISEL-NEXT: cset w0, hi -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sgt_ugt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sgt_ugt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, gt +; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sgt_ugt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sgt i32 %s0, %s1 %c1 = icmp ugt i32 %s2, %s3 @@ -2043,21 +2043,21 @@ entry: } define i32 @and_sgt_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sgt_uge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, gt -; SDISEL-NEXT: cset w0, hs -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sgt_uge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sgt_uge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, gt +; CHECK-SD-NEXT: cset w0, hs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sgt_uge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sgt i32 %s0, %s1 %c1 = icmp uge i32 %s2, %s3 @@ -2067,21 +2067,21 @@ entry: } define i32 @and_sgt_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sgt_slt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, gt -; SDISEL-NEXT: cset w0, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sgt_slt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sgt_slt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, gt +; CHECK-SD-NEXT: cset w0, lt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sgt_slt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sgt i32 %s0, %s1 %c1 = icmp slt i32 %s2, %s3 @@ -2091,21 +2091,21 @@ entry: } define i32 @and_sgt_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sgt_sle: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, gt -; SDISEL-NEXT: cset w0, le -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sgt_sle: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, le -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sgt_sle: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, gt +; CHECK-SD-NEXT: cset w0, le +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sgt_sle: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, le +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sgt i32 %s0, %s1 %c1 = icmp sle i32 %s2, %s3 @@ -2115,21 +2115,21 @@ entry: } define i32 @and_sgt_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sgt_sgt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, gt -; SDISEL-NEXT: cset w0, gt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sgt_sgt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sgt_sgt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, gt +; CHECK-SD-NEXT: cset w0, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sgt_sgt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, gt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sgt i32 %s0, %s1 %c1 = icmp sgt i32 %s2, %s3 @@ -2139,21 +2139,21 @@ entry: } define i32 @and_sgt_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sgt_sge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #8, gt -; SDISEL-NEXT: cset w0, ge -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sgt_sge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ge -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sgt_sge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #8, gt +; CHECK-SD-NEXT: cset w0, ge +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sgt_sge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ge +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sgt i32 %s0, %s1 %c1 = icmp sge i32 %s2, %s3 @@ -2163,21 +2163,21 @@ entry: } define i32 @and_sge_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sge_eq: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ge -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sge_eq: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ge -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, eq -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sge_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ge +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sge_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ge +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, eq +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sge i32 %s0, %s1 %c1 = icmp eq i32 %s2, %s3 @@ -2187,21 +2187,21 @@ entry: } define i32 @and_sge_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sge_ne: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, ge -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sge_ne: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ge -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sge_ne: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, ge +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sge_ne: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ge +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sge i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -2211,21 +2211,21 @@ entry: } define i32 @and_sge_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sge_ult: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, ge -; SDISEL-NEXT: cset w0, lo -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sge_ult: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ge -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sge_ult: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, ge +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sge_ult: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ge +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sge i32 %s0, %s1 %c1 = icmp ult i32 %s2, %s3 @@ -2235,21 +2235,21 @@ entry: } define i32 @and_sge_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sge_ule: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, ge -; SDISEL-NEXT: cset w0, ls -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sge_ule: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ge -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ls -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sge_ule: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, ge +; CHECK-SD-NEXT: cset w0, ls +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sge_ule: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ge +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ls +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sge i32 %s0, %s1 %c1 = icmp ule i32 %s2, %s3 @@ -2259,21 +2259,21 @@ entry: } define i32 @and_sge_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sge_ugt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ge -; SDISEL-NEXT: cset w0, hi -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sge_ugt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ge -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sge_ugt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ge +; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sge_ugt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ge +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sge i32 %s0, %s1 %c1 = icmp ugt i32 %s2, %s3 @@ -2283,21 +2283,21 @@ entry: } define i32 @and_sge_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sge_uge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ge -; SDISEL-NEXT: cset w0, hs -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sge_uge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ge -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sge_uge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ge +; CHECK-SD-NEXT: cset w0, hs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sge_uge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ge +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sge i32 %s0, %s1 %c1 = icmp uge i32 %s2, %s3 @@ -2307,21 +2307,21 @@ entry: } define i32 @and_sge_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sge_slt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ge -; SDISEL-NEXT: cset w0, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sge_slt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ge -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sge_slt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ge +; CHECK-SD-NEXT: cset w0, lt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sge_slt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ge +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sge i32 %s0, %s1 %c1 = icmp slt i32 %s2, %s3 @@ -2331,21 +2331,21 @@ entry: } define i32 @and_sge_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sge_sle: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, ge -; SDISEL-NEXT: cset w0, le -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sge_sle: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ge -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, le -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sge_sle: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, ge +; CHECK-SD-NEXT: cset w0, le +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sge_sle: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ge +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, le +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sge i32 %s0, %s1 %c1 = icmp sle i32 %s2, %s3 @@ -2355,21 +2355,21 @@ entry: } define i32 @and_sge_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sge_sgt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #4, ge -; SDISEL-NEXT: cset w0, gt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sge_sgt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ge -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sge_sgt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #4, ge +; CHECK-SD-NEXT: cset w0, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sge_sgt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ge +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, gt +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sge i32 %s0, %s1 %c1 = icmp sgt i32 %s2, %s3 @@ -2379,21 +2379,21 @@ entry: } define i32 @and_sge_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) { -; SDISEL-LABEL: and_sge_sge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #8, ge -; SDISEL-NEXT: cset w0, ge -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_sge_sge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ge -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ge -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_sge_sge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #8, ge +; CHECK-SD-NEXT: cset w0, ge +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_sge_sge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ge +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ge +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %c0 = icmp sge i32 %s0, %s1 %c1 = icmp sge i32 %s2, %s3 @@ -2403,19 +2403,19 @@ entry: } define i32 @cmp_to_ands1(i32 %num) { -; SDISEL-LABEL: cmp_to_ands1: -; SDISEL: // %bb.0: -; SDISEL-NEXT: and w8, w0, #0xff -; SDISEL-NEXT: tst w0, #0xfe -; SDISEL-NEXT: csel w0, w8, wzr, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: cmp_to_ands1: -; GISEL: // %bb.0: -; GISEL-NEXT: and w8, w0, #0xff -; GISEL-NEXT: cmp w8, #1 -; GISEL-NEXT: csel w0, w8, wzr, hi -; GISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_to_ands1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xff +; CHECK-SD-NEXT: tst w0, #0xfe +; CHECK-SD-NEXT: csel w0, w8, wzr, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: cmp_to_ands1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xff +; CHECK-GI-NEXT: cmp w8, #1 +; CHECK-GI-NEXT: csel w0, w8, wzr, hi +; CHECK-GI-NEXT: ret %and = and i32 %num, 255 %cmp = icmp ugt i32 %and, 1 %r = select i1 %cmp, i32 %and, i32 0 @@ -2423,19 +2423,19 @@ define i32 @cmp_to_ands1(i32 %num) { } define i32 @cmp_to_ands2(i32 %num) { -; SDISEL-LABEL: cmp_to_ands2: -; SDISEL: // %bb.0: -; SDISEL-NEXT: and w8, w0, #0xfe -; SDISEL-NEXT: tst w0, #0xc0 -; SDISEL-NEXT: csel w0, w8, wzr, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: cmp_to_ands2: -; GISEL: // %bb.0: -; GISEL-NEXT: and w8, w0, #0xfe -; GISEL-NEXT: cmp w8, #63 -; GISEL-NEXT: csel w0, w8, wzr, hi -; GISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_to_ands2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xfe +; CHECK-SD-NEXT: tst w0, #0xc0 +; CHECK-SD-NEXT: csel w0, w8, wzr, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: cmp_to_ands2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xfe +; CHECK-GI-NEXT: cmp w8, #63 +; CHECK-GI-NEXT: csel w0, w8, wzr, hi +; CHECK-GI-NEXT: ret %and = and i32 %num, 254 %cmp = icmp ugt i32 %and, 63 %r = select i1 %cmp, i32 %and, i32 0 @@ -2443,19 +2443,19 @@ define i32 @cmp_to_ands2(i32 %num) { } define i32 @cmp_to_ands3(i32 %num, i32 %a) { -; SDISEL-LABEL: cmp_to_ands3: -; SDISEL: // %bb.0: -; SDISEL-NEXT: tst w0, #0x10 -; SDISEL-NEXT: csel w0, w1, wzr, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: cmp_to_ands3: -; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #23 // =0x17 -; GISEL-NEXT: and w8, w0, w8 -; GISEL-NEXT: cmp w8, #7 -; GISEL-NEXT: csel w0, w1, wzr, hi -; GISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_to_ands3: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: tst w0, #0x10 +; CHECK-SD-NEXT: csel w0, w1, wzr, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: cmp_to_ands3: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #23 // =0x17 +; CHECK-GI-NEXT: and w8, w0, w8 +; CHECK-GI-NEXT: cmp w8, #7 +; CHECK-GI-NEXT: csel w0, w1, wzr, hi +; CHECK-GI-NEXT: ret %and = and i32 %num, 23 %cmp = icmp ugt i32 %and, 7 %r = select i1 %cmp, i32 %a, i32 0 @@ -2463,19 +2463,19 @@ define i32 @cmp_to_ands3(i32 %num, i32 %a) { } define i32 @cmp_to_ands4(i32 %num, i32 %a) { -; SDISEL-LABEL: cmp_to_ands4: -; SDISEL: // %bb.0: -; SDISEL-NEXT: and w8, w0, #0x30 -; SDISEL-NEXT: tst w0, #0x20 -; SDISEL-NEXT: csel w0, w8, w1, eq -; SDISEL-NEXT: ret -; -; GISEL-LABEL: cmp_to_ands4: -; GISEL: // %bb.0: -; GISEL-NEXT: and w8, w0, #0x30 -; GISEL-NEXT: cmp w8, #31 -; GISEL-NEXT: csel w0, w8, w1, ls -; GISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_to_ands4: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0x30 +; CHECK-SD-NEXT: tst w0, #0x20 +; CHECK-SD-NEXT: csel w0, w8, w1, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: cmp_to_ands4: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0x30 +; CHECK-GI-NEXT: cmp w8, #31 +; CHECK-GI-NEXT: csel w0, w8, w1, ls +; CHECK-GI-NEXT: ret %and = and i32 %num, 48 %cmp = icmp ule i32 %and, 31 %r = select i1 %cmp, i32 %and, i32 %a @@ -2483,19 +2483,19 @@ define i32 @cmp_to_ands4(i32 %num, i32 %a) { } define i32 @cmp_to_ands5(i32 %num, i32 %a) { -; SDISEL-LABEL: cmp_to_ands5: -; SDISEL: // %bb.0: -; SDISEL-NEXT: and w8, w0, #0xf8 -; SDISEL-NEXT: tst w0, #0xc0 -; SDISEL-NEXT: csel w0, w8, w1, eq -; SDISEL-NEXT: ret -; -; GISEL-LABEL: cmp_to_ands5: -; GISEL: // %bb.0: -; GISEL-NEXT: and w8, w0, #0xf8 -; GISEL-NEXT: cmp w8, #64 -; GISEL-NEXT: csel w0, w8, w1, lo -; GISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_to_ands5: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xf8 +; CHECK-SD-NEXT: tst w0, #0xc0 +; CHECK-SD-NEXT: csel w0, w8, w1, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: cmp_to_ands5: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xf8 +; CHECK-GI-NEXT: cmp w8, #64 +; CHECK-GI-NEXT: csel w0, w8, w1, lo +; CHECK-GI-NEXT: ret %and = and i32 %num, 248 %cmp = icmp ult i32 %and, 64 %r = select i1 %cmp, i32 %and, i32 %a @@ -2503,19 +2503,19 @@ define i32 @cmp_to_ands5(i32 %num, i32 %a) { } define i32 @cmp_to_ands6(i32 %num) { -; SDISEL-LABEL: cmp_to_ands6: -; SDISEL: // %bb.0: -; SDISEL-NEXT: and w8, w0, #0xfe -; SDISEL-NEXT: tst w0, #0xf0 -; SDISEL-NEXT: csel w0, w8, wzr, ne -; SDISEL-NEXT: ret -; -; GISEL-LABEL: cmp_to_ands6: -; GISEL: // %bb.0: -; GISEL-NEXT: and w8, w0, #0xfe -; GISEL-NEXT: cmp w8, #16 -; GISEL-NEXT: csel w0, w8, wzr, hs -; GISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_to_ands6: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xfe +; CHECK-SD-NEXT: tst w0, #0xf0 +; CHECK-SD-NEXT: csel w0, w8, wzr, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: cmp_to_ands6: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xfe +; CHECK-GI-NEXT: cmp w8, #16 +; CHECK-GI-NEXT: csel w0, w8, wzr, hs +; CHECK-GI-NEXT: ret %and = and i32 %num, 254 %cmp = icmp uge i32 %and, 16 %r = select i1 %cmp, i32 %and, i32 0 @@ -2523,21 +2523,21 @@ define i32 @cmp_to_ands6(i32 %num) { } define i1 @and_fcmp(float %0, float %1) { -; SDISEL-LABEL: and_fcmp: -; SDISEL: // %bb.0: -; SDISEL-NEXT: fcmp s1, s1 -; SDISEL-NEXT: fccmp s0, s0, #0, vs -; SDISEL-NEXT: cset w0, vs -; SDISEL-NEXT: ret -; -; GISEL-LABEL: and_fcmp: -; GISEL: // %bb.0: -; GISEL-NEXT: fcmp s0, #0.0 -; GISEL-NEXT: cset w8, vs -; GISEL-NEXT: fcmp s1, #0.0 -; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-SD-LABEL: and_fcmp: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcmp s1, s1 +; CHECK-SD-NEXT: fccmp s0, s0, #0, vs +; CHECK-SD-NEXT: cset w0, vs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: and_fcmp: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcmp s0, #0.0 +; CHECK-GI-NEXT: cset w8, vs +; CHECK-GI-NEXT: fcmp s1, #0.0 +; CHECK-GI-NEXT: cset w9, vs +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret %3 = fcmp uno float %0, 0.000000e+00 %4 = fcmp uno float %1, 0.000000e+00 diff --git a/llvm/test/CodeGen/AArch64/andorbrcompare.ll b/llvm/test/CodeGen/AArch64/andorbrcompare.ll index 951a5cd..5bc06ec 100644 --- a/llvm/test/CodeGen/AArch64/andorbrcompare.ll +++ b/llvm/test/CodeGen/AArch64/andorbrcompare.ll @@ -1,44 +1,44 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,SDISEL -; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare void @dummy() define i32 @and_eq_ne_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) { -; SDISEL-LABEL: and_eq_ne_ult: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w2, w3 -; SDISEL-NEXT: ccmp w0, w1, #0, ne -; SDISEL-NEXT: b.eq .LBB0_3 -; SDISEL-NEXT: // %bb.1: // %entry -; SDISEL-NEXT: cmp w4, w5 -; SDISEL-NEXT: b.lo .LBB0_3 -; SDISEL-NEXT: // %bb.2: -; SDISEL-NEXT: mov w0, wzr -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB0_3: // %if -; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: and_eq_ne_ult: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w2, w3 +; CHECK-SD-NEXT: ccmp w0, w1, #0, ne +; CHECK-SD-NEXT: b.eq .LBB0_3 +; CHECK-SD-NEXT: // %bb.1: // %entry +; CHECK-SD-NEXT: cmp w4, w5 +; CHECK-SD-NEXT: b.lo .LBB0_3 +; CHECK-SD-NEXT: // %bb.2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB0_3: // %if +; CHECK-SD-NEXT: mov w0, #1 // =0x1 +; CHECK-SD-NEXT: str w0, [x6] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: and_eq_ne_ult: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: tbnz w8, #0, .LBB0_3 -; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr -; GISEL-NEXT: b.lo .LBB0_3 -; GISEL-NEXT: // %bb.2: // %common.ret -; GISEL-NEXT: ret -; GISEL-NEXT: .LBB0_3: // %if -; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: and_eq_ne_ult: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: and w8, w8, w9 +; CHECK-GI-NEXT: tbnz w8, #0, .LBB0_3 +; CHECK-GI-NEXT: // %bb.1: // %entry +; CHECK-GI-NEXT: cmp w4, w5 +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: b.lo .LBB0_3 +; CHECK-GI-NEXT: // %bb.2: // %common.ret +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: .LBB0_3: // %if +; CHECK-GI-NEXT: mov w0, #1 // =0x1 +; CHECK-GI-NEXT: str w0, [x6] +; CHECK-GI-NEXT: ret entry: %c0 = icmp eq i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -56,40 +56,40 @@ else: } define i32 @and_ne_ult_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) { -; SDISEL-LABEL: and_ne_ult_ule: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w2, w3 -; SDISEL-NEXT: ccmp w0, w1, #4, lo -; SDISEL-NEXT: b.ne .LBB1_3 -; SDISEL-NEXT: // %bb.1: // %entry -; SDISEL-NEXT: cmp w4, w5 -; SDISEL-NEXT: b.ls .LBB1_3 -; SDISEL-NEXT: // %bb.2: -; SDISEL-NEXT: mov w0, wzr -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB1_3: // %if -; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: and_ne_ult_ule: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w2, w3 +; CHECK-SD-NEXT: ccmp w0, w1, #4, lo +; CHECK-SD-NEXT: b.ne .LBB1_3 +; CHECK-SD-NEXT: // %bb.1: // %entry +; CHECK-SD-NEXT: cmp w4, w5 +; CHECK-SD-NEXT: b.ls .LBB1_3 +; CHECK-SD-NEXT: // %bb.2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB1_3: // %if +; CHECK-SD-NEXT: mov w0, #1 // =0x1 +; CHECK-SD-NEXT: str w0, [x6] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: and_ne_ult_ule: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: tbnz w8, #0, .LBB1_3 -; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr -; GISEL-NEXT: b.ls .LBB1_3 -; GISEL-NEXT: // %bb.2: // %common.ret -; GISEL-NEXT: ret -; GISEL-NEXT: .LBB1_3: // %if -; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: and_ne_ult_ule: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: and w8, w8, w9 +; CHECK-GI-NEXT: tbnz w8, #0, .LBB1_3 +; CHECK-GI-NEXT: // %bb.1: // %entry +; CHECK-GI-NEXT: cmp w4, w5 +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: b.ls .LBB1_3 +; CHECK-GI-NEXT: // %bb.2: // %common.ret +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: .LBB1_3: // %if +; CHECK-GI-NEXT: mov w0, #1 // =0x1 +; CHECK-GI-NEXT: str w0, [x6] +; CHECK-GI-NEXT: ret entry: %c0 = icmp ne i32 %s0, %s1 %c1 = icmp ult i32 %s2, %s3 @@ -107,40 +107,40 @@ else: } define i32 @and_ult_ule_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) { -; SDISEL-LABEL: and_ult_ule_ugt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w2, w3 -; SDISEL-NEXT: ccmp w0, w1, #2, ls -; SDISEL-NEXT: b.lo .LBB2_3 -; SDISEL-NEXT: // %bb.1: // %entry -; SDISEL-NEXT: cmp w4, w5 -; SDISEL-NEXT: b.hi .LBB2_3 -; SDISEL-NEXT: // %bb.2: -; SDISEL-NEXT: mov w0, wzr -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB2_3: // %if -; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: and_ult_ule_ugt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w2, w3 +; CHECK-SD-NEXT: ccmp w0, w1, #2, ls +; CHECK-SD-NEXT: b.lo .LBB2_3 +; CHECK-SD-NEXT: // %bb.1: // %entry +; CHECK-SD-NEXT: cmp w4, w5 +; CHECK-SD-NEXT: b.hi .LBB2_3 +; CHECK-SD-NEXT: // %bb.2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB2_3: // %if +; CHECK-SD-NEXT: mov w0, #1 // =0x1 +; CHECK-SD-NEXT: str w0, [x6] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: and_ult_ule_ugt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ls -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: tbnz w8, #0, .LBB2_3 -; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr -; GISEL-NEXT: b.hi .LBB2_3 -; GISEL-NEXT: // %bb.2: // %common.ret -; GISEL-NEXT: ret -; GISEL-NEXT: .LBB2_3: // %if -; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: and_ult_ule_ugt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ls +; CHECK-GI-NEXT: and w8, w8, w9 +; CHECK-GI-NEXT: tbnz w8, #0, .LBB2_3 +; CHECK-GI-NEXT: // %bb.1: // %entry +; CHECK-GI-NEXT: cmp w4, w5 +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: b.hi .LBB2_3 +; CHECK-GI-NEXT: // %bb.2: // %common.ret +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: .LBB2_3: // %if +; CHECK-GI-NEXT: mov w0, #1 // =0x1 +; CHECK-GI-NEXT: str w0, [x6] +; CHECK-GI-NEXT: ret entry: %c0 = icmp ult i32 %s0, %s1 %c1 = icmp ule i32 %s2, %s3 @@ -158,40 +158,40 @@ else: } define i32 @and_ule_ugt_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) { -; SDISEL-LABEL: and_ule_ugt_uge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w2, w3 -; SDISEL-NEXT: ccmp w0, w1, #2, hi -; SDISEL-NEXT: b.ls .LBB3_3 -; SDISEL-NEXT: // %bb.1: // %entry -; SDISEL-NEXT: cmp w4, w5 -; SDISEL-NEXT: b.hs .LBB3_3 -; SDISEL-NEXT: // %bb.2: -; SDISEL-NEXT: mov w0, wzr -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB3_3: // %if -; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: and_ule_ugt_uge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w2, w3 +; CHECK-SD-NEXT: ccmp w0, w1, #2, hi +; CHECK-SD-NEXT: b.ls .LBB3_3 +; CHECK-SD-NEXT: // %bb.1: // %entry +; CHECK-SD-NEXT: cmp w4, w5 +; CHECK-SD-NEXT: b.hs .LBB3_3 +; CHECK-SD-NEXT: // %bb.2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB3_3: // %if +; CHECK-SD-NEXT: mov w0, #1 // =0x1 +; CHECK-SD-NEXT: str w0, [x6] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: and_ule_ugt_uge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, ls -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: tbnz w8, #0, .LBB3_3 -; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr -; GISEL-NEXT: b.hs .LBB3_3 -; GISEL-NEXT: // %bb.2: // %common.ret -; GISEL-NEXT: ret -; GISEL-NEXT: .LBB3_3: // %if -; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: and_ule_ugt_uge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, ls +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: and w8, w8, w9 +; CHECK-GI-NEXT: tbnz w8, #0, .LBB3_3 +; CHECK-GI-NEXT: // %bb.1: // %entry +; CHECK-GI-NEXT: cmp w4, w5 +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: b.hs .LBB3_3 +; CHECK-GI-NEXT: // %bb.2: // %common.ret +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: .LBB3_3: // %if +; CHECK-GI-NEXT: mov w0, #1 // =0x1 +; CHECK-GI-NEXT: str w0, [x6] +; CHECK-GI-NEXT: ret entry: %c0 = icmp ule i32 %s0, %s1 %c1 = icmp ugt i32 %s2, %s3 @@ -209,40 +209,40 @@ else: } define i32 @and_ugt_uge_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) { -; SDISEL-LABEL: and_ugt_uge_slt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w2, w3 -; SDISEL-NEXT: ccmp w0, w1, #0, hs -; SDISEL-NEXT: b.hi .LBB4_3 -; SDISEL-NEXT: // %bb.1: // %entry -; SDISEL-NEXT: cmp w4, w5 -; SDISEL-NEXT: b.lt .LBB4_3 -; SDISEL-NEXT: // %bb.2: -; SDISEL-NEXT: mov w0, wzr -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB4_3: // %if -; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: and_ugt_uge_slt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w2, w3 +; CHECK-SD-NEXT: ccmp w0, w1, #0, hs +; CHECK-SD-NEXT: b.hi .LBB4_3 +; CHECK-SD-NEXT: // %bb.1: // %entry +; CHECK-SD-NEXT: cmp w4, w5 +; CHECK-SD-NEXT: b.lt .LBB4_3 +; CHECK-SD-NEXT: // %bb.2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB4_3: // %if +; CHECK-SD-NEXT: mov w0, #1 // =0x1 +; CHECK-SD-NEXT: str w0, [x6] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: and_ugt_uge_slt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hi -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: tbnz w8, #0, .LBB4_3 -; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr -; GISEL-NEXT: b.lt .LBB4_3 -; GISEL-NEXT: // %bb.2: // %common.ret -; GISEL-NEXT: ret -; GISEL-NEXT: .LBB4_3: // %if -; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: and_ugt_uge_slt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: and w8, w8, w9 +; CHECK-GI-NEXT: tbnz w8, #0, .LBB4_3 +; CHECK-GI-NEXT: // %bb.1: // %entry +; CHECK-GI-NEXT: cmp w4, w5 +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: b.lt .LBB4_3 +; CHECK-GI-NEXT: // %bb.2: // %common.ret +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: .LBB4_3: // %if +; CHECK-GI-NEXT: mov w0, #1 // =0x1 +; CHECK-GI-NEXT: str w0, [x6] +; CHECK-GI-NEXT: ret entry: %c0 = icmp ugt i32 %s0, %s1 %c1 = icmp uge i32 %s2, %s3 @@ -260,40 +260,40 @@ else: } define i32 @and_uge_slt_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) { -; SDISEL-LABEL: and_uge_slt_sle: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w2, w3 -; SDISEL-NEXT: ccmp w0, w1, #0, lt -; SDISEL-NEXT: b.hs .LBB5_3 -; SDISEL-NEXT: // %bb.1: // %entry -; SDISEL-NEXT: cmp w4, w5 -; SDISEL-NEXT: b.le .LBB5_3 -; SDISEL-NEXT: // %bb.2: -; SDISEL-NEXT: mov w0, wzr -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB5_3: // %if -; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: and_uge_slt_sle: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w2, w3 +; CHECK-SD-NEXT: ccmp w0, w1, #0, lt +; CHECK-SD-NEXT: b.hs .LBB5_3 +; CHECK-SD-NEXT: // %bb.1: // %entry +; CHECK-SD-NEXT: cmp w4, w5 +; CHECK-SD-NEXT: b.le .LBB5_3 +; CHECK-SD-NEXT: // %bb.2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB5_3: // %if +; CHECK-SD-NEXT: mov w0, #1 // =0x1 +; CHECK-SD-NEXT: str w0, [x6] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: and_uge_slt_sle: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, lt -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: tbnz w8, #0, .LBB5_3 -; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr -; GISEL-NEXT: b.le .LBB5_3 -; GISEL-NEXT: // %bb.2: // %common.ret -; GISEL-NEXT: ret -; GISEL-NEXT: .LBB5_3: // %if -; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: and_uge_slt_sle: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, hs +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, lt +; CHECK-GI-NEXT: and w8, w8, w9 +; CHECK-GI-NEXT: tbnz w8, #0, .LBB5_3 +; CHECK-GI-NEXT: // %bb.1: // %entry +; CHECK-GI-NEXT: cmp w4, w5 +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: b.le .LBB5_3 +; CHECK-GI-NEXT: // %bb.2: // %common.ret +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: .LBB5_3: // %if +; CHECK-GI-NEXT: mov w0, #1 // =0x1 +; CHECK-GI-NEXT: str w0, [x6] +; CHECK-GI-NEXT: ret entry: %c0 = icmp uge i32 %s0, %s1 %c1 = icmp slt i32 %s2, %s3 @@ -311,40 +311,40 @@ else: } define i32 @and_slt_sle_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) { -; SDISEL-LABEL: and_slt_sle_sgt: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w2, w3 -; SDISEL-NEXT: ccmp w0, w1, #0, le -; SDISEL-NEXT: b.lt .LBB6_3 -; SDISEL-NEXT: // %bb.1: // %entry -; SDISEL-NEXT: cmp w4, w5 -; SDISEL-NEXT: b.gt .LBB6_3 -; SDISEL-NEXT: // %bb.2: -; SDISEL-NEXT: mov w0, wzr -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB6_3: // %if -; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: and_slt_sle_sgt: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w2, w3 +; CHECK-SD-NEXT: ccmp w0, w1, #0, le +; CHECK-SD-NEXT: b.lt .LBB6_3 +; CHECK-SD-NEXT: // %bb.1: // %entry +; CHECK-SD-NEXT: cmp w4, w5 +; CHECK-SD-NEXT: b.gt .LBB6_3 +; CHECK-SD-NEXT: // %bb.2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB6_3: // %if +; CHECK-SD-NEXT: mov w0, #1 // =0x1 +; CHECK-SD-NEXT: str w0, [x6] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: and_slt_sle_sgt: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, le -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: tbnz w8, #0, .LBB6_3 -; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr -; GISEL-NEXT: b.gt .LBB6_3 -; GISEL-NEXT: // %bb.2: // %common.ret -; GISEL-NEXT: ret -; GISEL-NEXT: .LBB6_3: // %if -; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: and_slt_sle_sgt: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, le +; CHECK-GI-NEXT: and w8, w8, w9 +; CHECK-GI-NEXT: tbnz w8, #0, .LBB6_3 +; CHECK-GI-NEXT: // %bb.1: // %entry +; CHECK-GI-NEXT: cmp w4, w5 +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: b.gt .LBB6_3 +; CHECK-GI-NEXT: // %bb.2: // %common.ret +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: .LBB6_3: // %if +; CHECK-GI-NEXT: mov w0, #1 // =0x1 +; CHECK-GI-NEXT: str w0, [x6] +; CHECK-GI-NEXT: ret entry: %c0 = icmp slt i32 %s0, %s1 %c1 = icmp sle i32 %s2, %s3 @@ -362,40 +362,40 @@ else: } define i32 @and_sle_sgt_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) { -; SDISEL-LABEL: and_sle_sgt_sge: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: cmp w2, w3 -; SDISEL-NEXT: ccmp w0, w1, #0, gt -; SDISEL-NEXT: b.le .LBB7_3 -; SDISEL-NEXT: // %bb.1: // %entry -; SDISEL-NEXT: cmp w4, w5 -; SDISEL-NEXT: b.ge .LBB7_3 -; SDISEL-NEXT: // %bb.2: -; SDISEL-NEXT: mov w0, wzr -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB7_3: // %if -; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: and_sle_sgt_sge: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp w2, w3 +; CHECK-SD-NEXT: ccmp w0, w1, #0, gt +; CHECK-SD-NEXT: b.le .LBB7_3 +; CHECK-SD-NEXT: // %bb.1: // %entry +; CHECK-SD-NEXT: cmp w4, w5 +; CHECK-SD-NEXT: b.ge .LBB7_3 +; CHECK-SD-NEXT: // %bb.2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB7_3: // %if +; CHECK-SD-NEXT: mov w0, #1 // =0x1 +; CHECK-SD-NEXT: str w0, [x6] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: and_sle_sgt_sge: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, le -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: tbnz w8, #0, .LBB7_3 -; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr -; GISEL-NEXT: b.ge .LBB7_3 -; GISEL-NEXT: // %bb.2: // %common.ret -; GISEL-NEXT: ret -; GISEL-NEXT: .LBB7_3: // %if -; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: and_sle_sgt_sge: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, le +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, gt +; CHECK-GI-NEXT: and w8, w8, w9 +; CHECK-GI-NEXT: tbnz w8, #0, .LBB7_3 +; CHECK-GI-NEXT: // %bb.1: // %entry +; CHECK-GI-NEXT: cmp w4, w5 +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: b.ge .LBB7_3 +; CHECK-GI-NEXT: // %bb.2: // %common.ret +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: .LBB7_3: // %if +; CHECK-GI-NEXT: mov w0, #1 // =0x1 +; CHECK-GI-NEXT: str w0, [x6] +; CHECK-GI-NEXT: ret entry: %c0 = icmp sle i32 %s0, %s1 %c1 = icmp sgt i32 %s2, %s3 diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll index 06e957f..a546ffd 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp | FileCheck %s --check-prefixes=CHECK,SDISEL -; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp -global-isel | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI target triple = "arm64-apple-ios" define i32 @single_same(i32 %a, i32 %b) nounwind ssp { @@ -32,31 +32,31 @@ if.end: ; Different condition codes for the two compares. define i32 @single_different(i32 %a, i32 %b) nounwind ssp { -; SDISEL-LABEL: single_different: -; SDISEL: ; %bb.0: ; %entry -; SDISEL-NEXT: cmp w0, #6 -; SDISEL-NEXT: ccmp w1, #17, #0, ge -; SDISEL-NEXT: b.eq LBB1_2 -; SDISEL-NEXT: ; %bb.1: ; %if.then -; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill -; SDISEL-NEXT: bl _foo -; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; SDISEL-NEXT: LBB1_2: ; %if.end -; SDISEL-NEXT: mov w0, #7 ; =0x7 -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: single_different: +; CHECK-SD: ; %bb.0: ; %entry +; CHECK-SD-NEXT: cmp w0, #6 +; CHECK-SD-NEXT: ccmp w1, #17, #0, ge +; CHECK-SD-NEXT: b.eq LBB1_2 +; CHECK-SD-NEXT: ; %bb.1: ; %if.then +; CHECK-SD-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-SD-NEXT: bl _foo +; CHECK-SD-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-SD-NEXT: LBB1_2: ; %if.end +; CHECK-SD-NEXT: mov w0, #7 ; =0x7 +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: single_different: -; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: cmp w0, #5 -; GISEL-NEXT: ccmp w1, #17, #0, gt -; GISEL-NEXT: b.eq LBB1_2 -; GISEL-NEXT: ; %bb.1: ; %if.then -; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill -; GISEL-NEXT: bl _foo -; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; GISEL-NEXT: LBB1_2: ; %if.end -; GISEL-NEXT: mov w0, #7 ; =0x7 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: single_different: +; CHECK-GI: ; %bb.0: ; %entry +; CHECK-GI-NEXT: cmp w0, #5 +; CHECK-GI-NEXT: ccmp w1, #17, #0, gt +; CHECK-GI-NEXT: b.eq LBB1_2 +; CHECK-GI-NEXT: ; %bb.1: ; %if.then +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-GI-NEXT: bl _foo +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-GI-NEXT: LBB1_2: ; %if.end +; CHECK-GI-NEXT: mov w0, #7 ; =0x7 +; CHECK-GI-NEXT: ret entry: %cmp = icmp sle i32 %a, 5 %cmp1 = icmp ne i32 %b, 17 @@ -73,41 +73,41 @@ if.end: ; Second block clobbers the flags, can't convert (easily). define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp { -; SDISEL-LABEL: single_flagclobber: -; SDISEL: ; %bb.0: ; %entry -; SDISEL-NEXT: cmp w0, #5 -; SDISEL-NEXT: b.eq LBB2_2 -; SDISEL-NEXT: ; %bb.1: ; %lor.lhs.false -; SDISEL-NEXT: lsl w8, w1, #1 -; SDISEL-NEXT: cmp w1, #7 -; SDISEL-NEXT: csinc w8, w8, w1, lt -; SDISEL-NEXT: cmp w8, #16 -; SDISEL-NEXT: b.gt LBB2_3 -; SDISEL-NEXT: LBB2_2: ; %if.then -; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill -; SDISEL-NEXT: bl _foo -; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; SDISEL-NEXT: LBB2_3: ; %if.end -; SDISEL-NEXT: mov w0, #7 ; =0x7 -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: single_flagclobber: +; CHECK-SD: ; %bb.0: ; %entry +; CHECK-SD-NEXT: cmp w0, #5 +; CHECK-SD-NEXT: b.eq LBB2_2 +; CHECK-SD-NEXT: ; %bb.1: ; %lor.lhs.false +; CHECK-SD-NEXT: lsl w8, w1, #1 +; CHECK-SD-NEXT: cmp w1, #7 +; CHECK-SD-NEXT: csinc w8, w8, w1, lt +; CHECK-SD-NEXT: cmp w8, #16 +; CHECK-SD-NEXT: b.gt LBB2_3 +; CHECK-SD-NEXT: LBB2_2: ; %if.then +; CHECK-SD-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-SD-NEXT: bl _foo +; CHECK-SD-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-SD-NEXT: LBB2_3: ; %if.end +; CHECK-SD-NEXT: mov w0, #7 ; =0x7 +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: single_flagclobber: -; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: cmp w0, #5 -; GISEL-NEXT: b.eq LBB2_2 -; GISEL-NEXT: ; %bb.1: ; %lor.lhs.false -; GISEL-NEXT: lsl w8, w1, #1 -; GISEL-NEXT: cmp w1, #7 -; GISEL-NEXT: csinc w8, w8, w1, lt -; GISEL-NEXT: cmp w8, #17 -; GISEL-NEXT: b.ge LBB2_3 -; GISEL-NEXT: LBB2_2: ; %if.then -; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill -; GISEL-NEXT: bl _foo -; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; GISEL-NEXT: LBB2_3: ; %if.end -; GISEL-NEXT: mov w0, #7 ; =0x7 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: single_flagclobber: +; CHECK-GI: ; %bb.0: ; %entry +; CHECK-GI-NEXT: cmp w0, #5 +; CHECK-GI-NEXT: b.eq LBB2_2 +; CHECK-GI-NEXT: ; %bb.1: ; %lor.lhs.false +; CHECK-GI-NEXT: lsl w8, w1, #1 +; CHECK-GI-NEXT: cmp w1, #7 +; CHECK-GI-NEXT: csinc w8, w8, w1, lt +; CHECK-GI-NEXT: cmp w8, #17 +; CHECK-GI-NEXT: b.ge LBB2_3 +; CHECK-GI-NEXT: LBB2_2: ; %if.then +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-GI-NEXT: bl _foo +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-GI-NEXT: LBB2_3: ; %if.end +; CHECK-GI-NEXT: mov w0, #7 ; =0x7 +; CHECK-GI-NEXT: ret entry: %cmp = icmp eq i32 %a, 5 br i1 %cmp, label %if.then, label %lor.lhs.false @@ -171,37 +171,37 @@ if.end: ; preds = %if.then, %lor.lhs.f ; The sdiv/udiv instructions do not trap when the divisor is zero, so they are ; safe to speculate. define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp { -; SDISEL-LABEL: speculate_division: -; SDISEL: ; %bb.0: ; %entry -; SDISEL-NEXT: cmp w0, #1 -; SDISEL-NEXT: sdiv w8, w1, w0 -; SDISEL-NEXT: ccmp w8, #16, #0, ge -; SDISEL-NEXT: b.le LBB4_2 -; SDISEL-NEXT: ; %bb.1: ; %if.end -; SDISEL-NEXT: mov w0, #7 ; =0x7 -; SDISEL-NEXT: ret -; SDISEL-NEXT: LBB4_2: ; %if.then -; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill -; SDISEL-NEXT: bl _foo -; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; SDISEL-NEXT: mov w0, #7 ; =0x7 -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: speculate_division: +; CHECK-SD: ; %bb.0: ; %entry +; CHECK-SD-NEXT: cmp w0, #1 +; CHECK-SD-NEXT: sdiv w8, w1, w0 +; CHECK-SD-NEXT: ccmp w8, #16, #0, ge +; CHECK-SD-NEXT: b.le LBB4_2 +; CHECK-SD-NEXT: ; %bb.1: ; %if.end +; CHECK-SD-NEXT: mov w0, #7 ; =0x7 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB4_2: ; %if.then +; CHECK-SD-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-SD-NEXT: bl _foo +; CHECK-SD-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-SD-NEXT: mov w0, #7 ; =0x7 +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: speculate_division: -; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: cmp w0, #0 -; GISEL-NEXT: sdiv w8, w1, w0 -; GISEL-NEXT: ccmp w8, #17, #0, gt -; GISEL-NEXT: b.lt LBB4_2 -; GISEL-NEXT: ; %bb.1: ; %if.end -; GISEL-NEXT: mov w0, #7 ; =0x7 -; GISEL-NEXT: ret -; GISEL-NEXT: LBB4_2: ; %if.then -; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill -; GISEL-NEXT: bl _foo -; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; GISEL-NEXT: mov w0, #7 ; =0x7 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: speculate_division: +; CHECK-GI: ; %bb.0: ; %entry +; CHECK-GI-NEXT: cmp w0, #0 +; CHECK-GI-NEXT: sdiv w8, w1, w0 +; CHECK-GI-NEXT: ccmp w8, #17, #0, gt +; CHECK-GI-NEXT: b.lt LBB4_2 +; CHECK-GI-NEXT: ; %bb.1: ; %if.end +; CHECK-GI-NEXT: mov w0, #7 ; =0x7 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB4_2: ; %if.then +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-GI-NEXT: bl _foo +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-GI-NEXT: mov w0, #7 ; =0x7 +; CHECK-GI-NEXT: ret entry: %cmp = icmp sgt i32 %a, 0 br i1 %cmp, label %land.lhs.true, label %if.end @@ -221,41 +221,41 @@ if.end: ; Floating point compare. define i32 @single_fcmp(i32 %a, float %b) nounwind ssp { -; SDISEL-LABEL: single_fcmp: -; SDISEL: ; %bb.0: ; %entry -; SDISEL-NEXT: cmp w0, #1 -; SDISEL-NEXT: scvtf s1, w0 -; SDISEL-NEXT: fdiv s0, s0, s1 -; SDISEL-NEXT: fmov s1, #17.00000000 -; SDISEL-NEXT: fccmp s0, s1, #8, ge -; SDISEL-NEXT: b.ge LBB5_2 -; SDISEL-NEXT: ; %bb.1: ; %if.end -; SDISEL-NEXT: mov w0, #7 ; =0x7 -; SDISEL-NEXT: ret -; SDISEL-NEXT: LBB5_2: ; %if.then -; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill -; SDISEL-NEXT: bl _foo -; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; SDISEL-NEXT: mov w0, #7 ; =0x7 -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: single_fcmp: +; CHECK-SD: ; %bb.0: ; %entry +; CHECK-SD-NEXT: cmp w0, #1 +; CHECK-SD-NEXT: scvtf s1, w0 +; CHECK-SD-NEXT: fdiv s0, s0, s1 +; CHECK-SD-NEXT: fmov s1, #17.00000000 +; CHECK-SD-NEXT: fccmp s0, s1, #8, ge +; CHECK-SD-NEXT: b.ge LBB5_2 +; CHECK-SD-NEXT: ; %bb.1: ; %if.end +; CHECK-SD-NEXT: mov w0, #7 ; =0x7 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB5_2: ; %if.then +; CHECK-SD-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-SD-NEXT: bl _foo +; CHECK-SD-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-SD-NEXT: mov w0, #7 ; =0x7 +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: single_fcmp: -; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: cmp w0, #0 -; GISEL-NEXT: scvtf s1, w0 -; GISEL-NEXT: fdiv s0, s0, s1 -; GISEL-NEXT: fmov s1, #17.00000000 -; GISEL-NEXT: fccmp s0, s1, #8, gt -; GISEL-NEXT: b.ge LBB5_2 -; GISEL-NEXT: ; %bb.1: ; %if.end -; GISEL-NEXT: mov w0, #7 ; =0x7 -; GISEL-NEXT: ret -; GISEL-NEXT: LBB5_2: ; %if.then -; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill -; GISEL-NEXT: bl _foo -; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; GISEL-NEXT: mov w0, #7 ; =0x7 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: single_fcmp: +; CHECK-GI: ; %bb.0: ; %entry +; CHECK-GI-NEXT: cmp w0, #0 +; CHECK-GI-NEXT: scvtf s1, w0 +; CHECK-GI-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NEXT: fmov s1, #17.00000000 +; CHECK-GI-NEXT: fccmp s0, s1, #8, gt +; CHECK-GI-NEXT: b.ge LBB5_2 +; CHECK-GI-NEXT: ; %bb.1: ; %if.end +; CHECK-GI-NEXT: mov w0, #7 ; =0x7 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB5_2: ; %if.then +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-GI-NEXT: bl _foo +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-GI-NEXT: mov w0, #7 ; =0x7 +; CHECK-GI-NEXT: ret entry: %cmp = icmp sgt i32 %a, 0 br i1 %cmp, label %land.lhs.true, label %if.end @@ -499,28 +499,28 @@ define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) { } define i64 @gccbug(i64 %x0, i64 %x1) { -; SDISEL-LABEL: gccbug: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp x0, #2 -; SDISEL-NEXT: ccmp x0, #4, #4, ne -; SDISEL-NEXT: ccmp x1, #0, #0, eq -; SDISEL-NEXT: mov w8, #1 ; =0x1 -; SDISEL-NEXT: cinc x0, x8, eq -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: gccbug: +; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: cmp x0, #2 +; CHECK-SD-NEXT: ccmp x0, #4, #4, ne +; CHECK-SD-NEXT: ccmp x1, #0, #0, eq +; CHECK-SD-NEXT: mov w8, #1 ; =0x1 +; CHECK-SD-NEXT: cinc x0, x8, eq +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: gccbug: -; GISEL: ; %bb.0: -; GISEL-NEXT: cmp x1, #0 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmp x0, #2 -; GISEL-NEXT: cset w9, eq -; GISEL-NEXT: cmp x0, #4 -; GISEL-NEXT: cset w10, eq -; GISEL-NEXT: orr w9, w10, w9 -; GISEL-NEXT: and w8, w9, w8 -; GISEL-NEXT: and x8, x8, #0x1 -; GISEL-NEXT: add x0, x8, #1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: gccbug: +; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: cmp x1, #0 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmp x0, #2 +; CHECK-GI-NEXT: cset w9, eq +; CHECK-GI-NEXT: cmp x0, #4 +; CHECK-GI-NEXT: cset w10, eq +; CHECK-GI-NEXT: orr w9, w10, w9 +; CHECK-GI-NEXT: and w8, w9, w8 +; CHECK-GI-NEXT: and x8, x8, #0x1 +; CHECK-GI-NEXT: add x0, x8, #1 +; CHECK-GI-NEXT: ret %cmp0 = icmp eq i64 %x1, 0 %cmp1 = icmp eq i64 %x0, 2 %cmp2 = icmp eq i64 %x0, 4 @@ -570,23 +570,23 @@ define i32 @select_andor(i32 %v1, i32 %v2, i32 %v3) { } define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) { -; SDISEL-LABEL: select_andor32: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp w1, w2 -; SDISEL-NEXT: mov w8, #32 ; =0x20 -; SDISEL-NEXT: ccmp w0, w8, #4, lt -; SDISEL-NEXT: ccmp w0, w1, #0, eq -; SDISEL-NEXT: csel w0, w0, w1, eq -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: select_andor32: +; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: cmp w1, w2 +; CHECK-SD-NEXT: mov w8, #32 ; =0x20 +; CHECK-SD-NEXT: ccmp w0, w8, #4, lt +; CHECK-SD-NEXT: ccmp w0, w1, #0, eq +; CHECK-SD-NEXT: csel w0, w0, w1, eq +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: select_andor32: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #32 ; =0x20 -; GISEL-NEXT: cmp w1, w2 -; GISEL-NEXT: ccmp w0, w8, #4, lt -; GISEL-NEXT: ccmp w0, w1, #0, eq -; GISEL-NEXT: csel w0, w0, w1, eq -; GISEL-NEXT: ret +; CHECK-GI-LABEL: select_andor32: +; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: mov w8, #32 ; =0x20 +; CHECK-GI-NEXT: cmp w1, w2 +; CHECK-GI-NEXT: ccmp w0, w8, #4, lt +; CHECK-GI-NEXT: ccmp w0, w1, #0, eq +; CHECK-GI-NEXT: csel w0, w0, w1, eq +; CHECK-GI-NEXT: ret %c0 = icmp eq i32 %v1, %v2 %c1 = icmp sge i32 %v2, %v3 %c2 = icmp eq i32 %v1, 32 @@ -597,22 +597,22 @@ define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) { } define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) { -; SDISEL-LABEL: select_noccmp1: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp x0, #0 -; SDISEL-NEXT: ccmp x0, #13, #4, lt -; SDISEL-NEXT: cset w8, gt -; SDISEL-NEXT: cmp x2, #2 -; SDISEL-NEXT: ccmp x2, #4, #4, lt -; SDISEL-NEXT: csinc w8, w8, wzr, le -; SDISEL-NEXT: cmp w8, #0 -; SDISEL-NEXT: csel x0, xzr, x3, ne -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: select_noccmp1: +; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: cmp x0, #0 +; CHECK-SD-NEXT: ccmp x0, #13, #4, lt +; CHECK-SD-NEXT: cset w8, gt +; CHECK-SD-NEXT: cmp x2, #2 +; CHECK-SD-NEXT: ccmp x2, #4, #4, lt +; CHECK-SD-NEXT: csinc w8, w8, wzr, le +; CHECK-SD-NEXT: cmp w8, #0 +; CHECK-SD-NEXT: csel x0, xzr, x3, ne +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: select_noccmp1: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov x0, x3 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: select_noccmp1: +; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: mov x0, x3 +; CHECK-GI-NEXT: ret %c0 = icmp slt i64 %v1, 0 %c1 = icmp sgt i64 %v1, 13 %c2 = icmp slt i64 %v3, 2 @@ -627,28 +627,28 @@ define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) { @g = global i32 0 define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) { -; SDISEL-LABEL: select_noccmp2: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp x0, #0 -; SDISEL-NEXT: ccmp x0, #13, #0, ge -; SDISEL-NEXT: cset w8, gt -; SDISEL-NEXT: cmp w8, #0 -; SDISEL-NEXT: csel x0, xzr, x3, ne -; SDISEL-NEXT: sbfx w8, w8, #0, #1 -; SDISEL-NEXT: adrp x9, _g@PAGE -; SDISEL-NEXT: str w8, [x9, _g@PAGEOFF] -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: select_noccmp2: +; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: cmp x0, #0 +; CHECK-SD-NEXT: ccmp x0, #13, #0, ge +; CHECK-SD-NEXT: cset w8, gt +; CHECK-SD-NEXT: cmp w8, #0 +; CHECK-SD-NEXT: csel x0, xzr, x3, ne +; CHECK-SD-NEXT: sbfx w8, w8, #0, #1 +; CHECK-SD-NEXT: adrp x9, _g@PAGE +; CHECK-SD-NEXT: str w8, [x9, _g@PAGEOFF] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: select_noccmp2: -; GISEL: ; %bb.0: -; GISEL-NEXT: cmp x0, #14 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: csel x0, xzr, x3, ne -; GISEL-NEXT: sbfx w8, w8, #0, #1 -; GISEL-NEXT: adrp x9, _g@PAGE -; GISEL-NEXT: str w8, [x9, _g@PAGEOFF] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: select_noccmp2: +; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: cmp x0, #14 +; CHECK-GI-NEXT: cset w8, hs +; CHECK-GI-NEXT: tst w8, #0x1 +; CHECK-GI-NEXT: csel x0, xzr, x3, ne +; CHECK-GI-NEXT: sbfx w8, w8, #0, #1 +; CHECK-GI-NEXT: adrp x9, _g@PAGE +; CHECK-GI-NEXT: str w8, [x9, _g@PAGEOFF] +; CHECK-GI-NEXT: ret %c0 = icmp slt i64 %v1, 0 %c1 = icmp sgt i64 %v1, 13 %or = or i1 %c0, %c1 @@ -661,33 +661,33 @@ define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) { ; The following is not possible to implement with a single cmp;ccmp;csel ; sequence. define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) { -; SDISEL-LABEL: select_noccmp3: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp w0, #0 -; SDISEL-NEXT: ccmp w0, #13, #0, ge -; SDISEL-NEXT: cset w8, gt -; SDISEL-NEXT: cmp w0, #22 -; SDISEL-NEXT: mov w9, #44 ; =0x2c -; SDISEL-NEXT: ccmp w0, w9, #0, ge -; SDISEL-NEXT: csel w8, wzr, w8, le -; SDISEL-NEXT: cmp w0, #99 -; SDISEL-NEXT: mov w9, #77 ; =0x4d -; SDISEL-NEXT: ccmp w0, w9, #4, ne -; SDISEL-NEXT: cset w9, eq -; SDISEL-NEXT: tst w8, w9 -; SDISEL-NEXT: csel w0, w1, w2, ne -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: select_noccmp3: +; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: ccmp w0, #13, #0, ge +; CHECK-SD-NEXT: cset w8, gt +; CHECK-SD-NEXT: cmp w0, #22 +; CHECK-SD-NEXT: mov w9, #44 ; =0x2c +; CHECK-SD-NEXT: ccmp w0, w9, #0, ge +; CHECK-SD-NEXT: csel w8, wzr, w8, le +; CHECK-SD-NEXT: cmp w0, #99 +; CHECK-SD-NEXT: mov w9, #77 ; =0x4d +; CHECK-SD-NEXT: ccmp w0, w9, #4, ne +; CHECK-SD-NEXT: cset w9, eq +; CHECK-SD-NEXT: tst w8, w9 +; CHECK-SD-NEXT: csel w0, w1, w2, ne +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: select_noccmp3: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #99 ; =0x63 -; GISEL-NEXT: sub w9, w0, #45 -; GISEL-NEXT: cmp w0, #77 -; GISEL-NEXT: ccmp w0, w8, #4, ne -; GISEL-NEXT: ccmn w9, #23, #2, eq -; GISEL-NEXT: ccmp w0, #14, #0, lo -; GISEL-NEXT: csel w0, w1, w2, hs -; GISEL-NEXT: ret +; CHECK-GI-LABEL: select_noccmp3: +; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: mov w8, #99 ; =0x63 +; CHECK-GI-NEXT: sub w9, w0, #45 +; CHECK-GI-NEXT: cmp w0, #77 +; CHECK-GI-NEXT: ccmp w0, w8, #4, ne +; CHECK-GI-NEXT: ccmn w9, #23, #2, eq +; CHECK-GI-NEXT: ccmp w0, #14, #0, lo +; CHECK-GI-NEXT: csel w0, w1, w2, hs +; CHECK-GI-NEXT: ret %c0 = icmp slt i32 %v0, 0 %c1 = icmp sgt i32 %v0, 13 %c2 = icmp slt i32 %v0, 22 @@ -864,27 +864,27 @@ define i32 @select_or_olt_ueq_ogt(double %v0, double %v1, double %v2, double %v3 ; Verify that we correctly promote f16. define i32 @half_select_and_olt_oge(half %v0, half %v1, half %v2, half %v3, i32 %a, i32 %b) #0 { -; SDISEL-LABEL: half_select_and_olt_oge: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: fcvt s1, h1 -; SDISEL-NEXT: fcvt s0, h0 -; SDISEL-NEXT: fcmp s0, s1 -; SDISEL-NEXT: fcvt s0, h3 -; SDISEL-NEXT: fcvt s1, h2 -; SDISEL-NEXT: fccmp s1, s0, #8, mi -; SDISEL-NEXT: csel w0, w0, w1, ge -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: half_select_and_olt_oge: +; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: fcvt s1, h1 +; CHECK-SD-NEXT: fcvt s0, h0 +; CHECK-SD-NEXT: fcmp s0, s1 +; CHECK-SD-NEXT: fcvt s0, h3 +; CHECK-SD-NEXT: fcvt s1, h2 +; CHECK-SD-NEXT: fccmp s1, s0, #8, mi +; CHECK-SD-NEXT: csel w0, w0, w1, ge +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: half_select_and_olt_oge: -; GISEL: ; %bb.0: -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: fcvt s1, h1 -; GISEL-NEXT: fcvt s2, h2 -; GISEL-NEXT: fcvt s3, h3 -; GISEL-NEXT: fcmp s0, s1 -; GISEL-NEXT: fccmp s2, s3, #8, mi -; GISEL-NEXT: csel w0, w0, w1, ge -; GISEL-NEXT: ret +; CHECK-GI-LABEL: half_select_and_olt_oge: +; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: fcvt s0, h0 +; CHECK-GI-NEXT: fcvt s1, h1 +; CHECK-GI-NEXT: fcvt s2, h2 +; CHECK-GI-NEXT: fcvt s3, h3 +; CHECK-GI-NEXT: fcmp s0, s1 +; CHECK-GI-NEXT: fccmp s2, s3, #8, mi +; CHECK-GI-NEXT: csel w0, w0, w1, ge +; CHECK-GI-NEXT: ret %c0 = fcmp olt half %v0, %v1 %c1 = fcmp oge half %v2, %v3 %cr = and i1 %c1, %c0 @@ -893,29 +893,29 @@ define i32 @half_select_and_olt_oge(half %v0, half %v1, half %v2, half %v3, i32 } define i32 @half_select_and_olt_one(half %v0, half %v1, half %v2, half %v3, i32 %a, i32 %b) #0 { -; SDISEL-LABEL: half_select_and_olt_one: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: fcvt s1, h1 -; SDISEL-NEXT: fcvt s0, h0 -; SDISEL-NEXT: fcmp s0, s1 -; SDISEL-NEXT: fcvt s0, h3 -; SDISEL-NEXT: fcvt s1, h2 -; SDISEL-NEXT: fccmp s1, s0, #4, mi -; SDISEL-NEXT: fccmp s1, s0, #1, ne -; SDISEL-NEXT: csel w0, w0, w1, vc -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: half_select_and_olt_one: +; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: fcvt s1, h1 +; CHECK-SD-NEXT: fcvt s0, h0 +; CHECK-SD-NEXT: fcmp s0, s1 +; CHECK-SD-NEXT: fcvt s0, h3 +; CHECK-SD-NEXT: fcvt s1, h2 +; CHECK-SD-NEXT: fccmp s1, s0, #4, mi +; CHECK-SD-NEXT: fccmp s1, s0, #1, ne +; CHECK-SD-NEXT: csel w0, w0, w1, vc +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: half_select_and_olt_one: -; GISEL: ; %bb.0: -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: fcvt s1, h1 -; GISEL-NEXT: fcvt s2, h2 -; GISEL-NEXT: fcvt s3, h3 -; GISEL-NEXT: fcmp s0, s1 -; GISEL-NEXT: fccmp s2, s3, #4, mi -; GISEL-NEXT: fccmp s2, s3, #1, ne -; GISEL-NEXT: csel w0, w0, w1, vc -; GISEL-NEXT: ret +; CHECK-GI-LABEL: half_select_and_olt_one: +; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: fcvt s0, h0 +; CHECK-GI-NEXT: fcvt s1, h1 +; CHECK-GI-NEXT: fcvt s2, h2 +; CHECK-GI-NEXT: fcvt s3, h3 +; CHECK-GI-NEXT: fcmp s0, s1 +; CHECK-GI-NEXT: fccmp s2, s3, #4, mi +; CHECK-GI-NEXT: fccmp s2, s3, #1, ne +; CHECK-GI-NEXT: csel w0, w0, w1, vc +; CHECK-GI-NEXT: ret %c0 = fcmp olt half %v0, %v1 %c1 = fcmp one half %v2, %v3 %cr = and i1 %c1, %c0 @@ -926,51 +926,51 @@ define i32 @half_select_and_olt_one(half %v0, half %v1, half %v2, half %v3, i32 ; Also verify that we don't try to generate f128 FCCMPs, using RT calls instead. define i32 @f128_select_and_olt_oge(fp128 %v0, fp128 %v1, fp128 %v2, fp128 %v3, i32 %a, i32 %b) #0 { -; SDISEL-LABEL: f128_select_and_olt_oge: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: sub sp, sp, #80 -; SDISEL-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill -; SDISEL-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill -; SDISEL-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill -; SDISEL-NEXT: mov x19, x1 -; SDISEL-NEXT: mov x20, x0 -; SDISEL-NEXT: stp q2, q3, [sp] ; 32-byte Folded Spill -; SDISEL-NEXT: bl ___lttf2 -; SDISEL-NEXT: cmp w0, #0 -; SDISEL-NEXT: cset w21, lt -; SDISEL-NEXT: ldp q0, q1, [sp] ; 32-byte Folded Reload -; SDISEL-NEXT: bl ___getf2 -; SDISEL-NEXT: cmp w0, #0 -; SDISEL-NEXT: cset w8, ge -; SDISEL-NEXT: tst w8, w21 -; SDISEL-NEXT: csel w0, w20, w19, ne -; SDISEL-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload -; SDISEL-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload -; SDISEL-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload -; SDISEL-NEXT: add sp, sp, #80 -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: f128_select_and_olt_oge: +; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: sub sp, sp, #80 +; CHECK-SD-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill +; CHECK-SD-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill +; CHECK-SD-NEXT: mov x19, x1 +; CHECK-SD-NEXT: mov x20, x0 +; CHECK-SD-NEXT: stp q2, q3, [sp] ; 32-byte Folded Spill +; CHECK-SD-NEXT: bl ___lttf2 +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: cset w21, lt +; CHECK-SD-NEXT: ldp q0, q1, [sp] ; 32-byte Folded Reload +; CHECK-SD-NEXT: bl ___getf2 +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: cset w8, ge +; CHECK-SD-NEXT: tst w8, w21 +; CHECK-SD-NEXT: csel w0, w20, w19, ne +; CHECK-SD-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload +; CHECK-SD-NEXT: add sp, sp, #80 +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: f128_select_and_olt_oge: -; GISEL: ; %bb.0: -; GISEL-NEXT: sub sp, sp, #80 -; GISEL-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill -; GISEL-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill -; GISEL-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill -; GISEL-NEXT: stp q3, q2, [sp] ; 32-byte Folded Spill -; GISEL-NEXT: mov x19, x0 -; GISEL-NEXT: mov x20, x1 -; GISEL-NEXT: bl ___lttf2 -; GISEL-NEXT: mov x21, x0 -; GISEL-NEXT: ldp q1, q0, [sp] ; 32-byte Folded Reload -; GISEL-NEXT: bl ___getf2 -; GISEL-NEXT: cmp w21, #0 -; GISEL-NEXT: ccmp w0, #0, #8, lt -; GISEL-NEXT: csel w0, w19, w20, ge -; GISEL-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload -; GISEL-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload -; GISEL-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload -; GISEL-NEXT: add sp, sp, #80 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: f128_select_and_olt_oge: +; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: sub sp, sp, #80 +; CHECK-GI-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill +; CHECK-GI-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill +; CHECK-GI-NEXT: stp q3, q2, [sp] ; 32-byte Folded Spill +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x1 +; CHECK-GI-NEXT: bl ___lttf2 +; CHECK-GI-NEXT: mov x21, x0 +; CHECK-GI-NEXT: ldp q1, q0, [sp] ; 32-byte Folded Reload +; CHECK-GI-NEXT: bl ___getf2 +; CHECK-GI-NEXT: cmp w21, #0 +; CHECK-GI-NEXT: ccmp w0, #0, #8, lt +; CHECK-GI-NEXT: csel w0, w19, w20, ge +; CHECK-GI-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload +; CHECK-GI-NEXT: add sp, sp, #80 +; CHECK-GI-NEXT: ret %c0 = fcmp olt fp128 %v0, %v1 %c1 = fcmp oge fp128 %v2, %v3 %cr = and i1 %c1, %c0 @@ -1048,46 +1048,46 @@ define i32 @deep_or2(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) { ; This test is trying to test that multiple ccmp's don't get created in a way ; that they would have multiple uses. It doesn't seem to. define i32 @multiccmp(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %x, i32 %y) #0 { -; SDISEL-LABEL: multiccmp: -; SDISEL: ; %bb.0: ; %entry -; SDISEL-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill -; SDISEL-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill -; SDISEL-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill -; SDISEL-NEXT: mov x19, x5 -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: cset w20, gt -; SDISEL-NEXT: cmp w2, w3 -; SDISEL-NEXT: cset w21, ne -; SDISEL-NEXT: tst w20, w21 -; SDISEL-NEXT: csel w0, w5, w4, ne -; SDISEL-NEXT: bl _callee -; SDISEL-NEXT: tst w20, w21 -; SDISEL-NEXT: csel w0, w0, w19, ne -; SDISEL-NEXT: bl _callee -; SDISEL-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload -; SDISEL-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload -; SDISEL-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: multiccmp: +; CHECK-SD: ; %bb.0: ; %entry +; CHECK-SD-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; CHECK-SD-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-SD-NEXT: mov x19, x5 +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: cset w20, gt +; CHECK-SD-NEXT: cmp w2, w3 +; CHECK-SD-NEXT: cset w21, ne +; CHECK-SD-NEXT: tst w20, w21 +; CHECK-SD-NEXT: csel w0, w5, w4, ne +; CHECK-SD-NEXT: bl _callee +; CHECK-SD-NEXT: tst w20, w21 +; CHECK-SD-NEXT: csel w0, w0, w19, ne +; CHECK-SD-NEXT: bl _callee +; CHECK-SD-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: multiccmp: -; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; GISEL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; GISEL-NEXT: mov x19, x5 -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: and w20, w8, w9 -; GISEL-NEXT: tst w20, #0x1 -; GISEL-NEXT: csel w0, w5, w4, ne -; GISEL-NEXT: bl _callee -; GISEL-NEXT: tst w20, #0x1 -; GISEL-NEXT: csel w0, w0, w19, ne -; GISEL-NEXT: bl _callee -; GISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; GISEL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload -; GISEL-NEXT: ret +; CHECK-GI-LABEL: multiccmp: +; CHECK-GI: ; %bb.0: ; %entry +; CHECK-GI-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; CHECK-GI-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-GI-NEXT: mov x19, x5 +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: and w20, w8, w9 +; CHECK-GI-NEXT: tst w20, #0x1 +; CHECK-GI-NEXT: csel w0, w5, w4, ne +; CHECK-GI-NEXT: bl _callee +; CHECK-GI-NEXT: tst w20, #0x1 +; CHECK-GI-NEXT: csel w0, w0, w19, ne +; CHECK-GI-NEXT: bl _callee +; CHECK-GI-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %c0 = icmp sgt i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -1100,57 +1100,57 @@ entry: } define i32 @multiccmp2(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %x, i32 %y) #0 { -; SDISEL-LABEL: multiccmp2: -; SDISEL: ; %bb.0: ; %entry -; SDISEL-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill -; SDISEL-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill -; SDISEL-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill -; SDISEL-NEXT: mov x19, x5 -; SDISEL-NEXT: mov x20, x3 -; SDISEL-NEXT: mov x21, x0 -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: cset w8, gt -; SDISEL-NEXT: cmp w2, w3 -; SDISEL-NEXT: cset w22, ne -; SDISEL-NEXT: tst w8, w22 -; SDISEL-NEXT: csel w0, w5, w4, ne -; SDISEL-NEXT: bl _callee -; SDISEL-NEXT: cmp w21, w20 -; SDISEL-NEXT: cset w8, eq -; SDISEL-NEXT: tst w22, w8 -; SDISEL-NEXT: csel w0, w0, w19, ne -; SDISEL-NEXT: bl _callee -; SDISEL-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload -; SDISEL-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload -; SDISEL-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: multiccmp2: +; CHECK-SD: ; %bb.0: ; %entry +; CHECK-SD-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; CHECK-SD-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-SD-NEXT: mov x19, x5 +; CHECK-SD-NEXT: mov x20, x3 +; CHECK-SD-NEXT: mov x21, x0 +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: cset w8, gt +; CHECK-SD-NEXT: cmp w2, w3 +; CHECK-SD-NEXT: cset w22, ne +; CHECK-SD-NEXT: tst w8, w22 +; CHECK-SD-NEXT: csel w0, w5, w4, ne +; CHECK-SD-NEXT: bl _callee +; CHECK-SD-NEXT: cmp w21, w20 +; CHECK-SD-NEXT: cset w8, eq +; CHECK-SD-NEXT: tst w22, w8 +; CHECK-SD-NEXT: csel w0, w0, w19, ne +; CHECK-SD-NEXT: bl _callee +; CHECK-SD-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: multiccmp2: -; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill -; GISEL-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill -; GISEL-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill -; GISEL-NEXT: mov x19, x0 -; GISEL-NEXT: mov x20, x3 -; GISEL-NEXT: mov x21, x5 -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w22, ne -; GISEL-NEXT: and w8, w8, w22 -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: csel w0, w5, w4, ne -; GISEL-NEXT: bl _callee -; GISEL-NEXT: cmp w19, w20 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: and w8, w22, w8 -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: csel w0, w0, w21, ne -; GISEL-NEXT: bl _callee -; GISEL-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload -; GISEL-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload -; GISEL-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload -; GISEL-NEXT: ret +; CHECK-GI-LABEL: multiccmp2: +; CHECK-GI: ; %bb.0: ; %entry +; CHECK-GI-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; CHECK-GI-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x3 +; CHECK-GI-NEXT: mov x21, x5 +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w22, ne +; CHECK-GI-NEXT: and w8, w8, w22 +; CHECK-GI-NEXT: tst w8, #0x1 +; CHECK-GI-NEXT: csel w0, w5, w4, ne +; CHECK-GI-NEXT: bl _callee +; CHECK-GI-NEXT: cmp w19, w20 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: and w8, w22, w8 +; CHECK-GI-NEXT: tst w8, #0x1 +; CHECK-GI-NEXT: csel w0, w0, w21, ne +; CHECK-GI-NEXT: bl _callee +; CHECK-GI-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %c0 = icmp sgt i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -1168,21 +1168,21 @@ entry: declare i32 @callee(i32) define i1 @cmp_and_negative_const(i32 %0, i32 %1) { -; SDISEL-LABEL: cmp_and_negative_const: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmn w0, #1 -; SDISEL-NEXT: ccmn w1, #2, #0, eq -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_and_negative_const: +; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: cmn w0, #1 +; CHECK-SD-NEXT: ccmn w1, #2, #0, eq +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: cmp_and_negative_const: -; GISEL: ; %bb.0: -; GISEL-NEXT: cmn w0, #1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmn w1, #2 -; GISEL-NEXT: cset w9, eq -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: cmp_and_negative_const: +; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: cmn w0, #1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmn w1, #2 +; CHECK-GI-NEXT: cset w9, eq +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret %3 = icmp eq i32 %0, -1 %4 = icmp eq i32 %1, -2 %5 = and i1 %3, %4 @@ -1190,21 +1190,21 @@ define i1 @cmp_and_negative_const(i32 %0, i32 %1) { } define i1 @cmp_or_negative_const(i32 %a, i32 %b) { -; SDISEL-LABEL: cmp_or_negative_const: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmn w0, #1 -; SDISEL-NEXT: ccmn w1, #2, #4, ne -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_or_negative_const: +; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: cmn w0, #1 +; CHECK-SD-NEXT: ccmn w1, #2, #4, ne +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: cmp_or_negative_const: -; GISEL: ; %bb.0: -; GISEL-NEXT: cmn w0, #1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: cmn w1, #2 -; GISEL-NEXT: cset w9, eq -; GISEL-NEXT: orr w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: cmp_or_negative_const: +; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: cmn w0, #1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: cmn w1, #2 +; CHECK-GI-NEXT: cset w9, eq +; CHECK-GI-NEXT: orr w0, w8, w9 +; CHECK-GI-NEXT: ret %cmp = icmp eq i32 %a, -1 %cmp1 = icmp eq i32 %b, -2 %or.cond = or i1 %cmp, %cmp1 diff --git a/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll b/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll index ce35810..60c48bf 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -enable-unsafe-fp-math -mattr=+fullfp16 | FileCheck %s +; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -mattr=+fullfp16 | FileCheck %s ; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -fp-contract=fast -mattr=+fullfp16 | FileCheck %s define void @foo_2d(ptr %src) { @@ -130,9 +130,9 @@ for.end: ; preds = %for.body ; CHECK: fnmadd h0, h0, h1, h2 define half @test0(half %a, half %b, half %c) { entry: - %0 = fmul half %a, %b - %mul = fsub half -0.000000e+00, %0 - %sub1 = fsub half %mul, %c + %0 = fmul contract half %a, %b + %mul = fsub contract half -0.000000e+00, %0 + %sub1 = fsub contract half %mul, %c ret half %sub1 } @@ -140,9 +140,9 @@ entry: ; CHECK: fnmadd s0, s0, s1, s2 define float @test1(float %a, float %b, float %c) { entry: - %0 = fmul float %a, %b - %mul = fsub float -0.000000e+00, %0 - %sub1 = fsub float %mul, %c + %0 = fmul contract float %a, %b + %mul = fsub contract float -0.000000e+00, %0 + %sub1 = fsub contract float %mul, %c ret float %sub1 } @@ -150,9 +150,9 @@ entry: ; CHECK: fnmadd d0, d0, d1, d2 define double @test2(double %a, double %b, double %c) { entry: - %0 = fmul double %a, %b - %mul = fsub double -0.000000e+00, %0 - %sub1 = fsub double %mul, %c + %0 = fmul contract double %a, %b + %mul = fsub contract double -0.000000e+00, %0 + %sub1 = fsub contract double %mul, %c ret double %sub1 } diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll index 9dfc8df..9666c5c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll @@ -136,3 +136,18 @@ entry: %0 = load i64, ptr %arrayidx, align 8 ret i64 %0 } + +define <2 x i64> @loadv2i64_shr1(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: loadv2i64_shr1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: ldr q0, [x2, x8, lsl #4] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 1 + %arrayidx = getelementptr inbounds <2 x i64>, ptr %table, i64 %shr + %0 = load <2 x i64>, ptr %arrayidx, align 16 + ret <2 x i64> %0 +} diff --git a/llvm/test/CodeGen/AArch64/arm64-this-return.ll b/llvm/test/CodeGen/AArch64/arm64-this-return.ll index a497ba2..7dd47ac 100644 --- a/llvm/test/CodeGen/AArch64/arm64-this-return.ll +++ b/llvm/test/CodeGen/AArch64/arm64-this-return.ll @@ -148,7 +148,7 @@ define ptr @E_ctor_base(ptr %this, i32 %x) { ; GISEL-MIR: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; GISEL-MIR: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) ; GISEL-MIR: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GISEL-MIR: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C]](s64) + ; GISEL-MIR: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GISEL-MIR: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; GISEL-MIR: $x0 = COPY [[PTR_ADD]](p0) ; GISEL-MIR: $w1 = COPY [[COPY1]](s32) diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll index 937a17c..07400bb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -1,12 +1,50 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+aes | FileCheck %s +; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for pmull8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_low +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_high +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_low +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_high +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_64 define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: smull.8h v0, v0, v1 +; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -19,7 +57,7 @@ define <4 x i32> @smull4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: smull.4s v0, v0, v1 +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -32,7 +70,7 @@ define <2 x i64> @smull2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: smull.2d v0, v0, v1 +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -49,7 +87,7 @@ define <8 x i16> @umull8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: umull.8h v0, v0, v1 +; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -62,7 +100,7 @@ define <4 x i32> @umull4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: umull.4s v0, v0, v1 +; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -75,7 +113,7 @@ define <2 x i64> @umull2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: umull.2d v0, v0, v1 +; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -92,7 +130,7 @@ define <4 x i32> @sqdmull4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqdmull.4s v0, v0, v1 +; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -105,7 +143,7 @@ define <2 x i64> @sqdmull2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqdmull.2d v0, v0, v1 +; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -114,12 +152,19 @@ define <2 x i64> @sqdmull2d(ptr %A, ptr %B) nounwind { } define <4 x i32> @sqdmull2_4s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: sqdmull2_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ldr d1, [x1, #8] -; CHECK-NEXT: sqdmull.4s v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmull2_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: ldr d1, [x1, #8] +; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.4h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmull2_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: sqdmull2 v0.4s, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %load1 = load <8 x i16>, ptr %A %load2 = load <8 x i16>, ptr %B %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -129,12 +174,19 @@ define <4 x i32> @sqdmull2_4s(ptr %A, ptr %B) nounwind { } define <2 x i64> @sqdmull2_2d(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: sqdmull2_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ldr d1, [x1, #8] -; CHECK-NEXT: sqdmull.2d v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmull2_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: ldr d1, [x1, #8] +; CHECK-SD-NEXT: sqdmull v0.2d, v0.2s, v1.2s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmull2_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: sqdmull2 v0.2d, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %load1 = load <4 x i32>, ptr %A %load2 = load <4 x i32>, ptr %B %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> @@ -152,7 +204,7 @@ define <8 x i16> @pmull8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: pmull.8h v0, v0, v1 +; CHECK-NEXT: pmull v0.8h, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -167,7 +219,7 @@ define <4 x i16> @sqdmulh_4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqdmulh.4h v0, v0, v1 +; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -180,7 +232,7 @@ define <8 x i16> @sqdmulh_8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqdmulh.8h v0, v0, v1 +; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -193,7 +245,7 @@ define <2 x i32> @sqdmulh_2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqdmulh.2s v0, v0, v1 +; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -206,7 +258,7 @@ define <4 x i32> @sqdmulh_4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqdmulh.4s v0, v0, v1 +; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -241,7 +293,7 @@ define <4 x i16> @sqrdmulh_4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqrdmulh.4h v0, v0, v1 +; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -254,7 +306,7 @@ define <8 x i16> @sqrdmulh_8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrdmulh.8h v0, v0, v1 +; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -267,7 +319,7 @@ define <2 x i32> @sqrdmulh_2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqrdmulh.2s v0, v0, v1 +; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -280,7 +332,7 @@ define <4 x i32> @sqrdmulh_4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrdmulh.4s v0, v0, v1 +; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -289,15 +341,23 @@ define <4 x i32> @sqrdmulh_4s(ptr %A, ptr %B) nounwind { } define i32 @sqrdmulh_1s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: sqrdmulh_1s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: sqrdmulh s0, s0, s1 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqrdmulh_1s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr w8, [x0] +; CHECK-SD-NEXT: ldr w9, [x1] +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: fmov s1, w9 +; CHECK-SD-NEXT: sqrdmulh s0, s0, s1 +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqrdmulh_1s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr s0, [x0] +; CHECK-GI-NEXT: ldr s1, [x1] +; CHECK-GI-NEXT: sqrdmulh s0, s0, s1 +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret %tmp1 = load i32, ptr %A %tmp2 = load i32, ptr %B %tmp3 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %tmp1, i32 %tmp2) @@ -315,7 +375,7 @@ define <2 x float> @fmulx_2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: fmulx.2s v0, v0, v1 +; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x float>, ptr %A %tmp2 = load <2 x float>, ptr %B @@ -328,7 +388,7 @@ define <4 x float> @fmulx_4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: fmulx.4s v0, v0, v1 +; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x float>, ptr %A %tmp2 = load <4 x float>, ptr %B @@ -341,7 +401,7 @@ define <2 x double> @fmulx_2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: fmulx.2d v0, v0, v1 +; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x double>, ptr %A %tmp2 = load <2 x double>, ptr %B @@ -359,7 +419,7 @@ define <4 x i32> @smlal4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: smlal.4s v0, v1, v2 +; CHECK-NEXT: smlal v0.4s, v1.4h, v2.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -375,7 +435,7 @@ define <2 x i64> @smlal2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: smlal.2d v0, v1, v2 +; CHECK-NEXT: smlal v0.2d, v1.2s, v2.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -386,14 +446,24 @@ define <2 x i64> @smlal2d(ptr %A, ptr %B, ptr %C) nounwind { } define void @smlal8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { -; CHECK-LABEL: smlal8h_chain_with_constant: -; CHECK: // %bb.0: -; CHECK-NEXT: movi.16b v3, #1 -; CHECK-NEXT: smlal.8h v3, v0, v2 -; CHECK-NEXT: mvn.8b v0, v2 -; CHECK-NEXT: smlal.8h v3, v1, v0 -; CHECK-NEXT: str q3, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: smlal8h_chain_with_constant: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v3.16b, #1 +; CHECK-SD-NEXT: smlal v3.8h, v0.8b, v2.8b +; CHECK-SD-NEXT: mvn v0.8b, v2.8b +; CHECK-SD-NEXT: smlal v3.8h, v1.8b, v0.8b +; CHECK-SD-NEXT: str q3, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: smlal8h_chain_with_constant: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvn v3.8b, v2.8b +; CHECK-GI-NEXT: smull v1.8h, v1.8b, v3.8b +; CHECK-GI-NEXT: movi v3.16b, #1 +; CHECK-GI-NEXT: smlal v1.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: add v0.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %xor = xor <8 x i8> %v3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> %smull.1 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %v1, <8 x i8> %v3) %add.1 = add <8 x i16> %smull.1, <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257> @@ -404,15 +474,26 @@ define void @smlal8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, < } define void @smlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { -; CHECK-LABEL: smlal2d_chain_with_constant: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 // =0x101 -; CHECK-NEXT: dup.2d v3, x8 -; CHECK-NEXT: smlal.2d v3, v0, v2 -; CHECK-NEXT: mvn.8b v0, v2 -; CHECK-NEXT: smlal.2d v3, v1, v0 -; CHECK-NEXT: str q3, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: smlal2d_chain_with_constant: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #257 // =0x101 +; CHECK-SD-NEXT: dup v3.2d, x8 +; CHECK-SD-NEXT: smlal v3.2d, v0.2s, v2.2s +; CHECK-SD-NEXT: mvn v0.8b, v2.8b +; CHECK-SD-NEXT: smlal v3.2d, v1.2s, v0.2s +; CHECK-SD-NEXT: str q3, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: smlal2d_chain_with_constant: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvn v3.8b, v2.8b +; CHECK-GI-NEXT: adrp x8, .LCPI27_0 +; CHECK-GI-NEXT: smull v1.2d, v1.2s, v3.2s +; CHECK-GI-NEXT: smlal v1.2d, v0.2s, v2.2s +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI27_0] +; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %xor = xor <2 x i32> %v3, <i32 -1, i32 -1> %smull.1 = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %v1, <2 x i32> %v3) %add.1 = add <2 x i64> %smull.1, <i64 257, i64 257> @@ -428,7 +509,7 @@ define <4 x i32> @smlsl4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: smlsl.4s v0, v1, v2 +; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -444,7 +525,7 @@ define <2 x i64> @smlsl2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: smlsl.2d v0, v1, v2 +; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -457,10 +538,10 @@ define <2 x i64> @smlsl2d(ptr %A, ptr %B, ptr %C) nounwind { define void @smlsl8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { ; CHECK-LABEL: smlsl8h_chain_with_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.16b v3, #1 -; CHECK-NEXT: smlsl.8h v3, v0, v2 -; CHECK-NEXT: mvn.8b v0, v2 -; CHECK-NEXT: smlsl.8h v3, v1, v0 +; CHECK-NEXT: movi v3.16b, #1 +; CHECK-NEXT: smlsl v3.8h, v0.8b, v2.8b +; CHECK-NEXT: mvn v0.8b, v2.8b +; CHECK-NEXT: smlsl v3.8h, v1.8b, v0.8b ; CHECK-NEXT: str q3, [x0] ; CHECK-NEXT: ret %xor = xor <8 x i8> %v3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> @@ -473,15 +554,25 @@ define void @smlsl8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, < } define void @smlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { -; CHECK-LABEL: smlsl2d_chain_with_constant: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 // =0x101 -; CHECK-NEXT: dup.2d v3, x8 -; CHECK-NEXT: smlsl.2d v3, v0, v2 -; CHECK-NEXT: mvn.8b v0, v2 -; CHECK-NEXT: smlsl.2d v3, v1, v0 -; CHECK-NEXT: str q3, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: smlsl2d_chain_with_constant: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #257 // =0x101 +; CHECK-SD-NEXT: dup v3.2d, x8 +; CHECK-SD-NEXT: smlsl v3.2d, v0.2s, v2.2s +; CHECK-SD-NEXT: mvn v0.8b, v2.8b +; CHECK-SD-NEXT: smlsl v3.2d, v1.2s, v0.2s +; CHECK-SD-NEXT: str q3, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: smlsl2d_chain_with_constant: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI31_0 +; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI31_0] +; CHECK-GI-NEXT: smlsl v3.2d, v0.2s, v2.2s +; CHECK-GI-NEXT: mvn v0.8b, v2.8b +; CHECK-GI-NEXT: smlsl v3.2d, v1.2s, v0.2s +; CHECK-GI-NEXT: str q3, [x0] +; CHECK-GI-NEXT: ret %xor = xor <2 x i32> %v3, <i32 -1, i32 -1> %smull.1 = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %v1, <2 x i32> %v3) %sub.1 = sub <2 x i64> <i64 257, i64 257>, %smull.1 @@ -502,7 +593,7 @@ define <4 x i32> @sqdmlal4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: sqdmlal.4s v0, v1, v2 +; CHECK-NEXT: sqdmlal v0.4s, v1.4h, v2.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -518,7 +609,7 @@ define <2 x i64> @sqdmlal2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: sqdmlal.2d v0, v1, v2 +; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -529,13 +620,21 @@ define <2 x i64> @sqdmlal2d(ptr %A, ptr %B, ptr %C) nounwind { } define <4 x i32> @sqdmlal2_4s(ptr %A, ptr %B, ptr %C) nounwind { -; CHECK-LABEL: sqdmlal2_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: ldr d1, [x0, #8] -; CHECK-NEXT: ldr d2, [x1, #8] -; CHECK-NEXT: sqdmlal.4s v0, v1, v2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlal2_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x2] +; CHECK-SD-NEXT: ldr d1, [x0, #8] +; CHECK-SD-NEXT: ldr d2, [x1, #8] +; CHECK-SD-NEXT: sqdmlal v0.4s, v1.4h, v2.4h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlal2_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q2, [x1] +; CHECK-GI-NEXT: ldr q0, [x2] +; CHECK-GI-NEXT: sqdmlal2 v0.4s, v1.8h, v2.8h +; CHECK-GI-NEXT: ret %load1 = load <8 x i16>, ptr %A %load2 = load <8 x i16>, ptr %B %tmp3 = load <4 x i32>, ptr %C @@ -547,13 +646,21 @@ define <4 x i32> @sqdmlal2_4s(ptr %A, ptr %B, ptr %C) nounwind { } define <2 x i64> @sqdmlal2_2d(ptr %A, ptr %B, ptr %C) nounwind { -; CHECK-LABEL: sqdmlal2_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: ldr d1, [x0, #8] -; CHECK-NEXT: ldr d2, [x1, #8] -; CHECK-NEXT: sqdmlal.2d v0, v1, v2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlal2_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x2] +; CHECK-SD-NEXT: ldr d1, [x0, #8] +; CHECK-SD-NEXT: ldr d2, [x1, #8] +; CHECK-SD-NEXT: sqdmlal v0.2d, v1.2s, v2.2s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlal2_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q2, [x1] +; CHECK-GI-NEXT: ldr q0, [x2] +; CHECK-GI-NEXT: sqdmlal2 v0.2d, v1.4s, v2.4s +; CHECK-GI-NEXT: ret %load1 = load <4 x i32>, ptr %A %load2 = load <4 x i32>, ptr %B %tmp3 = load <2 x i64>, ptr %C @@ -570,7 +677,7 @@ define <4 x i32> @sqdmlsl4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: sqdmlsl.4s v0, v1, v2 +; CHECK-NEXT: sqdmlsl v0.4s, v1.4h, v2.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -586,7 +693,7 @@ define <2 x i64> @sqdmlsl2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: sqdmlsl.2d v0, v1, v2 +; CHECK-NEXT: sqdmlsl v0.2d, v1.2s, v2.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -597,13 +704,21 @@ define <2 x i64> @sqdmlsl2d(ptr %A, ptr %B, ptr %C) nounwind { } define <4 x i32> @sqdmlsl2_4s(ptr %A, ptr %B, ptr %C) nounwind { -; CHECK-LABEL: sqdmlsl2_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: ldr d1, [x0, #8] -; CHECK-NEXT: ldr d2, [x1, #8] -; CHECK-NEXT: sqdmlsl.4s v0, v1, v2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlsl2_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x2] +; CHECK-SD-NEXT: ldr d1, [x0, #8] +; CHECK-SD-NEXT: ldr d2, [x1, #8] +; CHECK-SD-NEXT: sqdmlsl v0.4s, v1.4h, v2.4h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlsl2_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q2, [x1] +; CHECK-GI-NEXT: ldr q0, [x2] +; CHECK-GI-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.8h +; CHECK-GI-NEXT: ret %load1 = load <8 x i16>, ptr %A %load2 = load <8 x i16>, ptr %B %tmp3 = load <4 x i32>, ptr %C @@ -615,13 +730,21 @@ define <4 x i32> @sqdmlsl2_4s(ptr %A, ptr %B, ptr %C) nounwind { } define <2 x i64> @sqdmlsl2_2d(ptr %A, ptr %B, ptr %C) nounwind { -; CHECK-LABEL: sqdmlsl2_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: ldr d1, [x0, #8] -; CHECK-NEXT: ldr d2, [x1, #8] -; CHECK-NEXT: sqdmlsl.2d v0, v1, v2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlsl2_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x2] +; CHECK-SD-NEXT: ldr d1, [x0, #8] +; CHECK-SD-NEXT: ldr d2, [x1, #8] +; CHECK-SD-NEXT: sqdmlsl v0.2d, v1.2s, v2.2s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlsl2_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q2, [x1] +; CHECK-GI-NEXT: ldr q0, [x2] +; CHECK-GI-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.4s +; CHECK-GI-NEXT: ret %load1 = load <4 x i32>, ptr %A %load2 = load <4 x i32>, ptr %B %tmp3 = load <2 x i64>, ptr %C @@ -638,7 +761,7 @@ define <4 x i32> @umlal4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: umlal.4s v0, v1, v2 +; CHECK-NEXT: umlal v0.4s, v1.4h, v2.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -654,7 +777,7 @@ define <2 x i64> @umlal2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: umlal.2d v0, v1, v2 +; CHECK-NEXT: umlal v0.2d, v1.2s, v2.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -665,14 +788,24 @@ define <2 x i64> @umlal2d(ptr %A, ptr %B, ptr %C) nounwind { } define void @umlal8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { -; CHECK-LABEL: umlal8h_chain_with_constant: -; CHECK: // %bb.0: -; CHECK-NEXT: movi.16b v3, #1 -; CHECK-NEXT: umlal.8h v3, v0, v2 -; CHECK-NEXT: mvn.8b v0, v2 -; CHECK-NEXT: umlal.8h v3, v1, v0 -; CHECK-NEXT: str q3, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: umlal8h_chain_with_constant: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v3.16b, #1 +; CHECK-SD-NEXT: umlal v3.8h, v0.8b, v2.8b +; CHECK-SD-NEXT: mvn v0.8b, v2.8b +; CHECK-SD-NEXT: umlal v3.8h, v1.8b, v0.8b +; CHECK-SD-NEXT: str q3, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: umlal8h_chain_with_constant: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvn v3.8b, v2.8b +; CHECK-GI-NEXT: umull v1.8h, v1.8b, v3.8b +; CHECK-GI-NEXT: movi v3.16b, #1 +; CHECK-GI-NEXT: umlal v1.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: add v0.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %xor = xor <8 x i8> %v3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> %umull.1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %v1, <8 x i8> %v3) %add.1 = add <8 x i16> %umull.1, <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257> @@ -683,15 +816,26 @@ define void @umlal8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, < } define void @umlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { -; CHECK-LABEL: umlal2d_chain_with_constant: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 // =0x101 -; CHECK-NEXT: dup.2d v3, x8 -; CHECK-NEXT: umlal.2d v3, v0, v2 -; CHECK-NEXT: mvn.8b v0, v2 -; CHECK-NEXT: umlal.2d v3, v1, v0 -; CHECK-NEXT: str q3, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: umlal2d_chain_with_constant: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #257 // =0x101 +; CHECK-SD-NEXT: dup v3.2d, x8 +; CHECK-SD-NEXT: umlal v3.2d, v0.2s, v2.2s +; CHECK-SD-NEXT: mvn v0.8b, v2.8b +; CHECK-SD-NEXT: umlal v3.2d, v1.2s, v0.2s +; CHECK-SD-NEXT: str q3, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: umlal2d_chain_with_constant: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvn v3.8b, v2.8b +; CHECK-GI-NEXT: adrp x8, .LCPI43_0 +; CHECK-GI-NEXT: umull v1.2d, v1.2s, v3.2s +; CHECK-GI-NEXT: umlal v1.2d, v0.2s, v2.2s +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI43_0] +; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %xor = xor <2 x i32> %v3, <i32 -1, i32 -1> %umull.1 = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %v1, <2 x i32> %v3) %add.1 = add <2 x i64> %umull.1, <i64 257, i64 257> @@ -707,7 +851,7 @@ define <4 x i32> @umlsl4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: umlsl.4s v0, v1, v2 +; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -723,7 +867,7 @@ define <2 x i64> @umlsl2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: umlsl.2d v0, v1, v2 +; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -736,10 +880,10 @@ define <2 x i64> @umlsl2d(ptr %A, ptr %B, ptr %C) nounwind { define void @umlsl8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { ; CHECK-LABEL: umlsl8h_chain_with_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.16b v3, #1 -; CHECK-NEXT: umlsl.8h v3, v0, v2 -; CHECK-NEXT: mvn.8b v0, v2 -; CHECK-NEXT: umlsl.8h v3, v1, v0 +; CHECK-NEXT: movi v3.16b, #1 +; CHECK-NEXT: umlsl v3.8h, v0.8b, v2.8b +; CHECK-NEXT: mvn v0.8b, v2.8b +; CHECK-NEXT: umlsl v3.8h, v1.8b, v0.8b ; CHECK-NEXT: str q3, [x0] ; CHECK-NEXT: ret %xor = xor <8 x i8> %v3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> @@ -752,15 +896,25 @@ define void @umlsl8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, < } define void @umlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { -; CHECK-LABEL: umlsl2d_chain_with_constant: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 // =0x101 -; CHECK-NEXT: dup.2d v3, x8 -; CHECK-NEXT: umlsl.2d v3, v0, v2 -; CHECK-NEXT: mvn.8b v0, v2 -; CHECK-NEXT: umlsl.2d v3, v1, v0 -; CHECK-NEXT: str q3, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: umlsl2d_chain_with_constant: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #257 // =0x101 +; CHECK-SD-NEXT: dup v3.2d, x8 +; CHECK-SD-NEXT: umlsl v3.2d, v0.2s, v2.2s +; CHECK-SD-NEXT: mvn v0.8b, v2.8b +; CHECK-SD-NEXT: umlsl v3.2d, v1.2s, v0.2s +; CHECK-SD-NEXT: str q3, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: umlsl2d_chain_with_constant: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI47_0 +; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI47_0] +; CHECK-GI-NEXT: umlsl v3.2d, v0.2s, v2.2s +; CHECK-GI-NEXT: mvn v0.8b, v2.8b +; CHECK-GI-NEXT: umlsl v3.2d, v1.2s, v0.2s +; CHECK-GI-NEXT: str q3, [x0] +; CHECK-GI-NEXT: ret %xor = xor <2 x i32> %v3, <i32 -1, i32 -1> %umull.1 = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %v1, <2 x i32> %v3) %add.1 = sub <2 x i64> <i64 257, i64 257>, %umull.1 @@ -776,7 +930,7 @@ define <2 x float> @fmla_2s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr d0, [x2] -; CHECK-NEXT: fmla.2s v0, v2, v1 +; CHECK-NEXT: fmla v0.2s, v2.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x float>, ptr %A %tmp2 = load <2 x float>, ptr %B @@ -791,7 +945,7 @@ define <4 x float> @fmla_4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmla.4s v0, v2, v1 +; CHECK-NEXT: fmla v0.4s, v2.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x float>, ptr %A %tmp2 = load <4 x float>, ptr %B @@ -806,7 +960,7 @@ define <2 x double> @fmla_2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmla.2d v0, v2, v1 +; CHECK-NEXT: fmla v0.2d, v2.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x double>, ptr %A %tmp2 = load <2 x double>, ptr %B @@ -825,7 +979,7 @@ define <2 x float> @fmls_2s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr d0, [x2] -; CHECK-NEXT: fmls.2s v0, v1, v2 +; CHECK-NEXT: fmls v0.2s, v1.2s, v2.2s ; CHECK-NEXT: ret %tmp1 = load <2 x float>, ptr %A %tmp2 = load <2 x float>, ptr %B @@ -841,7 +995,7 @@ define <4 x float> @fmls_4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmls.4s v0, v1, v2 +; CHECK-NEXT: fmls v0.4s, v1.4s, v2.4s ; CHECK-NEXT: ret %tmp1 = load <4 x float>, ptr %A %tmp2 = load <4 x float>, ptr %B @@ -857,7 +1011,7 @@ define <2 x double> @fmls_2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmls.2d v0, v1, v2 +; CHECK-NEXT: fmls v0.2d, v1.2d, v2.2d ; CHECK-NEXT: ret %tmp1 = load <2 x double>, ptr %A %tmp2 = load <2 x double>, ptr %B @@ -873,7 +1027,7 @@ define <2 x float> @fmls_commuted_neg_2s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr d0, [x2] -; CHECK-NEXT: fmls.2s v0, v1, v2 +; CHECK-NEXT: fmls v0.2s, v1.2s, v2.2s ; CHECK-NEXT: ret %tmp1 = load <2 x float>, ptr %A %tmp2 = load <2 x float>, ptr %B @@ -889,7 +1043,7 @@ define <4 x float> @fmls_commuted_neg_4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmls.4s v0, v1, v2 +; CHECK-NEXT: fmls v0.4s, v1.4s, v2.4s ; CHECK-NEXT: ret %tmp1 = load <4 x float>, ptr %A %tmp2 = load <4 x float>, ptr %B @@ -905,7 +1059,7 @@ define <2 x double> @fmls_commuted_neg_2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmls.2d v0, v1, v2 +; CHECK-NEXT: fmls v0.2d, v1.2d, v2.2d ; CHECK-NEXT: ret %tmp1 = load <2 x double>, ptr %A %tmp2 = load <2 x double>, ptr %B @@ -919,7 +1073,7 @@ define <2 x float> @fmls_indexed_2s(<2 x float> %a, <2 x float> %b, <2 x float> ; CHECK-LABEL: fmls_indexed_2s: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: fmls.2s v0, v2, v1[0] +; CHECK-NEXT: fmls v0.2s, v2.2s, v1.s[0] ; CHECK-NEXT: ret entry: %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %c @@ -931,7 +1085,7 @@ entry: define <4 x float> @fmls_indexed_4s(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone ssp { ; CHECK-LABEL: fmls_indexed_4s: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmls.4s v0, v2, v1[0] +; CHECK-NEXT: fmls v0.4s, v2.4s, v1.s[0] ; CHECK-NEXT: ret entry: %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c @@ -943,7 +1097,7 @@ entry: define <2 x double> @fmls_indexed_2d(<2 x double> %a, <2 x double> %b, <2 x double> %c) nounwind readnone ssp { ; CHECK-LABEL: fmls_indexed_2d: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmls.2d v0, v2, v1[0] +; CHECK-NEXT: fmls v0.2d, v2.2d, v1.d[0] ; CHECK-NEXT: ret entry: %0 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c @@ -956,7 +1110,7 @@ define <2 x float> @fmla_indexed_scalar_2s(<2 x float> %a, <2 x float> %b, float ; CHECK-LABEL: fmla_indexed_scalar_2s: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $s2 killed $s2 def $d2 -; CHECK-NEXT: fmla.2s v0, v1, v2 +; CHECK-NEXT: fmla v0.2s, v1.2s, v2.2s ; CHECK-NEXT: ret entry: %v1 = insertelement <2 x float> undef, float %c, i32 0 @@ -969,7 +1123,7 @@ define <4 x float> @fmla_indexed_scalar_4s(<4 x float> %a, <4 x float> %b, float ; CHECK-LABEL: fmla_indexed_scalar_4s: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2 -; CHECK-NEXT: fmla.4s v0, v1, v2[0] +; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[0] ; CHECK-NEXT: ret entry: %v1 = insertelement <4 x float> undef, float %c, i32 0 @@ -984,7 +1138,7 @@ define <2 x double> @fmla_indexed_scalar_2d(<2 x double> %a, <2 x double> %b, do ; CHECK-LABEL: fmla_indexed_scalar_2d: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: fmla.2d v0, v1, v2[0] +; CHECK-NEXT: fmla v0.2d, v1.2d, v2.d[0] ; CHECK-NEXT: ret entry: %v1 = insertelement <2 x double> undef, double %c, i32 0 @@ -997,7 +1151,7 @@ define <2 x float> @fmls_indexed_2s_strict(<2 x float> %a, <2 x float> %b, <2 x ; CHECK-LABEL: fmls_indexed_2s_strict: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: fmls.2s v0, v2, v1[0] +; CHECK-NEXT: fmls v0.2s, v2.2s, v1.s[0] ; CHECK-NEXT: ret entry: %0 = fneg <2 x float> %c @@ -1009,7 +1163,7 @@ entry: define <4 x float> @fmls_indexed_4s_strict(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone ssp strictfp { ; CHECK-LABEL: fmls_indexed_4s_strict: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmls.4s v0, v2, v1[0] +; CHECK-NEXT: fmls v0.4s, v2.4s, v1.s[0] ; CHECK-NEXT: ret entry: %0 = fneg <4 x float> %c @@ -1021,7 +1175,7 @@ entry: define <2 x double> @fmls_indexed_2d_strict(<2 x double> %a, <2 x double> %b, <2 x double> %c) nounwind readnone ssp strictfp { ; CHECK-LABEL: fmls_indexed_2d_strict: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmls.2d v0, v2, v1[0] +; CHECK-NEXT: fmls v0.2d, v2.2d, v1.d[0] ; CHECK-NEXT: ret entry: %0 = fneg <2 x double> %c @@ -1034,7 +1188,7 @@ define <2 x float> @fmla_indexed_scalar_2s_strict(<2 x float> %a, <2 x float> %b ; CHECK-LABEL: fmla_indexed_scalar_2s_strict: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2 -; CHECK-NEXT: fmla.2s v0, v1, v2[0] +; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[0] ; CHECK-NEXT: ret entry: %v1 = insertelement <2 x float> undef, float %c, i32 0 @@ -1047,7 +1201,7 @@ define <4 x float> @fmla_indexed_scalar_4s_strict(<4 x float> %a, <4 x float> %b ; CHECK-LABEL: fmla_indexed_scalar_4s_strict: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2 -; CHECK-NEXT: fmla.4s v0, v1, v2[0] +; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[0] ; CHECK-NEXT: ret entry: %v1 = insertelement <4 x float> undef, float %c, i32 0 @@ -1062,7 +1216,7 @@ define <2 x double> @fmla_indexed_scalar_2d_strict(<2 x double> %a, <2 x double> ; CHECK-LABEL: fmla_indexed_scalar_2d_strict: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: fmla.2d v0, v1, v2[0] +; CHECK-NEXT: fmla v0.2d, v1.2d, v2.d[0] ; CHECK-NEXT: ret entry: %v1 = insertelement <2 x double> undef, double %c, i32 0 @@ -1081,7 +1235,7 @@ define <4 x i16> @mul_4h(<4 x i16> %A, <4 x i16> %B) nounwind { ; CHECK-LABEL: mul_4h: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mul.4h v0, v0, v1[1] +; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp4 = mul <4 x i16> %A, %tmp3 @@ -1091,7 +1245,7 @@ define <4 x i16> @mul_4h(<4 x i16> %A, <4 x i16> %B) nounwind { define <8 x i16> @mul_8h(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: mul_8h: ; CHECK: // %bb.0: -; CHECK-NEXT: mul.8h v0, v0, v1[1] +; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <8 x i16> %B, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> %tmp4 = mul <8 x i16> %A, %tmp3 @@ -1102,7 +1256,7 @@ define <2 x i32> @mul_2s(<2 x i32> %A, <2 x i32> %B) nounwind { ; CHECK-LABEL: mul_2s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mul.2s v0, v0, v1[1] +; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp4 = mul <2 x i32> %A, %tmp3 @@ -1112,7 +1266,7 @@ define <2 x i32> @mul_2s(<2 x i32> %A, <2 x i32> %B) nounwind { define <4 x i32> @mul_4s(<4 x i32> %A, <4 x i32> %B) nounwind { ; CHECK-LABEL: mul_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: mul.4s v0, v0, v1[1] +; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <4 x i32> %B, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp4 = mul <4 x i32> %A, %tmp3 @@ -1120,17 +1274,29 @@ define <4 x i32> @mul_4s(<4 x i32> %A, <4 x i32> %B) nounwind { } define <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind { -; CHECK-LABEL: mul_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov x10, d1 -; CHECK-NEXT: fmov x11, d0 -; CHECK-NEXT: mov.d x8, v1[1] -; CHECK-NEXT: mov.d x9, v0[1] -; CHECK-NEXT: mul x10, x11, x10 -; CHECK-NEXT: mul x8, x9, x8 -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: mov.d v0[1], x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mul_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov x10, d1 +; CHECK-SD-NEXT: fmov x11, d0 +; CHECK-SD-NEXT: mov x8, v1.d[1] +; CHECK-SD-NEXT: mov x9, v0.d[1] +; CHECK-SD-NEXT: mul x10, x11, x10 +; CHECK-SD-NEXT: mul x8, x9, x8 +; CHECK-SD-NEXT: fmov d0, x10 +; CHECK-SD-NEXT: mov v0.d[1], x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mul_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x10, d0 +; CHECK-GI-NEXT: fmov x11, d1 +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov x9, v1.d[1] +; CHECK-GI-NEXT: mul x10, x10, x11 +; CHECK-GI-NEXT: mul x8, x8, x9 +; CHECK-GI-NEXT: fmov d0, x10 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret %tmp1 = mul <2 x i64> %A, %B ret <2 x i64> %tmp1 } @@ -1139,7 +1305,7 @@ define <2 x float> @fmul_lane_2s(<2 x float> %A, <2 x float> %B) nounwind { ; CHECK-LABEL: fmul_lane_2s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: fmul.2s v0, v0, v1[1] +; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <2 x float> %B, <2 x float> poison, <2 x i32> <i32 1, i32 1> %tmp4 = fmul <2 x float> %A, %tmp3 @@ -1149,7 +1315,7 @@ define <2 x float> @fmul_lane_2s(<2 x float> %A, <2 x float> %B) nounwind { define <4 x float> @fmul_lane_4s(<4 x float> %A, <4 x float> %B) nounwind { ; CHECK-LABEL: fmul_lane_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: fmul.4s v0, v0, v1[1] +; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <4 x float> %B, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp4 = fmul <4 x float> %A, %tmp3 @@ -1159,7 +1325,7 @@ define <4 x float> @fmul_lane_4s(<4 x float> %A, <4 x float> %B) nounwind { define <2 x double> @fmul_lane_2d(<2 x double> %A, <2 x double> %B) nounwind { ; CHECK-LABEL: fmul_lane_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmul.2d v0, v0, v1[1] +; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <2 x double> %B, <2 x double> poison, <2 x i32> <i32 1, i32 1> %tmp4 = fmul <2 x double> %A, %tmp3 @@ -1169,7 +1335,7 @@ define <2 x double> @fmul_lane_2d(<2 x double> %A, <2 x double> %B) nounwind { define float @fmul_lane_s(float %A, <4 x float> %vec) nounwind { ; CHECK-LABEL: fmul_lane_s: ; CHECK: // %bb.0: -; CHECK-NEXT: fmul.s s0, s0, v1[3] +; CHECK-NEXT: fmul s0, s0, v1.s[3] ; CHECK-NEXT: ret %B = extractelement <4 x float> %vec, i32 3 %res = fmul float %A, %B @@ -1179,7 +1345,7 @@ define float @fmul_lane_s(float %A, <4 x float> %vec) nounwind { define double @fmul_lane_d(double %A, <2 x double> %vec) nounwind { ; CHECK-LABEL: fmul_lane_d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmul.d d0, d0, v1[1] +; CHECK-NEXT: fmul d0, d0, v1.d[1] ; CHECK-NEXT: ret %B = extractelement <2 x double> %vec, i32 1 %res = fmul double %A, %B @@ -1192,7 +1358,7 @@ define <2 x float> @fmulx_lane_2s(<2 x float> %A, <2 x float> %B) nounwind { ; CHECK-LABEL: fmulx_lane_2s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: fmulx.2s v0, v0, v1[1] +; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <2 x float> %B, <2 x float> poison, <2 x i32> <i32 1, i32 1> %tmp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %A, <2 x float> %tmp3) @@ -1202,7 +1368,7 @@ define <2 x float> @fmulx_lane_2s(<2 x float> %A, <2 x float> %B) nounwind { define <4 x float> @fmulx_lane_4s(<4 x float> %A, <4 x float> %B) nounwind { ; CHECK-LABEL: fmulx_lane_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: fmulx.4s v0, v0, v1[1] +; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.s[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <4 x float> %B, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %A, <4 x float> %tmp3) @@ -1212,7 +1378,7 @@ define <4 x float> @fmulx_lane_4s(<4 x float> %A, <4 x float> %B) nounwind { define <2 x double> @fmulx_lane_2d(<2 x double> %A, <2 x double> %B) nounwind { ; CHECK-LABEL: fmulx_lane_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmulx.2d v0, v0, v1[1] +; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <2 x double> %B, <2 x double> poison, <2 x i32> <i32 1, i32 1> %tmp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %A, <2 x double> %tmp3) @@ -1223,7 +1389,7 @@ define <4 x i16> @sqdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind { ; CHECK-LABEL: sqdmulh_lane_4h: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: sqdmulh.4h v0, v0, v1[1] +; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %A, <4 x i16> %tmp3) @@ -1233,7 +1399,7 @@ define <4 x i16> @sqdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind { define <8 x i16> @sqdmulh_lane_8h(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: sqdmulh_lane_8h: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmulh.8h v0, v0, v1[1] +; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <8 x i16> %B, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %A, <8 x i16> %tmp3) @@ -1244,7 +1410,7 @@ define <2 x i32> @sqdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind { ; CHECK-LABEL: sqdmulh_lane_2s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: sqdmulh.2s v0, v0, v1[1] +; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %A, <2 x i32> %tmp3) @@ -1254,7 +1420,7 @@ define <2 x i32> @sqdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind { define <4 x i32> @sqdmulh_lane_4s(<4 x i32> %A, <4 x i32> %B) nounwind { ; CHECK-LABEL: sqdmulh_lane_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmulh.4s v0, v0, v1[1] +; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <4 x i32> %B, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %A, <4 x i32> %tmp3) @@ -1265,7 +1431,7 @@ define i32 @sqdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind { ; CHECK-LABEL: sqdmulh_lane_1s: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: sqdmulh.s s0, s1, v0[1] +; CHECK-NEXT: sqdmulh s0, s1, v0.s[1] ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %tmp1 = extractelement <4 x i32> %B, i32 1 @@ -1277,7 +1443,7 @@ define <4 x i16> @sqrdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind { ; CHECK-LABEL: sqrdmulh_lane_4h: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: sqrdmulh.4h v0, v0, v1[1] +; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %A, <4 x i16> %tmp3) @@ -1287,7 +1453,7 @@ define <4 x i16> @sqrdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind { define <8 x i16> @sqrdmulh_lane_8h(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: sqrdmulh_lane_8h: ; CHECK: // %bb.0: -; CHECK-NEXT: sqrdmulh.8h v0, v0, v1[1] +; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <8 x i16> %B, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %A, <8 x i16> %tmp3) @@ -1298,7 +1464,7 @@ define <2 x i32> @sqrdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind { ; CHECK-LABEL: sqrdmulh_lane_2s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: sqrdmulh.2s v0, v0, v1[1] +; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %A, <2 x i32> %tmp3) @@ -1308,7 +1474,7 @@ define <2 x i32> @sqrdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind { define <4 x i32> @sqrdmulh_lane_4s(<4 x i32> %A, <4 x i32> %B) nounwind { ; CHECK-LABEL: sqrdmulh_lane_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: sqrdmulh.4s v0, v0, v1[1] +; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <4 x i32> %B, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %A, <4 x i32> %tmp3) @@ -1319,7 +1485,7 @@ define i32 @sqrdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind { ; CHECK-LABEL: sqrdmulh_lane_1s: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: sqrdmulh.s s0, s1, v0[1] +; CHECK-NEXT: sqrdmulh s0, s1, v0.s[1] ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %tmp1 = extractelement <4 x i32> %B, i32 1 @@ -1331,7 +1497,7 @@ define <4 x i32> @sqdmull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind { ; CHECK-LABEL: sqdmull_lane_4s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: sqdmull.4s v0, v0, v1[1] +; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %A, <4 x i16> %tmp3) @@ -1342,7 +1508,7 @@ define <2 x i64> @sqdmull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind { ; CHECK-LABEL: sqdmull_lane_2d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: sqdmull.2d v0, v0, v1[1] +; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %A, <2 x i32> %tmp3) @@ -1350,10 +1516,16 @@ define <2 x i64> @sqdmull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind { } define <4 x i32> @sqdmull2_lane_4s(<8 x i16> %A, <8 x i16> %B) nounwind { -; CHECK-LABEL: sqdmull2_lane_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmull2.4s v0, v0, v1[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmull2_lane_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmull2_lane_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.h[1] +; CHECK-GI-NEXT: ret %tmp1 = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %tmp2 = shufflevector <8 x i16> %B, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -1361,10 +1533,16 @@ define <4 x i32> @sqdmull2_lane_4s(<8 x i16> %A, <8 x i16> %B) nounwind { } define <2 x i64> @sqdmull2_lane_2d(<4 x i32> %A, <4 x i32> %B) nounwind { -; CHECK-LABEL: sqdmull2_lane_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmull2.2d v0, v0, v1[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmull2_lane_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmull2_lane_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.s[1] +; CHECK-GI-NEXT: ret %tmp1 = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %tmp2 = shufflevector <4 x i32> %B, <4 x i32> undef, <2 x i32> <i32 1, i32 1> %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -1375,7 +1553,7 @@ define <4 x i32> @umull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind { ; CHECK-LABEL: umull_lane_4s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: umull.4s v0, v0, v1[1] +; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %A, <4 x i16> %tmp3) @@ -1386,7 +1564,7 @@ define <2 x i64> @umull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind { ; CHECK-LABEL: umull_lane_2d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: umull.2d v0, v0, v1[1] +; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %tmp3) @@ -1397,7 +1575,7 @@ define <4 x i32> @smull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind { ; CHECK-LABEL: smull_lane_4s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: smull.4s v0, v0, v1[1] +; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %A, <4 x i16> %tmp3) @@ -1408,7 +1586,7 @@ define <2 x i64> @smull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind { ; CHECK-LABEL: smull_lane_2d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: smull.2d v0, v0, v1[1] +; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[1] ; CHECK-NEXT: ret %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %tmp3) @@ -1419,8 +1597,8 @@ define <4 x i32> @smlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwi ; CHECK-LABEL: smlal_lane_4s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: smlal.4s v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: smlal v2.4s, v0.4h, v1.h[1] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %A, <4 x i16> %tmp4) @@ -1432,8 +1610,8 @@ define <2 x i64> @smlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwi ; CHECK-LABEL: smlal_lane_2d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: smlal.2d v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: smlal v2.2d, v0.2s, v1.s[1] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %tmp4) @@ -1445,8 +1623,8 @@ define <4 x i32> @sqdmlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) noun ; CHECK-LABEL: sqdmlal_lane_4s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: sqdmlal.4s v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: sqdmlal v2.4s, v0.4h, v1.h[1] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %A, <4 x i16> %tmp4) @@ -1458,8 +1636,8 @@ define <2 x i64> @sqdmlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) noun ; CHECK-LABEL: sqdmlal_lane_2d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: sqdmlal.2d v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: sqdmlal v2.2d, v0.2s, v1.s[1] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %A, <2 x i32> %tmp4) @@ -1468,11 +1646,18 @@ define <2 x i64> @sqdmlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) noun } define <4 x i32> @sqdmlal2_lane_4s(<8 x i16> %A, <8 x i16> %B, <4 x i32> %C) nounwind { -; CHECK-LABEL: sqdmlal2_lane_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlal2.4s v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlal2_lane_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmlal2 v2.4s, v0.8h, v1.h[1] +; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlal2_lane_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: mov v0.16b, v2.16b +; CHECK-GI-NEXT: sqdmlal v0.4s, v3.4h, v1.h[1] +; CHECK-GI-NEXT: ret %tmp1 = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %tmp2 = shufflevector <8 x i16> %B, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -1481,11 +1666,18 @@ define <4 x i32> @sqdmlal2_lane_4s(<8 x i16> %A, <8 x i16> %B, <4 x i32> %C) nou } define <2 x i64> @sqdmlal2_lane_2d(<4 x i32> %A, <4 x i32> %B, <2 x i64> %C) nounwind { -; CHECK-LABEL: sqdmlal2_lane_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlal2.2d v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlal2_lane_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmlal2 v2.2d, v0.4s, v1.s[1] +; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlal2_lane_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: mov v0.16b, v2.16b +; CHECK-GI-NEXT: sqdmlal v0.2d, v3.2s, v1.s[1] +; CHECK-GI-NEXT: ret %tmp1 = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %tmp2 = shufflevector <4 x i32> %B, <4 x i32> undef, <2 x i32> <i32 1, i32 1> %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -1499,7 +1691,7 @@ define i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind { ; CHECK-NEXT: fmov s1, w1 ; CHECK-NEXT: fmov s2, w0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqdmlal.h s2, h1, v0[1] +; CHECK-NEXT: sqdmlal s2, h1, v0.h[1] ; CHECK-NEXT: fmov w0, s2 ; CHECK-NEXT: ret %lhs = insertelement <4 x i16> undef, i16 %B, i32 0 @@ -1517,7 +1709,7 @@ define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind { ; CHECK-NEXT: fmov s1, w1 ; CHECK-NEXT: fmov s2, w0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqdmlsl.h s2, h1, v0[1] +; CHECK-NEXT: sqdmlsl s2, h1, v0.h[1] ; CHECK-NEXT: fmov w0, s2 ; CHECK-NEXT: ret %lhs = insertelement <4 x i16> undef, i16 %B, i32 0 @@ -1530,15 +1722,24 @@ define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind { declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) define i32 @sqadd_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind { -; CHECK-LABEL: sqadd_lane1_sqdmull4s: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmull.4s v0, v0, v1 -; CHECK-NEXT: mov.s w8, v0[1] -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: sqadd s0, s0, s1 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqadd_lane1_sqdmull4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.4h +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: fmov s1, w8 +; CHECK-SD-NEXT: sqadd s0, s0, s1 +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqadd_lane1_sqdmull4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: fmov s1, w0 +; CHECK-GI-NEXT: mov s0, v0.s[1] +; CHECK-GI-NEXT: sqadd s0, s1, s0 +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C) %prod = extractelement <4 x i32> %prod.vec, i32 1 %res = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %A, i32 %prod) @@ -1546,15 +1747,24 @@ define i32 @sqadd_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind { } define i32 @sqsub_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind { -; CHECK-LABEL: sqsub_lane1_sqdmull4s: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmull.4s v0, v0, v1 -; CHECK-NEXT: mov.s w8, v0[1] -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: sqsub s0, s0, s1 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqsub_lane1_sqdmull4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.4h +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: fmov s1, w8 +; CHECK-SD-NEXT: sqsub s0, s0, s1 +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqsub_lane1_sqdmull4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: fmov s1, w0 +; CHECK-GI-NEXT: mov s0, v0.s[1] +; CHECK-GI-NEXT: sqsub s0, s1, s0 +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C) %prod = extractelement <4 x i32> %prod.vec, i32 1 %res = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %A, i32 %prod) @@ -1567,7 +1777,7 @@ define i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind { ; CHECK-NEXT: fmov d1, x0 ; CHECK-NEXT: fmov s2, w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqdmlal.s d1, s2, v0[1] +; CHECK-NEXT: sqdmlal d1, s2, v0.s[1] ; CHECK-NEXT: fmov x0, d1 ; CHECK-NEXT: ret %rhs = extractelement <2 x i32> %C, i32 1 @@ -1584,7 +1794,7 @@ define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind { ; CHECK-NEXT: fmov d1, x0 ; CHECK-NEXT: fmov s2, w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqdmlsl.s d1, s2, v0[1] +; CHECK-NEXT: sqdmlsl d1, s2, v0.s[1] ; CHECK-NEXT: fmov x0, d1 ; CHECK-NEXT: ret %rhs = extractelement <2 x i32> %C, i32 1 @@ -1599,8 +1809,8 @@ define <4 x i32> @umlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwi ; CHECK-LABEL: umlal_lane_4s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: umlal.4s v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: umlal v2.4s, v0.4h, v1.h[1] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %A, <4 x i16> %tmp4) @@ -1612,8 +1822,8 @@ define <2 x i64> @umlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwi ; CHECK-LABEL: umlal_lane_2d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: umlal.2d v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: umlal v2.2d, v0.2s, v1.s[1] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %tmp4) @@ -1626,8 +1836,8 @@ define <4 x i32> @smlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwi ; CHECK-LABEL: smlsl_lane_4s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: smlsl.4s v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: smlsl v2.4s, v0.4h, v1.h[1] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %A, <4 x i16> %tmp4) @@ -1639,8 +1849,8 @@ define <2 x i64> @smlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwi ; CHECK-LABEL: smlsl_lane_2d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: smlsl.2d v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: smlsl v2.2d, v0.2s, v1.s[1] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %tmp4) @@ -1652,8 +1862,8 @@ define <4 x i32> @sqdmlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) noun ; CHECK-LABEL: sqdmlsl_lane_4s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: sqdmlsl.4s v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: sqdmlsl v2.4s, v0.4h, v1.h[1] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %A, <4 x i16> %tmp4) @@ -1665,8 +1875,8 @@ define <2 x i64> @sqdmlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) noun ; CHECK-LABEL: sqdmlsl_lane_2d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: sqdmlsl.2d v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: sqdmlsl v2.2d, v0.2s, v1.s[1] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %A, <2 x i32> %tmp4) @@ -1675,11 +1885,18 @@ define <2 x i64> @sqdmlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) noun } define <4 x i32> @sqdmlsl2_lane_4s(<8 x i16> %A, <8 x i16> %B, <4 x i32> %C) nounwind { -; CHECK-LABEL: sqdmlsl2_lane_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlsl2.4s v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlsl2_lane_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmlsl2 v2.4s, v0.8h, v1.h[1] +; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlsl2_lane_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: mov v0.16b, v2.16b +; CHECK-GI-NEXT: sqdmlsl v0.4s, v3.4h, v1.h[1] +; CHECK-GI-NEXT: ret %tmp1 = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %tmp2 = shufflevector <8 x i16> %B, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -1688,11 +1905,18 @@ define <4 x i32> @sqdmlsl2_lane_4s(<8 x i16> %A, <8 x i16> %B, <4 x i32> %C) nou } define <2 x i64> @sqdmlsl2_lane_2d(<4 x i32> %A, <4 x i32> %B, <2 x i64> %C) nounwind { -; CHECK-LABEL: sqdmlsl2_lane_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlsl2.2d v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlsl2_lane_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmlsl2 v2.2d, v0.4s, v1.s[1] +; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlsl2_lane_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: mov v0.16b, v2.16b +; CHECK-GI-NEXT: sqdmlsl v0.2d, v3.2s, v1.s[1] +; CHECK-GI-NEXT: ret %tmp1 = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %tmp2 = shufflevector <4 x i32> %B, <4 x i32> undef, <2 x i32> <i32 1, i32 1> %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -1704,8 +1928,8 @@ define <4 x i32> @umlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwi ; CHECK-LABEL: umlsl_lane_4s: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: umlsl.4s v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: umlsl v2.4s, v0.4h, v1.h[1] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %A, <4 x i16> %tmp4) @@ -1717,8 +1941,8 @@ define <2 x i64> @umlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwi ; CHECK-LABEL: umlsl_lane_2d: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: umlsl.2d v2, v0, v1[1] -; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: umlsl v2.2d, v0.2s, v1.s[1] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %tmp4) @@ -1748,7 +1972,7 @@ define double @fmulxd(double %a, double %b) nounwind { define float @fmulxs_lane(float %a, <4 x float> %vec) nounwind { ; CHECK-LABEL: fmulxs_lane: ; CHECK: // %bb.0: -; CHECK-NEXT: fmulx.s s0, s0, v1[3] +; CHECK-NEXT: fmulx s0, s0, v1.s[3] ; CHECK-NEXT: ret %b = extractelement <4 x float> %vec, i32 3 %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind @@ -1758,7 +1982,7 @@ define float @fmulxs_lane(float %a, <4 x float> %vec) nounwind { define double @fmulxd_lane(double %a, <2 x double> %vec) nounwind { ; CHECK-LABEL: fmulxd_lane: ; CHECK: // %bb.0: -; CHECK-NEXT: fmulx.d d0, d0, v1[1] +; CHECK-NEXT: fmulx d0, d0, v1.d[1] ; CHECK-NEXT: ret %b = extractelement <2 x double> %vec, i32 1 %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind @@ -1772,7 +1996,7 @@ declare float @llvm.aarch64.neon.fmulx.f32(float, float) nounwind readnone define <8 x i16> @smull2_8h_simple(<16 x i8> %a, <16 x i8> %b) nounwind { ; CHECK-LABEL: smull2_8h_simple: ; CHECK: // %bb.0: -; CHECK-NEXT: smull2.8h v0, v0, v1 +; CHECK-NEXT: smull2 v0.8h, v0.16b, v1.16b ; CHECK-NEXT: ret %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -1783,7 +2007,7 @@ define <8 x i16> @smull2_8h_simple(<16 x i8> %a, <16 x i8> %b) nounwind { define <8 x i16> @foo0(<16 x i8> %a, <16 x i8> %b) nounwind { ; CHECK-LABEL: foo0: ; CHECK: // %bb.0: -; CHECK-NEXT: smull2.8h v0, v0, v1 +; CHECK-NEXT: smull2 v0.8h, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp = bitcast <16 x i8> %a to <2 x i64> %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -1798,7 +2022,7 @@ define <8 x i16> @foo0(<16 x i8> %a, <16 x i8> %b) nounwind { define <4 x i32> @foo1(<8 x i16> %a, <8 x i16> %b) nounwind { ; CHECK-LABEL: foo1: ; CHECK: // %bb.0: -; CHECK-NEXT: smull2.4s v0, v0, v1 +; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp = bitcast <8 x i16> %a to <2 x i64> %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -1813,7 +2037,7 @@ define <4 x i32> @foo1(<8 x i16> %a, <8 x i16> %b) nounwind { define <2 x i64> @foo2(<4 x i32> %a, <4 x i32> %b) nounwind { ; CHECK-LABEL: foo2: ; CHECK: // %bb.0: -; CHECK-NEXT: smull2.2d v0, v0, v1 +; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp = bitcast <4 x i32> %a to <2 x i64> %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -1828,7 +2052,7 @@ define <2 x i64> @foo2(<4 x i32> %a, <4 x i32> %b) nounwind { define <8 x i16> @foo3(<16 x i8> %a, <16 x i8> %b) nounwind { ; CHECK-LABEL: foo3: ; CHECK: // %bb.0: -; CHECK-NEXT: umull2.8h v0, v0, v1 +; CHECK-NEXT: umull2 v0.8h, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp = bitcast <16 x i8> %a to <2 x i64> %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -1843,7 +2067,7 @@ define <8 x i16> @foo3(<16 x i8> %a, <16 x i8> %b) nounwind { define <4 x i32> @foo4(<8 x i16> %a, <8 x i16> %b) nounwind { ; CHECK-LABEL: foo4: ; CHECK: // %bb.0: -; CHECK-NEXT: umull2.4s v0, v0, v1 +; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp = bitcast <8 x i16> %a to <2 x i64> %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -1858,7 +2082,7 @@ define <4 x i32> @foo4(<8 x i16> %a, <8 x i16> %b) nounwind { define <2 x i64> @foo5(<4 x i32> %a, <4 x i32> %b) nounwind { ; CHECK-LABEL: foo5: ; CHECK: // %bb.0: -; CHECK-NEXT: umull2.2d v0, v0, v1 +; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp = bitcast <4 x i32> %a to <2 x i64> %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -1871,11 +2095,18 @@ define <2 x i64> @foo5(<4 x i32> %a, <4 x i32> %b) nounwind { } define <4 x i32> @foo6(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp { -; CHECK-LABEL: foo6: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: smull2.4s v0, v1, v2[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: foo6: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: smull2 v0.4s, v1.8h, v2.h[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: foo6: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v1.d[1] +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: smull v0.4s, v0.4h, v2.h[1] +; CHECK-GI-NEXT: ret entry: %0 = bitcast <8 x i16> %b to <2 x i64> %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> @@ -1889,7 +2120,7 @@ define <4 x i32> @foo6a(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readn ; CHECK-LABEL: foo6a: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: smull.4s v0, v1, v2[1] +; CHECK-NEXT: smull v0.4s, v1.4h, v2.h[1] ; CHECK-NEXT: ret entry: %0 = bitcast <8 x i16> %b to <2 x i64> @@ -1901,11 +2132,18 @@ entry: } define <2 x i64> @foo7(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp { -; CHECK-LABEL: foo7: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: smull2.2d v0, v1, v2[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: foo7: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: smull2 v0.2d, v1.4s, v2.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: foo7: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v1.d[1] +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: smull v0.2d, v0.2s, v2.s[1] +; CHECK-GI-NEXT: ret entry: %0 = bitcast <4 x i32> %b to <2 x i64> %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> @@ -1919,7 +2157,7 @@ define <2 x i64> @foo7a(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readn ; CHECK-LABEL: foo7a: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: smull.2d v0, v1, v2[1] +; CHECK-NEXT: smull v0.2d, v1.2s, v2.s[1] ; CHECK-NEXT: ret entry: %0 = bitcast <4 x i32> %b to <2 x i64> @@ -1932,11 +2170,18 @@ entry: define <4 x i32> @foo8(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp { -; CHECK-LABEL: foo8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: umull2.4s v0, v1, v2[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: foo8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: umull2 v0.4s, v1.8h, v2.h[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: foo8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v1.d[1] +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: umull v0.4s, v0.4h, v2.h[1] +; CHECK-GI-NEXT: ret entry: %0 = bitcast <8 x i16> %b to <2 x i64> %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> @@ -1950,7 +2195,7 @@ define <4 x i32> @foo8a(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readn ; CHECK-LABEL: foo8a: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: umull.4s v0, v1, v2[1] +; CHECK-NEXT: umull v0.4s, v1.4h, v2.h[1] ; CHECK-NEXT: ret entry: %0 = bitcast <8 x i16> %b to <2 x i64> @@ -1962,11 +2207,18 @@ entry: } define <2 x i64> @foo9(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp { -; CHECK-LABEL: foo9: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: umull2.2d v0, v1, v2[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: foo9: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: umull2 v0.2d, v1.4s, v2.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: foo9: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v1.d[1] +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: umull v0.2d, v0.2s, v2.s[1] +; CHECK-GI-NEXT: ret entry: %0 = bitcast <4 x i32> %b to <2 x i64> %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> @@ -1980,7 +2232,7 @@ define <2 x i64> @foo9a(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readn ; CHECK-LABEL: foo9a: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: umull.2d v0, v1, v2[1] +; CHECK-NEXT: umull v0.2d, v1.2s, v2.s[1] ; CHECK-NEXT: ret entry: %0 = bitcast <4 x i32> %b to <2 x i64> @@ -1994,7 +2246,7 @@ entry: define <8 x i16> @bar0(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind { ; CHECK-LABEL: bar0: ; CHECK: // %bb.0: -; CHECK-NEXT: smlal2.8h v0, v1, v2 +; CHECK-NEXT: smlal2 v0.8h, v1.16b, v2.16b ; CHECK-NEXT: ret %tmp = bitcast <16 x i8> %b to <2 x i64> %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -2010,7 +2262,7 @@ define <8 x i16> @bar0(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind { define <4 x i32> @bar1(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind { ; CHECK-LABEL: bar1: ; CHECK: // %bb.0: -; CHECK-NEXT: smlal2.4s v0, v1, v2 +; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.8h ; CHECK-NEXT: ret %tmp = bitcast <8 x i16> %b to <2 x i64> %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -2026,7 +2278,7 @@ define <4 x i32> @bar1(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind { define <2 x i64> @bar2(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind { ; CHECK-LABEL: bar2: ; CHECK: // %bb.0: -; CHECK-NEXT: smlal2.2d v0, v1, v2 +; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.4s ; CHECK-NEXT: ret %tmp = bitcast <4 x i32> %b to <2 x i64> %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -2042,7 +2294,7 @@ define <2 x i64> @bar2(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind { define <8 x i16> @bar3(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind { ; CHECK-LABEL: bar3: ; CHECK: // %bb.0: -; CHECK-NEXT: umlal2.8h v0, v1, v2 +; CHECK-NEXT: umlal2 v0.8h, v1.16b, v2.16b ; CHECK-NEXT: ret %tmp = bitcast <16 x i8> %b to <2 x i64> %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -2058,7 +2310,7 @@ define <8 x i16> @bar3(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind { define <4 x i32> @bar4(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind { ; CHECK-LABEL: bar4: ; CHECK: // %bb.0: -; CHECK-NEXT: umlal2.4s v0, v1, v2 +; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.8h ; CHECK-NEXT: ret %tmp = bitcast <8 x i16> %b to <2 x i64> %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -2074,7 +2326,7 @@ define <4 x i32> @bar4(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind { define <2 x i64> @bar5(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind { ; CHECK-LABEL: bar5: ; CHECK: // %bb.0: -; CHECK-NEXT: umlal2.2d v0, v1, v2 +; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.4s ; CHECK-NEXT: ret %tmp = bitcast <4 x i32> %b to <2 x i64> %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -2088,11 +2340,18 @@ define <2 x i64> @bar5(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind { } define <4 x i32> @mlal2_1(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind { -; CHECK-LABEL: mlal2_1: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: smlal2.4s v0, v1, v2[3] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mlal2_1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: smlal2 v0.4s, v1.8h, v2.h[3] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mlal2_1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: dup v2.8h, v2.h[3] +; CHECK-GI-NEXT: smlal2 v0.4s, v1.8h, v2.8h +; CHECK-GI-NEXT: ret %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> %tmp = bitcast <8 x i16> %b to <2 x i64> %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -2106,11 +2365,18 @@ define <4 x i32> @mlal2_1(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind { } define <2 x i64> @mlal2_2(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind { -; CHECK-LABEL: mlal2_2: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: smlal2.2d v0, v1, v2[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mlal2_2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: smlal2 v0.2d, v1.4s, v2.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mlal2_2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: dup v2.4s, v2.s[1] +; CHECK-GI-NEXT: smlal2 v0.2d, v1.4s, v2.4s +; CHECK-GI-NEXT: ret %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %tmp = bitcast <4 x i32> %b to <2 x i64> %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -2124,11 +2390,18 @@ define <2 x i64> @mlal2_2(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind { } define <4 x i32> @mlal2_4(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind { -; CHECK-LABEL: mlal2_4: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: umlal2.4s v0, v1, v2[2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mlal2_4: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: umlal2 v0.4s, v1.8h, v2.h[2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mlal2_4: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: dup v2.8h, v2.h[2] +; CHECK-GI-NEXT: umlal2 v0.4s, v1.8h, v2.8h +; CHECK-GI-NEXT: ret %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> %tmp = bitcast <8 x i16> %b to <2 x i64> %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -2142,11 +2415,18 @@ define <4 x i32> @mlal2_4(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind { } define <2 x i64> @mlal2_5(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind { -; CHECK-LABEL: mlal2_5: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: umlal2.2d v0, v1, v2[0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mlal2_5: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: umlal2 v0.2d, v1.4s, v2.s[0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mlal2_5: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: dup v2.4s, v2.s[0] +; CHECK-GI-NEXT: umlal2 v0.2d, v1.4s, v2.4s +; CHECK-GI-NEXT: ret %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> zeroinitializer %tmp = bitcast <4 x i32> %b to <2 x i64> %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> @@ -2164,7 +2444,7 @@ define <2 x double> @vmulq_n_f64(<2 x double> %x, double %y) nounwind readnone s ; CHECK-LABEL: vmulq_n_f64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: fmul.2d v0, v0, v1[0] +; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[0] ; CHECK-NEXT: ret entry: %vecinit.i = insertelement <2 x double> undef, double %y, i32 0 @@ -2177,7 +2457,7 @@ define <4 x float> @vmulq_n_f32(<4 x float> %x, float %y) nounwind readnone ssp ; CHECK-LABEL: vmulq_n_f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 -; CHECK-NEXT: fmul.4s v0, v0, v1[0] +; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[0] ; CHECK-NEXT: ret entry: %vecinit.i = insertelement <4 x float> undef, float %y, i32 0 @@ -2192,7 +2472,7 @@ define <2 x float> @vmul_n_f32(<2 x float> %x, float %y) nounwind readnone ssp { ; CHECK-LABEL: vmul_n_f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 -; CHECK-NEXT: fmul.2s v0, v0, v1[0] +; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[0] ; CHECK-NEXT: ret entry: %vecinit.i = insertelement <2 x float> undef, float %y, i32 0 @@ -2204,7 +2484,7 @@ entry: define <4 x i16> @vmla_laneq_s16_test(<4 x i16> %a, <4 x i16> %b, <8 x i16> %c) nounwind readnone ssp { ; CHECK-LABEL: vmla_laneq_s16_test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mla.4h v0, v1, v2[6] +; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[6] ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6> @@ -2216,7 +2496,7 @@ entry: define <2 x i32> @vmla_laneq_s32_test(<2 x i32> %a, <2 x i32> %b, <4 x i32> %c) nounwind readnone ssp { ; CHECK-LABEL: vmla_laneq_s32_test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mla.2s v0, v1, v2[3] +; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[3] ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 3, i32 3> @@ -2226,10 +2506,16 @@ entry: } define <8 x i16> @not_really_vmlaq_laneq_s16_test(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone ssp { -; CHECK-LABEL: not_really_vmlaq_laneq_s16_test: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mla.8h v0, v1, v2[5] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: not_really_vmlaq_laneq_s16_test: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mla v0.8h, v1.8h, v2.h[5] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: not_really_vmlaq_laneq_s16_test: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ext v2.16b, v2.16b, v0.16b, #8 +; CHECK-GI-NEXT: mla v0.8h, v1.8h, v2.h[1] +; CHECK-GI-NEXT: ret entry: %shuffle1 = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %shuffle2 = shufflevector <4 x i16> %shuffle1, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> @@ -2239,10 +2525,16 @@ entry: } define <4 x i32> @not_really_vmlaq_laneq_s32_test(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone ssp { -; CHECK-LABEL: not_really_vmlaq_laneq_s32_test: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mla.4s v0, v1, v2[3] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: not_really_vmlaq_laneq_s32_test: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mla v0.4s, v1.4s, v2.s[3] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: not_really_vmlaq_laneq_s32_test: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ext v2.16b, v2.16b, v0.16b, #8 +; CHECK-GI-NEXT: mla v0.4s, v1.4s, v2.s[1] +; CHECK-GI-NEXT: ret entry: %shuffle1 = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %shuffle2 = shufflevector <2 x i32> %shuffle1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> @@ -2254,7 +2546,7 @@ entry: define <4 x i32> @vmull_laneq_s16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp { ; CHECK-LABEL: vmull_laneq_s16_test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: smull.4s v0, v0, v1[6] +; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[6] ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6> @@ -2265,7 +2557,7 @@ entry: define <2 x i64> @vmull_laneq_s32_test(<2 x i32> %a, <4 x i32> %b) nounwind readnone ssp { ; CHECK-LABEL: vmull_laneq_s32_test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: smull.2d v0, v0, v1[2] +; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[2] ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2> @@ -2275,7 +2567,7 @@ entry: define <4 x i32> @vmull_laneq_u16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp { ; CHECK-LABEL: vmull_laneq_u16_test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: umull.4s v0, v0, v1[6] +; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[6] ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6> @@ -2286,7 +2578,7 @@ entry: define <2 x i64> @vmull_laneq_u32_test(<2 x i32> %a, <4 x i32> %b) nounwind readnone ssp { ; CHECK-LABEL: vmull_laneq_u32_test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: umull.2d v0, v0, v1[2] +; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[2] ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2> @@ -2297,8 +2589,8 @@ entry: define <4 x i32> @vmull_low_n_s16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp { ; CHECK-LABEL: vmull_low_n_s16_test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: dup.4h v0, w0 -; CHECK-NEXT: smull.4s v0, v1, v0 +; CHECK-NEXT: dup v0.4h, w0 +; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h ; CHECK-NEXT: ret entry: %conv = trunc i32 %d to i16 @@ -2314,11 +2606,18 @@ entry: } define <4 x i32> @vmull_high_n_s16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp { -; CHECK-LABEL: vmull_high_n_s16_test: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: dup.8h v0, w0 -; CHECK-NEXT: smull2.4s v0, v1, v0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vmull_high_n_s16_test: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: dup v0.8h, w0 +; CHECK-SD-NEXT: smull2 v0.4s, v1.8h, v0.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vmull_high_n_s16_test: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v1.d[1] +; CHECK-GI-NEXT: dup v1.4h, w0 +; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: ret entry: %conv = trunc i32 %d to i16 %0 = bitcast <8 x i16> %b to <2 x i64> @@ -2333,11 +2632,18 @@ entry: } define <2 x i64> @vmull_high_n_s32_test(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c, i32 %d) nounwind readnone optsize ssp { -; CHECK-LABEL: vmull_high_n_s32_test: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: dup.4s v0, w0 -; CHECK-NEXT: smull2.2d v0, v1, v0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vmull_high_n_s32_test: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: dup v0.4s, w0 +; CHECK-SD-NEXT: smull2 v0.2d, v1.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vmull_high_n_s32_test: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v1.d[1] +; CHECK-GI-NEXT: dup v1.2s, w0 +; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s +; CHECK-GI-NEXT: ret entry: %0 = bitcast <4 x i32> %b to <2 x i64> %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> @@ -2349,11 +2655,18 @@ entry: } define <4 x i32> @vmull_high_n_u16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp { -; CHECK-LABEL: vmull_high_n_u16_test: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: dup.8h v0, w0 -; CHECK-NEXT: umull2.4s v0, v1, v0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vmull_high_n_u16_test: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: dup v0.8h, w0 +; CHECK-SD-NEXT: umull2 v0.4s, v1.8h, v0.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vmull_high_n_u16_test: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v1.d[1] +; CHECK-GI-NEXT: dup v1.4h, w0 +; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: ret entry: %conv = trunc i32 %d to i16 %0 = bitcast <8 x i16> %b to <2 x i64> @@ -2368,11 +2681,18 @@ entry: } define <2 x i64> @vmull_high_n_u32_test(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c, i32 %d) nounwind readnone optsize ssp { -; CHECK-LABEL: vmull_high_n_u32_test: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: dup.4s v0, w0 -; CHECK-NEXT: umull2.2d v0, v1, v0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vmull_high_n_u32_test: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: dup v0.4s, w0 +; CHECK-SD-NEXT: umull2 v0.2d, v1.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vmull_high_n_u32_test: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v1.d[1] +; CHECK-GI-NEXT: dup v1.2s, w0 +; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s +; CHECK-GI-NEXT: ret entry: %0 = bitcast <4 x i32> %b to <2 x i64> %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> @@ -2384,10 +2704,17 @@ entry: } define <4 x i32> @vmul_built_dup_test(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: vmul_built_dup_test: -; CHECK: // %bb.0: -; CHECK-NEXT: mul.4s v0, v0, v1[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vmul_built_dup_test: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul v0.4s, v0.4s, v1.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vmul_built_dup_test: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov s1, v1.s[1] +; CHECK-GI-NEXT: dup v1.4s, v1.s[0] +; CHECK-GI-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %vget_lane = extractelement <4 x i32> %b, i32 1 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 @@ -2398,11 +2725,19 @@ define <4 x i32> @vmul_built_dup_test(<4 x i32> %a, <4 x i32> %b) { } define <4 x i16> @vmul_built_dup_fromsmall_test(<4 x i16> %a, <4 x i16> %b) { -; CHECK-LABEL: vmul_built_dup_fromsmall_test: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mul.4h v0, v0, v1[3] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vmul_built_dup_fromsmall_test: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.h[3] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vmul_built_dup_fromsmall_test: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov h1, v1.h[3] +; CHECK-GI-NEXT: dup v1.4h, v1.h[0] +; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: ret %vget_lane = extractelement <4 x i16> %b, i32 3 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 @@ -2413,11 +2748,18 @@ define <4 x i16> @vmul_built_dup_fromsmall_test(<4 x i16> %a, <4 x i16> %b) { } define <8 x i16> @vmulq_built_dup_fromsmall_test(<8 x i16> %a, <4 x i16> %b) { -; CHECK-LABEL: vmulq_built_dup_fromsmall_test: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mul.8h v0, v0, v1[0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: vmulq_built_dup_fromsmall_test: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: mul v0.8h, v0.8h, v1.h[0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: vmulq_built_dup_fromsmall_test: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: dup v1.8h, v1.h[0] +; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %vget_lane = extractelement <4 x i16> %b, i32 0 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 @@ -2434,7 +2776,7 @@ define <8 x i16> @vmulq_built_dup_fromsmall_test(<8 x i16> %a, <4 x i16> %b) { define <2 x i64> @mull_from_two_extracts(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-LABEL: mull_from_two_extracts: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmull2.2d v0, v0, v1 +; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.4s ; CHECK-NEXT: ret %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> @@ -2446,7 +2788,7 @@ define <2 x i64> @mull_from_two_extracts(<4 x i32> %lhs, <4 x i32> %rhs) { define <2 x i64> @mlal_from_two_extracts(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-LABEL: mlal_from_two_extracts: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlal2.2d v0, v1, v2 +; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.4s ; CHECK-NEXT: ret %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> @@ -2459,8 +2801,8 @@ define <2 x i64> @mlal_from_two_extracts(<2 x i64> %accum, <4 x i32> %lhs, <4 x define <2 x i64> @mull_from_extract_dup_low(<4 x i32> %lhs, i32 %rhs) { ; CHECK-LABEL: mull_from_extract_dup_low: ; CHECK: // %bb.0: -; CHECK-NEXT: dup.2s v1, w0 -; CHECK-NEXT: sqdmull.2d v0, v0, v1 +; CHECK-NEXT: dup v1.2s, w0 +; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.2s ; CHECK-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 @@ -2472,11 +2814,18 @@ define <2 x i64> @mull_from_extract_dup_low(<4 x i32> %lhs, i32 %rhs) { } define <2 x i64> @mull_from_extract_dup_high(<4 x i32> %lhs, i32 %rhs) { -; CHECK-LABEL: mull_from_extract_dup_high: -; CHECK: // %bb.0: -; CHECK-NEXT: dup.4s v1, w0 -; CHECK-NEXT: sqdmull2.2d v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mull_from_extract_dup_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: dup v1.4s, w0 +; CHECK-SD-NEXT: sqdmull2 v0.2d, v0.4s, v1.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mull_from_extract_dup_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: dup v1.2s, w0 +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.2s +; CHECK-GI-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 @@ -2489,8 +2838,8 @@ define <2 x i64> @mull_from_extract_dup_high(<4 x i32> %lhs, i32 %rhs) { define <8 x i16> @pmull_from_extract_dup_low(<16 x i8> %lhs, i8 %rhs) { ; CHECK-LABEL: pmull_from_extract_dup_low: ; CHECK: // %bb.0: -; CHECK-NEXT: dup.8b v1, w0 -; CHECK-NEXT: pmull.8h v0, v0, v1 +; CHECK-NEXT: dup v1.8b, w0 +; CHECK-NEXT: pmull v0.8h, v0.8b, v1.8b ; CHECK-NEXT: ret %rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0 %rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> @@ -2504,8 +2853,8 @@ define <8 x i16> @pmull_from_extract_dup_low(<16 x i8> %lhs, i8 %rhs) { define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) { ; CHECK-LABEL: pmull_from_extract_dup_high: ; CHECK: // %bb.0: -; CHECK-NEXT: dup.16b v1, w0 -; CHECK-NEXT: pmull2.8h v0, v0, v1 +; CHECK-NEXT: dup v1.16b, w0 +; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b ; CHECK-NEXT: ret %rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0 %rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> @@ -2520,8 +2869,8 @@ define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs) ; CHECK-LABEL: pmull_from_extract_duplane_low: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: dup.8b v1, v1[0] -; CHECK-NEXT: pmull.8h v0, v0, v1 +; CHECK-NEXT: dup v1.8b, v1.b[0] +; CHECK-NEXT: pmull v0.8h, v0.8b, v1.8b ; CHECK-NEXT: ret %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> @@ -2534,8 +2883,8 @@ define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) ; CHECK-LABEL: pmull_from_extract_duplane_high: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: dup.16b v1, v1[0] -; CHECK-NEXT: pmull2.8h v0, v0, v1 +; CHECK-NEXT: dup v1.16b, v1.b[0] +; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b ; CHECK-NEXT: ret %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> @@ -2547,7 +2896,7 @@ define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) define <2 x i64> @sqdmull_from_extract_duplane_low(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-LABEL: sqdmull_from_extract_duplane_low: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmull.2d v0, v0, v1[0] +; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[0] ; CHECK-NEXT: ret %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1> %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> @@ -2557,10 +2906,16 @@ define <2 x i64> @sqdmull_from_extract_duplane_low(<4 x i32> %lhs, <4 x i32> %rh } define <2 x i64> @sqdmull_from_extract_duplane_high(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK-LABEL: sqdmull_from_extract_duplane_high: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmull2.2d v0, v0, v1[0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmull_from_extract_duplane_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmull_from_extract_duplane_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.s[0] +; CHECK-GI-NEXT: ret %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> @@ -2571,7 +2926,7 @@ define <2 x i64> @sqdmull_from_extract_duplane_high(<4 x i32> %lhs, <4 x i32> %r define <2 x i64> @sqdmlal_from_extract_duplane_low(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-LABEL: sqdmlal_from_extract_duplane_low: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlal.2d v0, v1, v2[0] +; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.s[0] ; CHECK-NEXT: ret %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1> %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> @@ -2582,10 +2937,16 @@ define <2 x i64> @sqdmlal_from_extract_duplane_low(<2 x i64> %accum, <4 x i32> % } define <2 x i64> @sqdmlal_from_extract_duplane_high(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK-LABEL: sqdmlal_from_extract_duplane_high: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlal2.2d v0, v1, v2[0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlal_from_extract_duplane_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlal_from_extract_duplane_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: sqdmlal v0.2d, v1.2s, v2.s[0] +; CHECK-GI-NEXT: ret %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> @@ -2597,7 +2958,7 @@ define <2 x i64> @sqdmlal_from_extract_duplane_high(<2 x i64> %accum, <4 x i32> define <2 x i64> @umlal_from_extract_duplane_low(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-LABEL: umlal_from_extract_duplane_low: ; CHECK: // %bb.0: -; CHECK-NEXT: umlal.2d v0, v1, v2[0] +; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[0] ; CHECK-NEXT: ret %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1> %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> @@ -2608,10 +2969,16 @@ define <2 x i64> @umlal_from_extract_duplane_low(<2 x i64> %accum, <4 x i32> %lh } define <2 x i64> @umlal_from_extract_duplane_high(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK-LABEL: umlal_from_extract_duplane_high: -; CHECK: // %bb.0: -; CHECK-NEXT: umlal2.2d v0, v1, v2[0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: umlal_from_extract_duplane_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: umlal2 v0.2d, v1.4s, v2.s[0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: umlal_from_extract_duplane_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: umlal v0.2d, v1.2s, v2.s[0] +; CHECK-GI-NEXT: ret %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> @@ -2623,7 +2990,7 @@ define <2 x i64> @umlal_from_extract_duplane_high(<2 x i64> %accum, <4 x i32> %l define float @scalar_fmla_from_extract_v4f32(float %accum, float %lhs, <4 x float> %rvec) { ; CHECK-LABEL: scalar_fmla_from_extract_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmla.s s0, s1, v2[3] +; CHECK-NEXT: fmla s0, s1, v2.s[3] ; CHECK-NEXT: ret %rhs = extractelement <4 x float> %rvec, i32 3 %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum) @@ -2631,11 +2998,18 @@ define float @scalar_fmla_from_extract_v4f32(float %accum, float %lhs, <4 x floa } define float @scalar_fmla_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) { -; CHECK-LABEL: scalar_fmla_from_extract_v2f32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: fmla.s s0, s1, v2[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scalar_fmla_from_extract_v2f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: fmla s0, s1, v2.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scalar_fmla_from_extract_v2f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: mov s2, v2.s[1] +; CHECK-GI-NEXT: fmadd s0, s1, s2, s0 +; CHECK-GI-NEXT: ret %rhs = extractelement <2 x float> %rvec, i32 1 %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum) ret float %res @@ -2644,7 +3018,7 @@ define float @scalar_fmla_from_extract_v2f32(float %accum, float %lhs, <2 x floa define float @scalar_fmls_from_extract_v4f32(float %accum, float %lhs, <4 x float> %rvec) { ; CHECK-LABEL: scalar_fmls_from_extract_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmls.s s0, s1, v2[3] +; CHECK-NEXT: fmls s0, s1, v2.s[3] ; CHECK-NEXT: ret %rhs.scal = extractelement <4 x float> %rvec, i32 3 %rhs = fsub float -0.0, %rhs.scal @@ -2656,7 +3030,7 @@ define float @scalar_fmls_from_extract_v2f32(float %accum, float %lhs, <2 x floa ; CHECK-LABEL: scalar_fmls_from_extract_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: fmls.s s0, s1, v2[1] +; CHECK-NEXT: fmls s0, s1, v2.s[1] ; CHECK-NEXT: ret %rhs.scal = extractelement <2 x float> %rvec, i32 1 %rhs = fsub float -0.0, %rhs.scal @@ -2669,7 +3043,7 @@ declare float @llvm.fma.f32(float, float, float) define double @scalar_fmla_from_extract_v2f64(double %accum, double %lhs, <2 x double> %rvec) { ; CHECK-LABEL: scalar_fmla_from_extract_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmla.d d0, d1, v2[1] +; CHECK-NEXT: fmla d0, d1, v2.d[1] ; CHECK-NEXT: ret %rhs = extractelement <2 x double> %rvec, i32 1 %res = call double @llvm.fma.f64(double %lhs, double %rhs, double %accum) @@ -2679,7 +3053,7 @@ define double @scalar_fmla_from_extract_v2f64(double %accum, double %lhs, <2 x d define double @scalar_fmls_from_extract_v2f64(double %accum, double %lhs, <2 x double> %rvec) { ; CHECK-LABEL: scalar_fmls_from_extract_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmls.d d0, d1, v2[1] +; CHECK-NEXT: fmls d0, d1, v2.d[1] ; CHECK-NEXT: ret %rhs.scal = extractelement <2 x double> %rvec, i32 1 %rhs = fsub double -0.0, %rhs.scal @@ -2692,7 +3066,7 @@ declare double @llvm.fma.f64(double, double, double) define <2 x float> @fmls_with_fneg_before_extract_v2f32(<2 x float> %accum, <2 x float> %lhs, <4 x float> %rhs) { ; CHECK-LABEL: fmls_with_fneg_before_extract_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmls.2s v0, v1, v2[3] +; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[3] ; CHECK-NEXT: ret %rhs_neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %rhs %splat = shufflevector <4 x float> %rhs_neg, <4 x float> undef, <2 x i32> <i32 3, i32 3> @@ -2704,7 +3078,7 @@ define <2 x float> @fmls_with_fneg_before_extract_v2f32_1(<2 x float> %accum, <2 ; CHECK-LABEL: fmls_with_fneg_before_extract_v2f32_1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: fmls.2s v0, v1, v2[1] +; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[1] ; CHECK-NEXT: ret %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs %splat = shufflevector <2 x float> %rhs_neg, <2 x float> undef, <2 x i32> <i32 1, i32 1> @@ -2715,7 +3089,7 @@ define <2 x float> @fmls_with_fneg_before_extract_v2f32_1(<2 x float> %accum, <2 define <4 x float> @fmls_with_fneg_before_extract_v4f32(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) { ; CHECK-LABEL: fmls_with_fneg_before_extract_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmls.4s v0, v1, v2[3] +; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[3] ; CHECK-NEXT: ret %rhs_neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %rhs %splat = shufflevector <4 x float> %rhs_neg, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> @@ -2727,7 +3101,7 @@ define <4 x float> @fmls_with_fneg_before_extract_v4f32_1(<4 x float> %accum, <4 ; CHECK-LABEL: fmls_with_fneg_before_extract_v4f32_1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: fmls.4s v0, v1, v2[1] +; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[1] ; CHECK-NEXT: ret %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs %splat = shufflevector <2 x float> %rhs_neg, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> @@ -2738,7 +3112,7 @@ define <4 x float> @fmls_with_fneg_before_extract_v4f32_1(<4 x float> %accum, <4 define <2 x double> @fmls_with_fneg_before_extract_v2f64(<2 x double> %accum, <2 x double> %lhs, <2 x double> %rhs) { ; CHECK-LABEL: fmls_with_fneg_before_extract_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmls.2d v0, v1, v2[1] +; CHECK-NEXT: fmls v0.2d, v1.2d, v2.d[1] ; CHECK-NEXT: ret %rhs_neg = fsub <2 x double> <double -0.0, double -0.0>, %rhs %splat = shufflevector <2 x double> %rhs_neg, <2 x double> undef, <2 x i32> <i32 1, i32 1> @@ -2770,7 +3144,7 @@ define i32 @sqdmlal_s(i16 %A, i16 %B, i32 %C) nounwind { ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: fmov s1, w1 ; CHECK-NEXT: fmov s2, w2 -; CHECK-NEXT: sqdmlal.h s2, h0, v1[0] +; CHECK-NEXT: sqdmlal s2, h0, v1.h[0] ; CHECK-NEXT: fmov w0, s2 ; CHECK-NEXT: ret %tmp1 = insertelement <4 x i16> undef, i16 %A, i64 0 @@ -2801,7 +3175,7 @@ define i32 @sqdmlsl_s(i16 %A, i16 %B, i32 %C) nounwind { ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: fmov s1, w1 ; CHECK-NEXT: fmov s2, w2 -; CHECK-NEXT: sqdmlsl.h s2, h0, v1[0] +; CHECK-NEXT: sqdmlsl s2, h0, v1.h[0] ; CHECK-NEXT: fmov w0, s2 ; CHECK-NEXT: ret %tmp1 = insertelement <4 x i16> undef, i16 %A, i64 0 @@ -2831,7 +3205,7 @@ define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, x1 ; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull.1q v0, v1, v0 +; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d ; CHECK-NEXT: ret %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) ret <16 x i8> %val @@ -2840,7 +3214,7 @@ define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind { define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { ; CHECK-LABEL: test_pmull_high_64: ; CHECK: // %bb.0: -; CHECK-NEXT: pmull2.1q v0, v0, v1 +; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d ; CHECK-NEXT: ret %l_hi = extractelement <2 x i64> %l, i32 1 %r_hi = extractelement <2 x i64> %r, i32 1 @@ -2851,15 +3225,23 @@ define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) define <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind { -; CHECK-LABEL: test_mul_v1i64: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: mul x8, x9, x8 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_mul_v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: fmov x8, d1 +; CHECK-SD-NEXT: fmov x9, d0 +; CHECK-SD-NEXT: mul x8, x9, x8 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_mul_v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: mul x8, x8, x9 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %prod = mul <1 x i64> %lhs, %rhs ret <1 x i64> %prod } @@ -2867,7 +3249,7 @@ define <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind { define <4 x i32> @sqdmlal4s_lib(<4 x i32> %dst, <4 x i16> %v1, <4 x i16> %v2) { ; CHECK-LABEL: sqdmlal4s_lib: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlal.4s v0, v1, v2 +; CHECK-NEXT: sqdmlal v0.4s, v1.4h, v2.4h ; CHECK-NEXT: ret %tmp = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %v1, <4 x i16> %v2) %sum = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %dst, <4 x i32> %tmp) @@ -2877,7 +3259,7 @@ define <4 x i32> @sqdmlal4s_lib(<4 x i32> %dst, <4 x i16> %v1, <4 x i16> %v2) { define <2 x i64> @sqdmlal2d_lib(<2 x i64> %dst, <2 x i32> %v1, <2 x i32> %v2) { ; CHECK-LABEL: sqdmlal2d_lib: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlal.2d v0, v1, v2 +; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.2s ; CHECK-NEXT: ret %tmp = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %v1, <2 x i32> %v2) %sum = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %dst, <2 x i64> %tmp) @@ -2887,7 +3269,7 @@ define <2 x i64> @sqdmlal2d_lib(<2 x i64> %dst, <2 x i32> %v1, <2 x i32> %v2) { define <4 x i32> @sqdmlal2_4s_lib(<4 x i32> %dst, <8 x i16> %v1, <8 x i16> %v2) { ; CHECK-LABEL: sqdmlal2_4s_lib: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlal2.4s v0, v1, v2 +; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.8h ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i16> %v1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %tmp1 = shufflevector <8 x i16> %v2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -2899,7 +3281,7 @@ define <4 x i32> @sqdmlal2_4s_lib(<4 x i32> %dst, <8 x i16> %v1, <8 x i16> %v2) define <2 x i64> @sqdmlal2_2d_lib(<2 x i64> %dst, <4 x i32> %v1, <4 x i32> %v2) { ; CHECK-LABEL: sqdmlal2_2d_lib: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlal2.2d v0, v1, v2 +; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.4s ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i32> %v1, <4 x i32> poison, <2 x i32> <i32 2, i32 3> %tmp1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <2 x i32> <i32 2, i32 3> @@ -2912,7 +3294,7 @@ define <4 x i32> @sqdmlal_lane_4s_lib(<4 x i32> %dst, <4 x i16> %v1, <4 x i16> % ; CHECK-LABEL: sqdmlal_lane_4s_lib: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: sqdmlal.4s v0, v1, v2[3] +; CHECK-NEXT: sqdmlal v0.4s, v1.4h, v2.h[3] ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i16> %v2, <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %tmp1 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %v1, <4 x i16> %tmp0) @@ -2924,7 +3306,7 @@ define <2 x i64> @sqdmlal_lane_2d_lib(<2 x i64> %dst, <2 x i32> %v1, <2 x i32> % ; CHECK-LABEL: sqdmlal_lane_2d_lib: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: sqdmlal.2d v0, v1, v2[1] +; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.s[1] ; CHECK-NEXT: ret %tmp0 = shufflevector <2 x i32> %v2, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp1 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %v1, <2 x i32> %tmp0) @@ -2933,10 +3315,16 @@ define <2 x i64> @sqdmlal_lane_2d_lib(<2 x i64> %dst, <2 x i32> %v1, <2 x i32> % } define <4 x i32> @sqdmlal2_lane_4s_lib(<4 x i32> %dst, <8 x i16> %v1, <8 x i16> %v2) { -; CHECK-LABEL: sqdmlal2_lane_4s_lib: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlal2.4s v0, v1, v2[7] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlal2_lane_4s_lib: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[7] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlal2_lane_4s_lib: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: sqdmlal v0.4s, v1.4h, v2.h[7] +; CHECK-GI-NEXT: ret %tmp0 = shufflevector <8 x i16> %v1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %tmp1 = shufflevector <8 x i16> %v2, <8 x i16> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7> %tmp2 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp0, <4 x i16> %tmp1) @@ -2945,10 +3333,16 @@ define <4 x i32> @sqdmlal2_lane_4s_lib(<4 x i32> %dst, <8 x i16> %v1, <8 x i16> } define <2 x i64> @sqdmlal2_lane_2d_lib(<2 x i64> %dst, <4 x i32> %v1, <4 x i32> %v2) { -; CHECK-LABEL: sqdmlal2_lane_2d_lib: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlal2.2d v0, v1, v2[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlal2_lane_2d_lib: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlal2_lane_2d_lib: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: sqdmlal v0.2d, v1.2s, v2.s[1] +; CHECK-GI-NEXT: ret %tmp0 = shufflevector <4 x i32> %v1, <4 x i32> poison, <2 x i32> <i32 2, i32 3> %tmp1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp2 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp0, <2 x i32> %tmp1) @@ -2959,7 +3353,7 @@ define <2 x i64> @sqdmlal2_lane_2d_lib(<2 x i64> %dst, <4 x i32> %v1, <4 x i32> define <4 x i32> @sqdmlsl4s_lib(<4 x i32> %dst, <4 x i16> %v1, <4 x i16> %v2) { ; CHECK-LABEL: sqdmlsl4s_lib: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlsl.4s v0, v1, v2 +; CHECK-NEXT: sqdmlsl v0.4s, v1.4h, v2.4h ; CHECK-NEXT: ret %tmp = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %v1, <4 x i16> %v2) %sum = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %dst, <4 x i32> %tmp) @@ -2969,7 +3363,7 @@ define <4 x i32> @sqdmlsl4s_lib(<4 x i32> %dst, <4 x i16> %v1, <4 x i16> %v2) { define <2 x i64> @sqdmlsl2d_lib(<2 x i64> %dst, <2 x i32> %v1, <2 x i32> %v2) { ; CHECK-LABEL: sqdmlsl2d_lib: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlsl.2d v0, v1, v2 +; CHECK-NEXT: sqdmlsl v0.2d, v1.2s, v2.2s ; CHECK-NEXT: ret %tmp = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %v1, <2 x i32> %v2) %sum = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %dst, <2 x i64> %tmp) @@ -2979,7 +3373,7 @@ define <2 x i64> @sqdmlsl2d_lib(<2 x i64> %dst, <2 x i32> %v1, <2 x i32> %v2) { define <4 x i32> @sqdmlsl2_4s_lib(<4 x i32> %dst, <8 x i16> %v1, <8 x i16> %v2) { ; CHECK-LABEL: sqdmlsl2_4s_lib: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlsl2.4s v0, v1, v2 +; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.8h ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i16> %v1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %tmp1 = shufflevector <8 x i16> %v2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -2991,7 +3385,7 @@ define <4 x i32> @sqdmlsl2_4s_lib(<4 x i32> %dst, <8 x i16> %v1, <8 x i16> %v2) define <2 x i64> @sqdmlsl2_2d_lib(<2 x i64> %dst, <4 x i32> %v1, <4 x i32> %v2) { ; CHECK-LABEL: sqdmlsl2_2d_lib: ; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlsl2.2d v0, v1, v2 +; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.4s ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i32> %v1, <4 x i32> poison, <2 x i32> <i32 2, i32 3> %tmp1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <2 x i32> <i32 2, i32 3> @@ -3004,7 +3398,7 @@ define <4 x i32> @sqdmlsl_lane_4s_lib(<4 x i32> %dst, <4 x i16> %v1, <4 x i16> % ; CHECK-LABEL: sqdmlsl_lane_4s_lib: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: sqdmlsl.4s v0, v1, v2[3] +; CHECK-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[3] ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i16> %v2, <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %tmp1 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %v1, <4 x i16> %tmp0) @@ -3016,7 +3410,7 @@ define <2 x i64> @sqdmlsl_lane_2d_lib(<2 x i64> %dst, <2 x i32> %v1, <2 x i32> % ; CHECK-LABEL: sqdmlsl_lane_2d_lib: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: sqdmlsl.2d v0, v1, v2[1] +; CHECK-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[1] ; CHECK-NEXT: ret %tmp0 = shufflevector <2 x i32> %v2, <2 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp1 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %v1, <2 x i32> %tmp0) @@ -3025,10 +3419,16 @@ define <2 x i64> @sqdmlsl_lane_2d_lib(<2 x i64> %dst, <2 x i32> %v1, <2 x i32> % } define <4 x i32> @sqdmlsl2_lane_4s_lib(<4 x i32> %dst, <8 x i16> %v1, <8 x i16> %v2) { -; CHECK-LABEL: sqdmlsl2_lane_4s_lib: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlsl2.4s v0, v1, v2[7] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlsl2_lane_4s_lib: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[7] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlsl2_lane_4s_lib: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[7] +; CHECK-GI-NEXT: ret %tmp0 = shufflevector <8 x i16> %v1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %tmp1 = shufflevector <8 x i16> %v2, <8 x i16> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7> %tmp2 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp0, <4 x i16> %tmp1) @@ -3037,10 +3437,16 @@ define <4 x i32> @sqdmlsl2_lane_4s_lib(<4 x i32> %dst, <8 x i16> %v1, <8 x i16> } define <2 x i64> @sqdmlsl2_lane_2d_lib(<2 x i64> %dst, <4 x i32> %v1, <4 x i32> %v2) { -; CHECK-LABEL: sqdmlsl2_lane_2d_lib: -; CHECK: // %bb.0: -; CHECK-NEXT: sqdmlsl2.2d v0, v1, v2[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlsl2_lane_2d_lib: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlsl2_lane_2d_lib: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[1] +; CHECK-GI-NEXT: ret %tmp0 = shufflevector <4 x i32> %v1, <4 x i32> poison, <2 x i32> <i32 2, i32 3> %tmp1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <2 x i32> <i32 1, i32 1> %tmp2 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp0, <2 x i32> %tmp1) diff --git a/llvm/test/CodeGen/AArch64/bsp_implicit_ops.mir b/llvm/test/CodeGen/AArch64/bsp_implicit_ops.mir new file mode 100644 index 0000000..23ac67c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/bsp_implicit_ops.mir @@ -0,0 +1,98 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s + + +--- +name: BSL_COPY +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7 + + + ; CHECK-LABEL: name: BSL_COPY + ; CHECK: liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $q2 = ORRv16i8 killed renamable $q20, killed renamable $q20 + ; CHECK-NEXT: renamable $q2 = BSLv16i8 killed renamable $q2, renamable $q21, renamable $q6, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3 + ; CHECK-NEXT: $q22 = ORRv16i8 $q0, killed $q0 + ; CHECK-NEXT: $q23 = ORRv16i8 $q1, killed $q1 + ; CHECK-NEXT: $q24 = ORRv16i8 $q2, killed $q2 + ; CHECK-NEXT: $q25 = ORRv16i8 $q3, killed $q3 + ; CHECK-NEXT: RET undef $lr, implicit $q22 + renamable $q2 = BSPv16i8 killed renamable $q20, renamable $q21, renamable $q6, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3 + $q22 = ORRv16i8 $q0, killed $q0 + $q23 = ORRv16i8 $q1, killed $q1 + $q24 = ORRv16i8 $q2, killed $q2 + $q25 = ORRv16i8 $q3, killed $q3 + RET_ReallyLR implicit $q22 +... +--- +name: BSL +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7 + + ; CHECK-LABEL: name: BSL + ; CHECK: liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $q2 = BSLv16i8 killed renamable $q2, renamable $q21, renamable $q6, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3 + ; CHECK-NEXT: $q22 = ORRv16i8 $q0, killed $q0 + ; CHECK-NEXT: $q23 = ORRv16i8 $q1, killed $q1 + ; CHECK-NEXT: $q24 = ORRv16i8 $q2, killed $q2 + ; CHECK-NEXT: $q25 = ORRv16i8 $q3, killed $q3 + ; CHECK-NEXT: RET undef $lr, implicit $q22 + renamable $q2 = BSPv16i8 killed renamable $q2, renamable $q21, renamable $q6, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3 + $q22 = ORRv16i8 $q0, killed $q0 + $q23 = ORRv16i8 $q1, killed $q1 + $q24 = ORRv16i8 $q2, killed $q2 + $q25 = ORRv16i8 $q3, killed $q3 + RET_ReallyLR implicit $q22 +... +--- +name: BIF +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7 + + ; CHECK-LABEL: name: BIF + ; CHECK: liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $q2 = BIFv16i8 renamable $q2, renamable $q6, killed renamable $q20, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3 + ; CHECK-NEXT: $q22 = ORRv16i8 $q0, killed $q0 + ; CHECK-NEXT: $q23 = ORRv16i8 $q1, killed $q1 + ; CHECK-NEXT: $q24 = ORRv16i8 $q2, killed $q2 + ; CHECK-NEXT: $q25 = ORRv16i8 $q3, killed $q3 + ; CHECK-NEXT: RET undef $lr, implicit $q22 + renamable $q2 = BSPv16i8 killed renamable $q20, renamable $q2, renamable $q6, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3 + $q22 = ORRv16i8 $q0, killed $q0 + $q23 = ORRv16i8 $q1, killed $q1 + $q24 = ORRv16i8 $q2, killed $q2 + $q25 = ORRv16i8 $q3, killed $q3 + RET_ReallyLR implicit $q22 +... +--- +name: BIT +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7 + + ; CHECK-LABEL: name: BIT + ; CHECK: liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $q2 = BITv16i8 renamable $q2, renamable $q21, killed renamable $q20, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3 + ; CHECK-NEXT: $q22 = ORRv16i8 $q0, killed $q0 + ; CHECK-NEXT: $q23 = ORRv16i8 $q1, killed $q1 + ; CHECK-NEXT: $q24 = ORRv16i8 $q2, killed $q2 + ; CHECK-NEXT: $q25 = ORRv16i8 $q3, killed $q3 + ; CHECK-NEXT: RET undef $lr, implicit $q22 + renamable $q2 = BSPv16i8 killed renamable $q20, renamable $q21, renamable $q2, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3 + $q22 = ORRv16i8 $q0, killed $q0 + $q23 = ORRv16i8 $q1, killed $q1 + $q24 = ORRv16i8 $q2, killed $q2 + $q25 = ORRv16i8 $q3, killed $q3 + RET_ReallyLR implicit $q22 +... diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll index 4b816df..3620444 100644 --- a/llvm/test/CodeGen/AArch64/cmp-chains.ll +++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll @@ -1,26 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,SDISEL -; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Ensure chains of comparisons produce chains of `ccmp` ; (x0 < x1) && (x2 > x3) define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) { -; SDISEL-LABEL: cmp_and2: -; SDISEL: // %bb.0: -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, lo -; SDISEL-NEXT: cset w0, hi -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_and2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, lo +; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: cmp_and2: -; GISEL: // %bb.0: -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: cmp_and2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret %5 = icmp ult i32 %0, %1 %6 = icmp ugt i32 %2, %3 %7 = select i1 %5, i1 %6, i1 false @@ -30,25 +30,25 @@ define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) { ; (x0 < x1) && (x2 > x3) && (x4 != x5) define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) { -; SDISEL-LABEL: cmp_and3: -; SDISEL: // %bb.0: -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, lo -; SDISEL-NEXT: ccmp w4, w5, #4, hi -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_and3: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, lo +; CHECK-SD-NEXT: ccmp w4, w5, #4, hi +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: cmp_and3: -; GISEL: // %bb.0: -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: cmp_and3: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: cmp w4, w5 +; CHECK-GI-NEXT: and w8, w8, w9 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret %7 = icmp ult i32 %0, %1 %8 = icmp ugt i32 %2, %3 %9 = select i1 %7, i1 %8, i1 false @@ -60,29 +60,29 @@ define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) { ; (x0 < x1) && (x2 > x3) && (x4 != x5) && (x6 == x7) define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) { -; SDISEL-LABEL: cmp_and4: -; SDISEL: // %bb.0: -; SDISEL-NEXT: cmp w2, w3 -; SDISEL-NEXT: ccmp w0, w1, #2, hi -; SDISEL-NEXT: ccmp w4, w5, #4, lo -; SDISEL-NEXT: ccmp w6, w7, #0, ne -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_and4: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w2, w3 +; CHECK-SD-NEXT: ccmp w0, w1, #2, hi +; CHECK-SD-NEXT: ccmp w4, w5, #4, lo +; CHECK-SD-NEXT: ccmp w6, w7, #0, ne +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: cmp_and4: -; GISEL: // %bb.0: -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w8, hi -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: cset w10, ne -; GISEL-NEXT: cmp w6, w7 -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: cset w11, eq -; GISEL-NEXT: and w9, w10, w11 -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: cmp_and4: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: cmp w4, w5 +; CHECK-GI-NEXT: cset w10, ne +; CHECK-GI-NEXT: cmp w6, w7 +; CHECK-GI-NEXT: and w8, w8, w9 +; CHECK-GI-NEXT: cset w11, eq +; CHECK-GI-NEXT: and w9, w10, w11 +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret %9 = icmp ugt i32 %2, %3 %10 = icmp ult i32 %0, %1 %11 = select i1 %9, i1 %10, i1 false @@ -96,22 +96,22 @@ define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 ; (x0 < x1) || (x2 > x3) define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) { -; SDISEL-LABEL: cmp_or2: -; SDISEL: // %bb.0: -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #0, hs -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_or2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #0, hs +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: cmp_or2: -; GISEL: // %bb.0: -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: and w0, w8, #0x1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: cmp_or2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: orr w8, w8, w9 +; CHECK-GI-NEXT: and w0, w8, #0x1 +; CHECK-GI-NEXT: ret %5 = icmp ult i32 %0, %1 %6 = icmp ne i32 %2, %3 %7 = select i1 %5, i1 true, i1 %6 @@ -121,26 +121,26 @@ define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) { ; (x0 < x1) || (x2 > x3) || (x4 != x5) define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) { -; SDISEL-LABEL: cmp_or3: -; SDISEL: // %bb.0: -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, hs -; SDISEL-NEXT: ccmp w4, w5, #0, ls -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_or3: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, hs +; CHECK-SD-NEXT: ccmp w4, w5, #0, ls +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: cmp_or3: -; GISEL: // %bb.0: -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: and w0, w8, #0x1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: cmp_or3: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: cmp w4, w5 +; CHECK-GI-NEXT: orr w8, w8, w9 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: orr w8, w8, w9 +; CHECK-GI-NEXT: and w0, w8, #0x1 +; CHECK-GI-NEXT: ret %7 = icmp ult i32 %0, %1 %8 = icmp ugt i32 %2, %3 %9 = select i1 %7, i1 true, i1 %8 @@ -152,30 +152,30 @@ define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) { ; (x0 < x1) || (x2 > x3) || (x4 != x5) || (x6 == x7) define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) { -; SDISEL-LABEL: cmp_or4: -; SDISEL: // %bb.0: -; SDISEL-NEXT: cmp w0, w1 -; SDISEL-NEXT: ccmp w2, w3, #2, hs -; SDISEL-NEXT: ccmp w4, w5, #0, ls -; SDISEL-NEXT: ccmp w6, w7, #4, eq -; SDISEL-NEXT: cset w0, eq -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: cmp_or4: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: ccmp w2, w3, #2, hs +; CHECK-SD-NEXT: ccmp w4, w5, #0, ls +; CHECK-SD-NEXT: ccmp w6, w7, #4, eq +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: cmp_or4: -; GISEL: // %bb.0: -; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w3 -; GISEL-NEXT: cset w9, hi -; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: cset w10, ne -; GISEL-NEXT: cmp w6, w7 -; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: cset w11, eq -; GISEL-NEXT: orr w9, w10, w11 -; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: and w0, w8, #0x1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: cmp_or4: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w3 +; CHECK-GI-NEXT: cset w9, hi +; CHECK-GI-NEXT: cmp w4, w5 +; CHECK-GI-NEXT: cset w10, ne +; CHECK-GI-NEXT: cmp w6, w7 +; CHECK-GI-NEXT: orr w8, w8, w9 +; CHECK-GI-NEXT: cset w11, eq +; CHECK-GI-NEXT: orr w9, w10, w11 +; CHECK-GI-NEXT: orr w8, w8, w9 +; CHECK-GI-NEXT: and w0, w8, #0x1 +; CHECK-GI-NEXT: ret %9 = icmp ult i32 %0, %1 %10 = icmp ugt i32 %2, %3 %11 = select i1 %9, i1 true, i1 %10 @@ -189,22 +189,22 @@ define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 ; (x0 != 0) || (x1 != 0) define i32 @true_or2(i32 %0, i32 %1) { -; SDISEL-LABEL: true_or2: -; SDISEL: // %bb.0: -; SDISEL-NEXT: orr w8, w0, w1 -; SDISEL-NEXT: cmp w8, #0 -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: true_or2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: orr w8, w0, w1 +; CHECK-SD-NEXT: cmp w8, #0 +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: true_or2: -; GISEL: // %bb.0: -; GISEL-NEXT: cmp w0, #0 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: cmp w1, #0 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: and w0, w8, #0x1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: true_or2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, #0 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: cmp w1, #0 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: orr w8, w8, w9 +; CHECK-GI-NEXT: and w0, w8, #0x1 +; CHECK-GI-NEXT: ret %3 = icmp ne i32 %0, 0 %4 = icmp ne i32 %1, 0 %5 = select i1 %3, i1 true, i1 %4 @@ -214,26 +214,26 @@ define i32 @true_or2(i32 %0, i32 %1) { ; (x0 != 0) || (x1 != 0) || (x2 != 0) define i32 @true_or3(i32 %0, i32 %1, i32 %2) { -; SDISEL-LABEL: true_or3: -; SDISEL: // %bb.0: -; SDISEL-NEXT: orr w8, w0, w1 -; SDISEL-NEXT: orr w8, w8, w2 -; SDISEL-NEXT: cmp w8, #0 -; SDISEL-NEXT: cset w0, ne -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: true_or3: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: orr w8, w0, w1 +; CHECK-SD-NEXT: orr w8, w8, w2 +; CHECK-SD-NEXT: cmp w8, #0 +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: true_or3: -; GISEL: // %bb.0: -; GISEL-NEXT: cmp w0, #0 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: cmp w1, #0 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: cmp w2, #0 -; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: cset w9, ne -; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: and w0, w8, #0x1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: true_or3: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, #0 +; CHECK-GI-NEXT: cset w8, ne +; CHECK-GI-NEXT: cmp w1, #0 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: cmp w2, #0 +; CHECK-GI-NEXT: orr w8, w8, w9 +; CHECK-GI-NEXT: cset w9, ne +; CHECK-GI-NEXT: orr w8, w8, w9 +; CHECK-GI-NEXT: and w0, w8, #0x1 +; CHECK-GI-NEXT: ret %4 = icmp ne i32 %0, 0 %5 = icmp ne i32 %1, 0 %6 = select i1 %4, i1 true, i1 %5 @@ -260,22 +260,22 @@ define i32 @neg_range_int(i32 %a, i32 %b, i32 %c) { ; (b > -(d | 1) && a < c) define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) { -; SDISEL-LABEL: neg_range_int_comp: -; SDISEL: // %bb.0: -; SDISEL-NEXT: orr w8, w3, #0x1 -; SDISEL-NEXT: cmp w0, w2 -; SDISEL-NEXT: ccmn w1, w8, #4, lt -; SDISEL-NEXT: csel w0, w1, w0, gt -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: neg_range_int_comp: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: orr w8, w3, #0x1 +; CHECK-SD-NEXT: cmp w0, w2 +; CHECK-SD-NEXT: ccmn w1, w8, #4, lt +; CHECK-SD-NEXT: csel w0, w1, w0, gt +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: neg_range_int_comp: -; GISEL: // %bb.0: -; GISEL-NEXT: orr w8, w3, #0x1 -; GISEL-NEXT: cmp w0, w2 -; GISEL-NEXT: neg w8, w8 -; GISEL-NEXT: ccmp w1, w8, #4, lt -; GISEL-NEXT: csel w0, w1, w0, gt -; GISEL-NEXT: ret +; CHECK-GI-LABEL: neg_range_int_comp: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: orr w8, w3, #0x1 +; CHECK-GI-NEXT: cmp w0, w2 +; CHECK-GI-NEXT: neg w8, w8 +; CHECK-GI-NEXT: ccmp w1, w8, #4, lt +; CHECK-GI-NEXT: csel w0, w1, w0, gt +; CHECK-GI-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp sgt i32 %b, %negd @@ -287,22 +287,22 @@ define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b >u -(d | 1) && a < c) define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) { -; SDISEL-LABEL: neg_range_int_comp_u: -; SDISEL: // %bb.0: -; SDISEL-NEXT: orr w8, w3, #0x1 -; SDISEL-NEXT: cmp w0, w2 -; SDISEL-NEXT: ccmn w1, w8, #0, lt -; SDISEL-NEXT: csel w0, w1, w0, hi -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: neg_range_int_comp_u: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: orr w8, w3, #0x1 +; CHECK-SD-NEXT: cmp w0, w2 +; CHECK-SD-NEXT: ccmn w1, w8, #0, lt +; CHECK-SD-NEXT: csel w0, w1, w0, hi +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: neg_range_int_comp_u: -; GISEL: // %bb.0: -; GISEL-NEXT: orr w8, w3, #0x1 -; GISEL-NEXT: cmp w0, w2 -; GISEL-NEXT: neg w8, w8 -; GISEL-NEXT: ccmp w1, w8, #0, lt -; GISEL-NEXT: csel w0, w1, w0, hi -; GISEL-NEXT: ret +; CHECK-GI-LABEL: neg_range_int_comp_u: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: orr w8, w3, #0x1 +; CHECK-GI-NEXT: cmp w0, w2 +; CHECK-GI-NEXT: neg w8, w8 +; CHECK-GI-NEXT: ccmp w1, w8, #0, lt +; CHECK-GI-NEXT: csel w0, w1, w0, hi +; CHECK-GI-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp ugt i32 %b, %negd @@ -314,22 +314,22 @@ define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b > -(d | 1) && a u < c) define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) { -; SDISEL-LABEL: neg_range_int_comp_ua: -; SDISEL: // %bb.0: -; SDISEL-NEXT: orr w8, w3, #0x1 -; SDISEL-NEXT: cmp w0, w2 -; SDISEL-NEXT: ccmn w1, w8, #4, lo -; SDISEL-NEXT: csel w0, w1, w0, gt -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: neg_range_int_comp_ua: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: orr w8, w3, #0x1 +; CHECK-SD-NEXT: cmp w0, w2 +; CHECK-SD-NEXT: ccmn w1, w8, #4, lo +; CHECK-SD-NEXT: csel w0, w1, w0, gt +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: neg_range_int_comp_ua: -; GISEL: // %bb.0: -; GISEL-NEXT: orr w8, w3, #0x1 -; GISEL-NEXT: cmp w0, w2 -; GISEL-NEXT: neg w8, w8 -; GISEL-NEXT: ccmp w1, w8, #4, lo -; GISEL-NEXT: csel w0, w1, w0, gt -; GISEL-NEXT: ret +; CHECK-GI-LABEL: neg_range_int_comp_ua: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: orr w8, w3, #0x1 +; CHECK-GI-NEXT: cmp w0, w2 +; CHECK-GI-NEXT: neg w8, w8 +; CHECK-GI-NEXT: ccmp w1, w8, #4, lo +; CHECK-GI-NEXT: csel w0, w1, w0, gt +; CHECK-GI-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp sgt i32 %b, %negd @@ -341,19 +341,19 @@ define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b <= -3 && a > c) define i32 @neg_range_int_2(i32 %a, i32 %b, i32 %c) { -; SDISEL-LABEL: neg_range_int_2: -; SDISEL: // %bb.0: -; SDISEL-NEXT: cmp w0, w2 -; SDISEL-NEXT: ccmn w1, #4, #4, gt -; SDISEL-NEXT: csel w0, w1, w0, gt -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: neg_range_int_2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w2 +; CHECK-SD-NEXT: ccmn w1, #4, #4, gt +; CHECK-SD-NEXT: csel w0, w1, w0, gt +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: neg_range_int_2: -; GISEL: // %bb.0: -; GISEL-NEXT: cmp w0, w2 -; GISEL-NEXT: ccmn w1, #3, #8, gt -; GISEL-NEXT: csel w0, w1, w0, ge -; GISEL-NEXT: ret +; CHECK-GI-LABEL: neg_range_int_2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w2 +; CHECK-GI-NEXT: ccmn w1, #3, #8, gt +; CHECK-GI-NEXT: csel w0, w1, w0, ge +; CHECK-GI-NEXT: ret %cmp = icmp sge i32 %b, -3 %cmp1 = icmp sgt i32 %a, %c %or.cond = and i1 %cmp, %cmp1 @@ -363,22 +363,22 @@ define i32 @neg_range_int_2(i32 %a, i32 %b, i32 %c) { ; (b < -(d | 1) && a >= c) define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) { -; SDISEL-LABEL: neg_range_int_comp2: -; SDISEL: // %bb.0: -; SDISEL-NEXT: orr w8, w3, #0x1 -; SDISEL-NEXT: cmp w0, w2 -; SDISEL-NEXT: ccmn w1, w8, #0, ge -; SDISEL-NEXT: csel w0, w1, w0, lt -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: neg_range_int_comp2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: orr w8, w3, #0x1 +; CHECK-SD-NEXT: cmp w0, w2 +; CHECK-SD-NEXT: ccmn w1, w8, #0, ge +; CHECK-SD-NEXT: csel w0, w1, w0, lt +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: neg_range_int_comp2: -; GISEL: // %bb.0: -; GISEL-NEXT: orr w8, w3, #0x1 -; GISEL-NEXT: cmp w0, w2 -; GISEL-NEXT: neg w8, w8 -; GISEL-NEXT: ccmp w1, w8, #0, ge -; GISEL-NEXT: csel w0, w1, w0, lt -; GISEL-NEXT: ret +; CHECK-GI-LABEL: neg_range_int_comp2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: orr w8, w3, #0x1 +; CHECK-GI-NEXT: cmp w0, w2 +; CHECK-GI-NEXT: neg w8, w8 +; CHECK-GI-NEXT: ccmp w1, w8, #0, ge +; CHECK-GI-NEXT: csel w0, w1, w0, lt +; CHECK-GI-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp slt i32 %b, %negd @@ -390,22 +390,22 @@ define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b <u -(d | 1) && a > c) define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) { -; SDISEL-LABEL: neg_range_int_comp_u2: -; SDISEL: // %bb.0: -; SDISEL-NEXT: orr w8, w3, #0x1 -; SDISEL-NEXT: cmp w0, w2 -; SDISEL-NEXT: ccmn w1, w8, #2, gt -; SDISEL-NEXT: csel w0, w1, w0, lo -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: neg_range_int_comp_u2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: orr w8, w3, #0x1 +; CHECK-SD-NEXT: cmp w0, w2 +; CHECK-SD-NEXT: ccmn w1, w8, #2, gt +; CHECK-SD-NEXT: csel w0, w1, w0, lo +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: neg_range_int_comp_u2: -; GISEL: // %bb.0: -; GISEL-NEXT: orr w8, w3, #0x1 -; GISEL-NEXT: cmp w0, w2 -; GISEL-NEXT: neg w8, w8 -; GISEL-NEXT: ccmp w1, w8, #2, gt -; GISEL-NEXT: csel w0, w1, w0, lo -; GISEL-NEXT: ret +; CHECK-GI-LABEL: neg_range_int_comp_u2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: orr w8, w3, #0x1 +; CHECK-GI-NEXT: cmp w0, w2 +; CHECK-GI-NEXT: neg w8, w8 +; CHECK-GI-NEXT: ccmp w1, w8, #2, gt +; CHECK-GI-NEXT: csel w0, w1, w0, lo +; CHECK-GI-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp ult i32 %b, %negd @@ -417,22 +417,22 @@ define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b > -(d | 1) && a u > c) define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) { -; SDISEL-LABEL: neg_range_int_comp_ua2: -; SDISEL: // %bb.0: -; SDISEL-NEXT: orr w8, w3, #0x1 -; SDISEL-NEXT: cmp w0, w2 -; SDISEL-NEXT: ccmn w1, w8, #4, hi -; SDISEL-NEXT: csel w0, w1, w0, gt -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: neg_range_int_comp_ua2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: orr w8, w3, #0x1 +; CHECK-SD-NEXT: cmp w0, w2 +; CHECK-SD-NEXT: ccmn w1, w8, #4, hi +; CHECK-SD-NEXT: csel w0, w1, w0, gt +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: neg_range_int_comp_ua2: -; GISEL: // %bb.0: -; GISEL-NEXT: orr w8, w3, #0x1 -; GISEL-NEXT: cmp w0, w2 -; GISEL-NEXT: neg w8, w8 -; GISEL-NEXT: ccmp w1, w8, #4, hi -; GISEL-NEXT: csel w0, w1, w0, gt -; GISEL-NEXT: ret +; CHECK-GI-LABEL: neg_range_int_comp_ua2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: orr w8, w3, #0x1 +; CHECK-GI-NEXT: cmp w0, w2 +; CHECK-GI-NEXT: neg w8, w8 +; CHECK-GI-NEXT: ccmp w1, w8, #4, hi +; CHECK-GI-NEXT: csel w0, w1, w0, gt +; CHECK-GI-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp sgt i32 %b, %negd @@ -444,22 +444,22 @@ define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) { ; (b > -(d | 1) && a u == c) define i32 @neg_range_int_comp_ua3(i32 %a, i32 %b, i32 %c, i32 %d) { -; SDISEL-LABEL: neg_range_int_comp_ua3: -; SDISEL: // %bb.0: -; SDISEL-NEXT: orr w8, w3, #0x1 -; SDISEL-NEXT: cmp w0, w2 -; SDISEL-NEXT: ccmn w1, w8, #4, eq -; SDISEL-NEXT: csel w0, w1, w0, gt -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: neg_range_int_comp_ua3: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: orr w8, w3, #0x1 +; CHECK-SD-NEXT: cmp w0, w2 +; CHECK-SD-NEXT: ccmn w1, w8, #4, eq +; CHECK-SD-NEXT: csel w0, w1, w0, gt +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: neg_range_int_comp_ua3: -; GISEL: // %bb.0: -; GISEL-NEXT: orr w8, w3, #0x1 -; GISEL-NEXT: cmp w0, w2 -; GISEL-NEXT: neg w8, w8 -; GISEL-NEXT: ccmp w1, w8, #4, eq -; GISEL-NEXT: csel w0, w1, w0, gt -; GISEL-NEXT: ret +; CHECK-GI-LABEL: neg_range_int_comp_ua3: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: orr w8, w3, #0x1 +; CHECK-GI-NEXT: cmp w0, w2 +; CHECK-GI-NEXT: neg w8, w8 +; CHECK-GI-NEXT: ccmp w1, w8, #4, eq +; CHECK-GI-NEXT: csel w0, w1, w0, gt +; CHECK-GI-NEXT: ret %dor = or i32 %d, 1 %negd = sub i32 0, %dor %cmp = icmp sgt i32 %b, %negd @@ -471,26 +471,26 @@ define i32 @neg_range_int_comp_ua3(i32 %a, i32 %b, i32 %c, i32 %d) { ; -(a | 1) > (b | 3) && a < c define i32 @neg_range_int_c(i32 %a, i32 %b, i32 %c) { -; SDISEL-LABEL: neg_range_int_c: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: orr w8, w0, #0x1 -; SDISEL-NEXT: orr w9, w1, #0x3 -; SDISEL-NEXT: cmn w9, w8 -; SDISEL-NEXT: ccmp w2, w0, #2, lo -; SDISEL-NEXT: cset w0, lo -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: neg_range_int_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: orr w8, w0, #0x1 +; CHECK-SD-NEXT: orr w9, w1, #0x3 +; CHECK-SD-NEXT: cmn w9, w8 +; CHECK-SD-NEXT: ccmp w2, w0, #2, lo +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: neg_range_int_c: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: orr w8, w0, #0x1 -; GISEL-NEXT: orr w9, w1, #0x3 -; GISEL-NEXT: neg w8, w8 -; GISEL-NEXT: cmp w9, w8 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: cmp w2, w0 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: and w0, w8, w9 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: neg_range_int_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: orr w8, w0, #0x1 +; CHECK-GI-NEXT: orr w9, w1, #0x3 +; CHECK-GI-NEXT: neg w8, w8 +; CHECK-GI-NEXT: cmp w9, w8 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: cmp w2, w0 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: and w0, w8, w9 +; CHECK-GI-NEXT: ret entry: %or = or i32 %a, 1 %sub = sub i32 0, %or diff --git a/llvm/test/CodeGen/AArch64/combine-and-like.ll b/llvm/test/CodeGen/AArch64/combine-and-like.ll index 15770c2..ea1359b 100644 --- a/llvm/test/CodeGen/AArch64/combine-and-like.ll +++ b/llvm/test/CodeGen/AArch64/combine-and-like.ll @@ -4,7 +4,6 @@ define i32 @f(i32 %a0) { ; CHECK-LABEL: f: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %1 = lshr i32 %a0, 2147483647 %2 = add i32 %1, 2147483647 diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll index 13434fa..7686740 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll @@ -203,93 +203,89 @@ define <12 x float> @abp90c12(<12 x float> %a, <12 x float> %b, <12 x float> %c) ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3 +; CHECK-NEXT: ldr s17, [sp, #40] +; CHECK-NEXT: add x10, sp, #56 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2 -; CHECK-NEXT: ldr s17, [sp, #32] -; CHECK-NEXT: // kill: def $s5 killed $s5 def $q5 ; CHECK-NEXT: add x9, sp, #48 -; CHECK-NEXT: add x10, sp, #64 ; CHECK-NEXT: mov v1.s[1], v3.s[0] +; CHECK-NEXT: ldr s3, [sp, #32] +; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2 ; CHECK-NEXT: mov v0.s[1], v2.s[0] +; CHECK-NEXT: ld1 { v17.s }[1], [x10] +; CHECK-NEXT: // kill: def $s5 killed $s5 def $q5 +; CHECK-NEXT: ldr s16, [sp, #8] ; CHECK-NEXT: // kill: def $s4 killed $s4 def $q4 -; CHECK-NEXT: add x11, sp, #72 -; CHECK-NEXT: ld1 { v17.s }[1], [x9] -; CHECK-NEXT: ldr s18, [x10] -; CHECK-NEXT: add x9, sp, #80 -; CHECK-NEXT: add x10, sp, #56 -; CHECK-NEXT: // kill: def $s6 killed $s6 def $q6 +; CHECK-NEXT: add x10, sp, #24 +; CHECK-NEXT: ld1 { v3.s }[1], [x9] +; CHECK-NEXT: add x9, sp, #72 ; CHECK-NEXT: // kill: def $s7 killed $s7 def $q7 -; CHECK-NEXT: ldr s16, [sp, #8] -; CHECK-NEXT: ldr s3, [sp, #96] -; CHECK-NEXT: ld1 { v18.s }[1], [x9] -; CHECK-NEXT: add x9, sp, #88 +; CHECK-NEXT: // kill: def $s6 killed $s6 def $q6 ; CHECK-NEXT: ldr s2, [sp] +; CHECK-NEXT: ld1 { v16.s }[1], [x10] +; CHECK-NEXT: add x10, sp, #112 +; CHECK-NEXT: ldr s20, [sp, #136] ; CHECK-NEXT: mov v1.s[2], v5.s[0] -; CHECK-NEXT: ldr s5, [sp, #40] +; CHECK-NEXT: ld1 { v17.s }[2], [x9] +; CHECK-NEXT: add x9, sp, #64 +; CHECK-NEXT: ldr s5, [sp, #96] +; CHECK-NEXT: ld1 { v3.s }[2], [x9] ; CHECK-NEXT: mov v0.s[2], v4.s[0] +; CHECK-NEXT: add x9, sp, #88 +; CHECK-NEXT: ldr s4, [sp, #104] +; CHECK-NEXT: ldr s19, [sp, #192] ; CHECK-NEXT: ld1 { v5.s }[1], [x10] -; CHECK-NEXT: ldr s19, [x11] +; CHECK-NEXT: add x10, sp, #80 +; CHECK-NEXT: ld1 { v17.s }[3], [x9] +; CHECK-NEXT: mov v1.s[3], v7.s[0] +; CHECK-NEXT: add x9, sp, #120 +; CHECK-NEXT: ld1 { v3.s }[3], [x10] +; CHECK-NEXT: ld1 { v4.s }[1], [x9] +; CHECK-NEXT: ldr s7, [sp, #128] ; CHECK-NEXT: add x10, sp, #144 -; CHECK-NEXT: zip1 v4.2d, v17.2d, v18.2d -; CHECK-NEXT: add x11, sp, #160 -; CHECK-NEXT: ldr s18, [sp, #136] -; CHECK-NEXT: ld1 { v19.s }[1], [x9] ; CHECK-NEXT: mov v0.s[3], v6.s[0] -; CHECK-NEXT: ldr s6, [sp, #128] -; CHECK-NEXT: mov v1.s[3], v7.s[0] -; CHECK-NEXT: add x9, sp, #24 -; CHECK-NEXT: ldr s7, [sp, #104] -; CHECK-NEXT: ld1 { v16.s }[1], [x9] -; CHECK-NEXT: add x9, sp, #112 -; CHECK-NEXT: ld1 { v6.s }[1], [x10] -; CHECK-NEXT: zip1 v5.2d, v5.2d, v19.2d -; CHECK-NEXT: add x10, sp, #120 -; CHECK-NEXT: ld1 { v3.s }[1], [x9] +; CHECK-NEXT: add x9, sp, #16 ; CHECK-NEXT: ld1 { v7.s }[1], [x10] -; CHECK-NEXT: ldr s17, [x11] -; CHECK-NEXT: add x9, sp, #176 -; CHECK-NEXT: add x10, sp, #16 -; CHECK-NEXT: add x11, sp, #168 -; CHECK-NEXT: ld1 { v17.s }[1], [x9] -; CHECK-NEXT: ld1 { v2.s }[1], [x10] -; CHECK-NEXT: add x9, sp, #152 -; CHECK-NEXT: fmul v19.4s, v5.4s, v1.4s -; CHECK-NEXT: fmul v20.4s, v7.4s, v16.4s -; CHECK-NEXT: fmul v16.4s, v3.4s, v16.4s -; CHECK-NEXT: fmul v1.4s, v4.4s, v1.4s -; CHECK-NEXT: ld1 { v18.s }[1], [x9] -; CHECK-NEXT: ldr s21, [x11] -; CHECK-NEXT: zip1 v6.2d, v6.2d, v17.2d -; CHECK-NEXT: ldr s17, [sp, #192] -; CHECK-NEXT: add x9, sp, #184 +; CHECK-NEXT: ld1 { v2.s }[1], [x9] +; CHECK-NEXT: add x9, sp, #160 +; CHECK-NEXT: fmul v6.4s, v17.4s, v1.4s +; CHECK-NEXT: fmul v18.4s, v4.4s, v16.4s +; CHECK-NEXT: fmul v16.4s, v5.4s, v16.4s +; CHECK-NEXT: fmul v1.4s, v3.4s, v1.4s ; CHECK-NEXT: add x10, sp, #208 -; CHECK-NEXT: ld1 { v21.s }[1], [x9] +; CHECK-NEXT: ld1 { v7.s }[2], [x9] +; CHECK-NEXT: add x9, sp, #152 +; CHECK-NEXT: ld1 { v19.s }[1], [x10] +; CHECK-NEXT: ld1 { v20.s }[1], [x9] +; CHECK-NEXT: add x9, sp, #176 +; CHECK-NEXT: add x10, sp, #184 +; CHECK-NEXT: fneg v6.4s, v6.4s +; CHECK-NEXT: fneg v18.4s, v18.4s +; CHECK-NEXT: fmla v16.4s, v2.4s, v4.4s +; CHECK-NEXT: fmla v1.4s, v0.4s, v17.4s +; CHECK-NEXT: ld1 { v7.s }[3], [x9] +; CHECK-NEXT: add x9, sp, #168 +; CHECK-NEXT: ld1 { v20.s }[2], [x9] +; CHECK-NEXT: ldr s4, [sp, #200] ; CHECK-NEXT: add x9, sp, #216 -; CHECK-NEXT: fneg v19.4s, v19.4s -; CHECK-NEXT: fneg v20.4s, v20.4s -; CHECK-NEXT: fmla v16.4s, v2.4s, v7.4s -; CHECK-NEXT: fmla v1.4s, v0.4s, v5.4s -; CHECK-NEXT: ld1 { v17.s }[1], [x10] -; CHECK-NEXT: ldr s5, [sp, #200] -; CHECK-NEXT: zip1 v7.2d, v18.2d, v21.2d -; CHECK-NEXT: ld1 { v5.s }[1], [x9] -; CHECK-NEXT: fmla v19.4s, v0.4s, v4.4s -; CHECK-NEXT: fmla v20.4s, v2.4s, v3.4s -; CHECK-NEXT: fsub v0.4s, v6.4s, v1.4s -; CHECK-NEXT: fsub v1.4s, v17.4s, v16.4s -; CHECK-NEXT: fadd v2.4s, v7.4s, v19.4s -; CHECK-NEXT: fadd v3.4s, v5.4s, v20.4s +; CHECK-NEXT: fmla v6.4s, v0.4s, v3.4s +; CHECK-NEXT: fmla v18.4s, v2.4s, v5.4s +; CHECK-NEXT: ld1 { v4.s }[1], [x9] +; CHECK-NEXT: fsub v0.4s, v7.4s, v1.4s +; CHECK-NEXT: fsub v1.4s, v19.4s, v16.4s +; CHECK-NEXT: ld1 { v20.s }[3], [x10] +; CHECK-NEXT: fadd v2.4s, v4.4s, v18.4s +; CHECK-NEXT: fadd v3.4s, v20.4s, v6.4s ; CHECK-NEXT: ext v4.16b, v0.16b, v1.16b, #12 -; CHECK-NEXT: ext v5.16b, v2.16b, v3.16b, #12 -; CHECK-NEXT: trn2 v1.4s, v1.4s, v3.4s +; CHECK-NEXT: ext v5.16b, v3.16b, v2.16b, #12 +; CHECK-NEXT: trn2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: ext v4.16b, v0.16b, v4.16b, #12 -; CHECK-NEXT: ext v5.16b, v2.16b, v5.16b, #8 +; CHECK-NEXT: ext v5.16b, v3.16b, v5.16b, #8 ; CHECK-NEXT: rev64 v4.4s, v4.4s -; CHECK-NEXT: trn2 v3.4s, v4.4s, v5.4s -; CHECK-NEXT: zip2 v4.4s, v0.4s, v2.4s -; CHECK-NEXT: zip1 v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ext v1.16b, v3.16b, v1.16b, #8 -; CHECK-NEXT: mov v4.d[1], v3.d[0] +; CHECK-NEXT: trn2 v2.4s, v4.4s, v5.4s +; CHECK-NEXT: zip2 v4.4s, v0.4s, v3.4s +; CHECK-NEXT: zip1 v0.4s, v0.4s, v3.4s +; CHECK-NEXT: ext v1.16b, v2.16b, v1.16b, #8 +; CHECK-NEXT: mov v4.d[1], v2.d[0] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: stp q4, q1, [x8, #16] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll index e6f27b9..acf15f1 100644 --- a/llvm/test/CodeGen/AArch64/concat-vector.ll +++ b/llvm/test/CodeGen/AArch64/concat-vector.ll @@ -186,9 +186,8 @@ define <16 x i8> @concat_v16s8_v4s8_load(ptr %ptrA, ptr %ptrB, ptr %ptrC, ptr %p ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ld1 { v0.s }[1], [x1] -; CHECK-NEXT: ldr s1, [x2] -; CHECK-NEXT: ld1 { v1.s }[1], [x3] -; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ld1 { v0.s }[2], [x2] +; CHECK-NEXT: ld1 { v0.s }[3], [x3] ; CHECK-NEXT: ret %A = load <4 x i8>, ptr %ptrA %B = load <4 x i8>, ptr %ptrB diff --git a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll index d444713..9f4b3e2 100644 --- a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll +++ b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll @@ -19,11 +19,11 @@ ; function, constant pools for this constant should not have `.unlikely` suffix. ;; Constant pools for function @cold_func. -; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .section .rodata.cst8.hot.,"aM",@progbits,8 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI0_0: ; CHECK-NEXT: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005 -; CHECK-NEXT: .section .rodata.cst8.unlikely,"aM",@progbits,8 +; CHECK-NEXT: .section .rodata.cst8.unlikely.,"aM",@progbits,8 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI0_1: ; CHECK-NEXT: .xword 0x3fe5eb851eb851ec // double 0.68500000000000005 @@ -58,7 +58,7 @@ ; CHECK-NEXT: .word 3 // 0x3 ; CHECK-NEXT: .word 5 // 0x5 ; CHECK-NEXT: .word 7 // 0x7 -; CHECK-NEXT: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK-NEXT: .section .rodata.cst16.hot.,"aM",@progbits,16 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI1_2: ; CHECK-NEXT: .word 442 // 0x1ba @@ -67,11 +67,11 @@ ; CHECK-NEXT: .word 0 // 0x0 ;; Constant pools for function @hot_func -; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .section .rodata.cst8.hot.,"aM",@progbits,8 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI2_0: ; CHECK-NEXT: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005 -; CHECK-NEXT: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK-NEXT: .section .rodata.cst16.hot.,"aM",@progbits,16 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI2_1: ; CHECK-NEXT: .word 0 // 0x0 diff --git a/llvm/test/CodeGen/AArch64/dag-combine-select.ll b/llvm/test/CodeGen/AArch64/dag-combine-select.ll index 56208f1..02b0077 100644 --- a/llvm/test/CodeGen/AArch64/dag-combine-select.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-select.ll @@ -1,26 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple arm64-none-eabi -o - %s | FileCheck %s --check-prefixes=CHECK,SDISEL -; RUN: llc -mtriple arm64-none-eabi -global-isel -o - %s | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc -mtriple arm64-none-eabi -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple arm64-none-eabi -global-isel -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI @out = internal global i32 0, align 4 ; Ensure that we transform select(C0, x, select(C1, x, y)) towards ; select(C0 | C1, x, y) so we can use CMP;CCMP for the implementation. define i32 @test0(i32 %v0, i32 %v1, i32 %v2) { -; SDISEL-LABEL: test0: -; SDISEL: // %bb.0: -; SDISEL-NEXT: cmp w0, #7 -; SDISEL-NEXT: ccmp w1, #0, #0, ne -; SDISEL-NEXT: csel w0, w1, w2, gt -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: test0: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, #7 +; CHECK-SD-NEXT: ccmp w1, #0, #0, ne +; CHECK-SD-NEXT: csel w0, w1, w2, gt +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: test0: -; GISEL: // %bb.0: -; GISEL-NEXT: cmp w0, #7 -; GISEL-NEXT: csel w8, w1, w2, eq -; GISEL-NEXT: cmp w1, #0 -; GISEL-NEXT: csel w0, w1, w8, gt -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test0: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, #7 +; CHECK-GI-NEXT: csel w8, w1, w2, eq +; CHECK-GI-NEXT: cmp w1, #0 +; CHECK-GI-NEXT: csel w0, w1, w8, gt +; CHECK-GI-NEXT: ret %cmp1 = icmp eq i32 %v0, 7 %cmp2 = icmp sgt i32 %v1, 0 %sel0 = select i1 %cmp1, i32 %v1, i32 %v2 @@ -32,36 +32,36 @@ define i32 @test0(i32 %v0, i32 %v1, i32 %v2) { ; sequences. This case should be transformed to select(C0, select(C1, x, y), y) ; anyway to get CSE effects. define void @test1(i32 %bitset, i32 %val0, i32 %val1) { -; SDISEL-LABEL: test1: -; SDISEL: // %bb.0: -; SDISEL-NEXT: cmp w0, #7 -; SDISEL-NEXT: adrp x9, out -; SDISEL-NEXT: csel w8, w1, w2, eq -; SDISEL-NEXT: cmp w8, #13 -; SDISEL-NEXT: csel w8, w1, w2, lo -; SDISEL-NEXT: cmp w0, #42 -; SDISEL-NEXT: csel w10, w1, w8, eq -; SDISEL-NEXT: str w8, [x9, :lo12:out] -; SDISEL-NEXT: str w10, [x9, :lo12:out] -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: test1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, #7 +; CHECK-SD-NEXT: adrp x9, out +; CHECK-SD-NEXT: csel w8, w1, w2, eq +; CHECK-SD-NEXT: cmp w8, #13 +; CHECK-SD-NEXT: csel w8, w1, w2, lo +; CHECK-SD-NEXT: cmp w0, #42 +; CHECK-SD-NEXT: csel w10, w1, w8, eq +; CHECK-SD-NEXT: str w8, [x9, :lo12:out] +; CHECK-SD-NEXT: str w10, [x9, :lo12:out] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: test1: -; GISEL: // %bb.0: -; GISEL-NEXT: cmp w0, #7 -; GISEL-NEXT: csel w8, w1, w2, eq -; GISEL-NEXT: cmp w8, #13 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: csel w9, w1, w2, ne -; GISEL-NEXT: cmp w0, #42 -; GISEL-NEXT: cset w10, eq -; GISEL-NEXT: orr w8, w10, w8 -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: adrp x8, out -; GISEL-NEXT: csel w10, w1, w2, ne -; GISEL-NEXT: str w9, [x8, :lo12:out] -; GISEL-NEXT: str w10, [x8, :lo12:out] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, #7 +; CHECK-GI-NEXT: csel w8, w1, w2, eq +; CHECK-GI-NEXT: cmp w8, #13 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: tst w8, #0x1 +; CHECK-GI-NEXT: csel w9, w1, w2, ne +; CHECK-GI-NEXT: cmp w0, #42 +; CHECK-GI-NEXT: cset w10, eq +; CHECK-GI-NEXT: orr w8, w10, w8 +; CHECK-GI-NEXT: tst w8, #0x1 +; CHECK-GI-NEXT: adrp x8, out +; CHECK-GI-NEXT: csel w10, w1, w2, ne +; CHECK-GI-NEXT: str w9, [x8, :lo12:out] +; CHECK-GI-NEXT: str w10, [x8, :lo12:out] +; CHECK-GI-NEXT: ret %cmp1 = icmp eq i32 %bitset, 7 %cond = select i1 %cmp1, i32 %val0, i32 %val1 %cmp5 = icmp ult i32 %cond, 13 diff --git a/llvm/test/CodeGen/AArch64/fcsel-zero.ll b/llvm/test/CodeGen/AArch64/fcsel-zero.ll index 3fbcd10..3db588b 100644 --- a/llvm/test/CodeGen/AArch64/fcsel-zero.ll +++ b/llvm/test/CodeGen/AArch64/fcsel-zero.ll @@ -2,8 +2,8 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -o - < %s | FileCheck %s -define float @foeq(float %a, float %b) #0 { - %t = fcmp oeq float %a, 0.0 +define float @foeq(float %a, float %b) { + %t = fcmp nsz oeq float %a, 0.0 %v = select i1 %t, float 0.0, float %b ret float %v ; CHECK-LABEL: foeq @@ -11,8 +11,8 @@ define float @foeq(float %a, float %b) #0 { ; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, eq } -define float @fueq(float %a, float %b) #0 { - %t = fcmp ueq float %a, 0.0 +define float @fueq(float %a, float %b) { + %t = fcmp nsz ueq float %a, 0.0 %v = select i1 %t, float 0.0, float %b ret float %v ; CHECK-LABEL: fueq @@ -21,8 +21,8 @@ define float @fueq(float %a, float %b) #0 { ; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, vs } -define float @fone(float %a, float %b) #0 { - %t = fcmp one float %a, 0.0 +define float @fone(float %a, float %b) { + %t = fcmp nsz one float %a, 0.0 %v = select i1 %t, float %b, float 0.0 ret float %v ; CHECK-LABEL: fone @@ -31,8 +31,8 @@ define float @fone(float %a, float %b) #0 { ; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], gt } -define float @fune(float %a, float %b) #0 { - %t = fcmp une float %a, 0.0 +define float @fune(float %a, float %b) { + %t = fcmp nsz une float %a, 0.0 %v = select i1 %t, float %b, float 0.0 ret float %v ; CHECK-LABEL: fune @@ -40,8 +40,8 @@ define float @fune(float %a, float %b) #0 { ; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], ne } -define double @doeq(double %a, double %b) #0 { - %t = fcmp oeq double %a, 0.0 +define double @doeq(double %a, double %b) { + %t = fcmp nsz oeq double %a, 0.0 %v = select i1 %t, double 0.0, double %b ret double %v ; CHECK-LABEL: doeq @@ -49,8 +49,8 @@ define double @doeq(double %a, double %b) #0 { ; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, eq } -define double @dueq(double %a, double %b) #0 { - %t = fcmp ueq double %a, 0.0 +define double @dueq(double %a, double %b) { + %t = fcmp nsz ueq double %a, 0.0 %v = select i1 %t, double 0.0, double %b ret double %v ; CHECK-LABEL: dueq @@ -59,8 +59,8 @@ define double @dueq(double %a, double %b) #0 { ; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, vs } -define double @done(double %a, double %b) #0 { - %t = fcmp one double %a, 0.0 +define double @done(double %a, double %b) { + %t = fcmp nsz one double %a, 0.0 %v = select i1 %t, double %b, double 0.0 ret double %v ; CHECK-LABEL: done @@ -69,14 +69,11 @@ define double @done(double %a, double %b) #0 { ; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], gt } -define double @dune(double %a, double %b) #0 { - %t = fcmp une double %a, 0.0 +define double @dune(double %a, double %b) { + %t = fcmp nsz une double %a, 0.0 %v = select i1 %t, double %b, double 0.0 ret double %v ; CHECK-LABEL: dune ; CHECK: fcmp [[R:d[0-9]+]], #0.0 ; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], ne } - -attributes #0 = { nounwind "unsafe-fp-math"="true" } - diff --git a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll index 4906e2e..c6b8e41 100644 --- a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll +++ b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll @@ -1431,7 +1431,6 @@ define <9 x half> @max_v9f16(<9 x half> %a, <9 x half> %b) { ; FULLFP16-NEXT: add x9, sp, #16 ; FULLFP16-NEXT: // kill: def $h3 killed $h3 def $q3 ; FULLFP16-NEXT: // kill: def $h4 killed $h4 def $q4 -; FULLFP16-NEXT: add x10, sp, #40 ; FULLFP16-NEXT: // kill: def $h5 killed $h5 def $q5 ; FULLFP16-NEXT: // kill: def $h6 killed $h6 def $q6 ; FULLFP16-NEXT: // kill: def $h7 killed $h7 def $q7 @@ -1440,30 +1439,30 @@ define <9 x half> @max_v9f16(<9 x half> %a, <9 x half> %b) { ; FULLFP16-NEXT: ld1 { v1.h }[1], [x9] ; FULLFP16-NEXT: add x9, sp, #24 ; FULLFP16-NEXT: mov v0.h[2], v2.h[0] +; FULLFP16-NEXT: ldr h2, [sp] ; FULLFP16-NEXT: ld1 { v1.h }[2], [x9] ; FULLFP16-NEXT: add x9, sp, #32 +; FULLFP16-NEXT: fminnm v2.8h, v2.8h, v2.8h ; FULLFP16-NEXT: mov v0.h[3], v3.h[0] ; FULLFP16-NEXT: ld1 { v1.h }[3], [x9] -; FULLFP16-NEXT: ldr h2, [x10] -; FULLFP16-NEXT: add x9, sp, #48 +; FULLFP16-NEXT: add x9, sp, #40 ; FULLFP16-NEXT: ldr h3, [sp, #72] -; FULLFP16-NEXT: ld1 { v2.h }[1], [x9] -; FULLFP16-NEXT: add x9, sp, #56 +; FULLFP16-NEXT: ld1 { v1.h }[4], [x9] +; FULLFP16-NEXT: add x9, sp, #48 ; FULLFP16-NEXT: fminnm v3.8h, v3.8h, v3.8h ; FULLFP16-NEXT: mov v0.h[4], v4.h[0] -; FULLFP16-NEXT: ld1 { v2.h }[2], [x9] -; FULLFP16-NEXT: add x9, sp, #64 +; FULLFP16-NEXT: ld1 { v1.h }[5], [x9] +; FULLFP16-NEXT: add x9, sp, #56 +; FULLFP16-NEXT: fmaxnm v2.8h, v2.8h, v3.8h ; FULLFP16-NEXT: mov v0.h[5], v5.h[0] -; FULLFP16-NEXT: ld1 { v2.h }[3], [x9] -; FULLFP16-NEXT: zip1 v1.2d, v1.2d, v2.2d -; FULLFP16-NEXT: ldr h2, [sp] +; FULLFP16-NEXT: ld1 { v1.h }[6], [x9] +; FULLFP16-NEXT: add x9, sp, #64 +; FULLFP16-NEXT: str h2, [x8, #16] ; FULLFP16-NEXT: mov v0.h[6], v6.h[0] -; FULLFP16-NEXT: fminnm v2.8h, v2.8h, v2.8h +; FULLFP16-NEXT: ld1 { v1.h }[7], [x9] ; FULLFP16-NEXT: fminnm v1.8h, v1.8h, v1.8h ; FULLFP16-NEXT: mov v0.h[7], v7.h[0] -; FULLFP16-NEXT: fmaxnm v2.8h, v2.8h, v3.8h ; FULLFP16-NEXT: fminnm v0.8h, v0.8h, v0.8h -; FULLFP16-NEXT: str h2, [x8, #16] ; FULLFP16-NEXT: fmaxnm v0.8h, v0.8h, v1.8h ; FULLFP16-NEXT: str q0, [x8] ; FULLFP16-NEXT: ret @@ -2013,7 +2012,6 @@ define <9 x half> @min_v9f16(<9 x half> %a, <9 x half> %b) { ; FULLFP16-NEXT: add x9, sp, #16 ; FULLFP16-NEXT: // kill: def $h3 killed $h3 def $q3 ; FULLFP16-NEXT: // kill: def $h4 killed $h4 def $q4 -; FULLFP16-NEXT: add x10, sp, #40 ; FULLFP16-NEXT: // kill: def $h5 killed $h5 def $q5 ; FULLFP16-NEXT: // kill: def $h6 killed $h6 def $q6 ; FULLFP16-NEXT: // kill: def $h7 killed $h7 def $q7 @@ -2022,30 +2020,30 @@ define <9 x half> @min_v9f16(<9 x half> %a, <9 x half> %b) { ; FULLFP16-NEXT: ld1 { v1.h }[1], [x9] ; FULLFP16-NEXT: add x9, sp, #24 ; FULLFP16-NEXT: mov v0.h[2], v2.h[0] +; FULLFP16-NEXT: ldr h2, [sp] ; FULLFP16-NEXT: ld1 { v1.h }[2], [x9] ; FULLFP16-NEXT: add x9, sp, #32 +; FULLFP16-NEXT: fminnm v2.8h, v2.8h, v2.8h ; FULLFP16-NEXT: mov v0.h[3], v3.h[0] ; FULLFP16-NEXT: ld1 { v1.h }[3], [x9] -; FULLFP16-NEXT: ldr h2, [x10] -; FULLFP16-NEXT: add x9, sp, #48 +; FULLFP16-NEXT: add x9, sp, #40 ; FULLFP16-NEXT: ldr h3, [sp, #72] -; FULLFP16-NEXT: ld1 { v2.h }[1], [x9] -; FULLFP16-NEXT: add x9, sp, #56 +; FULLFP16-NEXT: ld1 { v1.h }[4], [x9] +; FULLFP16-NEXT: add x9, sp, #48 ; FULLFP16-NEXT: fminnm v3.8h, v3.8h, v3.8h ; FULLFP16-NEXT: mov v0.h[4], v4.h[0] -; FULLFP16-NEXT: ld1 { v2.h }[2], [x9] -; FULLFP16-NEXT: add x9, sp, #64 +; FULLFP16-NEXT: ld1 { v1.h }[5], [x9] +; FULLFP16-NEXT: add x9, sp, #56 +; FULLFP16-NEXT: fminnm v2.8h, v2.8h, v3.8h ; FULLFP16-NEXT: mov v0.h[5], v5.h[0] -; FULLFP16-NEXT: ld1 { v2.h }[3], [x9] -; FULLFP16-NEXT: zip1 v1.2d, v1.2d, v2.2d -; FULLFP16-NEXT: ldr h2, [sp] +; FULLFP16-NEXT: ld1 { v1.h }[6], [x9] +; FULLFP16-NEXT: add x9, sp, #64 +; FULLFP16-NEXT: str h2, [x8, #16] ; FULLFP16-NEXT: mov v0.h[6], v6.h[0] -; FULLFP16-NEXT: fminnm v2.8h, v2.8h, v2.8h +; FULLFP16-NEXT: ld1 { v1.h }[7], [x9] ; FULLFP16-NEXT: fminnm v1.8h, v1.8h, v1.8h ; FULLFP16-NEXT: mov v0.h[7], v7.h[0] -; FULLFP16-NEXT: fminnm v2.8h, v2.8h, v3.8h ; FULLFP16-NEXT: fminnm v0.8h, v0.8h, v0.8h -; FULLFP16-NEXT: str h2, [x8, #16] ; FULLFP16-NEXT: fminnm v0.8h, v0.8h, v1.8h ; FULLFP16-NEXT: str q0, [x8] ; FULLFP16-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll index 1b98954..b056460 100644 --- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll +++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,SDISEL -; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half) declare i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half) @@ -27,18 +27,18 @@ declare half @llvm.aarch64.neon.frecpx.f16(half) declare half @llvm.aarch64.neon.frecpe.f16(half) define dso_local i16 @t2(half %a) { -; SDISEL-LABEL: t2: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fcmp h0, #0.0 -; SDISEL-NEXT: csetm w0, eq -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: t2: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcmp h0, #0.0 +; CHECK-SD-NEXT: csetm w0, eq +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: t2: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: fcmp h0, #0.0 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: sbfx w0, w8, #0, #1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: t2: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcmp h0, #0.0 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: sbfx w0, w8, #0, #1 +; CHECK-GI-NEXT: ret entry: %0 = fcmp oeq half %a, 0xH0000 %vceqz = sext i1 %0 to i16 @@ -46,18 +46,18 @@ entry: } define dso_local i16 @t3(half %a) { -; SDISEL-LABEL: t3: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fcmp h0, #0.0 -; SDISEL-NEXT: csetm w0, ge -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: t3: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcmp h0, #0.0 +; CHECK-SD-NEXT: csetm w0, ge +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: t3: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: fcmp h0, #0.0 -; GISEL-NEXT: cset w8, ge -; GISEL-NEXT: sbfx w0, w8, #0, #1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: t3: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcmp h0, #0.0 +; CHECK-GI-NEXT: cset w8, ge +; CHECK-GI-NEXT: sbfx w0, w8, #0, #1 +; CHECK-GI-NEXT: ret entry: %0 = fcmp oge half %a, 0xH0000 %vcgez = sext i1 %0 to i16 @@ -65,18 +65,18 @@ entry: } define dso_local i16 @t4(half %a) { -; SDISEL-LABEL: t4: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fcmp h0, #0.0 -; SDISEL-NEXT: csetm w0, gt -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: t4: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcmp h0, #0.0 +; CHECK-SD-NEXT: csetm w0, gt +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: t4: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: fcmp h0, #0.0 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: sbfx w0, w8, #0, #1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: t4: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcmp h0, #0.0 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: sbfx w0, w8, #0, #1 +; CHECK-GI-NEXT: ret entry: %0 = fcmp ogt half %a, 0xH0000 %vcgtz = sext i1 %0 to i16 @@ -84,18 +84,18 @@ entry: } define dso_local i16 @t5(half %a) { -; SDISEL-LABEL: t5: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fcmp h0, #0.0 -; SDISEL-NEXT: csetm w0, ls -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: t5: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcmp h0, #0.0 +; CHECK-SD-NEXT: csetm w0, ls +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: t5: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: fcmp h0, #0.0 -; GISEL-NEXT: cset w8, ls -; GISEL-NEXT: sbfx w0, w8, #0, #1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: t5: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcmp h0, #0.0 +; CHECK-GI-NEXT: cset w8, ls +; CHECK-GI-NEXT: sbfx w0, w8, #0, #1 +; CHECK-GI-NEXT: ret entry: %0 = fcmp ole half %a, 0xH0000 %vclez = sext i1 %0 to i16 @@ -103,18 +103,18 @@ entry: } define dso_local i16 @t6(half %a) { -; SDISEL-LABEL: t6: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fcmp h0, #0.0 -; SDISEL-NEXT: csetm w0, mi -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: t6: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcmp h0, #0.0 +; CHECK-SD-NEXT: csetm w0, mi +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: t6: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: fcmp h0, #0.0 -; GISEL-NEXT: cset w8, mi -; GISEL-NEXT: sbfx w0, w8, #0, #1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: t6: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcmp h0, #0.0 +; CHECK-GI-NEXT: cset w8, mi +; CHECK-GI-NEXT: sbfx w0, w8, #0, #1 +; CHECK-GI-NEXT: ret entry: %0 = fcmp olt half %a, 0xH0000 %vcltz = sext i1 %0 to i16 @@ -172,15 +172,15 @@ entry: } define dso_local i16 @t16(half %a) { -; SDISEL-LABEL: t16: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fcvtzs w0, h0 -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: t16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcvtzs w0, h0 +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: t16: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: fcvtzu w0, h0 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: t16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzu w0, h0 +; CHECK-GI-NEXT: ret entry: %0 = fptoui half %a to i16 ret i16 %0 diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll index 5b08ef2..da70599 100644 --- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll +++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,SDISEL -; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare half @llvm.aarch64.sisd.fabd.f16(half, half) @@ -35,18 +35,18 @@ entry: } define dso_local i16 @t_vceqh_f16(half %a, half %b) { -; SDISEL-LABEL: t_vceqh_f16: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fcmp h0, h1 -; SDISEL-NEXT: csetm w0, eq -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: t_vceqh_f16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcmp h0, h1 +; CHECK-SD-NEXT: csetm w0, eq +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: t_vceqh_f16: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: fcmp h0, h1 -; GISEL-NEXT: cset w8, eq -; GISEL-NEXT: sbfx w0, w8, #0, #1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: t_vceqh_f16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcmp h0, h1 +; CHECK-GI-NEXT: cset w8, eq +; CHECK-GI-NEXT: sbfx w0, w8, #0, #1 +; CHECK-GI-NEXT: ret entry: %0 = fcmp oeq half %a, %b %vcmpd = sext i1 %0 to i16 @@ -54,18 +54,18 @@ entry: } define dso_local i16 @t_vcgeh_f16(half %a, half %b) { -; SDISEL-LABEL: t_vcgeh_f16: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fcmp h0, h1 -; SDISEL-NEXT: csetm w0, ge -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: t_vcgeh_f16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcmp h0, h1 +; CHECK-SD-NEXT: csetm w0, ge +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: t_vcgeh_f16: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: fcmp h0, h1 -; GISEL-NEXT: cset w8, ge -; GISEL-NEXT: sbfx w0, w8, #0, #1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: t_vcgeh_f16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcmp h0, h1 +; CHECK-GI-NEXT: cset w8, ge +; CHECK-GI-NEXT: sbfx w0, w8, #0, #1 +; CHECK-GI-NEXT: ret entry: %0 = fcmp oge half %a, %b %vcmpd = sext i1 %0 to i16 @@ -73,18 +73,18 @@ entry: } define dso_local i16 @t_vcgth_f16(half %a, half %b) { -; SDISEL-LABEL: t_vcgth_f16: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fcmp h0, h1 -; SDISEL-NEXT: csetm w0, gt -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: t_vcgth_f16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcmp h0, h1 +; CHECK-SD-NEXT: csetm w0, gt +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: t_vcgth_f16: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: fcmp h0, h1 -; GISEL-NEXT: cset w8, gt -; GISEL-NEXT: sbfx w0, w8, #0, #1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: t_vcgth_f16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcmp h0, h1 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: sbfx w0, w8, #0, #1 +; CHECK-GI-NEXT: ret entry: %0 = fcmp ogt half %a, %b %vcmpd = sext i1 %0 to i16 @@ -92,18 +92,18 @@ entry: } define dso_local i16 @t_vcleh_f16(half %a, half %b) { -; SDISEL-LABEL: t_vcleh_f16: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fcmp h0, h1 -; SDISEL-NEXT: csetm w0, ls -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: t_vcleh_f16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcmp h0, h1 +; CHECK-SD-NEXT: csetm w0, ls +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: t_vcleh_f16: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: fcmp h0, h1 -; GISEL-NEXT: cset w8, ls -; GISEL-NEXT: sbfx w0, w8, #0, #1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: t_vcleh_f16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcmp h0, h1 +; CHECK-GI-NEXT: cset w8, ls +; CHECK-GI-NEXT: sbfx w0, w8, #0, #1 +; CHECK-GI-NEXT: ret entry: %0 = fcmp ole half %a, %b %vcmpd = sext i1 %0 to i16 @@ -111,18 +111,18 @@ entry: } define dso_local i16 @t_vclth_f16(half %a, half %b) { -; SDISEL-LABEL: t_vclth_f16: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fcmp h0, h1 -; SDISEL-NEXT: csetm w0, mi -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: t_vclth_f16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcmp h0, h1 +; CHECK-SD-NEXT: csetm w0, mi +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: t_vclth_f16: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: fcmp h0, h1 -; GISEL-NEXT: cset w8, mi -; GISEL-NEXT: sbfx w0, w8, #0, #1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: t_vclth_f16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcmp h0, h1 +; CHECK-GI-NEXT: cset w8, mi +; CHECK-GI-NEXT: sbfx w0, w8, #0, #1 +; CHECK-GI-NEXT: ret entry: %0 = fcmp olt half %a, %b %vcmpd = sext i1 %0 to i16 @@ -187,18 +187,18 @@ declare half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32, i32) #1 declare i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half, i32) #1 define dso_local half @test_vcvth_n_f16_s16_1(i16 %a) { -; SDISEL-LABEL: test_vcvth_n_f16_s16_1: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fmov s0, w0 -; SDISEL-NEXT: scvtf h0, h0, #1 -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: test_vcvth_n_f16_s16_1: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: scvtf h0, h0, #1 +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: test_vcvth_n_f16_s16_1: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: sxth w8, w0 -; GISEL-NEXT: fmov s0, w8 -; GISEL-NEXT: scvtf h0, h0, #1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test_vcvth_n_f16_s16_1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sxth w8, w0 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: scvtf h0, h0, #1 +; CHECK-GI-NEXT: ret entry: %sext = sext i16 %a to i32 %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 1) @@ -206,18 +206,18 @@ entry: } define dso_local half @test_vcvth_n_f16_s16_16(i16 %a) { -; SDISEL-LABEL: test_vcvth_n_f16_s16_16: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fmov s0, w0 -; SDISEL-NEXT: scvtf h0, h0, #16 -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: test_vcvth_n_f16_s16_16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: scvtf h0, h0, #16 +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: test_vcvth_n_f16_s16_16: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: sxth w8, w0 -; GISEL-NEXT: fmov s0, w8 -; GISEL-NEXT: scvtf h0, h0, #16 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test_vcvth_n_f16_s16_16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sxth w8, w0 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: scvtf h0, h0, #16 +; CHECK-GI-NEXT: ret entry: %sext = sext i16 %a to i32 %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 16) @@ -315,18 +315,18 @@ entry: } define dso_local half @test_vcvth_n_f16_u16_1(i16 %a) { -; SDISEL-LABEL: test_vcvth_n_f16_u16_1: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fmov s0, w0 -; SDISEL-NEXT: ucvtf h0, h0, #1 -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: test_vcvth_n_f16_u16_1: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: ucvtf h0, h0, #1 +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: test_vcvth_n_f16_u16_1: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: and w8, w0, #0xffff -; GISEL-NEXT: fmov s0, w8 -; GISEL-NEXT: ucvtf h0, h0, #1 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test_vcvth_n_f16_u16_1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: and w8, w0, #0xffff +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: ucvtf h0, h0, #1 +; CHECK-GI-NEXT: ret entry: %0 = zext i16 %a to i32 %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 1) @@ -334,18 +334,18 @@ entry: } define dso_local half @test_vcvth_n_f16_u16_16(i16 %a) { -; SDISEL-LABEL: test_vcvth_n_f16_u16_16: -; SDISEL: // %bb.0: // %entry -; SDISEL-NEXT: fmov s0, w0 -; SDISEL-NEXT: ucvtf h0, h0, #16 -; SDISEL-NEXT: ret +; CHECK-SD-LABEL: test_vcvth_n_f16_u16_16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: ucvtf h0, h0, #16 +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: test_vcvth_n_f16_u16_16: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: and w8, w0, #0xffff -; GISEL-NEXT: fmov s0, w8 -; GISEL-NEXT: ucvtf h0, h0, #16 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test_vcvth_n_f16_u16_16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: and w8, w0, #0xffff +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: ucvtf h0, h0, #16 +; CHECK-GI-NEXT: ret entry: %0 = zext i16 %a to i32 %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 16) diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll index ae2ef26..4c28c90 100644 --- a/llvm/test/CodeGen/AArch64/fsh.ll +++ b/llvm/test/CodeGen/AArch64/fsh.ll @@ -2509,88 +2509,87 @@ define <7 x i32> @fshl_v7i32(<7 x i32> %a, <7 x i32> %b, <7 x i32> %c) { ; ; CHECK-GI-LABEL: fshl_v7i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldr s17, [sp, #48] -; CHECK-GI-NEXT: add x8, sp, #56 -; CHECK-GI-NEXT: add x9, sp, #64 +; CHECK-GI-NEXT: ldr s3, [sp, #48] +; CHECK-GI-NEXT: ldr s20, [sp, #56] +; CHECK-GI-NEXT: add x9, sp, #56 ; CHECK-GI-NEXT: ldr s4, [sp, #48] -; CHECK-GI-NEXT: ldr s21, [sp, #56] -; CHECK-GI-NEXT: mov w10, #-1 // =0xffffffff -; CHECK-GI-NEXT: ld1 { v17.s }[1], [x8] -; CHECK-GI-NEXT: ldr s20, [x9] -; CHECK-GI-NEXT: add x8, sp, #72 -; CHECK-GI-NEXT: mov v4.s[1], v21.s[0] +; CHECK-GI-NEXT: ldr s7, [sp, #80] +; CHECK-GI-NEXT: mov w12, #-1 // =0xffffffff +; CHECK-GI-NEXT: ldr s21, [sp, #88] +; CHECK-GI-NEXT: mov v3.s[1], v20.s[0] +; CHECK-GI-NEXT: fmov s20, w12 +; CHECK-GI-NEXT: ld1 { v4.s }[1], [x9] +; CHECK-GI-NEXT: ldr s17, [sp] +; CHECK-GI-NEXT: add x13, sp, #64 +; CHECK-GI-NEXT: mov v7.s[1], v21.s[0] ; CHECK-GI-NEXT: fmov s21, w7 -; CHECK-GI-NEXT: ldr s6, [sp] -; CHECK-GI-NEXT: ld1 { v20.s }[1], [x8] ; CHECK-GI-NEXT: ldr s19, [sp, #64] -; CHECK-GI-NEXT: ldr s7, [sp, #80] -; CHECK-GI-NEXT: ldr s22, [sp, #88] -; CHECK-GI-NEXT: mov w9, #31 // =0x1f -; CHECK-GI-NEXT: mov w11, #1 // =0x1 -; CHECK-GI-NEXT: mov v21.s[1], v6.s[0] -; CHECK-GI-NEXT: fmov s6, w9 +; CHECK-GI-NEXT: mov w11, #31 // =0x1f +; CHECK-GI-NEXT: mov v20.s[1], w12 ; CHECK-GI-NEXT: ldr s18, [sp, #96] -; CHECK-GI-NEXT: zip1 v17.2d, v17.2d, v20.2d -; CHECK-GI-NEXT: fmov s20, w10 -; CHECK-GI-NEXT: mov v7.s[1], v22.s[0] -; CHECK-GI-NEXT: mov v4.s[2], v19.s[0] -; CHECK-GI-NEXT: fmov s19, w11 +; CHECK-GI-NEXT: ld1 { v4.s }[2], [x13] +; CHECK-GI-NEXT: mov w13, #1 // =0x1 +; CHECK-GI-NEXT: mov v3.s[2], v19.s[0] +; CHECK-GI-NEXT: mov v21.s[1], v17.s[0] +; CHECK-GI-NEXT: fmov s17, w11 +; CHECK-GI-NEXT: fmov s19, w13 ; CHECK-GI-NEXT: fmov s23, w0 -; CHECK-GI-NEXT: mov v6.s[1], w9 -; CHECK-GI-NEXT: fmov s24, w9 -; CHECK-GI-NEXT: ldr s2, [sp, #8] -; CHECK-GI-NEXT: mov v20.s[1], w10 +; CHECK-GI-NEXT: fmov s24, w11 +; CHECK-GI-NEXT: ldr s6, [sp, #8] ; CHECK-GI-NEXT: ldr s0, [sp, #24] ; CHECK-GI-NEXT: ldr s5, [sp, #32] -; CHECK-GI-NEXT: mov v19.s[1], w11 ; CHECK-GI-NEXT: mov v7.s[2], v18.s[0] +; CHECK-GI-NEXT: mov v17.s[1], w11 +; CHECK-GI-NEXT: mov v19.s[1], w13 +; CHECK-GI-NEXT: mov v20.s[2], w12 ; CHECK-GI-NEXT: ldr s16, [sp, #72] ; CHECK-GI-NEXT: mov v23.s[1], w1 ; CHECK-GI-NEXT: ldr s18, [sp, #80] -; CHECK-GI-NEXT: mov v21.s[2], v2.s[0] -; CHECK-GI-NEXT: mov v24.s[1], w9 +; CHECK-GI-NEXT: mov v21.s[2], v6.s[0] +; CHECK-GI-NEXT: mov v24.s[1], w11 ; CHECK-GI-NEXT: mov v0.s[1], v5.s[0] -; CHECK-GI-NEXT: fmov s5, w4 -; CHECK-GI-NEXT: mov v20.s[2], w10 -; CHECK-GI-NEXT: add x8, sp, #88 +; CHECK-GI-NEXT: fmov s6, w4 +; CHECK-GI-NEXT: add x10, sp, #88 ; CHECK-GI-NEXT: movi v22.4s, #31 -; CHECK-GI-NEXT: mov v4.s[3], v16.s[0] -; CHECK-GI-NEXT: mov v6.s[2], w9 -; CHECK-GI-NEXT: mov v19.s[2], w11 -; CHECK-GI-NEXT: ldr s1, [sp, #16] -; CHECK-GI-NEXT: ldr s3, [sp, #40] -; CHECK-GI-NEXT: ld1 { v18.s }[1], [x8] +; CHECK-GI-NEXT: mov v3.s[3], v16.s[0] +; CHECK-GI-NEXT: mov v17.s[2], w11 +; CHECK-GI-NEXT: mov v19.s[2], w13 +; CHECK-GI-NEXT: ldr s2, [sp, #16] +; CHECK-GI-NEXT: ldr s1, [sp, #40] +; CHECK-GI-NEXT: ld1 { v18.s }[1], [x10] +; CHECK-GI-NEXT: eor v5.16b, v7.16b, v20.16b ; CHECK-GI-NEXT: mov v23.s[2], w2 -; CHECK-GI-NEXT: mov v5.s[1], w5 -; CHECK-GI-NEXT: add x8, sp, #96 -; CHECK-GI-NEXT: eor v2.16b, v7.16b, v20.16b -; CHECK-GI-NEXT: mov v21.s[3], v1.s[0] -; CHECK-GI-NEXT: mov v24.s[2], w9 -; CHECK-GI-NEXT: mov v0.s[2], v3.s[0] -; CHECK-GI-NEXT: bic v1.16b, v22.16b, v4.16b -; CHECK-GI-NEXT: ld1 { v18.s }[2], [x8] +; CHECK-GI-NEXT: mov v6.s[1], w5 +; CHECK-GI-NEXT: add x8, sp, #72 +; CHECK-GI-NEXT: add x9, sp, #96 +; CHECK-GI-NEXT: mov v21.s[3], v2.s[0] +; CHECK-GI-NEXT: mov v24.s[2], w11 +; CHECK-GI-NEXT: mov v0.s[2], v1.s[0] +; CHECK-GI-NEXT: ld1 { v4.s }[3], [x8] +; CHECK-GI-NEXT: bic v2.16b, v22.16b, v3.16b +; CHECK-GI-NEXT: ld1 { v18.s }[2], [x9] +; CHECK-GI-NEXT: and v1.16b, v5.16b, v17.16b ; CHECK-GI-NEXT: neg v3.4s, v19.4s -; CHECK-GI-NEXT: and v4.16b, v17.16b, v22.16b -; CHECK-GI-NEXT: and v2.16b, v2.16b, v6.16b ; CHECK-GI-NEXT: mov v23.s[3], w3 -; CHECK-GI-NEXT: mov v5.s[2], w6 -; CHECK-GI-NEXT: ushr v6.4s, v21.4s, #1 -; CHECK-GI-NEXT: neg v1.4s, v1.4s +; CHECK-GI-NEXT: mov v6.s[2], w6 +; CHECK-GI-NEXT: and v4.16b, v4.16b, v22.16b +; CHECK-GI-NEXT: ushr v5.4s, v21.4s, #1 +; CHECK-GI-NEXT: neg v2.4s, v2.4s ; CHECK-GI-NEXT: and v7.16b, v18.16b, v24.16b +; CHECK-GI-NEXT: neg v1.4s, v1.4s ; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v3.4s -; CHECK-GI-NEXT: neg v2.4s, v2.4s ; CHECK-GI-NEXT: ushl v3.4s, v23.4s, v4.4s -; CHECK-GI-NEXT: ushl v1.4s, v6.4s, v1.4s -; CHECK-GI-NEXT: ushl v4.4s, v5.4s, v7.4s -; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-GI-NEXT: orr v1.16b, v3.16b, v1.16b +; CHECK-GI-NEXT: ushl v2.4s, v5.4s, v2.4s +; CHECK-GI-NEXT: ushl v4.4s, v6.4s, v7.4s +; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: orr v1.16b, v3.16b, v2.16b ; CHECK-GI-NEXT: orr v0.16b, v4.16b, v0.16b ; CHECK-GI-NEXT: mov s2, v1.s[1] ; CHECK-GI-NEXT: mov s3, v1.s[2] ; CHECK-GI-NEXT: mov s4, v1.s[3] -; CHECK-GI-NEXT: fmov w0, s1 ; CHECK-GI-NEXT: mov s5, v0.s[1] ; CHECK-GI-NEXT: mov s6, v0.s[2] +; CHECK-GI-NEXT: fmov w0, s1 ; CHECK-GI-NEXT: fmov w4, s0 ; CHECK-GI-NEXT: fmov w1, s2 ; CHECK-GI-NEXT: fmov w2, s3 diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll index 0f208f8..374def5 100644 --- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll +++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios < %s | FileCheck %s +; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=false < %s | sed -e "/; kill: /d" | FileCheck %s +; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=true < %s | FileCheck %s ; Check there's no assert in spilling from implicit-def operands on an ; IMPLICIT_DEF. @@ -92,7 +93,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a ; CHECK-NEXT: ldr x8, [sp, #40] ; 8-byte Folded Reload ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: mov x1, xzr -; CHECK-NEXT: ; kill: def $w8 killed $w8 killed $x8 def $x8 ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl _fprintf ; CHECK-NEXT: brk #0x1 diff --git a/llvm/test/CodeGen/AArch64/late-taildup-computed-goto.ll b/llvm/test/CodeGen/AArch64/late-taildup-computed-goto.ll new file mode 100644 index 0000000..c4a027c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/late-taildup-computed-goto.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -tail-dup-pred-size=2 -tail-dup-succ-size=2 -o - %s | FileCheck %s + +target triple = "arm64-apple-macosx13.0.0" + +@opcode.targets = local_unnamed_addr constant [6 x ptr] [ptr blockaddress(@test_interp, %op1.bb), ptr blockaddress(@test_interp, %op6.bb), ptr blockaddress(@test_interp, %loop.header), ptr blockaddress(@test_interp, %op2.bb), ptr blockaddress(@test_interp, %op4.bb), ptr blockaddress(@test_interp, %op5.bb)] + +define void @test_interp(ptr %frame, ptr %dst) { +; CHECK-LABEL: test_interp: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp x24, x23, [sp, #-64]! ; 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w19, -24 +; CHECK-NEXT: .cfi_offset w20, -32 +; CHECK-NEXT: .cfi_offset w21, -40 +; CHECK-NEXT: .cfi_offset w22, -48 +; CHECK-NEXT: .cfi_offset w23, -56 +; CHECK-NEXT: .cfi_offset w24, -64 +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x21, _opcode.targets@PAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: add x21, x21, _opcode.targets@PAGEOFF +; CHECK-NEXT: mov x22, xzr +; CHECK-NEXT: add x8, x21, xzr, lsl #3 +; CHECK-NEXT: mov x19, x1 +; CHECK-NEXT: mov x20, x0 +; CHECK-NEXT: add x23, x22, #1 +; CHECK-NEXT: br x8 +; CHECK-NEXT: Ltmp0: ; Block address taken +; CHECK-NEXT: LBB0_1: ; %loop.header +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add x8, x21, x23, lsl #3 +; CHECK-NEXT: mov x20, xzr +; CHECK-NEXT: mov x22, xzr +; CHECK-NEXT: add x23, x23, #1 +; CHECK-NEXT: br x8 +; CHECK-NEXT: Ltmp1: ; Block address taken +; CHECK-NEXT: LBB0_2: ; %op1.bb +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: str xzr, [x19] +; CHECK-NEXT: mov w8, #1 ; =0x1 +; CHECK-NEXT: ldr x0, [x20, #-8]! +; CHECK-NEXT: ldr x9, [x0, #8] +; CHECK-NEXT: str x8, [x0] +; CHECK-NEXT: ldr x8, [x9, #48] +; CHECK-NEXT: blr x8 +; CHECK-NEXT: add x8, x21, x23, lsl #3 +; CHECK-NEXT: add x23, x23, #1 +; CHECK-NEXT: br x8 +; CHECK-NEXT: Ltmp2: ; Block address taken +; CHECK-NEXT: LBB0_3: ; %op2.bb +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add x8, x21, x23, lsl #3 +; CHECK-NEXT: mov x20, xzr +; CHECK-NEXT: add x23, x23, #1 +; CHECK-NEXT: str x22, [x19] +; CHECK-NEXT: mov x22, xzr +; CHECK-NEXT: br x8 +; CHECK-NEXT: Ltmp3: ; Block address taken +; CHECK-NEXT: LBB0_4: ; %op4.bb +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: str x22, [x19] +; CHECK-NEXT: add x10, x21, x23, lsl #3 +; CHECK-NEXT: add x23, x23, #1 +; CHECK-NEXT: ldur x8, [x22, #12] +; CHECK-NEXT: ldur x9, [x20, #-8] +; CHECK-NEXT: add x22, x22, #20 +; CHECK-NEXT: stp x8, x9, [x20, #-8] +; CHECK-NEXT: add x20, x20, #8 +; CHECK-NEXT: br x10 +; CHECK-NEXT: Ltmp4: ; Block address taken +; CHECK-NEXT: LBB0_5: ; %op5.bb +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: str x22, [x19] +; CHECK-NEXT: add x10, x21, x23, lsl #3 +; CHECK-NEXT: add x23, x23, #1 +; CHECK-NEXT: ldur x8, [x22, #12] +; CHECK-NEXT: ldur x9, [x20, #-8] +; CHECK-NEXT: add x22, x22, #20 +; CHECK-NEXT: stp x8, x9, [x20, #-8] +; CHECK-NEXT: add x20, x20, #8 +; CHECK-NEXT: br x10 +; CHECK-NEXT: Ltmp5: ; Block address taken +; CHECK-NEXT: LBB0_6: ; %op6.bb +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr x0, [x20, #-8]! +; CHECK-NEXT: mov w8, #1 ; =0x1 +; CHECK-NEXT: ldr x9, [x0, #8] +; CHECK-NEXT: str x8, [x0] +; CHECK-NEXT: ldr x8, [x9, #48] +; CHECK-NEXT: blr x8 +; CHECK-NEXT: add x8, x21, x23, lsl #3 +; CHECK-NEXT: add x23, x23, #1 +; CHECK-NEXT: br x8 +; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1 +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %op1.bb ], [ %iv.next, %op2.bb ], [ %iv.next, %op4.bb ], [ %iv.next, %op5.bb ], [ %iv.next, %op6.bb ], [ %iv.next, %loop.header ] + %stack.pointer = phi ptr [ %frame, %entry ], [ %stack.8, %op1.bb ], [ null, %op2.bb ], [ %stack.next, %op4.bb ], [ %stack.next.2, %op5.bb ], [ %stack.4, %op6.bb ], [ null, %loop.header ] + %next.instr = phi ptr [ null, %entry ], [ %next.instr, %op1.bb ], [ null, %op2.bb ], [ %next.instr.20, %op4.bb ], [ %next.instr.21, %op5.bb ], [ %next.instr, %op6.bb ], [ null, %loop.header ] + %iv.next = add i64 %iv, 1 + %next_op = getelementptr [6 x ptr], ptr @opcode.targets, i64 0, i64 %iv + indirectbr ptr %next_op, [label %op1.bb, label %op6.bb, label %loop.header, label %op2.bb, label %op4.bb, label %op5.bb] + +op1.bb: + store ptr null, ptr %dst, align 8 + %stack.8 = getelementptr i8, ptr %stack.pointer, i64 -8 + %l.0 = load ptr, ptr %stack.8, align 8 + store i64 1, ptr %l.0, align 8 + %gep.0 = getelementptr i8, ptr %l.0, i64 8 + %l.1 = load ptr, ptr %gep.0, align 8 + %gep.1 = getelementptr i8, ptr %l.1, i64 48 + %l.2 = load ptr, ptr %gep.1, align 8 + tail call void %l.2(ptr nonnull %l.0) + br label %loop.header + +op2.bb: + store ptr %next.instr, ptr %dst, align 8 + br label %loop.header + +op4.bb: + store ptr %next.instr, ptr %dst, align 8 + %next.instr.20 = getelementptr i8, ptr %next.instr, i64 20 + %stack.2 = getelementptr i8, ptr %stack.pointer, i64 -8 + %l.3 = load ptr, ptr %stack.2, align 8 + %next.instr.12 = getelementptr i8, ptr %next.instr, i64 12 + %next.instr.12.val = load ptr, ptr %next.instr.12, align 2 + store ptr %next.instr.12.val, ptr %stack.2, align 8 + store ptr %l.3, ptr %stack.pointer, align 8 + %stack.next = getelementptr i8, ptr %stack.pointer, i64 8 + br label %loop.header + +op5.bb: + store ptr %next.instr, ptr %dst, align 8 + %next.instr.21 = getelementptr i8, ptr %next.instr, i64 20 + %stack.3 = getelementptr i8, ptr %stack.pointer, i64 -8 + %l.4 = load ptr, ptr %stack.3, align 8 + %next.instr.2 = getelementptr i8, ptr %next.instr, i64 12 + %next.instr.2.val = load ptr, ptr %next.instr.2, align 2 + store ptr %next.instr.2.val, ptr %stack.3, align 8 + store ptr %l.4, ptr %stack.pointer, align 8 + %stack.next.2 = getelementptr i8, ptr %stack.pointer, i64 8 + br label %loop.header + +op6.bb: + %stack.4 = getelementptr i8, ptr %stack.pointer, i64 -8 + %l.5 = load ptr, ptr %stack.4, align 8 + store i64 1, ptr %l.5, align 8 + %gep.5 = getelementptr i8, ptr %l.5, i64 8 + %l.6 = load ptr, ptr %gep.5, align 8 + %gep.6 = getelementptr i8, ptr %l.6, i64 48 + %l.7 = load ptr, ptr %gep.6, align 8 + tail call void %l.7(ptr nonnull %l.5) + br label %loop.header +} diff --git a/llvm/test/CodeGen/AArch64/llvm.frexp.ll b/llvm/test/CodeGen/AArch64/llvm.frexp.ll index 4e1876d..2213aa1 100644 --- a/llvm/test/CodeGen/AArch64/llvm.frexp.ll +++ b/llvm/test/CodeGen/AArch64/llvm.frexp.ll @@ -700,14 +700,13 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; CHECK-NEXT: ldr s1, [sp, #44] ; CHECK-NEXT: ldr q2, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: mov v2.s[3], v0.s[0] ; CHECK-NEXT: ld1 { v1.s }[1], [x19] -; CHECK-NEXT: ldr s0, [x20] -; CHECK-NEXT: ld1 { v0.s }[1], [x21] +; CHECK-NEXT: mov v2.s[3], v0.s[0] +; CHECK-NEXT: ld1 { v1.s }[2], [x20] ; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldp x30, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: zip1 v1.2d, v1.2d, v0.2d ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ld1 { v1.s }[3], [x21] +; CHECK-NEXT: ldp x30, x21, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret ; @@ -873,11 +872,10 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: ldr s0, [sp, #28] ; CHECK-NEXT: ld1 { v0.s }[1], [x19] -; CHECK-NEXT: ldr s1, [x20] -; CHECK-NEXT: ld1 { v1.s }[1], [x21] +; CHECK-NEXT: ld1 { v0.s }[2], [x20] ; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ld1 { v0.s }[3], [x21] ; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll b/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll new file mode 100644 index 0000000..1a83930 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s + +; load zero-extended i32, bitcast to f64 +define double @_Z9load_u64_from_u32_testPj(ptr %n){ +; CHECK-LABEL: _Z9load_u64_from_u32_testPj: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ret +entry: + %0 = load i32, ptr %n, align 4 + %conv = zext i32 %0 to i64 + %1 = bitcast i64 %conv to double + ret double %1 +} + +; load zero-extended i16, bitcast to f64 +define double @_Z9load_u64_from_u16_testPj(ptr %n){ +; CHECK-LABEL: _Z9load_u64_from_u16_testPj: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ret +entry: + %0 = load i16, ptr %n, align 2 + %conv = zext i16 %0 to i64 + %1 = bitcast i64 %conv to double + ret double %1 +} + +; load zero-extended i8, bitcast to f64 +define double @_Z16load_u64_from_u8Ph(ptr %n){ +; CHECK-LABEL: _Z16load_u64_from_u8Ph: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: ret +entry: + %0 = load i8, ptr %n, align 1 + %conv = zext i8 %0 to i64 + %1 = bitcast i64 %conv to double + ret double %1 +} + +; load zero-extended i16, bitcast to f32 +define float @_Z17load_u32_from_u16Pt(ptr %n){ +; CHECK-LABEL: _Z17load_u32_from_u16Pt: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ret +entry: + %0 = load i16, ptr %n, align 2 + %conv = zext i16 %0 to i32 + %1 = bitcast i32 %conv to float + ret float %1 +} + +; load zero-extended i8, bitcast to f32 +define float @_Z16load_u32_from_u8Ph(ptr %n){ +; CHECK-LABEL: _Z16load_u32_from_u8Ph: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: ret +entry: + %0 = load i8, ptr %n, align 1 + %conv = zext i8 %0 to i32 + %1 = bitcast i32 %conv to float + ret float %1 +} + +; load zero-extended i8, bitcast to f16 +define half @_Z16load_u16_from_u8Ph(ptr %n){ +; CHECK-LABEL: _Z16load_u16_from_u8Ph: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-NEXT: ret +entry: + %0 = load i8, ptr %n, align 1 + %conv = zext i8 %0 to i16 + %1 = bitcast i16 %conv to half + ret half %1 +} + diff --git a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll index 9912c7a..81f13b8 100644 --- a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll +++ b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; RUN: llc -mtriple=aarch64-none-elf < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-none-elf -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI @var1_32 = global i32 0 @var2_32 = global i32 0 @@ -243,26 +244,48 @@ define void @logical_64bit() minsize { } define void @flag_setting() { -; CHECK-LABEL: flag_setting: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, :got:var1_64 -; CHECK-NEXT: adrp x10, :got:var2_64 -; CHECK-NEXT: ldr x8, [x8, :got_lo12:var1_64] -; CHECK-NEXT: ldr x10, [x10, :got_lo12:var2_64] -; CHECK-NEXT: ldr x9, [x8] -; CHECK-NEXT: ldr x10, [x10] -; CHECK-NEXT: tst x9, x10 -; CHECK-NEXT: b.gt .LBB2_4 -; CHECK-NEXT: // %bb.1: // %test2 -; CHECK-NEXT: tst x9, x10, lsl #63 -; CHECK-NEXT: b.lt .LBB2_4 -; CHECK-NEXT: // %bb.2: // %test3 -; CHECK-NEXT: tst x9, x10, asr #12 -; CHECK-NEXT: b.gt .LBB2_4 -; CHECK-NEXT: // %bb.3: // %other_exit -; CHECK-NEXT: str x9, [x8] -; CHECK-NEXT: .LBB2_4: // %common.ret -; CHECK-NEXT: ret +; CHECK-SD-LABEL: flag_setting: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, :got:var1_64 +; CHECK-SD-NEXT: adrp x10, :got:var2_64 +; CHECK-SD-NEXT: ldr x8, [x8, :got_lo12:var1_64] +; CHECK-SD-NEXT: ldr x10, [x10, :got_lo12:var2_64] +; CHECK-SD-NEXT: ldr x9, [x8] +; CHECK-SD-NEXT: ldr x10, [x10] +; CHECK-SD-NEXT: tst x9, x10 +; CHECK-SD-NEXT: b.gt .LBB2_4 +; CHECK-SD-NEXT: // %bb.1: // %test2 +; CHECK-SD-NEXT: tst x9, x10, lsl #63 +; CHECK-SD-NEXT: b.lt .LBB2_4 +; CHECK-SD-NEXT: // %bb.2: // %test3 +; CHECK-SD-NEXT: tst x9, x10, asr #12 +; CHECK-SD-NEXT: b.gt .LBB2_4 +; CHECK-SD-NEXT: // %bb.3: // %other_exit +; CHECK-SD-NEXT: str x9, [x8] +; CHECK-SD-NEXT: .LBB2_4: // %common.ret +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: flag_setting: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, :got:var1_64 +; CHECK-GI-NEXT: adrp x10, :got:var2_64 +; CHECK-GI-NEXT: ldr x8, [x8, :got_lo12:var1_64] +; CHECK-GI-NEXT: ldr x10, [x10, :got_lo12:var2_64] +; CHECK-GI-NEXT: ldr x9, [x8] +; CHECK-GI-NEXT: ldr x10, [x10] +; CHECK-GI-NEXT: tst x9, x10 +; CHECK-GI-NEXT: b.gt .LBB2_4 +; CHECK-GI-NEXT: // %bb.1: // %test2 +; CHECK-GI-NEXT: tst x9, x10, lsl #63 +; CHECK-GI-NEXT: b.lt .LBB2_4 +; CHECK-GI-NEXT: // %bb.2: // %test3 +; CHECK-GI-NEXT: asr x10, x10, #12 +; CHECK-GI-NEXT: tst x10, x9 +; CHECK-GI-NEXT: b.gt .LBB2_4 +; CHECK-GI-NEXT: // %bb.3: // %other_exit +; CHECK-GI-NEXT: str x9, [x8] +; CHECK-GI-NEXT: .LBB2_4: // %common.ret +; CHECK-GI-NEXT: ret %val1 = load i64, ptr @var1_64 %val2 = load i64, ptr @var2_64 diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir index 525f6dd..184c9ef 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir @@ -1,14 +1,11 @@ -# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SAFE -# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu -enable-unsafe-fp-math %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE +# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s # fadd without the reassoc flags can be reassociate only when unsafe fp math is # enabled. # CHECK-LABEL: name: fadd_no_reassoc # CHECK: [[ADD1:%[0-9]+]]:fpr32 = FADDSrr %0, %1, implicit $fpcr -# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], %2, implicit $fpcr -# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD2]], %3, implicit $fpcr -# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr %2, %3, implicit $fpcr -# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr +# CHECK: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], %2, implicit $fpcr +# CHECK: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD2]], %3, implicit $fpcr --- name: fadd_no_reassoc alignment: 4 @@ -49,10 +46,9 @@ body: | # the reassoc flag is ignored. # CHECK-LABEL: name: fadd_reassoc # CHECK: [[ADD1:%[0-9]+]]:fpr32 = reassoc FADDSrr %0, %1, implicit $fpcr -# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], %2, implicit $fpcr -# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD2]], %3, implicit $fpcr -# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr %2, %3, implicit $fpcr -# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr +# CHECK: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], %2, implicit $fpcr +# CHECK: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD2]], %3, implicit $fpcr + --- name: fadd_reassoc alignment: 4 @@ -92,10 +88,8 @@ body: | # Check that flags on the instructions are preserved after reassociation. # CHECK-LABEL: name: fadd_flags # CHECK: [[ADD1:%[0-9]+]]:fpr32 = nnan ninf nsz FADDSrr %0, %1, implicit $fpcr -# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = nnan nsz FADDSrr killed [[ADD1]], %2, implicit $fpcr -# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = ninf nsz FADDSrr killed [[ADD2]], %3, implicit $fpcr -# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = nsz FADDSrr %2, %3, implicit $fpcr -# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = nsz FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr +# CHECK: [[ADD2:%[0-9]+]]:fpr32 = nnan nsz FADDSrr killed [[ADD1]], %2, implicit $fpcr +# CHECK: [[ADD3:%[0-9]+]]:fpr32 = ninf nsz FADDSrr killed [[ADD2]], %3, implicit $fpcr --- name: fadd_flags alignment: 4 diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll index ec61fee..65afd92 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll @@ -1,29 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STD -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 < %s | FileCheck %s ; Incremental updates of the instruction depths should be enough for this test ; case. -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math \ -; RUN: -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 \ +; RUN: -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s ; Verify that the first two adds are independent regardless of how the inputs are ; commuted. The destination registers are used as source registers for the third add. define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_adds1: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s0, s2 -; CHECK-STD-NEXT: fadd s0, s0, s3 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds1: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s2, s3 -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds1: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fadd s0, s0, s3 +; CHECK-NEXT: ret %t0 = fadd float %x0, %x1 %t1 = fadd float %t0, %x2 %t2 = fadd float %t1, %x3 @@ -44,110 +36,110 @@ define float @reassociate_adds1_fast(float %x0, float %x1, float %x2, float %x3) } define float @reassociate_adds1_reassoc(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_adds1_reassoc: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s0, s2 -; CHECK-STD-NEXT: fadd s0, s0, s3 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds1_reassoc: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s2, s3 -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: ret - %t0 = fadd reassoc float %x0, %x1 - %t1 = fadd reassoc float %t0, %x2 - %t2 = fadd reassoc float %t1, %x3 +; CHECK-LABEL: reassociate_adds1_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s1, s2, s3 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %t0, %x2 + %t2 = fadd reassoc nsz float %t1, %x3 ret float %t2 } define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_adds2: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s2, s0 -; CHECK-STD-NEXT: fadd s0, s0, s3 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds2: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s2, s3 -; CHECK-UNSAFE-NEXT: fadd s0, s1, s0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds2: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s2, s0 +; CHECK-NEXT: fadd s0, s0, s3 +; CHECK-NEXT: ret %t0 = fadd float %x0, %x1 %t1 = fadd float %x2, %t0 %t2 = fadd float %t1, %x3 ret float %t2 } +define float @reassociate_adds2_reassoc(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_adds2_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s1, s2, s3 +; CHECK-NEXT: fadd s0, s1, s0 +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %x2, %t0 + %t2 = fadd reassoc nsz float %t1, %x3 + ret float %t2 +} + define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_adds3: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s0, s2 -; CHECK-STD-NEXT: fadd s0, s3, s0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds3: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s3, s2 -; CHECK-UNSAFE-NEXT: fadd s0, s1, s0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds3: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fadd s0, s3, s0 +; CHECK-NEXT: ret %t0 = fadd float %x0, %x1 %t1 = fadd float %t0, %x2 %t2 = fadd float %x3, %t1 ret float %t2 } +define float @reassociate_adds3_reassoc(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_adds3_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s1, s3, s2 +; CHECK-NEXT: fadd s0, s1, s0 +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %t0, %x2 + %t2 = fadd reassoc nsz float %x3, %t1 + ret float %t2 +} + define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_adds4: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s2, s0 -; CHECK-STD-NEXT: fadd s0, s3, s0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds4: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s3, s2 -; CHECK-UNSAFE-NEXT: fadd s0, s1, s0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds4: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s2, s0 +; CHECK-NEXT: fadd s0, s3, s0 +; CHECK-NEXT: ret %t0 = fadd float %x0, %x1 %t1 = fadd float %x2, %t0 %t2 = fadd float %x3, %t1 ret float %t2 } +define float @reassociate_adds4_reassoc(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_adds4_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s1, s3, s2 +; CHECK-NEXT: fadd s0, s1, s0 +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %x2, %t0 + %t2 = fadd reassoc nsz float %x3, %t1 + ret float %t2 +} + ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not ; produced because that would cost more compile time. define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { -; CHECK-STD-LABEL: reassociate_adds5: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s0, s2 -; CHECK-STD-NEXT: fadd s0, s0, s3 -; CHECK-STD-NEXT: fadd s0, s0, s4 -; CHECK-STD-NEXT: fadd s0, s0, s5 -; CHECK-STD-NEXT: fadd s0, s0, s6 -; CHECK-STD-NEXT: fadd s0, s0, s7 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds5: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s2, s3 -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s4, s5 -; CHECK-UNSAFE-NEXT: fadd s1, s1, s6 -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s0, s0, s7 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds5: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fadd s0, s0, s3 +; CHECK-NEXT: fadd s0, s0, s4 +; CHECK-NEXT: fadd s0, s0, s5 +; CHECK-NEXT: fadd s0, s0, s6 +; CHECK-NEXT: fadd s0, s0, s7 +; CHECK-NEXT: ret %t0 = fadd float %x0, %x1 %t1 = fadd float %t0, %x2 %t2 = fadd float %t1, %x3 @@ -158,141 +150,198 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa ret float %t6 } +define float @reassociate_adds5_reassoc(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { +; CHECK-LABEL: reassociate_adds5_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s1, s2, s3 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s1, s4, s5 +; CHECK-NEXT: fadd s1, s1, s6 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s7 +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %t0, %x2 + %t2 = fadd reassoc nsz float %t1, %x3 + %t3 = fadd reassoc nsz float %t2, %x4 + %t4 = fadd reassoc nsz float %t3, %x5 + %t5 = fadd reassoc nsz float %t4, %x6 + %t6 = fadd reassoc nsz float %t5, %x7 + ret float %t6 +} + ; Verify that we only need two associative operations to reassociate the operands. ; Also, we should reassociate such that the result of the high latency division ; is used by the final 'add' rather than reassociating the %x3 operand with the ; division. The latter reassociation would not improve anything. define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_adds6: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fdiv s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s2, s0 -; CHECK-STD-NEXT: fadd s0, s3, s0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds6: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s3, s2 -; CHECK-UNSAFE-NEXT: fadd s0, s1, s0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds6: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv s0, s0, s1 +; CHECK-NEXT: fadd s0, s2, s0 +; CHECK-NEXT: fadd s0, s3, s0 +; CHECK-NEXT: ret %t0 = fdiv float %x0, %x1 %t1 = fadd float %x2, %t0 %t2 = fadd float %x3, %t1 ret float %t2 } +define float @reassociate_adds6_reassoc(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_adds6_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv s0, s0, s1 +; CHECK-NEXT: fadd s1, s3, s2 +; CHECK-NEXT: fadd s0, s1, s0 +; CHECK-NEXT: ret + %t0 = fdiv reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %x2, %t0 + %t2 = fadd reassoc nsz float %x3, %t1 + ret float %t2 +} + ; Verify that scalar single-precision multiplies are reassociated. define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_muls1: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fdiv s0, s0, s1 -; CHECK-STD-NEXT: fmul s0, s2, s0 -; CHECK-STD-NEXT: fmul s0, s3, s0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls1: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1 -; CHECK-UNSAFE-NEXT: fmul s1, s3, s2 -; CHECK-UNSAFE-NEXT: fmul s0, s1, s0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls1: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv s0, s0, s1 +; CHECK-NEXT: fmul s0, s2, s0 +; CHECK-NEXT: fmul s0, s3, s0 +; CHECK-NEXT: ret %t0 = fdiv float %x0, %x1 %t1 = fmul float %x2, %t0 %t2 = fmul float %x3, %t1 ret float %t2 } +define float @reassociate_muls1_reassoc(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_muls1_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv s0, s0, s1 +; CHECK-NEXT: fmul s1, s3, s2 +; CHECK-NEXT: fmul s0, s1, s0 +; CHECK-NEXT: ret + %t0 = fdiv reassoc nsz float %x0, %x1 + %t1 = fmul reassoc nsz float %x2, %t0 + %t2 = fmul reassoc nsz float %x3, %t1 + ret float %t2 +} + ; Verify that scalar double-precision adds are reassociated. define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) { -; CHECK-STD-LABEL: reassociate_adds_double: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fdiv d0, d0, d1 -; CHECK-STD-NEXT: fadd d0, d2, d0 -; CHECK-STD-NEXT: fadd d0, d3, d0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds_double: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1 -; CHECK-UNSAFE-NEXT: fadd d1, d3, d2 -; CHECK-UNSAFE-NEXT: fadd d0, d1, d0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds_double: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv d0, d0, d1 +; CHECK-NEXT: fadd d0, d2, d0 +; CHECK-NEXT: fadd d0, d3, d0 +; CHECK-NEXT: ret %t0 = fdiv double %x0, %x1 %t1 = fadd double %x2, %t0 %t2 = fadd double %x3, %t1 ret double %t2 } +define double @reassociate_adds_double_reassoc(double %x0, double %x1, double %x2, double %x3) { +; CHECK-LABEL: reassociate_adds_double_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv d0, d0, d1 +; CHECK-NEXT: fadd d1, d3, d2 +; CHECK-NEXT: fadd d0, d1, d0 +; CHECK-NEXT: ret + %t0 = fdiv reassoc nsz double %x0, %x1 + %t1 = fadd reassoc nsz double %x2, %t0 + %t2 = fadd reassoc nsz double %x3, %t1 + ret double %t2 +} + ; Verify that scalar double-precision multiplies are reassociated. define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) { -; CHECK-STD-LABEL: reassociate_muls_double: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fdiv d0, d0, d1 -; CHECK-STD-NEXT: fmul d0, d2, d0 -; CHECK-STD-NEXT: fmul d0, d3, d0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls_double: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1 -; CHECK-UNSAFE-NEXT: fmul d1, d3, d2 -; CHECK-UNSAFE-NEXT: fmul d0, d1, d0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls_double: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv d0, d0, d1 +; CHECK-NEXT: fmul d0, d2, d0 +; CHECK-NEXT: fmul d0, d3, d0 +; CHECK-NEXT: ret %t0 = fdiv double %x0, %x1 %t1 = fmul double %x2, %t0 %t2 = fmul double %x3, %t1 ret double %t2 } +define double @reassociate_muls_double_reassoc(double %x0, double %x1, double %x2, double %x3) { +; CHECK-LABEL: reassociate_muls_double_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv d0, d0, d1 +; CHECK-NEXT: fmul d1, d3, d2 +; CHECK-NEXT: fmul d0, d1, d0 +; CHECK-NEXT: ret + %t0 = fdiv reassoc nsz double %x0, %x1 + %t1 = fmul reassoc nsz double %x2, %t0 + %t2 = fmul reassoc nsz double %x3, %t1 + ret double %t2 +} + ; Verify that scalar half-precision adds are reassociated. define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) { -; CHECK-STD-LABEL: reassociate_adds_half: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fdiv h0, h0, h1 -; CHECK-STD-NEXT: fadd h0, h2, h0 -; CHECK-STD-NEXT: fadd h0, h3, h0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds_half: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1 -; CHECK-UNSAFE-NEXT: fadd h1, h3, h2 -; CHECK-UNSAFE-NEXT: fadd h0, h1, h0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds_half: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv h0, h0, h1 +; CHECK-NEXT: fadd h0, h2, h0 +; CHECK-NEXT: fadd h0, h3, h0 +; CHECK-NEXT: ret %t0 = fdiv half %x0, %x1 %t1 = fadd half %x2, %t0 %t2 = fadd half %x3, %t1 ret half %t2 } +define half @reassociate_adds_half_reassoc(half %x0, half %x1, half %x2, half %x3) { +; CHECK-LABEL: reassociate_adds_half_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv h0, h0, h1 +; CHECK-NEXT: fadd h1, h3, h2 +; CHECK-NEXT: fadd h0, h1, h0 +; CHECK-NEXT: ret + %t0 = fdiv reassoc nsz half %x0, %x1 + %t1 = fadd reassoc nsz half %x2, %t0 + %t2 = fadd reassoc nsz half %x3, %t1 + ret half %t2 +} + ; Verify that scalar half-precision multiplies are reassociated. define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) { -; CHECK-STD-LABEL: reassociate_muls_half: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fdiv h0, h0, h1 -; CHECK-STD-NEXT: fmul h0, h2, h0 -; CHECK-STD-NEXT: fmul h0, h3, h0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls_half: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1 -; CHECK-UNSAFE-NEXT: fmul h1, h3, h2 -; CHECK-UNSAFE-NEXT: fmul h0, h1, h0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls_half: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv h0, h0, h1 +; CHECK-NEXT: fmul h0, h2, h0 +; CHECK-NEXT: fmul h0, h3, h0 +; CHECK-NEXT: ret %t0 = fdiv half %x0, %x1 %t1 = fmul half %x2, %t0 %t2 = fmul half %x3, %t1 ret half %t2 } +define half @reassociate_muls_half_reassoc(half %x0, half %x1, half %x2, half %x3) { +; CHECK-LABEL: reassociate_muls_half_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv h0, h0, h1 +; CHECK-NEXT: fmul h1, h3, h2 +; CHECK-NEXT: fmul h0, h1, h0 +; CHECK-NEXT: ret + %t0 = fdiv reassoc nsz half %x0, %x1 + %t1 = fmul reassoc nsz half %x2, %t0 + %t2 = fmul reassoc nsz half %x3, %t1 + ret half %t2 +} + ; Verify that scalar integer adds are reassociated. define i32 @reassociate_adds_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { @@ -365,173 +414,222 @@ define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; Verify that we reassociate vector instructions too. define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { -; CHECK-STD-LABEL: vector_reassociate_adds1: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v2.4s -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v3.4s -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: vector_reassociate_adds1: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s -; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: vector_reassociate_adds1: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v3.4s +; CHECK-NEXT: ret %t0 = fadd <4 x float> %x0, %x1 %t1 = fadd <4 x float> %t0, %x2 %t2 = fadd <4 x float> %t1, %x3 ret <4 x float> %t2 } +define <4 x float> @vector_reassociate_adds1_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { +; CHECK-LABEL: vector_reassociate_adds1_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %t0, %x2 + %t2 = fadd reassoc nsz <4 x float> %t1, %x3 + ret <4 x float> %t2 +} + define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { -; CHECK-STD-LABEL: vector_reassociate_adds2: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-STD-NEXT: fadd v0.4s, v2.4s, v0.4s -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v3.4s -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: vector_reassociate_adds2: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s -; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: vector_reassociate_adds2: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v3.4s +; CHECK-NEXT: ret %t0 = fadd <4 x float> %x0, %x1 %t1 = fadd <4 x float> %x2, %t0 %t2 = fadd <4 x float> %t1, %x3 ret <4 x float> %t2 } +define <4 x float> @vector_reassociate_adds2_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { +; CHECK-LABEL: vector_reassociate_adds2_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s +; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %x2, %t0 + %t2 = fadd reassoc nsz <4 x float> %t1, %x3 + ret <4 x float> %t2 +} + define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { -; CHECK-STD-LABEL: vector_reassociate_adds3: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v2.4s -; CHECK-STD-NEXT: fadd v0.4s, v3.4s, v0.4s -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: vector_reassociate_adds3: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s -; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: vector_reassociate_adds3: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s +; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s +; CHECK-NEXT: ret %t0 = fadd <4 x float> %x0, %x1 %t1 = fadd <4 x float> %t0, %x2 %t2 = fadd <4 x float> %x3, %t1 ret <4 x float> %t2 } +define <4 x float> @vector_reassociate_adds3_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { +; CHECK-LABEL: vector_reassociate_adds3_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v1.4s, v3.4s, v2.4s +; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %t0, %x2 + %t2 = fadd reassoc nsz <4 x float> %x3, %t1 + ret <4 x float> %t2 +} + define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { -; CHECK-STD-LABEL: vector_reassociate_adds4: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-STD-NEXT: fadd v0.4s, v2.4s, v0.4s -; CHECK-STD-NEXT: fadd v0.4s, v3.4s, v0.4s -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: vector_reassociate_adds4: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s -; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: vector_reassociate_adds4: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s +; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s +; CHECK-NEXT: ret %t0 = fadd <4 x float> %x0, %x1 %t1 = fadd <4 x float> %x2, %t0 %t2 = fadd <4 x float> %x3, %t1 ret <4 x float> %t2 } +define <4 x float> @vector_reassociate_adds4_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { +; CHECK-LABEL: vector_reassociate_adds4_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v1.4s, v3.4s, v2.4s +; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %x2, %t0 + %t2 = fadd reassoc nsz <4 x float> %x3, %t1 + ret <4 x float> %t2 +} + ; Verify that 64-bit vector half-precision adds are reassociated. define <4 x half> @reassociate_adds_v4f16(<4 x half> %x0, <4 x half> %x1, <4 x half> %x2, <4 x half> %x3) { -; CHECK-STD-LABEL: reassociate_adds_v4f16: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.4h, v0.4h, v1.4h -; CHECK-STD-NEXT: fadd v0.4h, v2.4h, v0.4h -; CHECK-STD-NEXT: fadd v0.4h, v3.4h, v0.4h -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds_v4f16: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.4h, v0.4h, v1.4h -; CHECK-UNSAFE-NEXT: fadd v1.4h, v3.4h, v2.4h -; CHECK-UNSAFE-NEXT: fadd v0.4h, v1.4h, v0.4h -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: fadd v0.4h, v2.4h, v0.4h +; CHECK-NEXT: fadd v0.4h, v3.4h, v0.4h +; CHECK-NEXT: ret %t0 = fadd <4 x half> %x0, %x1 %t1 = fadd <4 x half> %x2, %t0 %t2 = fadd <4 x half> %x3, %t1 ret <4 x half> %t2 } +define <4 x half> @reassociate_adds_v4f16_reassoc(<4 x half> %x0, <4 x half> %x1, <4 x half> %x2, <4 x half> %x3) { +; CHECK-LABEL: reassociate_adds_v4f16_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: fadd v1.4h, v3.4h, v2.4h +; CHECK-NEXT: fadd v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <4 x half> %x0, %x1 + %t1 = fadd reassoc nsz <4 x half> %x2, %t0 + %t2 = fadd reassoc nsz <4 x half> %x3, %t1 + ret <4 x half> %t2 +} + ; Verify that 128-bit vector half-precision multiplies are reassociated. define <8 x half> @reassociate_muls_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, <8 x half> %x3) { -; CHECK-STD-LABEL: reassociate_muls_v8f16: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.8h, v0.8h, v1.8h -; CHECK-STD-NEXT: fmul v0.8h, v2.8h, v0.8h -; CHECK-STD-NEXT: fmul v0.8h, v3.8h, v0.8h -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls_v8f16: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.8h, v0.8h, v1.8h -; CHECK-UNSAFE-NEXT: fmul v1.8h, v3.8h, v2.8h -; CHECK-UNSAFE-NEXT: fmul v0.8h, v1.8h, v0.8h -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: fmul v0.8h, v2.8h, v0.8h +; CHECK-NEXT: fmul v0.8h, v3.8h, v0.8h +; CHECK-NEXT: ret %t0 = fadd <8 x half> %x0, %x1 %t1 = fmul <8 x half> %x2, %t0 %t2 = fmul <8 x half> %x3, %t1 ret <8 x half> %t2 } +define <8 x half> @reassociate_muls_v8f16_reassoc(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, <8 x half> %x3) { +; CHECK-LABEL: reassociate_muls_v8f16_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: fmul v1.8h, v3.8h, v2.8h +; CHECK-NEXT: fmul v0.8h, v1.8h, v0.8h +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <8 x half> %x0, %x1 + %t1 = fmul reassoc nsz <8 x half> %x2, %t0 + %t2 = fmul reassoc nsz <8 x half> %x3, %t1 + ret <8 x half> %t2 +} + ; Verify that 128-bit vector single-precision multiplies are reassociated. define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { -; CHECK-STD-LABEL: reassociate_muls_v4f32: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-STD-NEXT: fmul v0.4s, v2.4s, v0.4s -; CHECK-STD-NEXT: fmul v0.4s, v3.4s, v0.4s -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls_v4f32: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-UNSAFE-NEXT: fmul v1.4s, v3.4s, v2.4s -; CHECK-UNSAFE-NEXT: fmul v0.4s, v1.4s, v0.4s -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fmul v0.4s, v2.4s, v0.4s +; CHECK-NEXT: fmul v0.4s, v3.4s, v0.4s +; CHECK-NEXT: ret %t0 = fadd <4 x float> %x0, %x1 %t1 = fmul <4 x float> %x2, %t0 %t2 = fmul <4 x float> %x3, %t1 ret <4 x float> %t2 } +define <4 x float> @reassociate_muls_v4f32_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { +; CHECK-LABEL: reassociate_muls_v4f32_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fmul v1.4s, v3.4s, v2.4s +; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fmul reassoc nsz <4 x float> %x2, %t0 + %t2 = fmul reassoc nsz <4 x float> %x3, %t1 + ret <4 x float> %t2 +} + ; Verify that 128-bit vector double-precision multiplies are reassociated. define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { -; CHECK-STD-LABEL: reassociate_muls_v2f64: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.2d, v0.2d, v1.2d -; CHECK-STD-NEXT: fmul v0.2d, v2.2d, v0.2d -; CHECK-STD-NEXT: fmul v0.2d, v3.2d, v0.2d -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls_v2f64: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.2d, v0.2d, v1.2d -; CHECK-UNSAFE-NEXT: fmul v1.2d, v3.2d, v2.2d -; CHECK-UNSAFE-NEXT: fmul v0.2d, v1.2d, v0.2d -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fmul v0.2d, v2.2d, v0.2d +; CHECK-NEXT: fmul v0.2d, v3.2d, v0.2d +; CHECK-NEXT: ret %t0 = fadd <2 x double> %x0, %x1 %t1 = fmul <2 x double> %x2, %t0 %t2 = fmul <2 x double> %x3, %t1 ret <2 x double> %t2 } +define <2 x double> @reassociate_muls_v2f64_reassoc(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { +; CHECK-LABEL: reassociate_muls_v2f64_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fmul v1.2d, v3.2d, v2.2d +; CHECK-NEXT: fmul v0.2d, v1.2d, v0.2d +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <2 x double> %x0, %x1 + %t1 = fmul reassoc nsz <2 x double> %x2, %t0 + %t2 = fmul reassoc nsz <2 x double> %x3, %t1 + ret <2 x double> %t2 +} + + ; Verify that vector integer arithmetic operations are reassociated. define <2 x i32> @reassociate_muls_v2i32(<2 x i32> %x0, <2 x i32> %x1, <2 x i32> %x2, <2 x i32> %x3) { @@ -606,65 +704,83 @@ define <4 x i32> @reassociate_xors_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> ; Verify that scalable vector FP arithmetic operations are reassociated. define <vscale x 8 x half> @reassociate_adds_nxv4f16(<vscale x 8 x half> %x0, <vscale x 8 x half> %x1, <vscale x 8 x half> %x2, <vscale x 8 x half> %x3) { -; CHECK-STD-LABEL: reassociate_adds_nxv4f16: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd z0.h, z0.h, z1.h -; CHECK-STD-NEXT: fadd z0.h, z2.h, z0.h -; CHECK-STD-NEXT: fadd z0.h, z3.h, z0.h -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f16: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd z0.h, z0.h, z1.h -; CHECK-UNSAFE-NEXT: fadd z1.h, z3.h, z2.h -; CHECK-UNSAFE-NEXT: fadd z0.h, z1.h, z0.h -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.h, z0.h, z1.h +; CHECK-NEXT: fadd z0.h, z2.h, z0.h +; CHECK-NEXT: fadd z0.h, z3.h, z0.h +; CHECK-NEXT: ret %t0 = fadd reassoc <vscale x 8 x half> %x0, %x1 %t1 = fadd reassoc <vscale x 8 x half> %x2, %t0 %t2 = fadd reassoc <vscale x 8 x half> %x3, %t1 ret <vscale x 8 x half> %t2 } +define <vscale x 8 x half> @reassociate_adds_nxv4f16_nsz(<vscale x 8 x half> %x0, <vscale x 8 x half> %x1, <vscale x 8 x half> %x2, <vscale x 8 x half> %x3) { +; CHECK-LABEL: reassociate_adds_nxv4f16_nsz: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.h, z0.h, z1.h +; CHECK-NEXT: fadd z1.h, z3.h, z2.h +; CHECK-NEXT: fadd z0.h, z1.h, z0.h +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <vscale x 8 x half> %x0, %x1 + %t1 = fadd reassoc nsz <vscale x 8 x half> %x2, %t0 + %t2 = fadd reassoc nsz <vscale x 8 x half> %x3, %t1 + ret <vscale x 8 x half> %t2 +} + define <vscale x 4 x float> @reassociate_adds_nxv4f32(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1, <vscale x 4 x float> %x2, <vscale x 4 x float> %x3) { -; CHECK-STD-LABEL: reassociate_adds_nxv4f32: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd z0.s, z0.s, z1.s -; CHECK-STD-NEXT: fadd z0.s, z2.s, z0.s -; CHECK-STD-NEXT: fadd z0.s, z3.s, z0.s -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f32: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd z0.s, z0.s, z1.s -; CHECK-UNSAFE-NEXT: fadd z1.s, z3.s, z2.s -; CHECK-UNSAFE-NEXT: fadd z0.s, z1.s, z0.s -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.s, z0.s, z1.s +; CHECK-NEXT: fadd z0.s, z2.s, z0.s +; CHECK-NEXT: fadd z0.s, z3.s, z0.s +; CHECK-NEXT: ret %t0 = fadd reassoc <vscale x 4 x float> %x0, %x1 %t1 = fadd reassoc <vscale x 4 x float> %x2, %t0 %t2 = fadd reassoc <vscale x 4 x float> %x3, %t1 ret <vscale x 4 x float> %t2 } +define <vscale x 4 x float> @reassociate_adds_nxv4f32_nsz(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1, <vscale x 4 x float> %x2, <vscale x 4 x float> %x3) { +; CHECK-LABEL: reassociate_adds_nxv4f32_nsz: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.s, z0.s, z1.s +; CHECK-NEXT: fadd z1.s, z3.s, z2.s +; CHECK-NEXT: fadd z0.s, z1.s, z0.s +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <vscale x 4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <vscale x 4 x float> %x2, %t0 + %t2 = fadd reassoc nsz <vscale x 4 x float> %x3, %t1 + ret <vscale x 4 x float> %t2 +} + define <vscale x 2 x double> @reassociate_muls_nxv2f64(<vscale x 2 x double> %x0, <vscale x 2 x double> %x1, <vscale x 2 x double> %x2, <vscale x 2 x double> %x3) { -; CHECK-STD-LABEL: reassociate_muls_nxv2f64: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fmul z0.d, z0.d, z1.d -; CHECK-STD-NEXT: fmul z0.d, z2.d, z0.d -; CHECK-STD-NEXT: fmul z0.d, z3.d, z0.d -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls_nxv2f64: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fmul z0.d, z0.d, z1.d -; CHECK-UNSAFE-NEXT: fmul z1.d, z3.d, z2.d -; CHECK-UNSAFE-NEXT: fmul z0.d, z1.d, z0.d -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.d, z0.d, z1.d +; CHECK-NEXT: fmul z0.d, z2.d, z0.d +; CHECK-NEXT: fmul z0.d, z3.d, z0.d +; CHECK-NEXT: ret %t0 = fmul reassoc <vscale x 2 x double> %x0, %x1 %t1 = fmul reassoc <vscale x 2 x double> %x2, %t0 %t2 = fmul reassoc <vscale x 2 x double> %x3, %t1 ret <vscale x 2 x double> %t2 } +define <vscale x 2 x double> @reassociate_muls_nxv2f64_nsz(<vscale x 2 x double> %x0, <vscale x 2 x double> %x1, <vscale x 2 x double> %x2, <vscale x 2 x double> %x3) { +; CHECK-LABEL: reassociate_muls_nxv2f64_nsz: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.d, z0.d, z1.d +; CHECK-NEXT: fmul z1.d, z3.d, z2.d +; CHECK-NEXT: fmul z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %t0 = fmul reassoc nsz <vscale x 2 x double> %x0, %x1 + %t1 = fmul reassoc nsz <vscale x 2 x double> %x2, %t0 + %t2 = fmul reassoc nsz <vscale x 2 x double> %x3, %t1 + ret <vscale x 2 x double> %t2 +} + ; Verify that scalable vector integer arithmetic operations are reassociated. define <vscale x 16 x i8> @reassociate_muls_nxv16i8(<vscale x 16 x i8> %x0, <vscale x 16 x i8> %x1, <vscale x 16 x i8> %x2, <vscale x 16 x i8> %x3) { @@ -753,55 +869,30 @@ define <vscale x 8 x i16> @reassociate_ors_nxv8i16(<vscale x 8 x i16> %x0, <vsca declare double @bar() define double @reassociate_adds_from_calls() { -; CHECK-STD-LABEL: reassociate_adds_from_calls: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill -; CHECK-STD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-STD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill -; CHECK-STD-NEXT: .cfi_def_cfa_offset 32 -; CHECK-STD-NEXT: .cfi_offset w30, -8 -; CHECK-STD-NEXT: .cfi_offset b8, -16 -; CHECK-STD-NEXT: .cfi_offset b9, -24 -; CHECK-STD-NEXT: .cfi_offset b10, -32 -; CHECK-STD-NEXT: bl bar -; CHECK-STD-NEXT: fmov d8, d0 -; CHECK-STD-NEXT: bl bar -; CHECK-STD-NEXT: fmov d9, d0 -; CHECK-STD-NEXT: bl bar -; CHECK-STD-NEXT: fmov d10, d0 -; CHECK-STD-NEXT: bl bar -; CHECK-STD-NEXT: fadd d1, d8, d9 -; CHECK-STD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-STD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-STD-NEXT: fadd d1, d1, d10 -; CHECK-STD-NEXT: fadd d0, d1, d0 -; CHECK-STD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds_from_calls: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill -; CHECK-UNSAFE-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-UNSAFE-NEXT: str x30, [sp, #24] // 8-byte Folded Spill -; CHECK-UNSAFE-NEXT: .cfi_def_cfa_offset 32 -; CHECK-UNSAFE-NEXT: .cfi_offset w30, -8 -; CHECK-UNSAFE-NEXT: .cfi_offset b8, -16 -; CHECK-UNSAFE-NEXT: .cfi_offset b9, -24 -; CHECK-UNSAFE-NEXT: .cfi_offset b10, -32 -; CHECK-UNSAFE-NEXT: bl bar -; CHECK-UNSAFE-NEXT: fmov d8, d0 -; CHECK-UNSAFE-NEXT: bl bar -; CHECK-UNSAFE-NEXT: fmov d9, d0 -; CHECK-UNSAFE-NEXT: bl bar -; CHECK-UNSAFE-NEXT: fmov d10, d0 -; CHECK-UNSAFE-NEXT: bl bar -; CHECK-UNSAFE-NEXT: fadd d1, d8, d9 -; CHECK-UNSAFE-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-UNSAFE-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-UNSAFE-NEXT: fadd d0, d10, d0 -; CHECK-UNSAFE-NEXT: fadd d0, d1, d0 -; CHECK-UNSAFE-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds_from_calls: +; CHECK: // %bb.0: +; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset b8, -16 +; CHECK-NEXT: .cfi_offset b9, -24 +; CHECK-NEXT: .cfi_offset b10, -32 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d8, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d9, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d10, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fadd d1, d8, d9 +; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: fadd d1, d1, d10 +; CHECK-NEXT: fadd d0, d1, d0 +; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret %x0 = call double @bar() %x1 = call double @bar() %x2 = call double @bar() @@ -812,6 +903,41 @@ define double @reassociate_adds_from_calls() { ret double %t2 } +define double @reassociate_adds_from_calls_reassoc() { +; CHECK-LABEL: reassociate_adds_from_calls_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset b8, -16 +; CHECK-NEXT: .cfi_offset b9, -24 +; CHECK-NEXT: .cfi_offset b10, -32 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d8, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d9, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d10, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fadd d1, d8, d9 +; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: fadd d0, d10, d0 +; CHECK-NEXT: fadd d0, d1, d0 +; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %x0 = call reassoc nsz double @bar() + %x1 = call reassoc nsz double @bar() + %x2 = call reassoc nsz double @bar() + %x3 = call reassoc nsz double @bar() + %t0 = fadd reassoc nsz double %x0, %x1 + %t1 = fadd reassoc nsz double %t0, %x2 + %t2 = fadd reassoc nsz double %t1, %x3 + ret double %t2 +} + define double @already_reassociated() { ; CHECK-LABEL: already_reassociated: ; CHECK: // %bb.0: @@ -846,3 +972,38 @@ define double @already_reassociated() { %t2 = fadd double %t0, %t1 ret double %t2 } + +define double @already_reassociated_reassoc() { +; CHECK-LABEL: already_reassociated_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset b8, -16 +; CHECK-NEXT: .cfi_offset b9, -24 +; CHECK-NEXT: .cfi_offset b10, -32 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d8, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d9, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d10, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fadd d1, d8, d9 +; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: fadd d0, d10, d0 +; CHECK-NEXT: fadd d0, d1, d0 +; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %x0 = call reassoc nsz double @bar() + %x1 = call reassoc nsz double @bar() + %x2 = call reassoc nsz double @bar() + %x3 = call reassoc nsz double @bar() + %t0 = fadd reassoc nsz double %x0, %x1 + %t1 = fadd reassoc nsz double %x2, %x3 + %t2 = fadd reassoc nsz double %t0, %t1 + ret double %t2 +} diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.mir b/llvm/test/CodeGen/AArch64/machine-combiner.mir index b967aaa..a0e1280 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 -enable-unsafe-fp-math \ +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 \ # RUN: -run-pass machine-combiner -machine-combiner-inc-threshold=0 \ # RUN: -machine-combiner-verify-pattern-order=true -verify-machineinstrs -o - %s | FileCheck %s --- @@ -36,8 +36,8 @@ body: | %6 = ADDWrr %3, killed %5 %7 = SCVTFUWDri killed %6, implicit $fpcr ; CHECK: FMADDDrrr %7, %7, %0, implicit $fpcr - %8 = FMULDrr %7, %7, implicit $fpcr - %9 = FADDDrr %0, killed %8, implicit $fpcr + %8 = contract FMULDrr %7, %7, implicit $fpcr + %9 = contract FADDDrr %0, killed %8, implicit $fpcr $d0 = COPY %9 RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll index bbdce7c..15c1dff 100644 --- a/llvm/test/CodeGen/AArch64/midpoint-int.ll +++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll @@ -13,10 +13,9 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind { ; CHECK-LABEL: scalar_i32_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w9, w1, w0 -; CHECK-NEXT: subs w10, w0, w1 +; CHECK-NEXT: subs w9, w0, w1 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: csel w9, w10, w9, gt +; CHECK-NEXT: cneg w9, w9, le ; CHECK-NEXT: cneg w8, w8, le ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 @@ -35,10 +34,9 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind { define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind { ; CHECK-LABEL: scalar_i32_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w9, w1, w0 -; CHECK-NEXT: subs w10, w0, w1 +; CHECK-NEXT: subs w9, w0, w1 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: csel w9, w10, w9, hi +; CHECK-NEXT: cneg w9, w9, ls ; CHECK-NEXT: cneg w8, w8, ls ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 @@ -61,11 +59,9 @@ define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w9, [x0] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cmp w9, w1 -; CHECK-NEXT: sub w10, w1, w9 +; CHECK-NEXT: subs w10, w9, w1 +; CHECK-NEXT: cneg w10, w10, le ; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: subs w11, w9, w1 -; CHECK-NEXT: csel w10, w11, w10, gt ; CHECK-NEXT: lsr w10, w10, #1 ; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret @@ -86,11 +82,9 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w9, [x1] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cmp w0, w9 -; CHECK-NEXT: sub w10, w9, w0 -; CHECK-NEXT: cneg w8, w8, le ; CHECK-NEXT: subs w9, w0, w9 -; CHECK-NEXT: csel w9, w9, w10, gt +; CHECK-NEXT: cneg w9, w9, le +; CHECK-NEXT: cneg w8, w8, le ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret @@ -112,11 +106,9 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr w9, [x0] ; CHECK-NEXT: ldr w10, [x1] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: sub w11, w10, w9 -; CHECK-NEXT: cneg w8, w8, le ; CHECK-NEXT: subs w10, w9, w10 -; CHECK-NEXT: csel w10, w10, w11, gt +; CHECK-NEXT: cneg w10, w10, le +; CHECK-NEXT: cneg w8, w8, le ; CHECK-NEXT: lsr w10, w10, #1 ; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret @@ -142,10 +134,9 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { ; CHECK-LABEL: scalar_i64_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x9, x1, x0 -; CHECK-NEXT: subs x10, x0, x1 +; CHECK-NEXT: subs x9, x0, x1 ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: csel x9, x10, x9, gt +; CHECK-NEXT: cneg x9, x9, le ; CHECK-NEXT: cneg x8, x8, le ; CHECK-NEXT: lsr x9, x9, #1 ; CHECK-NEXT: madd x0, x9, x8, x0 @@ -164,10 +155,9 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind { ; CHECK-LABEL: scalar_i64_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x9, x1, x0 -; CHECK-NEXT: subs x10, x0, x1 +; CHECK-NEXT: subs x9, x0, x1 ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: csel x9, x10, x9, hi +; CHECK-NEXT: cneg x9, x9, ls ; CHECK-NEXT: cneg x8, x8, ls ; CHECK-NEXT: lsr x9, x9, #1 ; CHECK-NEXT: madd x0, x9, x8, x0 @@ -190,11 +180,9 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: sub x10, x1, x9 +; CHECK-NEXT: subs x10, x9, x1 +; CHECK-NEXT: cneg x10, x10, le ; CHECK-NEXT: cneg x8, x8, le -; CHECK-NEXT: subs x11, x9, x1 -; CHECK-NEXT: csel x10, x11, x10, gt ; CHECK-NEXT: lsr x10, x10, #1 ; CHECK-NEXT: madd x0, x10, x8, x9 ; CHECK-NEXT: ret @@ -215,11 +203,9 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cmp x0, x9 -; CHECK-NEXT: sub x10, x9, x0 -; CHECK-NEXT: cneg x8, x8, le ; CHECK-NEXT: subs x9, x0, x9 -; CHECK-NEXT: csel x9, x9, x10, gt +; CHECK-NEXT: cneg x9, x9, le +; CHECK-NEXT: cneg x8, x8, le ; CHECK-NEXT: lsr x9, x9, #1 ; CHECK-NEXT: madd x0, x9, x8, x0 ; CHECK-NEXT: ret @@ -241,11 +227,9 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: ldr x10, [x1] ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cmp x9, x10 -; CHECK-NEXT: sub x11, x10, x9 -; CHECK-NEXT: cneg x8, x8, le ; CHECK-NEXT: subs x10, x9, x10 -; CHECK-NEXT: csel x10, x10, x11, gt +; CHECK-NEXT: cneg x10, x10, le +; CHECK-NEXT: cneg x8, x8, le ; CHECK-NEXT: lsr x10, x10, #1 ; CHECK-NEXT: madd x0, x10, x8, x9 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/neg-abs.ll b/llvm/test/CodeGen/AArch64/neg-abs.ll index 9be0d1a..35cafe5 100644 --- a/llvm/test/CodeGen/AArch64/neg-abs.ll +++ b/llvm/test/CodeGen/AArch64/neg-abs.ll @@ -1,15 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs \ -; RUN: -mtriple=aarch64-unknown-unknown < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-elf < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-none-elf -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i64 @llvm.abs.i64(i64, i1 immarg) define i64 @neg_abs64(i64 %x) { -; CHECK-LABEL: neg_abs64: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: cneg x0, x0, pl -; CHECK-NEXT: ret +; CHECK-SD-LABEL: neg_abs64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp x0, #0 +; CHECK-SD-NEXT: cneg x0, x0, pl +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neg_abs64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp x0, #0 +; CHECK-GI-NEXT: cneg x8, x0, le +; CHECK-GI-NEXT: neg x0, x8 +; CHECK-GI-NEXT: ret %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true) %neg = sub nsw i64 0, %abs ret i64 %neg @@ -18,11 +25,18 @@ define i64 @neg_abs64(i64 %x) { declare i32 @llvm.abs.i32(i32, i1 immarg) define i32 @neg_abs32(i32 %x) { -; CHECK-LABEL: neg_abs32: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cneg w0, w0, pl -; CHECK-NEXT: ret +; CHECK-SD-LABEL: neg_abs32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: cneg w0, w0, pl +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neg_abs32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, #0 +; CHECK-GI-NEXT: cneg w8, w0, le +; CHECK-GI-NEXT: neg w0, w8 +; CHECK-GI-NEXT: ret %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) %neg = sub nsw i32 0, %abs ret i32 %neg @@ -31,12 +45,20 @@ define i32 @neg_abs32(i32 %x) { declare i16 @llvm.abs.i16(i16, i1 immarg) define i16 @neg_abs16(i16 %x) { -; CHECK-LABEL: neg_abs16: -; CHECK: // %bb.0: -; CHECK-NEXT: sbfx w8, w0, #15, #1 -; CHECK-NEXT: eor w9, w0, w8 -; CHECK-NEXT: sub w0, w8, w9 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: neg_abs16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sbfx w8, w0, #15, #1 +; CHECK-SD-NEXT: eor w9, w0, w8 +; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neg_abs16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sxth w8, w0 +; CHECK-GI-NEXT: cmp w8, #0 +; CHECK-GI-NEXT: cneg w8, w0, le +; CHECK-GI-NEXT: neg w0, w8 +; CHECK-GI-NEXT: ret %abs = tail call i16 @llvm.abs.i16(i16 %x, i1 true) %neg = sub nsw i16 0, %abs ret i16 %neg @@ -46,14 +68,25 @@ define i16 @neg_abs16(i16 %x) { declare i128 @llvm.abs.i128(i128, i1 immarg) define i128 @neg_abs128(i128 %x) { -; CHECK-LABEL: neg_abs128: -; CHECK: // %bb.0: -; CHECK-NEXT: asr x8, x1, #63 -; CHECK-NEXT: eor x9, x0, x8 -; CHECK-NEXT: eor x10, x1, x8 -; CHECK-NEXT: subs x0, x8, x9 -; CHECK-NEXT: sbc x1, x8, x10 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: neg_abs128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: asr x8, x1, #63 +; CHECK-SD-NEXT: eor x9, x0, x8 +; CHECK-SD-NEXT: eor x10, x1, x8 +; CHECK-SD-NEXT: subs x0, x8, x9 +; CHECK-SD-NEXT: sbc x1, x8, x10 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neg_abs128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: asr x8, x1, #63 +; CHECK-GI-NEXT: adds x9, x0, x8 +; CHECK-GI-NEXT: adc x10, x1, x8 +; CHECK-GI-NEXT: eor x9, x9, x8 +; CHECK-GI-NEXT: eor x8, x10, x8 +; CHECK-GI-NEXT: negs x0, x9 +; CHECK-GI-NEXT: ngc x1, x8 +; CHECK-GI-NEXT: ret %abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true) %neg = sub nsw i128 0, %abs ret i128 %neg @@ -62,46 +95,76 @@ define i128 @neg_abs128(i128 %x) { define i64 @abs64(i64 %x) { -; CHECK-LABEL: abs64: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: cneg x0, x0, mi -; CHECK-NEXT: ret +; CHECK-SD-LABEL: abs64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp x0, #0 +; CHECK-SD-NEXT: cneg x0, x0, mi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: abs64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp x0, #0 +; CHECK-GI-NEXT: cneg x0, x0, le +; CHECK-GI-NEXT: ret %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true) ret i64 %abs } define i32 @abs32(i32 %x) { -; CHECK-LABEL: abs32: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cneg w0, w0, mi -; CHECK-NEXT: ret +; CHECK-SD-LABEL: abs32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: cneg w0, w0, mi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: abs32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, #0 +; CHECK-GI-NEXT: cneg w0, w0, le +; CHECK-GI-NEXT: ret %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) ret i32 %abs } define i16 @abs16(i16 %x) { -; CHECK-LABEL: abs16: -; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cneg w0, w8, mi -; CHECK-NEXT: ret +; CHECK-SD-LABEL: abs16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sxth w8, w0 +; CHECK-SD-NEXT: cmp w8, #0 +; CHECK-SD-NEXT: cneg w0, w8, mi +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: abs16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sxth w8, w0 +; CHECK-GI-NEXT: cmp w8, #0 +; CHECK-GI-NEXT: cneg w0, w0, le +; CHECK-GI-NEXT: ret %abs = tail call i16 @llvm.abs.i16(i16 %x, i1 true) ret i16 %abs } define i128 @abs128(i128 %x) { -; CHECK-LABEL: abs128: -; CHECK: // %bb.0: -; CHECK-NEXT: asr x8, x1, #63 -; CHECK-NEXT: eor x9, x0, x8 -; CHECK-NEXT: eor x10, x1, x8 -; CHECK-NEXT: subs x0, x9, x8 -; CHECK-NEXT: sbc x1, x10, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: abs128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: asr x8, x1, #63 +; CHECK-SD-NEXT: eor x9, x0, x8 +; CHECK-SD-NEXT: eor x10, x1, x8 +; CHECK-SD-NEXT: subs x0, x9, x8 +; CHECK-SD-NEXT: sbc x1, x10, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: abs128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: asr x8, x1, #63 +; CHECK-GI-NEXT: adds x9, x0, x8 +; CHECK-GI-NEXT: adc x10, x1, x8 +; CHECK-GI-NEXT: eor x0, x9, x8 +; CHECK-GI-NEXT: eor x1, x10, x8 +; CHECK-GI-NEXT: ret %abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true) ret i128 %abs } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/neg-selects.ll b/llvm/test/CodeGen/AArch64/neg-selects.ll index 4ef1633..b643ee7 100644 --- a/llvm/test/CodeGen/AArch64/neg-selects.ll +++ b/llvm/test/CodeGen/AArch64/neg-selects.ll @@ -1,12 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-none-elf %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-none-elf < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-none-elf -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i32 @neg_select_neg(i32 %a, i32 %b, i1 %bb) { -; CHECK-LABEL: neg_select_neg: -; CHECK: // %bb.0: -; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret +; CHECK-SD-LABEL: neg_select_neg: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: tst w2, #0x1 +; CHECK-SD-NEXT: csel w0, w0, w1, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neg_select_neg: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w2, #0x1 +; CHECK-GI-NEXT: neg w9, w0 +; CHECK-GI-NEXT: tst w8, #0x1 +; CHECK-GI-NEXT: csneg w8, w9, w1, ne +; CHECK-GI-NEXT: neg w0, w8 +; CHECK-GI-NEXT: ret %nega = sub i32 0, %a %negb = sub i32 0, %b %sel = select i1 %bb, i32 %nega, i32 %negb @@ -15,11 +25,20 @@ define i32 @neg_select_neg(i32 %a, i32 %b, i1 %bb) { } define i32 @negneg_select_nega(i32 %a, i32 %b, i1 %bb) { -; CHECK-LABEL: negneg_select_nega: -; CHECK: // %bb.0: -; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: csneg w0, w1, w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: negneg_select_nega: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: tst w2, #0x1 +; CHECK-SD-NEXT: csneg w0, w1, w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: negneg_select_nega: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w2, #0x1 +; CHECK-GI-NEXT: tst w8, #0x1 +; CHECK-GI-NEXT: csneg w8, w1, w0, eq +; CHECK-GI-NEXT: neg w8, w8 +; CHECK-GI-NEXT: neg w0, w8 +; CHECK-GI-NEXT: ret %nega = sub i32 0, %a %sel = select i1 %bb, i32 %nega, i32 %b %nsel = sub i32 0, %sel @@ -28,11 +47,19 @@ define i32 @negneg_select_nega(i32 %a, i32 %b, i1 %bb) { } define i32 @neg_select_nega(i32 %a, i32 %b, i1 %bb) { -; CHECK-LABEL: neg_select_nega: -; CHECK: // %bb.0: -; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: csneg w0, w0, w1, ne -; CHECK-NEXT: ret +; CHECK-SD-LABEL: neg_select_nega: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: tst w2, #0x1 +; CHECK-SD-NEXT: csneg w0, w0, w1, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neg_select_nega: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w2, #0x1 +; CHECK-GI-NEXT: tst w8, #0x1 +; CHECK-GI-NEXT: csneg w8, w1, w0, eq +; CHECK-GI-NEXT: neg w0, w8 +; CHECK-GI-NEXT: ret %nega = sub i32 0, %a %sel = select i1 %bb, i32 %nega, i32 %b %res = sub i32 0, %sel @@ -40,11 +67,19 @@ define i32 @neg_select_nega(i32 %a, i32 %b, i1 %bb) { } define i32 @neg_select_negb(i32 %a, i32 %b, i1 %bb) { -; CHECK-LABEL: neg_select_negb: -; CHECK: // %bb.0: -; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: csneg w0, w1, w0, eq -; CHECK-NEXT: ret +; CHECK-SD-LABEL: neg_select_negb: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: tst w2, #0x1 +; CHECK-SD-NEXT: csneg w0, w1, w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neg_select_negb: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w2, #0x1 +; CHECK-GI-NEXT: tst w8, #0x1 +; CHECK-GI-NEXT: csneg w8, w0, w1, ne +; CHECK-GI-NEXT: neg w0, w8 +; CHECK-GI-NEXT: ret %negb = sub i32 0, %b %sel = select i1 %bb, i32 %a, i32 %negb %res = sub i32 0, %sel @@ -52,28 +87,47 @@ define i32 @neg_select_negb(i32 %a, i32 %b, i1 %bb) { } define i32 @neg_select_ab(i32 %a, i32 %b, i1 %bb) { -; CHECK-LABEL: neg_select_ab: -; CHECK: // %bb.0: -; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: csel w8, w0, w1, ne -; CHECK-NEXT: neg w0, w8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: neg_select_ab: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: tst w2, #0x1 +; CHECK-SD-NEXT: csel w8, w0, w1, ne +; CHECK-SD-NEXT: neg w0, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neg_select_ab: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w2, #0x1 +; CHECK-GI-NEXT: tst w8, #0x1 +; CHECK-GI-NEXT: csel w8, w0, w1, ne +; CHECK-GI-NEXT: neg w0, w8 +; CHECK-GI-NEXT: ret %sel = select i1 %bb, i32 %a, i32 %b %res = sub i32 0, %sel ret i32 %res } define i32 @neg_select_nega_with_use(i32 %a, i32 %b, i1 %bb) { -; CHECK-LABEL: neg_select_nega_with_use: -; CHECK: // %bb.0: -; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: neg w8, w0 -; CHECK-NEXT: csneg w9, w1, w0, eq -; CHECK-NEXT: sub w0, w8, w9 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: neg_select_nega_with_use: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: tst w2, #0x1 +; CHECK-SD-NEXT: neg w8, w0 +; CHECK-SD-NEXT: csneg w9, w1, w0, eq +; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neg_select_nega_with_use: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w2, #0x1 +; CHECK-GI-NEXT: tst w8, #0x1 +; CHECK-GI-NEXT: neg w8, w0 +; CHECK-GI-NEXT: csneg w9, w1, w0, eq +; CHECK-GI-NEXT: sub w0, w8, w9 +; CHECK-GI-NEXT: ret %nega = sub i32 0, %a %sel = select i1 %bb, i32 %nega, i32 %b %nsel = sub i32 0, %sel %res = add i32 %nsel, %nega ret i32 %res } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/neon-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-dot-product.ll index cf09a46..584caa30 100644 --- a/llvm/test/CodeGen/AArch64/neon-dot-product.ll +++ b/llvm/test/CodeGen/AArch64/neon-dot-product.ll @@ -1,13 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod < %s | FileCheck %s -; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=cortex-a65 < %s | FileCheck %s -; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=cortex-a65ae < %s | FileCheck %s -; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=neoverse-e1 < %s | FileCheck %s -; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=neoverse-n1 < %s | FileCheck %s -; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=neoverse-n2 < %s | FileCheck %s -; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=ampere1 < %s | FileCheck %s -; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=ampere1a < %s | FileCheck %s -; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=ampere1b < %s | FileCheck %s +; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) declare <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) @@ -56,10 +49,17 @@ entry: define <2 x i32> @test_vdot_u32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) #0 { -; CHECK-LABEL: test_vdot_u32_zero: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: udot v0.2s, v1.8b, v2.8b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vdot_u32_zero: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: udot v0.2s, v1.8b, v2.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vdot_u32_zero: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v3.2d, #0000000000000000 +; CHECK-GI-NEXT: udot v3.2s, v1.8b, v2.8b +; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s +; CHECK-GI-NEXT: ret entry: %vdot1.i = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> zeroinitializer, <8 x i8> %b, <8 x i8> %c) #2 %ret = add <2 x i32> %vdot1.i, %a @@ -67,10 +67,17 @@ entry: } define <4 x i32> @test_vdotq_u32_zero(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) #0 { -; CHECK-LABEL: test_vdotq_u32_zero: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: udot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vdotq_u32_zero: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: udot v0.4s, v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vdotq_u32_zero: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v3.2d, #0000000000000000 +; CHECK-GI-NEXT: udot v3.4s, v1.16b, v2.16b +; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-GI-NEXT: ret entry: %vdot1.i = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> zeroinitializer, <16 x i8> %b, <16 x i8> %c) #2 %ret = add <4 x i32> %vdot1.i, %a @@ -78,10 +85,17 @@ entry: } define <2 x i32> @test_vdot_s32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) #0 { -; CHECK-LABEL: test_vdot_s32_zero: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sdot v0.2s, v1.8b, v2.8b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vdot_s32_zero: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sdot v0.2s, v1.8b, v2.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vdot_s32_zero: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v3.2d, #0000000000000000 +; CHECK-GI-NEXT: sdot v3.2s, v1.8b, v2.8b +; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s +; CHECK-GI-NEXT: ret entry: %vdot1.i = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> zeroinitializer, <8 x i8> %b, <8 x i8> %c) #2 %ret = add <2 x i32> %vdot1.i, %a @@ -89,10 +103,17 @@ entry: } define <4 x i32> @test_vdotq_s32_zero(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) #0 { -; CHECK-LABEL: test_vdotq_s32_zero: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sdot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vdotq_s32_zero: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sdot v0.4s, v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vdotq_s32_zero: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v3.2d, #0000000000000000 +; CHECK-GI-NEXT: sdot v3.4s, v1.16b, v2.16b +; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-GI-NEXT: ret entry: %vdot1.i = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> zeroinitializer, <16 x i8> %b, <16 x i8> %c) #2 %ret = add <4 x i32> %vdot1.i, %a @@ -156,11 +177,19 @@ entry: define <2 x i32> @test_vdot_lane_u32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK-LABEL: test_vdot_lane_u32_zero: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: udot v0.2s, v1.8b, v2.4b[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vdot_lane_u32_zero: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: udot v0.2s, v1.8b, v2.4b[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vdot_lane_u32_zero: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v3.2d, #0000000000000000 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: udot v3.2s, v1.8b, v2.4b[1] +; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s +; CHECK-GI-NEXT: ret entry: %.cast = bitcast <8 x i8> %c to <2 x i32> %shuffle = shufflevector <2 x i32> %.cast, <2 x i32> undef, <2 x i32> <i32 1, i32 1> @@ -171,11 +200,19 @@ entry: } define <4 x i32> @test_vdotq_lane_u32_zero(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) { -; CHECK-LABEL: test_vdotq_lane_u32_zero: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: udot v0.4s, v1.16b, v2.4b[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vdotq_lane_u32_zero: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: udot v0.4s, v1.16b, v2.4b[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vdotq_lane_u32_zero: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v3.2d, #0000000000000000 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: udot v3.4s, v1.16b, v2.4b[1] +; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-GI-NEXT: ret entry: %.cast = bitcast <8 x i8> %c to <2 x i32> %shuffle = shufflevector <2 x i32> %.cast, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> @@ -186,10 +223,17 @@ entry: } define <2 x i32> @test_vdot_laneq_u32_zero(<2 x i32> %a, <8 x i8> %b, <16 x i8> %c) { -; CHECK-LABEL: test_vdot_laneq_u32_zero: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: udot v0.2s, v1.8b, v2.4b[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vdot_laneq_u32_zero: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: udot v0.2s, v1.8b, v2.4b[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vdot_laneq_u32_zero: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v3.2d, #0000000000000000 +; CHECK-GI-NEXT: udot v3.2s, v1.8b, v2.4b[1] +; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s +; CHECK-GI-NEXT: ret entry: %.cast = bitcast <16 x i8> %c to <4 x i32> %shuffle = shufflevector <4 x i32> %.cast, <4 x i32> undef, <2 x i32> <i32 1, i32 1> @@ -200,10 +244,17 @@ entry: } define <4 x i32> @test_vdotq_laneq_u32_zero(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK-LABEL: test_vdotq_laneq_u32_zero: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: udot v0.4s, v1.16b, v2.4b[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vdotq_laneq_u32_zero: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: udot v0.4s, v1.16b, v2.4b[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vdotq_laneq_u32_zero: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v3.2d, #0000000000000000 +; CHECK-GI-NEXT: udot v3.4s, v1.16b, v2.4b[1] +; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-GI-NEXT: ret entry: %.cast = bitcast <16 x i8> %c to <4 x i32> %shuffle = shufflevector <4 x i32> %.cast, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> @@ -270,11 +321,19 @@ entry: define <2 x i32> @test_vdot_lane_s32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK-LABEL: test_vdot_lane_s32_zero: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: sdot v0.2s, v1.8b, v2.4b[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vdot_lane_s32_zero: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: sdot v0.2s, v1.8b, v2.4b[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vdot_lane_s32_zero: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v3.2d, #0000000000000000 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: sdot v3.2s, v1.8b, v2.4b[1] +; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s +; CHECK-GI-NEXT: ret entry: %.cast = bitcast <8 x i8> %c to <2 x i32> %shuffle = shufflevector <2 x i32> %.cast, <2 x i32> undef, <2 x i32> <i32 1, i32 1> @@ -285,11 +344,19 @@ entry: } define <4 x i32> @test_vdotq_lane_s32_zero(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) { -; CHECK-LABEL: test_vdotq_lane_s32_zero: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: sdot v0.4s, v1.16b, v2.4b[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vdotq_lane_s32_zero: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: sdot v0.4s, v1.16b, v2.4b[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vdotq_lane_s32_zero: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v3.2d, #0000000000000000 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: sdot v3.4s, v1.16b, v2.4b[1] +; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-GI-NEXT: ret entry: %.cast = bitcast <8 x i8> %c to <2 x i32> %shuffle = shufflevector <2 x i32> %.cast, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> @@ -300,10 +367,17 @@ entry: } define <2 x i32> @test_vdot_laneq_s32_zero(<2 x i32> %a, <8 x i8> %b, <16 x i8> %c) { -; CHECK-LABEL: test_vdot_laneq_s32_zero: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sdot v0.2s, v1.8b, v2.4b[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vdot_laneq_s32_zero: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sdot v0.2s, v1.8b, v2.4b[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vdot_laneq_s32_zero: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v3.2d, #0000000000000000 +; CHECK-GI-NEXT: sdot v3.2s, v1.8b, v2.4b[1] +; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s +; CHECK-GI-NEXT: ret entry: %.cast = bitcast <16 x i8> %c to <4 x i32> %shuffle = shufflevector <4 x i32> %.cast, <4 x i32> undef, <2 x i32> <i32 1, i32 1> @@ -314,10 +388,17 @@ entry: } define <4 x i32> @test_vdotq_laneq_s32_zero(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK-LABEL: test_vdotq_laneq_s32_zero: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sdot v0.4s, v1.16b, v2.4b[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vdotq_laneq_s32_zero: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sdot v0.4s, v1.16b, v2.4b[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vdotq_laneq_s32_zero: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v3.2d, #0000000000000000 +; CHECK-GI-NEXT: sdot v3.4s, v1.16b, v2.4b[1] +; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-GI-NEXT: ret entry: %.cast = bitcast <16 x i8> %c to <4 x i32> %shuffle = shufflevector <4 x i32> %.cast, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> @@ -326,3 +407,6 @@ entry: %ret = add <4 x i32> %vdot1.i, %a ret <4 x i32> %ret } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll index 9443004..4f0c408 100644 --- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll +++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll @@ -6810,200 +6810,195 @@ define i32 @test_sdot_v48i8_double_nomla(<48 x i8> %a, <48 x i8> %b, <48 x i8> % ; CHECK-SD-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SD-NEXT: .cfi_offset w29, -16 -; CHECK-SD-NEXT: ldr b0, [sp, #208] +; CHECK-SD-NEXT: ldr b5, [sp, #208] ; CHECK-SD-NEXT: add x8, sp, #216 -; CHECK-SD-NEXT: add x9, sp, #272 -; CHECK-SD-NEXT: ldr b2, [sp, #80] +; CHECK-SD-NEXT: fmov s0, w0 ; CHECK-SD-NEXT: ldr b4, [sp, #976] -; CHECK-SD-NEXT: ldr b6, [sp, #720] -; CHECK-SD-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-SD-NEXT: add x9, sp, #984 +; CHECK-SD-NEXT: add x12, sp, #328 +; CHECK-SD-NEXT: ld1 { v5.b }[1], [x8] ; CHECK-SD-NEXT: add x8, sp, #224 -; CHECK-SD-NEXT: fmov s16, w0 -; CHECK-SD-NEXT: ldr b17, [sp, #848] -; CHECK-SD-NEXT: add x10, sp, #24 -; CHECK-SD-NEXT: movi v19.2d, #0000000000000000 -; CHECK-SD-NEXT: ld1 { v0.b }[2], [x8] +; CHECK-SD-NEXT: movi v1.16b, #1 +; CHECK-SD-NEXT: mov v0.b[1], w1 +; CHECK-SD-NEXT: ld1 { v4.b }[1], [x9] +; CHECK-SD-NEXT: movi v3.2d, #0000000000000000 +; CHECK-SD-NEXT: add x11, sp, #992 +; CHECK-SD-NEXT: ldr b6, [sp, #720] +; CHECK-SD-NEXT: ldr b7, [sp, #80] +; CHECK-SD-NEXT: ld1 { v5.b }[2], [x8] ; CHECK-SD-NEXT: add x8, sp, #232 -; CHECK-SD-NEXT: mov v16.b[1], w1 -; CHECK-SD-NEXT: ld1 { v0.b }[3], [x8] +; CHECK-SD-NEXT: add x13, sp, #88 +; CHECK-SD-NEXT: ld1 { v4.b }[2], [x11] +; CHECK-SD-NEXT: ld1 { v7.b }[1], [x13] +; CHECK-SD-NEXT: add x13, sp, #856 +; CHECK-SD-NEXT: mov v0.b[2], w2 +; CHECK-SD-NEXT: add x14, sp, #1008 +; CHECK-SD-NEXT: add x15, sp, #872 +; CHECK-SD-NEXT: ld1 { v5.b }[3], [x8] ; CHECK-SD-NEXT: add x8, sp, #240 -; CHECK-SD-NEXT: mov v16.b[2], w2 -; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-SD-NEXT: add x16, sp, #888 +; CHECK-SD-NEXT: add x10, sp, #16 +; CHECK-SD-NEXT: add x9, sp, #24 +; CHECK-SD-NEXT: add x11, sp, #40 +; CHECK-SD-NEXT: movi v2.2d, #0000000000000000 +; CHECK-SD-NEXT: ld1 { v5.b }[4], [x8] ; CHECK-SD-NEXT: add x8, sp, #248 -; CHECK-SD-NEXT: mov v16.b[3], w3 -; CHECK-SD-NEXT: ld1 { v0.b }[5], [x8] +; CHECK-SD-NEXT: mov v0.b[3], w3 +; CHECK-SD-NEXT: ld1 { v5.b }[5], [x8] ; CHECK-SD-NEXT: add x8, sp, #256 -; CHECK-SD-NEXT: ld1 { v0.b }[6], [x8] +; CHECK-SD-NEXT: mov v0.b[4], w4 +; CHECK-SD-NEXT: ld1 { v5.b }[6], [x8] ; CHECK-SD-NEXT: add x8, sp, #264 -; CHECK-SD-NEXT: mov v16.b[4], w4 -; CHECK-SD-NEXT: ld1 { v0.b }[7], [x8] -; CHECK-SD-NEXT: ldr b1, [x9] +; CHECK-SD-NEXT: mov v0.b[5], w5 +; CHECK-SD-NEXT: ld1 { v5.b }[7], [x8] +; CHECK-SD-NEXT: add x8, sp, #272 +; CHECK-SD-NEXT: ld1 { v5.b }[8], [x8] ; CHECK-SD-NEXT: add x8, sp, #280 -; CHECK-SD-NEXT: add x9, sp, #88 -; CHECK-SD-NEXT: mov v16.b[5], w5 -; CHECK-SD-NEXT: ld1 { v1.b }[1], [x8] +; CHECK-SD-NEXT: mov v0.b[6], w6 +; CHECK-SD-NEXT: ld1 { v5.b }[9], [x8] ; CHECK-SD-NEXT: add x8, sp, #288 -; CHECK-SD-NEXT: ld1 { v1.b }[2], [x8] +; CHECK-SD-NEXT: mov v0.b[7], w7 +; CHECK-SD-NEXT: ld1 { v5.b }[10], [x8] ; CHECK-SD-NEXT: add x8, sp, #296 -; CHECK-SD-NEXT: mov v16.b[6], w6 -; CHECK-SD-NEXT: ld1 { v1.b }[3], [x8] +; CHECK-SD-NEXT: ld1 { v0.b }[8], [x10] +; CHECK-SD-NEXT: add x10, sp, #128 +; CHECK-SD-NEXT: ld1 { v5.b }[11], [x8] ; CHECK-SD-NEXT: add x8, sp, #304 -; CHECK-SD-NEXT: mov v16.b[7], w7 -; CHECK-SD-NEXT: ld1 { v1.b }[4], [x8] +; CHECK-SD-NEXT: ld1 { v0.b }[9], [x9] +; CHECK-SD-NEXT: add x9, sp, #136 +; CHECK-SD-NEXT: ld1 { v5.b }[12], [x8] ; CHECK-SD-NEXT: add x8, sp, #312 -; CHECK-SD-NEXT: ld1 { v1.b }[5], [x8] +; CHECK-SD-NEXT: ld1 { v5.b }[13], [x8] ; CHECK-SD-NEXT: add x8, sp, #320 -; CHECK-SD-NEXT: ld1 { v1.b }[6], [x8] -; CHECK-SD-NEXT: add x8, sp, #328 -; CHECK-SD-NEXT: ld1 { v1.b }[7], [x8] -; CHECK-SD-NEXT: ld1 { v2.b }[1], [x9] -; CHECK-SD-NEXT: add x8, sp, #96 -; CHECK-SD-NEXT: add x9, sp, #144 -; CHECK-SD-NEXT: ld1 { v2.b }[2], [x8] -; CHECK-SD-NEXT: add x8, sp, #104 -; CHECK-SD-NEXT: zip1 v0.2d, v0.2d, v1.2d -; CHECK-SD-NEXT: movi v1.16b, #1 -; CHECK-SD-NEXT: ld1 { v2.b }[3], [x8] -; CHECK-SD-NEXT: add x8, sp, #112 -; CHECK-SD-NEXT: ld1 { v2.b }[4], [x8] -; CHECK-SD-NEXT: add x8, sp, #120 -; CHECK-SD-NEXT: ld1 { v2.b }[5], [x8] -; CHECK-SD-NEXT: add x8, sp, #128 -; CHECK-SD-NEXT: ld1 { v2.b }[6], [x8] -; CHECK-SD-NEXT: add x8, sp, #136 -; CHECK-SD-NEXT: ld1 { v2.b }[7], [x8] -; CHECK-SD-NEXT: ldr b3, [x9] +; CHECK-SD-NEXT: ld1 { v5.b }[14], [x8] +; CHECK-SD-NEXT: add x8, sp, #32 +; CHECK-SD-NEXT: ld1 { v0.b }[10], [x8] +; CHECK-SD-NEXT: add x8, sp, #144 +; CHECK-SD-NEXT: ld1 { v5.b }[15], [x12] +; CHECK-SD-NEXT: add x12, sp, #728 +; CHECK-SD-NEXT: ld1 { v6.b }[1], [x12] +; CHECK-SD-NEXT: add x12, sp, #1000 +; CHECK-SD-NEXT: ld1 { v0.b }[11], [x11] +; CHECK-SD-NEXT: ld1 { v4.b }[3], [x12] +; CHECK-SD-NEXT: add x12, sp, #736 +; CHECK-SD-NEXT: add x11, sp, #920 +; CHECK-SD-NEXT: sdot v3.4s, v5.16b, v1.16b +; CHECK-SD-NEXT: ldr b5, [sp, #848] +; CHECK-SD-NEXT: ld1 { v6.b }[2], [x12] +; CHECK-SD-NEXT: add x12, sp, #48 +; CHECK-SD-NEXT: ld1 { v5.b }[1], [x13] +; CHECK-SD-NEXT: add x13, sp, #744 +; CHECK-SD-NEXT: ld1 { v4.b }[4], [x14] +; CHECK-SD-NEXT: add x14, sp, #96 +; CHECK-SD-NEXT: ld1 { v0.b }[12], [x12] +; CHECK-SD-NEXT: ld1 { v6.b }[3], [x13] +; CHECK-SD-NEXT: add x13, sp, #864 +; CHECK-SD-NEXT: ld1 { v7.b }[2], [x14] +; CHECK-SD-NEXT: add x14, sp, #1016 +; CHECK-SD-NEXT: ld1 { v5.b }[2], [x13] +; CHECK-SD-NEXT: add x13, sp, #752 +; CHECK-SD-NEXT: ld1 { v4.b }[5], [x14] +; CHECK-SD-NEXT: add x14, sp, #104 +; CHECK-SD-NEXT: ld1 { v6.b }[4], [x13] +; CHECK-SD-NEXT: add x13, sp, #1024 +; CHECK-SD-NEXT: ld1 { v7.b }[3], [x14] +; CHECK-SD-NEXT: ld1 { v5.b }[3], [x15] +; CHECK-SD-NEXT: add x15, sp, #760 +; CHECK-SD-NEXT: add x14, sp, #112 +; CHECK-SD-NEXT: ld1 { v4.b }[6], [x13] +; CHECK-SD-NEXT: add x13, sp, #880 +; CHECK-SD-NEXT: ld1 { v6.b }[5], [x15] +; CHECK-SD-NEXT: add x15, sp, #1032 +; CHECK-SD-NEXT: ld1 { v7.b }[4], [x14] +; CHECK-SD-NEXT: ld1 { v5.b }[4], [x13] +; CHECK-SD-NEXT: add x14, sp, #768 +; CHECK-SD-NEXT: add x13, sp, #120 +; CHECK-SD-NEXT: ld1 { v4.b }[7], [x15] +; CHECK-SD-NEXT: add x15, sp, #1040 +; CHECK-SD-NEXT: ld1 { v6.b }[6], [x14] +; CHECK-SD-NEXT: ld1 { v7.b }[5], [x13] +; CHECK-SD-NEXT: add x13, sp, #776 +; CHECK-SD-NEXT: ld1 { v5.b }[5], [x16] +; CHECK-SD-NEXT: add x14, sp, #1048 +; CHECK-SD-NEXT: ld1 { v4.b }[8], [x15] +; CHECK-SD-NEXT: add x15, sp, #896 +; CHECK-SD-NEXT: ld1 { v6.b }[7], [x13] +; CHECK-SD-NEXT: ld1 { v7.b }[6], [x10] +; CHECK-SD-NEXT: add x10, sp, #784 +; CHECK-SD-NEXT: ld1 { v5.b }[6], [x15] +; CHECK-SD-NEXT: add x13, sp, #1056 +; CHECK-SD-NEXT: ld1 { v4.b }[9], [x14] +; CHECK-SD-NEXT: add x14, sp, #904 +; CHECK-SD-NEXT: ld1 { v6.b }[8], [x10] +; CHECK-SD-NEXT: ld1 { v7.b }[7], [x9] +; CHECK-SD-NEXT: add x9, sp, #792 +; CHECK-SD-NEXT: ld1 { v5.b }[7], [x14] +; CHECK-SD-NEXT: add x10, sp, #1064 +; CHECK-SD-NEXT: ld1 { v4.b }[10], [x13] +; CHECK-SD-NEXT: add x13, sp, #912 +; CHECK-SD-NEXT: ld1 { v6.b }[9], [x9] +; CHECK-SD-NEXT: ld1 { v7.b }[8], [x8] +; CHECK-SD-NEXT: add x9, sp, #800 +; CHECK-SD-NEXT: ld1 { v5.b }[8], [x13] ; CHECK-SD-NEXT: add x8, sp, #152 -; CHECK-SD-NEXT: add x9, sp, #984 -; CHECK-SD-NEXT: ld1 { v3.b }[1], [x8] -; CHECK-SD-NEXT: add x8, sp, #160 -; CHECK-SD-NEXT: ld1 { v3.b }[2], [x8] -; CHECK-SD-NEXT: add x8, sp, #168 -; CHECK-SD-NEXT: ld1 { v3.b }[3], [x8] -; CHECK-SD-NEXT: add x8, sp, #176 -; CHECK-SD-NEXT: ld1 { v3.b }[4], [x8] -; CHECK-SD-NEXT: add x8, sp, #184 -; CHECK-SD-NEXT: ld1 { v3.b }[5], [x8] -; CHECK-SD-NEXT: add x8, sp, #192 -; CHECK-SD-NEXT: ld1 { v3.b }[6], [x8] -; CHECK-SD-NEXT: add x8, sp, #200 -; CHECK-SD-NEXT: ld1 { v3.b }[7], [x8] -; CHECK-SD-NEXT: ld1 { v4.b }[1], [x9] -; CHECK-SD-NEXT: add x8, sp, #992 -; CHECK-SD-NEXT: add x9, sp, #1040 -; CHECK-SD-NEXT: ld1 { v4.b }[2], [x8] -; CHECK-SD-NEXT: add x8, sp, #1000 -; CHECK-SD-NEXT: zip1 v2.2d, v2.2d, v3.2d -; CHECK-SD-NEXT: ld1 { v4.b }[3], [x8] -; CHECK-SD-NEXT: add x8, sp, #1008 -; CHECK-SD-NEXT: ld1 { v4.b }[4], [x8] -; CHECK-SD-NEXT: add x8, sp, #1016 -; CHECK-SD-NEXT: ld1 { v4.b }[5], [x8] -; CHECK-SD-NEXT: add x8, sp, #1024 -; CHECK-SD-NEXT: ld1 { v4.b }[6], [x8] -; CHECK-SD-NEXT: add x8, sp, #1032 -; CHECK-SD-NEXT: ld1 { v4.b }[7], [x8] -; CHECK-SD-NEXT: ldr b5, [x9] -; CHECK-SD-NEXT: add x8, sp, #1048 -; CHECK-SD-NEXT: add x9, sp, #728 -; CHECK-SD-NEXT: ld1 { v5.b }[1], [x8] -; CHECK-SD-NEXT: add x8, sp, #1056 -; CHECK-SD-NEXT: ld1 { v5.b }[2], [x8] -; CHECK-SD-NEXT: add x8, sp, #1064 -; CHECK-SD-NEXT: ld1 { v5.b }[3], [x8] -; CHECK-SD-NEXT: add x8, sp, #1072 -; CHECK-SD-NEXT: ld1 { v5.b }[4], [x8] -; CHECK-SD-NEXT: add x8, sp, #1080 -; CHECK-SD-NEXT: ld1 { v5.b }[5], [x8] -; CHECK-SD-NEXT: add x8, sp, #1088 -; CHECK-SD-NEXT: ld1 { v5.b }[6], [x8] -; CHECK-SD-NEXT: add x8, sp, #1096 -; CHECK-SD-NEXT: ld1 { v5.b }[7], [x8] -; CHECK-SD-NEXT: ld1 { v6.b }[1], [x9] -; CHECK-SD-NEXT: add x8, sp, #736 -; CHECK-SD-NEXT: add x9, sp, #784 -; CHECK-SD-NEXT: ld1 { v6.b }[2], [x8] -; CHECK-SD-NEXT: add x8, sp, #744 -; CHECK-SD-NEXT: zip1 v4.2d, v4.2d, v5.2d -; CHECK-SD-NEXT: movi v5.2d, #0000000000000000 -; CHECK-SD-NEXT: ld1 { v6.b }[3], [x8] -; CHECK-SD-NEXT: add x8, sp, #752 -; CHECK-SD-NEXT: sdot v19.4s, v4.16b, v1.16b -; CHECK-SD-NEXT: sdot v5.4s, v0.16b, v1.16b -; CHECK-SD-NEXT: ld1 { v6.b }[4], [x8] -; CHECK-SD-NEXT: add x8, sp, #760 -; CHECK-SD-NEXT: ld1 { v6.b }[5], [x8] -; CHECK-SD-NEXT: add x8, sp, #768 -; CHECK-SD-NEXT: ld1 { v6.b }[6], [x8] -; CHECK-SD-NEXT: add x8, sp, #776 -; CHECK-SD-NEXT: ld1 { v6.b }[7], [x8] -; CHECK-SD-NEXT: ldr b7, [x9] -; CHECK-SD-NEXT: add x8, sp, #792 -; CHECK-SD-NEXT: add x9, sp, #856 -; CHECK-SD-NEXT: ld1 { v7.b }[1], [x8] -; CHECK-SD-NEXT: add x8, sp, #800 -; CHECK-SD-NEXT: ld1 { v7.b }[2], [x8] -; CHECK-SD-NEXT: add x8, sp, #808 -; CHECK-SD-NEXT: ld1 { v7.b }[3], [x8] +; CHECK-SD-NEXT: ld1 { v4.b }[11], [x10] +; CHECK-SD-NEXT: add x10, sp, #1072 +; CHECK-SD-NEXT: ld1 { v6.b }[10], [x9] +; CHECK-SD-NEXT: ld1 { v7.b }[9], [x8] +; CHECK-SD-NEXT: add x9, sp, #808 +; CHECK-SD-NEXT: ld1 { v5.b }[9], [x11] +; CHECK-SD-NEXT: add x8, sp, #56 +; CHECK-SD-NEXT: ld1 { v4.b }[12], [x10] +; CHECK-SD-NEXT: add x10, sp, #160 +; CHECK-SD-NEXT: ld1 { v0.b }[13], [x8] +; CHECK-SD-NEXT: ld1 { v6.b }[11], [x9] +; CHECK-SD-NEXT: add x9, sp, #928 +; CHECK-SD-NEXT: ld1 { v7.b }[10], [x10] +; CHECK-SD-NEXT: add x10, sp, #1080 +; CHECK-SD-NEXT: ld1 { v5.b }[10], [x9] ; CHECK-SD-NEXT: add x8, sp, #816 -; CHECK-SD-NEXT: ld1 { v7.b }[4], [x8] -; CHECK-SD-NEXT: add x8, sp, #824 -; CHECK-SD-NEXT: ld1 { v7.b }[5], [x8] -; CHECK-SD-NEXT: add x8, sp, #832 -; CHECK-SD-NEXT: ld1 { v7.b }[6], [x8] -; CHECK-SD-NEXT: add x8, sp, #840 -; CHECK-SD-NEXT: ld1 { v7.b }[7], [x8] -; CHECK-SD-NEXT: ld1 { v17.b }[1], [x9] -; CHECK-SD-NEXT: add x8, sp, #864 -; CHECK-SD-NEXT: add x9, sp, #16 -; CHECK-SD-NEXT: ld1 { v16.b }[8], [x9] -; CHECK-SD-NEXT: add x9, sp, #912 -; CHECK-SD-NEXT: ld1 { v17.b }[2], [x8] -; CHECK-SD-NEXT: add x8, sp, #872 -; CHECK-SD-NEXT: zip1 v0.2d, v6.2d, v7.2d -; CHECK-SD-NEXT: ld1 { v16.b }[9], [x10] -; CHECK-SD-NEXT: ld1 { v17.b }[3], [x8] -; CHECK-SD-NEXT: add x8, sp, #880 -; CHECK-SD-NEXT: sdot v19.4s, v0.16b, v1.16b -; CHECK-SD-NEXT: ld1 { v17.b }[4], [x8] -; CHECK-SD-NEXT: add x8, sp, #888 -; CHECK-SD-NEXT: ld1 { v17.b }[5], [x8] -; CHECK-SD-NEXT: add x8, sp, #896 -; CHECK-SD-NEXT: ld1 { v17.b }[6], [x8] -; CHECK-SD-NEXT: add x8, sp, #904 -; CHECK-SD-NEXT: ld1 { v17.b }[7], [x8] -; CHECK-SD-NEXT: ldr b18, [x9] -; CHECK-SD-NEXT: add x8, sp, #920 -; CHECK-SD-NEXT: ld1 { v18.b }[1], [x8] -; CHECK-SD-NEXT: add x8, sp, #32 -; CHECK-SD-NEXT: ld1 { v16.b }[10], [x8] -; CHECK-SD-NEXT: add x8, sp, #928 -; CHECK-SD-NEXT: ld1 { v18.b }[2], [x8] -; CHECK-SD-NEXT: add x8, sp, #40 -; CHECK-SD-NEXT: ld1 { v16.b }[11], [x8] +; CHECK-SD-NEXT: ld1 { v4.b }[13], [x10] +; CHECK-SD-NEXT: add x9, sp, #168 +; CHECK-SD-NEXT: add x10, sp, #176 +; CHECK-SD-NEXT: ld1 { v6.b }[12], [x8] ; CHECK-SD-NEXT: add x8, sp, #936 -; CHECK-SD-NEXT: ld1 { v18.b }[3], [x8] -; CHECK-SD-NEXT: add x8, sp, #48 -; CHECK-SD-NEXT: ld1 { v16.b }[12], [x8] -; CHECK-SD-NEXT: add x8, sp, #944 -; CHECK-SD-NEXT: ld1 { v18.b }[4], [x8] -; CHECK-SD-NEXT: add x8, sp, #56 -; CHECK-SD-NEXT: ld1 { v16.b }[13], [x8] -; CHECK-SD-NEXT: add x8, sp, #952 -; CHECK-SD-NEXT: ld1 { v18.b }[5], [x8] +; CHECK-SD-NEXT: ld1 { v7.b }[11], [x9] +; CHECK-SD-NEXT: add x9, sp, #1088 +; CHECK-SD-NEXT: ld1 { v5.b }[11], [x8] ; CHECK-SD-NEXT: add x8, sp, #64 -; CHECK-SD-NEXT: ld1 { v16.b }[14], [x8] +; CHECK-SD-NEXT: ld1 { v4.b }[14], [x9] +; CHECK-SD-NEXT: add x9, sp, #824 +; CHECK-SD-NEXT: ld1 { v0.b }[14], [x8] +; CHECK-SD-NEXT: ld1 { v6.b }[13], [x9] +; CHECK-SD-NEXT: add x9, sp, #944 +; CHECK-SD-NEXT: ld1 { v7.b }[12], [x10] +; CHECK-SD-NEXT: add x10, sp, #1096 +; CHECK-SD-NEXT: ld1 { v5.b }[12], [x9] +; CHECK-SD-NEXT: add x8, sp, #832 +; CHECK-SD-NEXT: ld1 { v4.b }[15], [x10] +; CHECK-SD-NEXT: add x9, sp, #184 +; CHECK-SD-NEXT: add x10, sp, #72 +; CHECK-SD-NEXT: ld1 { v6.b }[14], [x8] +; CHECK-SD-NEXT: add x8, sp, #952 +; CHECK-SD-NEXT: ld1 { v7.b }[13], [x9] +; CHECK-SD-NEXT: ld1 { v5.b }[13], [x8] +; CHECK-SD-NEXT: add x8, sp, #840 +; CHECK-SD-NEXT: ld1 { v0.b }[15], [x10] +; CHECK-SD-NEXT: sdot v2.4s, v4.16b, v1.16b +; CHECK-SD-NEXT: add x9, sp, #192 +; CHECK-SD-NEXT: ld1 { v6.b }[15], [x8] ; CHECK-SD-NEXT: add x8, sp, #960 -; CHECK-SD-NEXT: ld1 { v18.b }[6], [x8] -; CHECK-SD-NEXT: add x8, sp, #72 -; CHECK-SD-NEXT: ld1 { v16.b }[15], [x8] -; CHECK-SD-NEXT: add x8, sp, #968 -; CHECK-SD-NEXT: ld1 { v18.b }[7], [x8] -; CHECK-SD-NEXT: sdot v5.4s, v16.16b, v1.16b -; CHECK-SD-NEXT: zip1 v0.2d, v17.2d, v18.2d -; CHECK-SD-NEXT: sdot v5.4s, v2.16b, v1.16b -; CHECK-SD-NEXT: sdot v19.4s, v0.16b, v1.16b -; CHECK-SD-NEXT: add v0.4s, v5.4s, v19.4s +; CHECK-SD-NEXT: ld1 { v7.b }[14], [x9] +; CHECK-SD-NEXT: ld1 { v5.b }[14], [x8] +; CHECK-SD-NEXT: sdot v3.4s, v0.16b, v1.16b +; CHECK-SD-NEXT: add x8, sp, #200 +; CHECK-SD-NEXT: add x9, sp, #968 +; CHECK-SD-NEXT: sdot v2.4s, v6.16b, v1.16b +; CHECK-SD-NEXT: ld1 { v7.b }[15], [x8] +; CHECK-SD-NEXT: ld1 { v5.b }[15], [x9] +; CHECK-SD-NEXT: sdot v3.4s, v7.16b, v1.16b +; CHECK-SD-NEXT: sdot v2.4s, v5.16b, v1.16b +; CHECK-SD-NEXT: add v0.4s, v3.4s, v2.4s ; CHECK-SD-NEXT: addv s0, v0.4s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll index f7a87ae..f8ba150 100644 --- a/llvm/test/CodeGen/AArch64/nontemporal.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal.ll @@ -683,43 +683,41 @@ define void @test_stnp_v17f32(<17 x float> %v, ptr %ptr) { ; ; CHECK-BE-LABEL: test_stnp_v17f32: ; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: // kill: def $s1 killed $s1 def $q1 -; CHECK-BE-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-BE-NEXT: // kill: def $s4 killed $s4 def $q4 -; CHECK-BE-NEXT: // kill: def $s5 killed $s5 def $q5 -; CHECK-BE-NEXT: add x8, sp, #12 -; CHECK-BE-NEXT: add x9, sp, #20 +; CHECK-BE-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-BE-NEXT: ldr s16, [sp, #36] -; CHECK-BE-NEXT: mov v0.s[1], v1.s[0] -; CHECK-BE-NEXT: ldr s1, [sp, #4] +; CHECK-BE-NEXT: // kill: def $s5 killed $s5 def $q5 +; CHECK-BE-NEXT: // kill: def $s1 killed $s1 def $q1 +; CHECK-BE-NEXT: ldr s17, [sp, #4] +; CHECK-BE-NEXT: add x8, sp, #44 ; CHECK-BE-NEXT: mov v4.s[1], v5.s[0] -; CHECK-BE-NEXT: add x10, sp, #52 +; CHECK-BE-NEXT: mov v0.s[1], v1.s[0] ; CHECK-BE-NEXT: // kill: def $s6 killed $s6 def $q6 ; CHECK-BE-NEXT: // kill: def $s2 killed $s2 def $q2 ; CHECK-BE-NEXT: // kill: def $s7 killed $s7 def $q7 ; CHECK-BE-NEXT: // kill: def $s3 killed $s3 def $q3 -; CHECK-BE-NEXT: ld1 { v1.s }[1], [x8] -; CHECK-BE-NEXT: ldr s5, [x9] -; CHECK-BE-NEXT: add x8, sp, #28 -; CHECK-BE-NEXT: add x9, sp, #44 -; CHECK-BE-NEXT: ld1 { v5.s }[1], [x8] -; CHECK-BE-NEXT: ld1 { v16.s }[1], [x9] -; CHECK-BE-NEXT: ldr s17, [x10] -; CHECK-BE-NEXT: add x8, sp, #60 +; CHECK-BE-NEXT: ldr s1, [sp, #68] +; CHECK-BE-NEXT: ld1 { v16.s }[1], [x8] +; CHECK-BE-NEXT: add x8, sp, #12 +; CHECK-BE-NEXT: ld1 { v17.s }[1], [x8] +; CHECK-BE-NEXT: add x8, sp, #52 +; CHECK-BE-NEXT: str s1, [x0, #64] +; CHECK-BE-NEXT: ld1 { v16.s }[2], [x8] +; CHECK-BE-NEXT: add x8, sp, #20 ; CHECK-BE-NEXT: mov v4.s[2], v6.s[0] ; CHECK-BE-NEXT: mov v0.s[2], v2.s[0] -; CHECK-BE-NEXT: ld1 { v17.s }[1], [x8] -; CHECK-BE-NEXT: ldr s2, [sp, #68] -; CHECK-BE-NEXT: add x8, x0, #32 -; CHECK-BE-NEXT: zip1 v1.2d, v1.2d, v5.2d -; CHECK-BE-NEXT: add x9, x0, #48 -; CHECK-BE-NEXT: str s2, [x0, #64] -; CHECK-BE-NEXT: zip1 v5.2d, v16.2d, v17.2d +; CHECK-BE-NEXT: ld1 { v17.s }[2], [x8] +; CHECK-BE-NEXT: add x8, sp, #60 +; CHECK-BE-NEXT: ld1 { v16.s }[3], [x8] +; CHECK-BE-NEXT: add x8, sp, #28 +; CHECK-BE-NEXT: ld1 { v17.s }[3], [x8] ; CHECK-BE-NEXT: mov v4.s[3], v7.s[0] +; CHECK-BE-NEXT: add x8, x0, #48 ; CHECK-BE-NEXT: mov v0.s[3], v3.s[0] -; CHECK-BE-NEXT: st1 { v1.4s }, [x8] +; CHECK-BE-NEXT: st1 { v16.4s }, [x8] +; CHECK-BE-NEXT: add x8, x0, #32 +; CHECK-BE-NEXT: st1 { v17.4s }, [x8] ; CHECK-BE-NEXT: add x8, x0, #16 -; CHECK-BE-NEXT: st1 { v5.4s }, [x9] ; CHECK-BE-NEXT: st1 { v4.4s }, [x8] ; CHECK-BE-NEXT: st1 { v0.4s }, [x0] ; CHECK-BE-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/preferred-function-alignment.ll b/llvm/test/CodeGen/AArch64/preferred-function-alignment.ll index 05f4fb1..a6cb712 100644 --- a/llvm/test/CodeGen/AArch64/preferred-function-alignment.ll +++ b/llvm/test/CodeGen/AArch64/preferred-function-alignment.ll @@ -40,3 +40,10 @@ define void @test_optsize() optsize { ; CHECK-LABEL: test_optsize ; CHECK-NEXT: .p2align 2 + +define void @test_minsize() minsize { + ret void +} + +; CHECK-LABEL: test_minsize +; CHECK-NEXT: .p2align 2 diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll index 2a77d4d..4206c0bc 100644 --- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll @@ -27,11 +27,12 @@ define i32 @caller() nounwind ssp { ; CHECK-NEXT: sub sp, sp, #208 ; CHECK-NEXT: mov w8, #10 ; =0xa ; CHECK-NEXT: mov w9, #9 ; =0x9 -; CHECK-NEXT: mov w10, #8 ; =0x8 +; CHECK-NEXT: mov w0, #1 ; =0x1 ; CHECK-NEXT: stp x9, x8, [sp, #24] -; CHECK-NEXT: mov w8, #7 ; =0x7 +; CHECK-NEXT: mov w8, #8 ; =0x8 ; CHECK-NEXT: mov w9, #6 ; =0x6 -; CHECK-NEXT: mov w0, #1 ; =0x1 +; CHECK-NEXT: str x8, [sp, #16] +; CHECK-NEXT: mov w8, #7 ; =0x7 ; CHECK-NEXT: mov w1, #2 ; =0x2 ; CHECK-NEXT: mov w2, #3 ; =0x3 ; CHECK-NEXT: mov w3, #4 ; =0x4 @@ -46,8 +47,7 @@ define i32 @caller() nounwind ssp { ; CHECK-NEXT: stp x22, x21, [sp, #160] ; 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #176] ; 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #192] ; 16-byte Folded Spill -; CHECK-NEXT: stp x8, x10, [sp, #8] -; CHECK-NEXT: str x9, [sp] +; CHECK-NEXT: stp x9, x8, [sp] ; CHECK-NEXT: bl _callee ; CHECK-NEXT: ldp x29, x30, [sp, #192] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #176] ; 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/ptrauth-isel.ll b/llvm/test/CodeGen/AArch64/ptrauth-isel.ll new file mode 100644 index 0000000..7011b94 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ptrauth-isel.ll @@ -0,0 +1,269 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple arm64e-apple-darwin -verify-machineinstrs -stop-after=finalize-isel -global-isel=0 \ +; RUN: | FileCheck %s --check-prefixes=DAGISEL +; RUN: llc < %s -mtriple arm64e-apple-darwin -verify-machineinstrs -stop-after=finalize-isel -global-isel=1 -global-isel-abort=1 \ +; RUN: | FileCheck %s --check-prefixes=GISEL +; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -verify-machineinstrs -stop-after=finalize-isel -global-isel=0 \ +; RUN: | FileCheck %s --check-prefixes=DAGISEL +; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -verify-machineinstrs -stop-after=finalize-isel -global-isel=1 -global-isel-abort=1 \ +; RUN: | FileCheck %s --check-prefixes=GISEL + +; Check MIR produced by the instruction selector to validate properties that +; cannot be reliably tested by only inspecting the final asm output. + +@discvar = dso_local global i64 0 + +; Make sure the components of blend(addr, imm) and integer constants are +; recognized and passed to PAC pseudo via separate operands to prevent +; substitution of the immediate modifier. +; +; MIR output of the instruction selector is inspected, as it is hard to reliably +; distinguish MOVKXi immediately followed by a pseudo from a standalone pseudo +; instruction carrying address and immediate modifiers in its separate operands +; by only observing the final asm output. + +define i64 @small_imm_disc_optimized(i64 %addr) { + ; DAGISEL-LABEL: name: small_imm_disc_optimized + ; DAGISEL: bb.0.entry: + ; DAGISEL-NEXT: liveins: $x0 + ; DAGISEL-NEXT: {{ $}} + ; DAGISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; DAGISEL-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 42 + ; DAGISEL-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64noip = SUBREG_TO_REG 0, killed [[MOVi32imm]], %subreg.sub_32 + ; DAGISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, killed $noreg, implicit-def dead $x16, implicit-def dead $x17 + ; DAGISEL-NEXT: $x0 = COPY [[PAC]] + ; DAGISEL-NEXT: RET_ReallyLR implicit $x0 + ; + ; GISEL-LABEL: name: small_imm_disc_optimized + ; GISEL: bb.1.entry: + ; GISEL-NEXT: liveins: $x0 + ; GISEL-NEXT: {{ $}} + ; GISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; GISEL-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 42 + ; GISEL-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64noip = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 + ; GISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, $noreg, implicit-def dead $x16, implicit-def dead $x17 + ; GISEL-NEXT: $x0 = COPY [[PAC]] + ; GISEL-NEXT: RET_ReallyLR implicit $x0 +entry: + %signed = call i64 @llvm.ptrauth.sign(i64 %addr, i32 2, i64 42) + ret i64 %signed +} + +; Without optimization, MOVi64imm may be used for small i64 constants as well. +define i64 @small_imm_disc_non_optimized(i64 %addr) noinline optnone { + ; DAGISEL-LABEL: name: small_imm_disc_non_optimized + ; DAGISEL: bb.0.entry: + ; DAGISEL-NEXT: liveins: $x0 + ; DAGISEL-NEXT: {{ $}} + ; DAGISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; DAGISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY killed [[COPY]] + ; DAGISEL-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 42 + ; DAGISEL-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64noip = SUBREG_TO_REG 0, killed [[MOVi32imm]], %subreg.sub_32 + ; DAGISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY1]], 2, 42, killed $noreg, implicit-def dead $x16, implicit-def dead $x17 + ; DAGISEL-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[PAC]] + ; DAGISEL-NEXT: $x0 = COPY [[COPY2]] + ; DAGISEL-NEXT: RET_ReallyLR implicit $x0 + ; + ; GISEL-LABEL: name: small_imm_disc_non_optimized + ; GISEL: bb.1.entry: + ; GISEL-NEXT: liveins: $x0 + ; GISEL-NEXT: {{ $}} + ; GISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; GISEL-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64noip = MOVi64imm 42 + ; GISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, $noreg, implicit-def dead $x16, implicit-def dead $x17 + ; GISEL-NEXT: $x0 = COPY [[PAC]] + ; GISEL-NEXT: RET_ReallyLR implicit $x0 +entry: + %signed = call i64 @llvm.ptrauth.sign(i64 %addr, i32 2, i64 42) + ret i64 %signed +} + +define i64 @large_imm_disc_wreg(i64 %addr) { + ; DAGISEL-LABEL: name: large_imm_disc_wreg + ; DAGISEL: bb.0.entry: + ; DAGISEL-NEXT: liveins: $x0 + ; DAGISEL-NEXT: {{ $}} + ; DAGISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; DAGISEL-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 12345678 + ; DAGISEL-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64noip = SUBREG_TO_REG 0, killed [[MOVi32imm]], %subreg.sub_32 + ; DAGISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 0, killed [[SUBREG_TO_REG]], implicit-def dead $x16, implicit-def dead $x17 + ; DAGISEL-NEXT: $x0 = COPY [[PAC]] + ; DAGISEL-NEXT: RET_ReallyLR implicit $x0 + ; + ; GISEL-LABEL: name: large_imm_disc_wreg + ; GISEL: bb.1.entry: + ; GISEL-NEXT: liveins: $x0 + ; GISEL-NEXT: {{ $}} + ; GISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; GISEL-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 12345678 + ; GISEL-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64noip = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 + ; GISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 0, [[SUBREG_TO_REG]], implicit-def dead $x16, implicit-def dead $x17 + ; GISEL-NEXT: $x0 = COPY [[PAC]] + ; GISEL-NEXT: RET_ReallyLR implicit $x0 +entry: + %signed = call i64 @llvm.ptrauth.sign(i64 %addr, i32 2, i64 12345678) + ret i64 %signed +} + +define i64 @large_imm_disc_xreg(i64 %addr) { + ; DAGISEL-LABEL: name: large_imm_disc_xreg + ; DAGISEL: bb.0.entry: + ; DAGISEL-NEXT: liveins: $x0 + ; DAGISEL-NEXT: {{ $}} + ; DAGISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; DAGISEL-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64noip = MOVi64imm 123456789012345 + ; DAGISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 0, killed [[MOVi64imm]], implicit-def dead $x16, implicit-def dead $x17 + ; DAGISEL-NEXT: $x0 = COPY [[PAC]] + ; DAGISEL-NEXT: RET_ReallyLR implicit $x0 + ; + ; GISEL-LABEL: name: large_imm_disc_xreg + ; GISEL: bb.1.entry: + ; GISEL-NEXT: liveins: $x0 + ; GISEL-NEXT: {{ $}} + ; GISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; GISEL-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64noip = MOVi64imm 123456789012345 + ; GISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 0, [[MOVi64imm]], implicit-def dead $x16, implicit-def dead $x17 + ; GISEL-NEXT: $x0 = COPY [[PAC]] + ; GISEL-NEXT: RET_ReallyLR implicit $x0 +entry: + %signed = call i64 @llvm.ptrauth.sign(i64 %addr, i32 2, i64 123456789012345) + ret i64 %signed +} + +; Make sure blend() is lowered as expected when optimization is disabled. +define i64 @blended_disc_non_optimized(i64 %addr, i64 %addrdisc) noinline optnone { + ; DAGISEL-LABEL: name: blended_disc_non_optimized + ; DAGISEL: bb.0.entry: + ; DAGISEL-NEXT: liveins: $x0, $x1 + ; DAGISEL-NEXT: {{ $}} + ; DAGISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1 + ; DAGISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0 + ; DAGISEL-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY killed [[COPY1]] + ; DAGISEL-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY killed [[COPY]] + ; DAGISEL-NEXT: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[COPY3]], 42, 48 + ; DAGISEL-NEXT: [[COPY4:%[0-9]+]]:gpr64noip = COPY [[MOVKXi]] + ; DAGISEL-NEXT: [[COPY5:%[0-9]+]]:gpr64noip = COPY [[COPY3]] + ; DAGISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY2]], 2, 42, [[COPY5]], implicit-def dead $x16, implicit-def dead $x17 + ; DAGISEL-NEXT: [[COPY6:%[0-9]+]]:gpr64all = COPY [[PAC]] + ; DAGISEL-NEXT: $x0 = COPY [[COPY6]] + ; DAGISEL-NEXT: RET_ReallyLR implicit $x0 + ; + ; GISEL-LABEL: name: blended_disc_non_optimized + ; GISEL: bb.1.entry: + ; GISEL-NEXT: liveins: $x0, $x1 + ; GISEL-NEXT: {{ $}} + ; GISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; GISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; GISEL-NEXT: [[MOVKXi:%[0-9]+]]:gpr64noip = MOVKXi [[COPY1]], 42, 48 + ; GISEL-NEXT: [[COPY2:%[0-9]+]]:gpr64noip = COPY [[COPY1]] + ; GISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, [[COPY2]], implicit-def dead $x16, implicit-def dead $x17 + ; GISEL-NEXT: $x0 = COPY [[PAC]] + ; GISEL-NEXT: RET_ReallyLR implicit $x0 +entry: + %disc = call i64 @llvm.ptrauth.blend(i64 %addrdisc, i64 42) + %signed = call i64 @llvm.ptrauth.sign(i64 %addr, i32 2, i64 %disc) + ret i64 %signed +} + +define i64 @blend_and_sign_same_bb(i64 %addr) { + ; DAGISEL-LABEL: name: blend_and_sign_same_bb + ; DAGISEL: bb.0.entry: + ; DAGISEL-NEXT: liveins: $x0 + ; DAGISEL-NEXT: {{ $}} + ; DAGISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; DAGISEL-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @discvar + ; DAGISEL-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @discvar :: (dereferenceable load (s64) from @discvar) + ; DAGISEL-NEXT: [[MOVKXi:%[0-9]+]]:gpr64noip = MOVKXi [[LDRXui]], 42, 48 + ; DAGISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64noip = COPY [[LDRXui]] + ; DAGISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, killed [[COPY1]], implicit-def dead $x16, implicit-def dead $x17 + ; DAGISEL-NEXT: $x0 = COPY [[PAC]] + ; DAGISEL-NEXT: RET_ReallyLR implicit $x0 + ; + ; GISEL-LABEL: name: blend_and_sign_same_bb + ; GISEL: bb.1.entry: + ; GISEL-NEXT: liveins: $x0 + ; GISEL-NEXT: {{ $}} + ; GISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; GISEL-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @discvar + ; GISEL-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @discvar :: (dereferenceable load (s64) from @discvar) + ; GISEL-NEXT: [[MOVKXi:%[0-9]+]]:gpr64noip = MOVKXi [[LDRXui]], 42, 48 + ; GISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64noip = COPY [[LDRXui]] + ; GISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, [[COPY1]], implicit-def dead $x16, implicit-def dead $x17 + ; GISEL-NEXT: $x0 = COPY [[PAC]] + ; GISEL-NEXT: RET_ReallyLR implicit $x0 +entry: + %addrdisc = load i64, ptr @discvar + %disc = call i64 @llvm.ptrauth.blend(i64 %addrdisc, i64 42) + %signed = call i64 @llvm.ptrauth.sign(i64 %addr, i32 2, i64 %disc) + ret i64 %signed +} + +; In the below test cases both %addrdisc and %disc are computed (i.e. they are +; neither global addresses, nor function arguments) in a different basic block, +; making them harder to express via ISD::PtrAuthGlobalAddress. + +define i64 @blend_and_sign_different_bbs(i64 %addr, i64 %cond) { + ; DAGISEL-LABEL: name: blend_and_sign_different_bbs + ; DAGISEL: bb.0.entry: + ; DAGISEL-NEXT: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; DAGISEL-NEXT: liveins: $x0, $x1 + ; DAGISEL-NEXT: {{ $}} + ; DAGISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1 + ; DAGISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0 + ; DAGISEL-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @discvar + ; DAGISEL-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @discvar :: (dereferenceable load (s64) from @discvar) + ; DAGISEL-NEXT: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[LDRXui]], 42, 48 + ; DAGISEL-NEXT: [[COPY2:%[0-9]+]]:gpr64noip = COPY [[MOVKXi]] + ; DAGISEL-NEXT: CBZX [[COPY]], %bb.2 + ; DAGISEL-NEXT: B %bb.1 + ; DAGISEL-NEXT: {{ $}} + ; DAGISEL-NEXT: bb.1.next: + ; DAGISEL-NEXT: successors: %bb.2(0x80000000) + ; DAGISEL-NEXT: {{ $}} + ; DAGISEL-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY [[COPY2]] + ; DAGISEL-NEXT: INLINEASM &nop, 1 /* sideeffect attdialect */, 3866633 /* reguse:GPR64common */, [[COPY3]] + ; DAGISEL-NEXT: {{ $}} + ; DAGISEL-NEXT: bb.2.exit: + ; DAGISEL-NEXT: [[COPY4:%[0-9]+]]:gpr64noip = COPY [[LDRXui]] + ; DAGISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY1]], 2, 42, [[COPY4]], implicit-def dead $x16, implicit-def dead $x17 + ; DAGISEL-NEXT: $x0 = COPY [[PAC]] + ; DAGISEL-NEXT: RET_ReallyLR implicit $x0 + ; + ; GISEL-LABEL: name: blend_and_sign_different_bbs + ; GISEL: bb.1.entry: + ; GISEL-NEXT: successors: %bb.2(0x50000000), %bb.3(0x30000000) + ; GISEL-NEXT: liveins: $x0, $x1 + ; GISEL-NEXT: {{ $}} + ; GISEL-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; GISEL-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; GISEL-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @discvar + ; GISEL-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @discvar :: (dereferenceable load (s64) from @discvar) + ; GISEL-NEXT: [[MOVKXi:%[0-9]+]]:gpr64noip = MOVKXi [[LDRXui]], 42, 48 + ; GISEL-NEXT: CBZX [[COPY1]], %bb.3 + ; GISEL-NEXT: B %bb.2 + ; GISEL-NEXT: {{ $}} + ; GISEL-NEXT: bb.2.next: + ; GISEL-NEXT: successors: %bb.3(0x80000000) + ; GISEL-NEXT: {{ $}} + ; GISEL-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY [[MOVKXi]] + ; GISEL-NEXT: INLINEASM &nop, 1 /* sideeffect attdialect */, 3866633 /* reguse:GPR64common */, [[COPY2]] + ; GISEL-NEXT: {{ $}} + ; GISEL-NEXT: bb.3.exit: + ; GISEL-NEXT: [[COPY3:%[0-9]+]]:gpr64noip = COPY [[LDRXui]] + ; GISEL-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, [[COPY3]], implicit-def dead $x16, implicit-def dead $x17 + ; GISEL-NEXT: $x0 = COPY [[PAC]] + ; GISEL-NEXT: RET_ReallyLR implicit $x0 +entry: + %addrdisc = load i64, ptr @discvar + %disc = call i64 @llvm.ptrauth.blend(i64 %addrdisc, i64 42) + %cond.b = icmp ne i64 %cond, 0 + br i1 %cond.b, label %next, label %exit + +next: + call void asm sideeffect "nop", "r"(i64 %disc) + br label %exit + +exit: + %signed = call i64 @llvm.ptrauth.sign(i64 %addr, i32 2, i64 %disc) + ret i64 %signed +} diff --git a/llvm/test/CodeGen/AArch64/ptrauth-isel.mir b/llvm/test/CodeGen/AArch64/ptrauth-isel.mir new file mode 100644 index 0000000..1a15588 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ptrauth-isel.mir @@ -0,0 +1,205 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -o - %s -mtriple arm64e-apple-darwin -verify-machineinstrs \ +# RUN: -stop-after=finalize-isel -start-before=finalize-isel | FileCheck %s +# RUN: llc -o - %s -mtriple aarch64-linux-gnu -mattr=+pauth -verify-machineinstrs \ +# RUN: -stop-after=finalize-isel -start-before=finalize-isel | FileCheck %s + +# This MIR-based test contains several test cases that are hard to implement +# via an LLVM IR input. Most other test cases are in ptrauth-isel.ll file. + +--- | + @globalvar = dso_local global i64 0 + + define i64 @movk_correct_blend(i64 %a, i64 %b) { + entry: + ret i64 0 + } + + define i64 @movk_wrong_shift_amount(i64 %a, i64 %b) { + entry: + ret i64 0 + } + + define i64 @movk_non_immediate_operand(i64 %a, i64 %b) { + entry: + ret i64 0 + } + + define i64 @movi64imm_immediate_operand(i64 %a) { + entry: + ret i64 0 + } + + define i64 @movi64imm_non_immediate_operand(i64 %a) { + entry: + ret i64 0 + } + + define i64 @movi32imm_immediate_operand(i64 %a) { + entry: + ret i64 0 + } + + define i64 @movi32imm_non_immediate_operand(i64 %a) { + entry: + ret i64 0 + } +... +--- +name: movk_correct_blend +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: movk_correct_blend + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[MOVKXi:%[0-9]+]]:gpr64noip = MOVKXi [[COPY1]], 42, 48 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64noip = COPY [[COPY1]] + ; CHECK-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, killed [[COPY2]], implicit-def dead $x16, implicit-def dead $x17 + ; CHECK-NEXT: $x0 = COPY [[PAC]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = COPY $x1 + %2:gpr64noip = MOVKXi %1, 42, 48 + %3:gpr64 = PAC %0, 2, 0, killed %2, implicit-def dead $x16, implicit-def dead $x17 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: movk_wrong_shift_amount +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: movk_wrong_shift_amount + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[MOVKXi:%[0-9]+]]:gpr64noip = MOVKXi [[COPY1]], 42, 0 + ; CHECK-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 0, killed [[MOVKXi]], implicit-def dead $x16, implicit-def dead $x17 + ; CHECK-NEXT: $x0 = COPY [[PAC]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = COPY $x1 + %2:gpr64noip = MOVKXi %1, 42, 0 + %3:gpr64 = PAC %0, 2, 0, killed %2, implicit-def dead $x16, implicit-def dead $x17 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: movk_non_immediate_operand +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: movk_non_immediate_operand + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[MOVKXi:%[0-9]+]]:gpr64noip = MOVKXi [[COPY1]], target-flags(aarch64-pageoff, aarch64-nc) @globalvar, 48 + ; CHECK-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 0, killed [[MOVKXi]], implicit-def dead $x16, implicit-def dead $x17 + ; CHECK-NEXT: $x0 = COPY [[PAC]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = COPY $x1 + %2:gpr64noip = MOVKXi %1, target-flags(aarch64-pageoff, aarch64-nc) @globalvar, 48 + %3:gpr64 = PAC %0, 2, 0, killed %2, implicit-def dead $x16, implicit-def dead $x17 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: movi64imm_immediate_operand +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: movi64imm_immediate_operand + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64noip = MOVi64imm 42 + ; CHECK-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, killed $noreg, implicit-def dead $x16, implicit-def dead $x17 + ; CHECK-NEXT: $x0 = COPY [[PAC]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64noip = MOVi64imm 42 + %2:gpr64 = PAC %0, 2, 0, killed %1, implicit-def dead $x16, implicit-def dead $x17 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... +--- +name: movi64imm_non_immediate_operand +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: movi64imm_non_immediate_operand + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64noip = MOVi64imm target-flags(aarch64-pageoff, aarch64-nc) @globalvar + ; CHECK-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 0, killed [[MOVi64imm]], implicit-def dead $x16, implicit-def dead $x17 + ; CHECK-NEXT: $x0 = COPY [[PAC]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64noip = MOVi64imm target-flags(aarch64-pageoff, aarch64-nc) @globalvar + %2:gpr64 = PAC %0, 2, 0, killed %1, implicit-def dead $x16, implicit-def dead $x17 + $x0 = COPY %2 + RET_ReallyLR implicit $x0 +... +--- +name: movi32imm_immediate_operand +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: movi32imm_immediate_operand + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 42 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64noip = SUBREG_TO_REG 0, killed [[MOVi32imm]], %subreg.sub_32 + ; CHECK-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 42, killed $noreg, implicit-def dead $x16, implicit-def dead $x17 + ; CHECK-NEXT: $x0 = COPY [[PAC]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 42 + %2:gpr64noip = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = PAC %0, 2, 0, killed %2, implicit-def dead $x16, implicit-def dead $x17 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... +--- +name: movi32imm_non_immediate_operand +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: movi32imm_non_immediate_operand + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm target-flags(aarch64-pageoff, aarch64-nc) @globalvar + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64noip = SUBREG_TO_REG 0, killed [[MOVi32imm]], %subreg.sub_32 + ; CHECK-NEXT: [[PAC:%[0-9]+]]:gpr64 = PAC [[COPY]], 2, 0, killed [[SUBREG_TO_REG]], implicit-def dead $x16, implicit-def dead $x17 + ; CHECK-NEXT: $x0 = COPY [[PAC]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm target-flags(aarch64-pageoff, aarch64-nc) @globalvar + %2:gpr64noip = SUBREG_TO_REG 0, killed %1, %subreg.sub_32 + %3:gpr64 = PAC %0, 2, 0, killed %2, implicit-def dead $x16, implicit-def dead $x17 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 +... diff --git a/llvm/test/CodeGen/AArch64/reassocmls.ll b/llvm/test/CodeGen/AArch64/reassocmls.ll index acbf9fc..0909fbf 100644 --- a/llvm/test/CodeGen/AArch64/reassocmls.ll +++ b/llvm/test/CodeGen/AArch64/reassocmls.ll @@ -1,12 +1,25 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 | FileCheck %s +; RUN: llc -mtriple=aarch64-none-elf -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-none-elf -mattr=+sve2 -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for smlsl_nxv8i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl_nxv8i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mls_nxv8i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mla_nxv8i16 define i64 @smlsl_i64(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) { -; CHECK-LABEL: smlsl_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: smsubl x8, w4, w3, x0 -; CHECK-NEXT: smsubl x0, w2, w1, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: smlsl_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: smsubl x8, w4, w3, x0 +; CHECK-SD-NEXT: smsubl x0, w2, w1, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: smlsl_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: smull x8, w2, w1 +; CHECK-GI-NEXT: smaddl x8, w4, w3, x8 +; CHECK-GI-NEXT: sub x0, x0, x8 +; CHECK-GI-NEXT: ret %be = sext i32 %b to i64 %ce = sext i32 %c to i64 %de = sext i32 %d to i64 @@ -19,11 +32,18 @@ define i64 @smlsl_i64(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) { } define i64 @umlsl_i64(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) { -; CHECK-LABEL: umlsl_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: umsubl x8, w4, w3, x0 -; CHECK-NEXT: umsubl x0, w2, w1, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: umlsl_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: umsubl x8, w4, w3, x0 +; CHECK-SD-NEXT: umsubl x0, w2, w1, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: umlsl_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: umull x8, w2, w1 +; CHECK-GI-NEXT: umaddl x8, w4, w3, x8 +; CHECK-GI-NEXT: sub x0, x0, x8 +; CHECK-GI-NEXT: ret %be = zext i32 %b to i64 %ce = zext i32 %c to i64 %de = zext i32 %d to i64 @@ -36,11 +56,18 @@ define i64 @umlsl_i64(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) { } define i64 @mls_i64(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) { -; CHECK-LABEL: mls_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: msub x8, x4, x3, x0 -; CHECK-NEXT: msub x0, x2, x1, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mls_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: msub x8, x4, x3, x0 +; CHECK-SD-NEXT: msub x0, x2, x1, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mls_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul x8, x2, x1 +; CHECK-GI-NEXT: madd x8, x4, x3, x8 +; CHECK-GI-NEXT: sub x0, x0, x8 +; CHECK-GI-NEXT: ret %m1.neg = mul i64 %c, %b %m2.neg = mul i64 %e, %d %reass.add = add i64 %m2.neg, %m1.neg @@ -49,11 +76,18 @@ define i64 @mls_i64(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) { } define i16 @mls_i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e) { -; CHECK-LABEL: mls_i16: -; CHECK: // %bb.0: -; CHECK-NEXT: msub w8, w4, w3, w0 -; CHECK-NEXT: msub w0, w2, w1, w8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mls_i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: msub w8, w4, w3, w0 +; CHECK-SD-NEXT: msub w0, w2, w1, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mls_i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w2, w1 +; CHECK-GI-NEXT: madd w8, w4, w3, w8 +; CHECK-GI-NEXT: sub w0, w0, w8 +; CHECK-GI-NEXT: ret %m1.neg = mul i16 %c, %b %m2.neg = mul i16 %e, %d %reass.add = add i16 %m2.neg, %m1.neg @@ -91,12 +125,20 @@ define i64 @mls_i64_C(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) { } define i64 @umlsl_i64_muls(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) { -; CHECK-LABEL: umlsl_i64_muls: -; CHECK: // %bb.0: -; CHECK-NEXT: umull x8, w2, w3 -; CHECK-NEXT: umsubl x8, w4, w3, x8 -; CHECK-NEXT: umsubl x0, w2, w1, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: umlsl_i64_muls: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: umull x8, w2, w3 +; CHECK-SD-NEXT: umsubl x8, w4, w3, x8 +; CHECK-SD-NEXT: umsubl x0, w2, w1, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: umlsl_i64_muls: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: umull x8, w2, w1 +; CHECK-GI-NEXT: umull x9, w2, w3 +; CHECK-GI-NEXT: umaddl x8, w4, w3, x8 +; CHECK-GI-NEXT: sub x0, x9, x8 +; CHECK-GI-NEXT: ret %be = zext i32 %b to i64 %ce = zext i32 %c to i64 %de = zext i32 %d to i64 @@ -110,13 +152,21 @@ define i64 @umlsl_i64_muls(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) { } define i64 @umlsl_i64_uses(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) { -; CHECK-LABEL: umlsl_i64_uses: -; CHECK: // %bb.0: -; CHECK-NEXT: umull x8, w4, w3 -; CHECK-NEXT: umaddl x8, w2, w1, x8 -; CHECK-NEXT: sub x9, x0, x8 -; CHECK-NEXT: and x0, x8, x9 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: umlsl_i64_uses: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: umull x8, w4, w3 +; CHECK-SD-NEXT: umaddl x8, w2, w1, x8 +; CHECK-SD-NEXT: sub x9, x0, x8 +; CHECK-SD-NEXT: and x0, x8, x9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: umlsl_i64_uses: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: umull x8, w2, w1 +; CHECK-GI-NEXT: umaddl x8, w4, w3, x8 +; CHECK-GI-NEXT: sub x9, x0, x8 +; CHECK-GI-NEXT: and x0, x8, x9 +; CHECK-GI-NEXT: ret %be = zext i32 %b to i64 %ce = zext i32 %c to i64 %de = zext i32 %d to i64 @@ -175,11 +225,18 @@ define i64 @mla_i64_mul(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) { define <8 x i16> @smlsl_v8i16(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> %e) { -; CHECK-LABEL: smlsl_v8i16: -; CHECK: // %bb.0: -; CHECK-NEXT: smlsl v0.8h, v4.8b, v3.8b -; CHECK-NEXT: smlsl v0.8h, v2.8b, v1.8b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: smlsl_v8i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: smlsl v0.8h, v4.8b, v3.8b +; CHECK-SD-NEXT: smlsl v0.8h, v2.8b, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: smlsl_v8i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: smull v1.8h, v2.8b, v1.8b +; CHECK-GI-NEXT: smlal v1.8h, v4.8b, v3.8b +; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %be = sext <8 x i8> %b to <8 x i16> %ce = sext <8 x i8> %c to <8 x i16> %de = sext <8 x i8> %d to <8 x i16> @@ -192,11 +249,18 @@ define <8 x i16> @smlsl_v8i16(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> % } define <8 x i16> @umlsl_v8i16(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> %e) { -; CHECK-LABEL: umlsl_v8i16: -; CHECK: // %bb.0: -; CHECK-NEXT: umlsl v0.8h, v4.8b, v3.8b -; CHECK-NEXT: umlsl v0.8h, v2.8b, v1.8b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: umlsl_v8i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: umlsl v0.8h, v4.8b, v3.8b +; CHECK-SD-NEXT: umlsl v0.8h, v2.8b, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: umlsl_v8i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: umull v1.8h, v2.8b, v1.8b +; CHECK-GI-NEXT: umlal v1.8h, v4.8b, v3.8b +; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %be = zext <8 x i8> %b to <8 x i16> %ce = zext <8 x i8> %c to <8 x i16> %de = zext <8 x i8> %d to <8 x i16> @@ -209,11 +273,18 @@ define <8 x i16> @umlsl_v8i16(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> % } define <8 x i16> @mls_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) { -; CHECK-LABEL: mls_v8i16: -; CHECK: // %bb.0: -; CHECK-NEXT: mls v0.8h, v4.8h, v3.8h -; CHECK-NEXT: mls v0.8h, v2.8h, v1.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mls_v8i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mls v0.8h, v4.8h, v3.8h +; CHECK-SD-NEXT: mls v0.8h, v2.8h, v1.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mls_v8i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul v1.8h, v2.8h, v1.8h +; CHECK-GI-NEXT: mla v1.8h, v4.8h, v3.8h +; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %m1.neg = mul <8 x i16> %c, %b %m2.neg = mul <8 x i16> %e, %d %reass.add = add <8 x i16> %m2.neg, %m1.neg @@ -236,12 +307,20 @@ define <8 x i16> @mla_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> } define <8 x i16> @mls_v8i16_C(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) { -; CHECK-LABEL: mls_v8i16_C: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v0.8h, #10 -; CHECK-NEXT: mls v0.8h, v4.8h, v3.8h -; CHECK-NEXT: mls v0.8h, v2.8h, v1.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mls_v8i16_C: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v0.8h, #10 +; CHECK-SD-NEXT: mls v0.8h, v4.8h, v3.8h +; CHECK-SD-NEXT: mls v0.8h, v2.8h, v1.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mls_v8i16_C: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul v0.8h, v2.8h, v1.8h +; CHECK-GI-NEXT: movi v1.8h, #10 +; CHECK-GI-NEXT: mla v0.8h, v4.8h, v3.8h +; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ret %m1.neg = mul <8 x i16> %c, %b %m2.neg = mul <8 x i16> %e, %d %reass.add = add <8 x i16> %m2.neg, %m1.neg @@ -250,13 +329,21 @@ define <8 x i16> @mls_v8i16_C(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16 } define <8 x i16> @mla_v8i16_C(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) { -; CHECK-LABEL: mla_v8i16_C: -; CHECK: // %bb.0: -; CHECK-NEXT: mul v1.8h, v2.8h, v1.8h -; CHECK-NEXT: movi v0.8h, #10 -; CHECK-NEXT: mla v1.8h, v4.8h, v3.8h -; CHECK-NEXT: add v0.8h, v1.8h, v0.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mla_v8i16_C: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul v1.8h, v2.8h, v1.8h +; CHECK-SD-NEXT: movi v0.8h, #10 +; CHECK-SD-NEXT: mla v1.8h, v4.8h, v3.8h +; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mla_v8i16_C: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul v0.8h, v2.8h, v1.8h +; CHECK-GI-NEXT: movi v1.8h, #10 +; CHECK-GI-NEXT: mla v0.8h, v4.8h, v3.8h +; CHECK-GI-NEXT: add v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ret %m1.neg = mul <8 x i16> %c, %b %m2.neg = mul <8 x i16> %e, %d %reass.add = add <8 x i16> %m2.neg, %m1.neg diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir new file mode 100644 index 0000000..aecb90a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir @@ -0,0 +1,23 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=false -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=true -o - %s | FileCheck %s +--- +name: test +tracksRegLiveness: true +body: | + bb.0: + liveins: $x1 + ; CHECK-LABEL: name: test + ; CHECK: liveins: $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x0 = COPY $x1 + ; CHECK-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def renamable $x1 + ; CHECK-NEXT: RET_ReallyLR implicit $x1, implicit $x0 + %190:gpr64 = COPY killed $x1 + %191:gpr32 = COPY %190.sub_32:gpr64 + %192:gpr32 = ORRWrr $wzr, killed %191:gpr32 + %193:gpr64all = SUBREG_TO_REG 0, killed %192:gpr32, %subreg.sub_32 + $x0 = COPY killed %190:gpr64 + $x1 = COPY killed %193:gpr64all + RET_ReallyLR implicit $x1, implicit $x0 +... diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir index 08fc47d..eb6242c 100644 --- a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir +++ b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir @@ -7,9 +7,18 @@ # CHECK-DBG: ********** JOINING INTERVALS *********** # CHECK-DBG: ********** INTERVALS ********** # CHECK-DBG: %0 [16r,32r:0) 0@16r weight:0.000000e+00 -# CHECK-DBG: %3 [48r,112r:0) 0@48r L0000000000000040 [48r,112r:0) 0@48r weight:0.000000e+00 -# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000080 [112e,112d:0) 0@112e L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r weight:0.000000e+00 +# CHECK-DBG: %3 [48r,112r:0) 0@48r L0000000000000080 [48r,112r:0) 0@48r L0000000000000040 [48r,112r:0) 0@48r weight:0.000000e+00 +# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000080 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r weight:0.000000e+00 # CHECK-DBG: %5 [32r,112r:1)[112r,112d:0) 0@112r 1@32r weight:0.000000e+00 +# CHECK-DBG: ********** MACHINEINSTRS ********** +# CHECK-DBG: 0B bb.0.entry: +# CHECK-DBG: 16B %0:gpr64sp = ADDXri %stack.0, 0, 0 +# CHECK-DBG: 32B %5:gpr64common = nuw ADDXri %0:gpr64sp, 64, 0 +# CHECK-DBG: 48B undef %3.sub_32:gpr64 = MOVi32imm 64, implicit-def %3:gpr64 +# CHECK-DBG: 80B undef %4.sub_32:gpr64 = MOVi32imm 64, implicit-def %4:gpr64 +# CHECK-DBG: 112B dead %5:gpr64common, dead early-clobber %4:gpr64 = MOPSMemorySetPseudo %5:gpr64common(tied-def 0), %4:gpr64(tied-def 1), %3:gpr64, implicit-def dead $nzcv +# CHECK-DBG: 128B RET_ReallyLR + --- name: test tracksRegLiveness: true @@ -43,9 +52,44 @@ body: | # CHECK-DBG: %1 [32r,48B:2)[48B,320r:0)[320r,368B:1) 0@48B-phi 1@320r 2@32r # CHECK-DBG-SAME: weight:0.000000e+00 # CHECK-DBG: %3 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi -# CHECK-DBG-SAME: L0000000000000080 [288r,304B:0)[304B,320r:3) 0@288r 1@x 2@x 3@304B-phi +# CHECK-DBG-SAME: L0000000000000080 [240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@x 3@304B-phi # CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi # CHECK-DBG-SAME: weight:0.000000e+00 +# CHECK-DBG: ********** MACHINEINSTRS ********** +# CHECK-DBG: 0B bb.0: +# CHECK-DBG: successors: %bb.1(0x80000000); %bb.1(100.00%) +# CHECK-DBG: 32B %1:gpr64 = IMPLICIT_DEF +# CHECK-DBG: 48B bb.1: +# CHECK-DBG: ; predecessors: %bb.0, %bb.7 +# CHECK-DBG: successors: %bb.2(0x80000000); %bb.2(100.00%) +# CHECK-DBG: 64B bb.2: +# CHECK-DBG: ; predecessors: %bb.1 +# CHECK-DBG: successors: %bb.3(0x80000000); %bb.3(100.00%) +# CHECK-DBG: 80B undef %3.sub_32:gpr64 = MOVi32imm 1 +# CHECK-DBG: 96B bb.3: +# CHECK-DBG: ; predecessors: %bb.2 +# CHECK-DBG: successors: %bb.7(0x40000000), %bb.4(0x40000000); %bb.7(50.00%), %bb.4(50.00%) +# CHECK-DBG: 112B $nzcv = IMPLICIT_DEF +# CHECK-DBG: 144B Bcc 1, %bb.7, implicit killed $nzcv +# CHECK-DBG: 160B bb.4: +# CHECK-DBG: ; predecessors: %bb.3 +# CHECK-DBG: successors: %bb.6(0x40000000), %bb.5(0x40000000); %bb.6(50.00%), %bb.5(50.00%) +# CHECK-DBG: 176B $nzcv = IMPLICIT_DEF +# CHECK-DBG: 192B Bcc 1, %bb.6, implicit killed $nzcv +# CHECK-DBG: 208B bb.5: +# CHECK-DBG: ; predecessors: %bb.4 +# CHECK-DBG: successors: %bb.7(0x80000000); %bb.7(100.00%) +# CHECK-DBG: 240B undef %3.sub_32:gpr64 = MOVi32imm 1, implicit-def %3:gpr64 +# CHECK-DBG: 256B B %bb.7 +# CHECK-DBG: 272B bb.6: +# CHECK-DBG: ; predecessors: %bb.4 +# CHECK-DBG: successors: %bb.7(0x80000000); %bb.7(100.00%) +# CHECK-DBG: 288B %3:gpr64 = COPY $xzr +# CHECK-DBG: 304B bb.7: +# CHECK-DBG: ; predecessors: %bb.3, %bb.5, %bb.6 +# CHECK-DBG: successors: %bb.1(0x80000000); %bb.1(100.00%) +# CHECK-DBG: 320B %1:gpr64 = ADDXrs %1:gpr64, %3:gpr64, 1 +# CHECK-DBG: 352B B %bb.1 --- name: reproducer tracksRegLiveness: true @@ -92,6 +136,42 @@ body: | # CHECK-DBG-SAME: L0000000000000080 [224r,256B:1)[272r,288B:0)[288B,304r:3) 0@272r 1@224r 2@x 3@288B-phi # CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[224r,256B:1)[272r,288B:0)[288B,304r:3) 0@272r 1@224r 2@80r 3@288B-phi # CHECK-DBG-SAME: weight:0.000000e+00 +# CHECK-DBG: ********** MACHINEINSTRS ********** +# CHECK-DBG: 0B bb.0: +# CHECK-DBG: successors: %bb.1(0x80000000); %bb.1(100.00%) +# CHECK-DBG: 32B %1:gpr64 = IMPLICIT_DEF +# CHECK-DBG: 48B bb.1: +# CHECK-DBG: ; predecessors: %bb.0, %bb.7 +# CHECK-DBG: successors: %bb.2(0x80000000); %bb.2(100.00%) +# CHECK-DBG: 64B bb.2: +# CHECK-DBG: ; predecessors: %bb.1 +# CHECK-DBG: successors: %bb.3(0x80000000); %bb.3(100.00%) +# CHECK-DBG: 80B undef %3.sub_32:gpr64 = MOVi32imm 1 +# CHECK-DBG: 96B bb.3: +# CHECK-DBG: ; predecessors: %bb.2 +# CHECK-DBG: successors: %bb.7(0x40000000), %bb.4(0x40000000); %bb.7(50.00%), %bb.4(50.00%) +# CHECK-DBG: 112B $nzcv = IMPLICIT_DEF +# CHECK-DBG: 144B Bcc 1, %bb.7, implicit killed $nzcv +# CHECK-DBG: 160B bb.4: +# CHECK-DBG: ; predecessors: %bb.3 +# CHECK-DBG: successors: %bb.6(0x40000000), %bb.5(0x40000000); %bb.6(50.00%), %bb.5(50.00%) +# CHECK-DBG: 176B $nzcv = IMPLICIT_DEF +# CHECK-DBG: 192B Bcc 1, %bb.6, implicit killed $nzcv +# CHECK-DBG: 208B bb.5: +# CHECK-DBG: ; predecessors: %bb.4 +# CHECK-DBG: successors: %bb.7(0x80000000); %bb.7(100.00%) +# CHECK-DBG: 224B %3:gpr64 = IMPLICIT_DEF +# CHECK-DBG: 240B B %bb.7 +# CHECK-DBG: 256B bb.6: +# CHECK-DBG: ; predecessors: %bb.4 +# CHECK-DBG: successors: %bb.7(0x80000000); %bb.7(100.00%) +# CHECK-DBG: 272B %3:gpr64 = COPY $xzr +# CHECK-DBG: 288B bb.7: +# CHECK-DBG: ; predecessors: %bb.3, %bb.5, %bb.6 +# CHECK-DBG: successors: %bb.1(0x80000000); %bb.1(100.00%) +# CHECK-DBG: 304B %1:gpr64 = ADDXrs %1:gpr64, %3:gpr64, 1 +# CHECK-DBG: 336B B %bb.1 + --- name: reproducer2 tracksRegLiveness: true @@ -127,3 +207,78 @@ body: | B %bb.1 ... +# CHECK-DBG: ********** REGISTER COALESCER ********** +# CHECK-DBG: ********** Function: reproducer3 +# CHECK-DBG: ********** JOINING INTERVALS *********** +# CHECK-DBG: ********** INTERVALS ********** +# CHECK-DBG: W0 [0B,32r:0)[320r,336r:1) 0@0B-phi 1@320r +# CHECK-DBG: W1 [0B,16r:0) 0@0B-phi +# CHECK-DBG: %0 [16r,64r:0) 0@16r weight:0.000000e+00 +# CHECK-DBG: %1 [32r,128r:0) 0@32r weight:0.000000e+00 +# CHECK-DBG: %2 [48r,64r:0) 0@48r weight:0.000000e+00 +# CHECK-DBG: %3 [64r,80r:0) 0@64r weight:0.000000e+00 +# CHECK-DBG: %4 [80r,176r:0) 0@80r weight:0.000000e+00 +# CHECK-DBG: %7 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r +# CHECK-DBG-SAME: L0000000000000080 [128r,256r:0)[304B,320r:0) 0@128r +# CHECK-DBG-SAME: L0000000000000040 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r +# CHECK-DBG-SAME: weight:0.000000e+00 +# CHECK-DBG: %8 [96r,176r:1)[176r,192r:0) 0@176r 1@96r weight:0.000000e+00 +# CHECK-DBG: %9 [256r,272r:0) 0@256r weight:0.000000e+00 +# CHECK-DBG: ********** MACHINEINSTRS ********** +# CHECK-DBG: 0B bb.0: +# CHECK-DBG: successors: %bb.2(0x40000000), %bb.1(0x40000000); %bb.2(50.00%), %bb.1(50.00%) +# CHECK-DBG: liveins: $w0, $w1 +# CHECK-DBG: 16B %0:gpr32 = COPY $w1 +# CHECK-DBG: 32B %1:gpr32 = COPY $w0 +# CHECK-DBG: 48B %2:gpr32 = UBFMWri %1:gpr32, 31, 30 +# CHECK-DBG: 64B %3:gpr32 = SUBWrs %2:gpr32, %0:gpr32, 1 +# CHECK-DBG: 80B %4:gpr32 = UBFMWri %3:gpr32, 1, 31 +# CHECK-DBG: 96B %8:gpr32common = MOVi32imm 1 +# CHECK-DBG: 112B undef %7.sub_32:gpr64 = MOVi32imm 1 +# CHECK-DBG: 128B undef %7.sub_32:gpr64 = BFMWri %7.sub_32:gpr64(tied-def 0), %1:gpr32, 31, 30, implicit-def %7:gpr64 +# CHECK-DBG: 176B %8:gpr32common = BFMWri %8:gpr32common(tied-def 0), %4:gpr32, 30, 29 +# CHECK-DBG: 192B dead $wzr = SUBSWri %8:gpr32common, 0, 0, implicit-def $nzcv +# CHECK-DBG: 208B Bcc 2, %bb.2, implicit killed $nzcv +# CHECK-DBG: 224B B %bb.1 +# CHECK-DBG: 240B bb.1: +# CHECK-DBG: ; predecessors: %bb.0 +# CHECK-DBG: 256B %9:gpr64common = UBFMXri %7:gpr64, 62, 61 +# CHECK-DBG: 272B dead $xzr = LDRXui %9:gpr64common, 0 +# CHECK-DBG: 288B RET_ReallyLR +# CHECK-DBG: 304B bb.2: +# CHECK-DBG: ; predecessors: %bb.0 +# CHECK-DBG: 320B $x0 = COPY %7:gpr64 +# CHECK-DBG: 336B RET_ReallyLR implicit $x0 + +--- +name: reproducer3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + %0:gpr32 = COPY killed $w1 + %1:gpr32 = COPY killed $w0 + %3:gpr32 = UBFMWri %1, 31, 30 + %4:gpr32 = SUBWrs killed %3, killed %0, 1 + %5:gpr32 = UBFMWri killed %4, 1, 31 + %6:gpr32 = MOVi32imm 1 + %7:gpr32 = COPY %6 + %7:gpr32 = BFMWri %7, killed %1, 31, 30 + %8:gpr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32 + %9:gpr32common = COPY killed %6 + %9:gpr32common = BFMWri %9, killed %5, 30, 29 + dead $wzr = SUBSWri killed %9, 0, 0, implicit-def $nzcv + Bcc 2, %bb.2, implicit killed $nzcv + B %bb.1 + + bb.1: + %10:gpr64common = UBFMXri killed %8, 62, 61 + dead $xzr = LDRXui killed %10, 0 + RET_ReallyLR + + bb.2: + $x0 = COPY killed %8 + RET_ReallyLR implicit killed $x0 + +... diff --git a/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll b/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll index cd53833..fc5012c 100644 --- a/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll @@ -23,21 +23,21 @@ entry: %scevgep = getelementptr %Struct, ptr %this, i64 0, i32 2, i64 8, i32 0 %vec1 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %scevgep) %ev1 = extractvalue { <4 x float>, <4 x float> } %vec1, 1 - %fm1 = fmul <4 x float> %f, %ev1 - %av1 = fadd <4 x float> %f, %fm1 + %fm1 = fmul contract <4 x float> %f, %ev1 + %av1 = fadd contract <4 x float> %f, %fm1 %ev2 = extractvalue { <4 x float>, <4 x float> } %vec1, 0 - %fm2 = fmul <4 x float> %f, %ev2 - %av2 = fadd <4 x float> %f, %fm2 + %fm2 = fmul contract <4 x float> %f, %ev2 + %av2 = fadd contract <4 x float> %f, %fm2 %scevgep2 = getelementptr %Struct, ptr %this, i64 0, i32 3, i64 8, i32 0 tail call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %av2, <4 x float> %av1, ptr %scevgep2) %scevgep3 = getelementptr %Struct, ptr %this, i64 0, i32 2, i64 12, i32 0 %vec2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %scevgep3) %ev3 = extractvalue { <4 x float>, <4 x float> } %vec2, 1 - %fm3 = fmul <4 x float> %f, %ev3 - %av3 = fadd <4 x float> %f, %fm3 + %fm3 = fmul contract <4 x float> %f, %ev3 + %av3 = fadd contract <4 x float> %f, %fm3 %ev4 = extractvalue { <4 x float>, <4 x float> } %vec2, 0 - %fm4 = fmul <4 x float> %f, %ev4 - %av4 = fadd <4 x float> %f, %fm4 + %fm4 = fmul contract <4 x float> %f, %ev4 + %av4 = fadd contract <4 x float> %f, %fm4 %scevgep4 = getelementptr %Struct, ptr %this, i64 0, i32 3, i64 12, i32 0 tail call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %av4, <4 x float> %av3, ptr %scevgep4) ret void @@ -49,6 +49,6 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr) #2 ; Function Attrs: nounwind declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr nocapture) #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "use-soft-float"="false" } attributes #1 = { nounwind } attributes #2 = { nounwind readonly } diff --git a/llvm/test/CodeGen/AArch64/selectopt-const.ll b/llvm/test/CodeGen/AArch64/selectopt-const.ll index a44c746..fe48dba 100644 --- a/llvm/test/CodeGen/AArch64/selectopt-const.ll +++ b/llvm/test/CodeGen/AArch64/selectopt-const.ll @@ -29,8 +29,8 @@ define i32 @test_const(ptr %in1, ptr %in2, ptr %out, i32 %n, ptr %tbl) { ; CHECK-NEXT: csel x10, x9, xzr, lt ; CHECK-NEXT: subs x8, x8, #1 ; CHECK-NEXT: ldr s3, [x4, x10] -; CHECK-NEXT: fcvtzs w10, s3 -; CHECK-NEXT: str w10, [x2], #4 +; CHECK-NEXT: fcvtzs s3, s3 +; CHECK-NEXT: st1 { v3.s }[0], [x2], #4 ; CHECK-NEXT: b.ne .LBB0_2 ; CHECK-NEXT: .LBB0_3: // %for.cond.cleanup ; CHECK-NEXT: mov w0, wzr diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll index c63899c..19ac03d 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s --check-prefixes=STRIDED ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CONTIGUOUS +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+sme2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CONTIGUOUS define <vscale x 32 x i8> @ld1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %z1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { ; CHECK-LABEL: ld1_x2_i8_z0_z8: diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll index 05241f7..039b621 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s --check-prefixes=STRIDED ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CONTIGUOUS +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+sme2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CONTIGUOUS define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %z1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { ; STRIDED-LABEL: ldnt1_x2_i8_z0_z8: diff --git a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll index f73b4bd..e29993d 100644 --- a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll +++ b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll @@ -2,15 +2,15 @@ ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s -declare float @llvm.sqrt.f32(float) #0 -declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0 -declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0 -declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0 -declare double @llvm.sqrt.f64(double) #0 -declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0 -declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0 +declare float @llvm.sqrt.f32(float) +declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) +declare double @llvm.sqrt.f64(double) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) -define float @fsqrt(float %a) #0 { +define float @fsqrt(float %a) { ; FAULT-LABEL: fsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt s0, s0 @@ -33,7 +33,7 @@ define float @fsqrt(float %a) #0 { ret float %1 } -define float @fsqrt_ieee_denorms(float %a) #1 { +define float @fsqrt_ieee_denorms(float %a) #0 { ; FAULT-LABEL: fsqrt_ieee_denorms: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt s0, s0 @@ -56,7 +56,7 @@ define float @fsqrt_ieee_denorms(float %a) #1 { ret float %1 } -define <2 x float> @f2sqrt(<2 x float> %a) #0 { +define <2 x float> @f2sqrt(<2 x float> %a) { ; FAULT-LABEL: f2sqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.2s, v0.2s @@ -79,7 +79,7 @@ define <2 x float> @f2sqrt(<2 x float> %a) #0 { ret <2 x float> %1 } -define <4 x float> @f4sqrt(<4 x float> %a) #0 { +define <4 x float> @f4sqrt(<4 x float> %a) { ; FAULT-LABEL: f4sqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.4s, v0.4s @@ -102,7 +102,7 @@ define <4 x float> @f4sqrt(<4 x float> %a) #0 { ret <4 x float> %1 } -define <8 x float> @f8sqrt(<8 x float> %a) #0 { +define <8 x float> @f8sqrt(<8 x float> %a) { ; FAULT-LABEL: f8sqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.4s, v0.4s @@ -136,7 +136,7 @@ define <8 x float> @f8sqrt(<8 x float> %a) #0 { ret <8 x float> %1 } -define double @dsqrt(double %a) #0 { +define double @dsqrt(double %a) { ; FAULT-LABEL: dsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt d0, d0 @@ -162,7 +162,7 @@ define double @dsqrt(double %a) #0 { ret double %1 } -define double @dsqrt_ieee_denorms(double %a) #1 { +define double @dsqrt_ieee_denorms(double %a) #0 { ; FAULT-LABEL: dsqrt_ieee_denorms: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt d0, d0 @@ -188,7 +188,7 @@ define double @dsqrt_ieee_denorms(double %a) #1 { ret double %1 } -define <2 x double> @d2sqrt(<2 x double> %a) #0 { +define <2 x double> @d2sqrt(<2 x double> %a) { ; FAULT-LABEL: d2sqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.2d, v0.2d @@ -214,7 +214,7 @@ define <2 x double> @d2sqrt(<2 x double> %a) #0 { ret <2 x double> %1 } -define <4 x double> @d4sqrt(<4 x double> %a) #0 { +define <4 x double> @d4sqrt(<4 x double> %a) { ; FAULT-LABEL: d4sqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.2d, v0.2d @@ -254,7 +254,7 @@ define <4 x double> @d4sqrt(<4 x double> %a) #0 { ret <4 x double> %1 } -define float @frsqrt(float %a) #0 { +define float @frsqrt(float %a) { ; FAULT-LABEL: frsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt s0, s0 @@ -277,7 +277,7 @@ define float @frsqrt(float %a) #0 { ret float %2 } -define <2 x float> @f2rsqrt(<2 x float> %a) #0 { +define <2 x float> @f2rsqrt(<2 x float> %a) { ; FAULT-LABEL: f2rsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.2s, v0.2s @@ -300,7 +300,7 @@ define <2 x float> @f2rsqrt(<2 x float> %a) #0 { ret <2 x float> %2 } -define <4 x float> @f4rsqrt(<4 x float> %a) #0 { +define <4 x float> @f4rsqrt(<4 x float> %a) { ; FAULT-LABEL: f4rsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.4s, v0.4s @@ -323,7 +323,7 @@ define <4 x float> @f4rsqrt(<4 x float> %a) #0 { ret <4 x float> %2 } -define <8 x float> @f8rsqrt(<8 x float> %a) #0 { +define <8 x float> @f8rsqrt(<8 x float> %a) { ; FAULT-LABEL: f8rsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.4s, v0.4s @@ -355,7 +355,7 @@ define <8 x float> @f8rsqrt(<8 x float> %a) #0 { ret <8 x float> %2 } -define double @drsqrt(double %a) #0 { +define double @drsqrt(double %a) { ; FAULT-LABEL: drsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt d0, d0 @@ -381,7 +381,7 @@ define double @drsqrt(double %a) #0 { ret double %2 } -define <2 x double> @d2rsqrt(<2 x double> %a) #0 { +define <2 x double> @d2rsqrt(<2 x double> %a) { ; FAULT-LABEL: d2rsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.2d, v0.2d @@ -462,8 +462,8 @@ define double @sqrt_fdiv_common_operand(double %x) nounwind { ; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fmul d2, d1, d1 ; CHECK-NEXT: frsqrts d2, d0, d2 -; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fmul d0, d0, d1 +; CHECK-NEXT: fmul d0, d0, d2 ; CHECK-NEXT: ret %sqrt = call fast double @llvm.sqrt.f64(double %x) %r = fdiv fast double %x, %sqrt @@ -487,8 +487,8 @@ define <2 x double> @sqrt_fdiv_common_operand_vec(<2 x double> %x) nounwind { ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d -; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fmul v0.2d, v0.2d, v2.2d ; CHECK-NEXT: ret %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) %r = fdiv arcp nsz reassoc <2 x double> %x, %sqrt @@ -513,9 +513,9 @@ define double @sqrt_fdiv_common_operand_extra_use(double %x, ptr %p) nounwind { ; CHECK-NEXT: frsqrts d2, d0, d2 ; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: fmul d1, d0, d1 ; CHECK-NEXT: frsqrts d2, d0, d2 ; CHECK-NEXT: fmul d1, d1, d2 -; CHECK-NEXT: fmul d1, d0, d1 ; CHECK-NEXT: fcsel d2, d0, d1, eq ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: str d2, [x0] @@ -671,5 +671,4 @@ define double @sqrt_simplify_before_recip_4_uses(double %x, ptr %p1, ptr %p2, pt ret double %sqrt_fast } -attributes #0 = { "unsafe-fp-math"="true" } -attributes #1 = { "unsafe-fp-math"="true" "denormal-fp-math"="ieee" } +attributes #0 = { "denormal-fp-math"="ieee" } diff --git a/llvm/test/CodeGen/AArch64/store-float-conversion.ll b/llvm/test/CodeGen/AArch64/store-float-conversion.ll new file mode 100644 index 0000000..c46801f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/store-float-conversion.ll @@ -0,0 +1,131 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple=aarch64 < %s | FileCheck %s + +define void @f32_to_u8(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_u8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: str b0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptoui float %f to i32 + %trunc = trunc i32 %conv to i8 + store i8 %trunc, ptr %dst + ret void +} + +define void @f32_to_s8(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_s8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: str b0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptosi float %f to i32 + %trunc = trunc i32 %conv to i8 + store i8 %trunc, ptr %dst + ret void +} + +define void @f32_to_u16(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_u16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptoui float %f to i32 + %trunc = trunc i32 %conv to i16 + store i16 %trunc, ptr %dst + ret void +} + +define void @f32_to_s16(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptosi float %f to i32 + %trunc = trunc i32 %conv to i16 + store i16 %trunc, ptr %dst + ret void +} + +define void @f32_to_u32(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptoui float %f to i32 + store i32 %conv, ptr %dst + ret void +} + +define void @f32_to_s32(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptosi float %f to i32 + store i32 %conv, ptr %dst + ret void +} + +define void @f32_to_s64(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs w8, s0 +; CHECK-NEXT: sxtw x8, w8 +; CHECK-NEXT: str x8, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptosi float %f to i32 + %ext = sext i32 %conv to i64 + store i64 %ext, ptr %dst + ret void +} + +define void @f64_to_u64(double %d, ptr %dst) { +; CHECK-LABEL: f64_to_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptoui double %d to i64 + store i64 %conv, ptr %dst + ret void +} + +define void @f64_to_s64(double %d, ptr %dst) { +; CHECK-LABEL: f64_to_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptosi double %d to i64 + store i64 %conv, ptr %dst + ret void +} + +define i32 @f32_to_i32_multiple_uses(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_i32_multiple_uses: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs w8, s0 +; CHECK-NEXT: mov x9, x0 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: strb w8, [x9] +; CHECK-NEXT: ret +entry: + %conv = fptosi float %f to i32 + %trunc = trunc i32 %conv to i8 + store i8 %trunc, ptr %dst + ret i32 %conv +} diff --git a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll index 05abfa3..29e94dd6 100644 --- a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll @@ -268,6 +268,20 @@ define <vscale x 2 x bfloat> @ld1_nxv2bf16(ptr %addr, i64 %off) { ret <vscale x 2 x bfloat> %val } +; Ensure we don't lose the free shift when using indexed addressing. +define <vscale x 2 x bfloat> @ld1_nxv2bf16_double_shift(ptr %addr, i64 %off) { +; CHECK-LABEL: ld1_nxv2bf16_double_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lsr x8, x1, #6 +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: ret + %off2 = lshr i64 %off, 6 + %ptr = getelementptr inbounds bfloat, ptr %addr, i64 %off2 + %val = load volatile <vscale x 2 x bfloat>, ptr %ptr + ret <vscale x 2 x bfloat> %val +} + ; LD1W define <vscale x 4 x i32> @ld1_nxv4i32(ptr %addr, i64 %off) { @@ -327,6 +341,20 @@ define <vscale x 2 x float> @ld1_nxv2f32(ptr %addr, i64 %off) { ret <vscale x 2 x float> %val } +; Ensure we don't lose the free shift when using indexed addressing. +define <vscale x 2 x float> @ld1_nxv2f32_double_shift(ptr %addr, i64 %off) { +; CHECK-LABEL: ld1_nxv2f32_double_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lsr x8, x1, #6 +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2] +; CHECK-NEXT: ret + %off2 = lshr i64 %off, 6 + %ptr = getelementptr inbounds float, ptr %addr, i64 %off2 + %val = load volatile <vscale x 2 x float>, ptr %ptr + ret <vscale x 2 x float> %val +} + ; LD1D define <vscale x 2 x i64> @ld1_nxv2i64(ptr %addr, i64 %off) { @@ -350,3 +378,17 @@ define <vscale x 2 x double> @ld1_nxv2f64(ptr %addr, i64 %off) { %val = load volatile <vscale x 2 x double>, ptr %ptr ret <vscale x 2 x double> %val } + +; Ensure we don't lose the free shift when using indexed addressing. +define <vscale x 2 x double> @ld1_nxv2f64_double_shift(ptr %addr, i64 %off) { +; CHECK-LABEL: ld1_nxv2f64_double_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lsr x8, x1, #6 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] +; CHECK-NEXT: ret + %off2 = lshr i64 %off, 6 + %ptr = getelementptr inbounds double, ptr %addr, i64 %off2 + %val = load volatile <vscale x 2 x double>, ptr %ptr + ret <vscale x 2 x double> %val +} diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll index aa0a163..5fc996a 100644 --- a/llvm/test/CodeGen/AArch64/tbl-loops.ll +++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -63,7 +63,8 @@ define void @loop1(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcmp s2, #0.0 ; CHECK-NEXT: fcsel s2, s0, s3, mi ; CHECK-NEXT: subs w10, w10, #1 -; CHECK-NEXT: fcvtzs w11, s2 +; CHECK-NEXT: fcvtzs s2, s2 +; CHECK-NEXT: fmov w11, s2 ; CHECK-NEXT: strb w11, [x9], #1 ; CHECK-NEXT: b.ne .LBB0_7 ; CHECK-NEXT: .LBB0_8: // %for.cond.cleanup @@ -178,12 +179,12 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcmp s3, s1 ; CHECK-NEXT: fcsel s4, s1, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 -; CHECK-NEXT: fcvtzs w11, s2 +; CHECK-NEXT: fcvtzs s2, s2 ; CHECK-NEXT: fcsel s3, s0, s4, mi ; CHECK-NEXT: subs w10, w10, #1 -; CHECK-NEXT: strb w11, [x9] -; CHECK-NEXT: fcvtzs w12, s3 -; CHECK-NEXT: strb w12, [x9, #1] +; CHECK-NEXT: str b2, [x9] +; CHECK-NEXT: fcvtzs s3, s3 +; CHECK-NEXT: stur b3, [x9, #1] ; CHECK-NEXT: add x9, x9, #2 ; CHECK-NEXT: b.ne .LBB1_6 ; CHECK-NEXT: .LBB1_7: // %for.cond.cleanup @@ -395,19 +396,19 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcsel s4, s1, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 ; CHECK-NEXT: ldr s3, [x8, #8] -; CHECK-NEXT: fcvtzs w11, s2 +; CHECK-NEXT: fcvtzs s2, s2 ; CHECK-NEXT: add x8, x8, #12 ; CHECK-NEXT: fcsel s4, s0, s4, mi ; CHECK-NEXT: fcmp s3, s1 -; CHECK-NEXT: strb w11, [x9] +; CHECK-NEXT: str b2, [x9] ; CHECK-NEXT: fcsel s5, s1, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 -; CHECK-NEXT: fcvtzs w12, s4 +; CHECK-NEXT: fcvtzs s4, s4 ; CHECK-NEXT: fcsel s3, s0, s5, mi ; CHECK-NEXT: subs w10, w10, #1 -; CHECK-NEXT: strb w12, [x9, #1] -; CHECK-NEXT: fcvtzs w13, s3 -; CHECK-NEXT: strb w13, [x9, #2] +; CHECK-NEXT: stur b4, [x9, #1] +; CHECK-NEXT: fcvtzs s3, s3 +; CHECK-NEXT: stur b3, [x9, #2] ; CHECK-NEXT: add x9, x9, #3 ; CHECK-NEXT: b.ne .LBB2_8 ; CHECK-NEXT: .LBB2_9: // %for.cond.cleanup @@ -563,26 +564,26 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcmp s3, s1 ; CHECK-NEXT: fcsel s4, s1, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 -; CHECK-NEXT: fcvtzs w11, s2 +; CHECK-NEXT: fcvtzs s2, s2 ; CHECK-NEXT: ldp s3, s5, [x8, #8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: fcsel s4, s0, s4, mi ; CHECK-NEXT: fcmp s3, s1 -; CHECK-NEXT: strb w11, [x9] -; CHECK-NEXT: fcvtzs w12, s4 +; CHECK-NEXT: str b2, [x9] +; CHECK-NEXT: fcvtzs s4, s4 ; CHECK-NEXT: fcsel s6, s1, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 ; CHECK-NEXT: fcsel s3, s0, s6, mi ; CHECK-NEXT: fcmp s5, s1 -; CHECK-NEXT: strb w12, [x9, #1] +; CHECK-NEXT: stur b4, [x9, #1] ; CHECK-NEXT: fcsel s6, s1, s5, gt ; CHECK-NEXT: fcmp s5, #0.0 -; CHECK-NEXT: fcvtzs w13, s3 -; CHECK-NEXT: fcsel s2, s0, s6, mi +; CHECK-NEXT: fcvtzs s3, s3 +; CHECK-NEXT: fcsel s5, s0, s6, mi ; CHECK-NEXT: subs w10, w10, #1 -; CHECK-NEXT: strb w13, [x9, #2] -; CHECK-NEXT: fcvtzs w14, s2 -; CHECK-NEXT: strb w14, [x9, #3] +; CHECK-NEXT: stur b3, [x9, #2] +; CHECK-NEXT: fcvtzs s5, s5 +; CHECK-NEXT: stur b5, [x9, #3] ; CHECK-NEXT: add x9, x9, #4 ; CHECK-NEXT: b.ne .LBB3_6 ; CHECK-NEXT: .LBB3_7: // %for.cond.cleanup diff --git a/llvm/test/CodeGen/AArch64/urem-lkk.ll b/llvm/test/CodeGen/AArch64/urem-lkk.ll index 2212e0a..0dd6685 100644 --- a/llvm/test/CodeGen/AArch64/urem-lkk.ll +++ b/llvm/test/CodeGen/AArch64/urem-lkk.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i32 @fold_urem_positive_odd(i32 %x) { ; CHECK-LABEL: fold_urem_positive_odd: @@ -18,37 +19,54 @@ define i32 @fold_urem_positive_odd(i32 %x) { ret i32 %1 } - define i32 @fold_urem_positive_even(i32 %x) { -; CHECK-LABEL: fold_urem_positive_even: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16323 // =0x3fc3 -; CHECK-NEXT: mov w9, #1060 // =0x424 -; CHECK-NEXT: movk w8, #63310, lsl #16 -; CHECK-NEXT: umull x8, w0, w8 -; CHECK-NEXT: lsr x8, x8, #42 -; CHECK-NEXT: msub w0, w8, w9, w0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fold_urem_positive_even: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #16323 // =0x3fc3 +; CHECK-SD-NEXT: mov w9, #1060 // =0x424 +; CHECK-SD-NEXT: movk w8, #63310, lsl #16 +; CHECK-SD-NEXT: umull x8, w0, w8 +; CHECK-SD-NEXT: lsr x8, x8, #42 +; CHECK-SD-NEXT: msub w0, w8, w9, w0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fold_urem_positive_even: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #16323 // =0x3fc3 +; CHECK-GI-NEXT: mov w9, #1060 // =0x424 +; CHECK-GI-NEXT: movk w8, #63310, lsl #16 +; CHECK-GI-NEXT: umull x8, w0, w8 +; CHECK-GI-NEXT: lsr x8, x8, #32 +; CHECK-GI-NEXT: lsr w8, w8, #10 +; CHECK-GI-NEXT: msub w0, w8, w9, w0 +; CHECK-GI-NEXT: ret %1 = urem i32 %x, 1060 ret i32 %1 } - ; Don't fold if we can combine urem with udiv. define i32 @combine_urem_udiv(i32 %x) { -; CHECK-LABEL: combine_urem_udiv: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #8969 // =0x2309 -; CHECK-NEXT: movk w8, #22765, lsl #16 -; CHECK-NEXT: umull x8, w0, w8 -; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: sub w9, w0, w8 -; CHECK-NEXT: add w8, w8, w9, lsr #1 -; CHECK-NEXT: mov w9, #95 // =0x5f -; CHECK-NEXT: lsr w8, w8, #6 -; CHECK-NEXT: msub w9, w8, w9, w0 -; CHECK-NEXT: add w0, w9, w8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: combine_urem_udiv: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #8969 // =0x2309 +; CHECK-SD-NEXT: movk w8, #22765, lsl #16 +; CHECK-SD-NEXT: umull x8, w0, w8 +; CHECK-SD-NEXT: lsr x8, x8, #32 +; CHECK-SD-NEXT: sub w9, w0, w8 +; CHECK-SD-NEXT: add w8, w8, w9, lsr #1 +; CHECK-SD-NEXT: mov w9, #95 // =0x5f +; CHECK-SD-NEXT: lsr w8, w8, #6 +; CHECK-SD-NEXT: msub w9, w8, w9, w0 +; CHECK-SD-NEXT: add w0, w9, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: combine_urem_udiv: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #95 // =0x5f +; CHECK-GI-NEXT: udiv w9, w0, w8 +; CHECK-GI-NEXT: msub w8, w9, w8, w0 +; CHECK-GI-NEXT: add w0, w8, w9 +; CHECK-GI-NEXT: ret %1 = urem i32 %x, 95 %2 = udiv i32 %x, 95 %3 = add i32 %1, %2 |