diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/atomic-load-store.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/atomic-load-store.ll | 504 |
1 files changed, 106 insertions, 398 deletions
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 3e7b73a..1173c45 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O3 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O3 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O0 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O0 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-SSE-O3 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-SSE-O3 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-AVX-O3,CHECK-AVX2-O3 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-AVX-O3,CHECK-AVX512-O3 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-SSE-O0 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-SSE-O0 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-AVX-O0,CHECK-AVX2-O0 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-AVX-O0,CHECK-AVX512-O0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -50,30 +50,10 @@ define <1 x i8> @atomic_vec1_i8(ptr %x) { ; CHECK-O3-NEXT: movzbl (%rdi), %eax ; CHECK-O3-NEXT: retq ; -; CHECK-SSE-O3-LABEL: atomic_vec1_i8: -; CHECK-SSE-O3: # %bb.0: -; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax -; CHECK-SSE-O3-NEXT: retq -; -; CHECK-AVX-O3-LABEL: atomic_vec1_i8: -; CHECK-AVX-O3: # %bb.0: -; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax -; CHECK-AVX-O3-NEXT: retq -; ; CHECK-O0-LABEL: atomic_vec1_i8: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movb (%rdi), %al ; CHECK-O0-NEXT: retq -; -; CHECK-SSE-O0-LABEL: atomic_vec1_i8: -; CHECK-SSE-O0: # %bb.0: -; CHECK-SSE-O0-NEXT: movb (%rdi), %al -; CHECK-SSE-O0-NEXT: retq -; -; CHECK-AVX-O0-LABEL: atomic_vec1_i8: -; CHECK-AVX-O0: # %bb.0: -; CHECK-AVX-O0-NEXT: movb (%rdi), %al -; CHECK-AVX-O0-NEXT: retq %ret = load atomic <1 x i8>, ptr %x acquire, align 1 ret <1 x i8> %ret } @@ -84,30 +64,10 @@ define <1 x i16> @atomic_vec1_i16(ptr %x) { ; CHECK-O3-NEXT: movzwl (%rdi), %eax ; CHECK-O3-NEXT: retq ; -; CHECK-SSE-O3-LABEL: atomic_vec1_i16: -; CHECK-SSE-O3: # %bb.0: -; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax -; CHECK-SSE-O3-NEXT: retq -; -; CHECK-AVX-O3-LABEL: atomic_vec1_i16: -; CHECK-AVX-O3: # %bb.0: -; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax -; CHECK-AVX-O3-NEXT: retq -; ; CHECK-O0-LABEL: atomic_vec1_i16: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movw (%rdi), %ax ; CHECK-O0-NEXT: retq -; -; CHECK-SSE-O0-LABEL: atomic_vec1_i16: -; CHECK-SSE-O0: # %bb.0: -; CHECK-SSE-O0-NEXT: movw (%rdi), %ax -; CHECK-SSE-O0-NEXT: retq -; -; CHECK-AVX-O0-LABEL: atomic_vec1_i16: -; CHECK-AVX-O0: # %bb.0: -; CHECK-AVX-O0-NEXT: movw (%rdi), %ax -; CHECK-AVX-O0-NEXT: retq %ret = load atomic <1 x i16>, ptr %x acquire, align 2 ret <1 x i16> %ret } @@ -119,35 +79,11 @@ define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { ; CHECK-O3-NEXT: movzbl %al, %eax ; CHECK-O3-NEXT: retq ; -; CHECK-SSE-O3-LABEL: atomic_vec1_i8_zext: -; CHECK-SSE-O3: # %bb.0: -; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax -; CHECK-SSE-O3-NEXT: movzbl %al, %eax -; CHECK-SSE-O3-NEXT: retq -; -; CHECK-AVX-O3-LABEL: atomic_vec1_i8_zext: -; CHECK-AVX-O3: # %bb.0: -; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax -; CHECK-AVX-O3-NEXT: movzbl %al, %eax -; CHECK-AVX-O3-NEXT: retq -; ; CHECK-O0-LABEL: atomic_vec1_i8_zext: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movb (%rdi), %al ; CHECK-O0-NEXT: movzbl %al, %eax ; CHECK-O0-NEXT: retq -; -; CHECK-SSE-O0-LABEL: atomic_vec1_i8_zext: -; CHECK-SSE-O0: # %bb.0: -; CHECK-SSE-O0-NEXT: movb (%rdi), %al -; CHECK-SSE-O0-NEXT: movzbl %al, %eax -; CHECK-SSE-O0-NEXT: retq -; -; CHECK-AVX-O0-LABEL: atomic_vec1_i8_zext: -; CHECK-AVX-O0: # %bb.0: -; CHECK-AVX-O0-NEXT: movb (%rdi), %al -; CHECK-AVX-O0-NEXT: movzbl %al, %eax -; CHECK-AVX-O0-NEXT: retq %ret = load atomic <1 x i8>, ptr %x acquire, align 1 %zret = zext <1 x i8> %ret to <1 x i32> ret <1 x i32> %zret @@ -160,35 +96,11 @@ define <1 x i64> @atomic_vec1_i16_sext(ptr %x) { ; CHECK-O3-NEXT: movswq %ax, %rax ; CHECK-O3-NEXT: retq ; -; CHECK-SSE-O3-LABEL: atomic_vec1_i16_sext: -; CHECK-SSE-O3: # %bb.0: -; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax -; CHECK-SSE-O3-NEXT: movswq %ax, %rax -; CHECK-SSE-O3-NEXT: retq -; -; CHECK-AVX-O3-LABEL: atomic_vec1_i16_sext: -; CHECK-AVX-O3: # %bb.0: -; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax -; CHECK-AVX-O3-NEXT: movswq %ax, %rax -; CHECK-AVX-O3-NEXT: retq -; ; CHECK-O0-LABEL: atomic_vec1_i16_sext: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movw (%rdi), %ax ; CHECK-O0-NEXT: movswq %ax, %rax ; CHECK-O0-NEXT: retq -; -; CHECK-SSE-O0-LABEL: atomic_vec1_i16_sext: -; CHECK-SSE-O0: # %bb.0: -; CHECK-SSE-O0-NEXT: movw (%rdi), %ax -; CHECK-SSE-O0-NEXT: movswq %ax, %rax -; CHECK-SSE-O0-NEXT: retq -; -; CHECK-AVX-O0-LABEL: atomic_vec1_i16_sext: -; CHECK-AVX-O0: # %bb.0: -; CHECK-AVX-O0-NEXT: movw (%rdi), %ax -; CHECK-AVX-O0-NEXT: movswq %ax, %rax -; CHECK-AVX-O0-NEXT: retq %ret = load atomic <1 x i16>, ptr %x acquire, align 2 %sret = sext <1 x i16> %ret to <1 x i64> ret <1 x i64> %sret @@ -204,12 +116,6 @@ define <1 x ptr addrspace(270)> @atomic_vec1_ptr270(ptr %x) { } define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { -; CHECK-O3-LABEL: atomic_vec1_bfloat: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movzwl (%rdi), %eax -; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0 -; CHECK-O3-NEXT: retq -; ; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax @@ -222,15 +128,6 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { ; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX-O3-NEXT: retq ; -; CHECK-O0-LABEL: atomic_vec1_bfloat: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movw (%rdi), %cx -; CHECK-O0-NEXT: # implicit-def: $eax -; CHECK-O0-NEXT: movw %cx, %ax -; CHECK-O0-NEXT: # implicit-def: $xmm0 -; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0 -; CHECK-O0-NEXT: retq -; ; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat: ; CHECK-SSE-O0: # %bb.0: ; CHECK-SSE-O0-NEXT: movw (%rdi), %cx @@ -283,30 +180,6 @@ define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { ; CHECK-O3-NEXT: popq %rcx ; CHECK-O3-NEXT: retq ; -; CHECK-SSE-O3-LABEL: atomic_vec1_ptr: -; CHECK-SSE-O3: # %bb.0: -; CHECK-SSE-O3-NEXT: pushq %rax -; CHECK-SSE-O3-NEXT: movq %rdi, %rsi -; CHECK-SSE-O3-NEXT: movq %rsp, %rdx -; CHECK-SSE-O3-NEXT: movl $8, %edi -; CHECK-SSE-O3-NEXT: movl $2, %ecx -; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT -; CHECK-SSE-O3-NEXT: movq (%rsp), %rax -; CHECK-SSE-O3-NEXT: popq %rcx -; CHECK-SSE-O3-NEXT: retq -; -; CHECK-AVX-O3-LABEL: atomic_vec1_ptr: -; CHECK-AVX-O3: # %bb.0: -; CHECK-AVX-O3-NEXT: pushq %rax -; CHECK-AVX-O3-NEXT: movq %rdi, %rsi -; CHECK-AVX-O3-NEXT: movq %rsp, %rdx -; CHECK-AVX-O3-NEXT: movl $8, %edi -; CHECK-AVX-O3-NEXT: movl $2, %ecx -; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT -; CHECK-AVX-O3-NEXT: movq (%rsp), %rax -; CHECK-AVX-O3-NEXT: popq %rcx -; CHECK-AVX-O3-NEXT: retq -; ; CHECK-O0-LABEL: atomic_vec1_ptr: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: pushq %rax @@ -318,41 +191,11 @@ define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { ; CHECK-O0-NEXT: movq (%rsp), %rax ; CHECK-O0-NEXT: popq %rcx ; CHECK-O0-NEXT: retq -; -; CHECK-SSE-O0-LABEL: atomic_vec1_ptr: -; CHECK-SSE-O0: # %bb.0: -; CHECK-SSE-O0-NEXT: pushq %rax -; CHECK-SSE-O0-NEXT: movq %rdi, %rsi -; CHECK-SSE-O0-NEXT: movl $8, %edi -; CHECK-SSE-O0-NEXT: movq %rsp, %rdx -; CHECK-SSE-O0-NEXT: movl $2, %ecx -; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT -; CHECK-SSE-O0-NEXT: movq (%rsp), %rax -; CHECK-SSE-O0-NEXT: popq %rcx -; CHECK-SSE-O0-NEXT: retq -; -; CHECK-AVX-O0-LABEL: atomic_vec1_ptr: -; CHECK-AVX-O0: # %bb.0: -; CHECK-AVX-O0-NEXT: pushq %rax -; CHECK-AVX-O0-NEXT: movq %rdi, %rsi -; CHECK-AVX-O0-NEXT: movl $8, %edi -; CHECK-AVX-O0-NEXT: movq %rsp, %rdx -; CHECK-AVX-O0-NEXT: movl $2, %ecx -; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT -; CHECK-AVX-O0-NEXT: movq (%rsp), %rax -; CHECK-AVX-O0-NEXT: popq %rcx -; CHECK-AVX-O0-NEXT: retq %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 ret <1 x ptr> %ret } define <1 x half> @atomic_vec1_half(ptr %x) { -; CHECK-O3-LABEL: atomic_vec1_half: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movzwl (%rdi), %eax -; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0 -; CHECK-O3-NEXT: retq -; ; CHECK-SSE-O3-LABEL: atomic_vec1_half: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax @@ -365,15 +208,6 @@ define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX-O3-NEXT: retq ; -; CHECK-O0-LABEL: atomic_vec1_half: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movw (%rdi), %cx -; CHECK-O0-NEXT: # implicit-def: $eax -; CHECK-O0-NEXT: movw %cx, %ax -; CHECK-O0-NEXT: # implicit-def: $xmm0 -; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0 -; CHECK-O0-NEXT: retq -; ; CHECK-SSE-O0-LABEL: atomic_vec1_half: ; CHECK-SSE-O0: # %bb.0: ; CHECK-SSE-O0-NEXT: movw (%rdi), %cx @@ -396,11 +230,6 @@ define <1 x half> @atomic_vec1_half(ptr %x) { } define <1 x float> @atomic_vec1_float(ptr %x) { -; CHECK-O3-LABEL: atomic_vec1_float: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-O3-NEXT: retq -; ; CHECK-SSE-O3-LABEL: atomic_vec1_float: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -411,11 +240,6 @@ define <1 x float> @atomic_vec1_float(ptr %x) { ; CHECK-AVX-O3-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-AVX-O3-NEXT: retq ; -; CHECK-O0-LABEL: atomic_vec1_float: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-O0-NEXT: retq -; ; CHECK-SSE-O0-LABEL: atomic_vec1_float: ; CHECK-SSE-O0: # %bb.0: ; CHECK-SSE-O0-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -430,11 +254,6 @@ define <1 x float> @atomic_vec1_float(ptr %x) { } define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { -; CHECK-O3-LABEL: atomic_vec1_double_align: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-O3-NEXT: retq -; ; CHECK-SSE-O3-LABEL: atomic_vec1_double_align: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -445,11 +264,6 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { ; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-AVX-O3-NEXT: retq ; -; CHECK-O0-LABEL: atomic_vec1_double_align: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-O0-NEXT: retq -; ; CHECK-SSE-O0-LABEL: atomic_vec1_double_align: ; CHECK-SSE-O0: # %bb.0: ; CHECK-SSE-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -476,30 +290,6 @@ define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { ; CHECK-O3-NEXT: popq %rcx ; CHECK-O3-NEXT: retq ; -; CHECK-SSE-O3-LABEL: atomic_vec1_i64: -; CHECK-SSE-O3: # %bb.0: -; CHECK-SSE-O3-NEXT: pushq %rax -; CHECK-SSE-O3-NEXT: movq %rdi, %rsi -; CHECK-SSE-O3-NEXT: movq %rsp, %rdx -; CHECK-SSE-O3-NEXT: movl $8, %edi -; CHECK-SSE-O3-NEXT: movl $2, %ecx -; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT -; CHECK-SSE-O3-NEXT: movq (%rsp), %rax -; CHECK-SSE-O3-NEXT: popq %rcx -; CHECK-SSE-O3-NEXT: retq -; -; CHECK-AVX-O3-LABEL: atomic_vec1_i64: -; CHECK-AVX-O3: # %bb.0: -; CHECK-AVX-O3-NEXT: pushq %rax -; CHECK-AVX-O3-NEXT: movq %rdi, %rsi -; CHECK-AVX-O3-NEXT: movq %rsp, %rdx -; CHECK-AVX-O3-NEXT: movl $8, %edi -; CHECK-AVX-O3-NEXT: movl $2, %ecx -; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT -; CHECK-AVX-O3-NEXT: movq (%rsp), %rax -; CHECK-AVX-O3-NEXT: popq %rcx -; CHECK-AVX-O3-NEXT: retq -; ; CHECK-O0-LABEL: atomic_vec1_i64: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: pushq %rax @@ -511,47 +301,11 @@ define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { ; CHECK-O0-NEXT: movq (%rsp), %rax ; CHECK-O0-NEXT: popq %rcx ; CHECK-O0-NEXT: retq -; -; CHECK-SSE-O0-LABEL: atomic_vec1_i64: -; CHECK-SSE-O0: # %bb.0: -; CHECK-SSE-O0-NEXT: pushq %rax -; CHECK-SSE-O0-NEXT: movq %rdi, %rsi -; CHECK-SSE-O0-NEXT: movl $8, %edi -; CHECK-SSE-O0-NEXT: movq %rsp, %rdx -; CHECK-SSE-O0-NEXT: movl $2, %ecx -; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT -; CHECK-SSE-O0-NEXT: movq (%rsp), %rax -; CHECK-SSE-O0-NEXT: popq %rcx -; CHECK-SSE-O0-NEXT: retq -; -; CHECK-AVX-O0-LABEL: atomic_vec1_i64: -; CHECK-AVX-O0: # %bb.0: -; CHECK-AVX-O0-NEXT: pushq %rax -; CHECK-AVX-O0-NEXT: movq %rdi, %rsi -; CHECK-AVX-O0-NEXT: movl $8, %edi -; CHECK-AVX-O0-NEXT: movq %rsp, %rdx -; CHECK-AVX-O0-NEXT: movl $2, %ecx -; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT -; CHECK-AVX-O0-NEXT: movq (%rsp), %rax -; CHECK-AVX-O0-NEXT: popq %rcx -; CHECK-AVX-O0-NEXT: retq %ret = load atomic <1 x i64>, ptr %x acquire, align 4 ret <1 x i64> %ret } define <1 x double> @atomic_vec1_double(ptr %x) nounwind { -; CHECK-O3-LABEL: atomic_vec1_double: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: pushq %rax -; CHECK-O3-NEXT: movq %rdi, %rsi -; CHECK-O3-NEXT: movq %rsp, %rdx -; CHECK-O3-NEXT: movl $8, %edi -; CHECK-O3-NEXT: movl $2, %ecx -; CHECK-O3-NEXT: callq __atomic_load@PLT -; CHECK-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-O3-NEXT: popq %rax -; CHECK-O3-NEXT: retq -; ; CHECK-SSE-O3-LABEL: atomic_vec1_double: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: pushq %rax @@ -576,18 +330,6 @@ define <1 x double> @atomic_vec1_double(ptr %x) nounwind { ; CHECK-AVX-O3-NEXT: popq %rax ; CHECK-AVX-O3-NEXT: retq ; -; CHECK-O0-LABEL: atomic_vec1_double: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: pushq %rax -; CHECK-O0-NEXT: movq %rdi, %rsi -; CHECK-O0-NEXT: movl $8, %edi -; CHECK-O0-NEXT: movq %rsp, %rdx -; CHECK-O0-NEXT: movl $2, %ecx -; CHECK-O0-NEXT: callq __atomic_load@PLT -; CHECK-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-O0-NEXT: popq %rax -; CHECK-O0-NEXT: retq -; ; CHECK-SSE-O0-LABEL: atomic_vec1_double: ; CHECK-SSE-O0: # %bb.0: ; CHECK-SSE-O0-NEXT: pushq %rax @@ -616,18 +358,6 @@ define <1 x double> @atomic_vec1_double(ptr %x) nounwind { } define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { -; CHECK-O3-LABEL: atomic_vec2_i32: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: pushq %rax -; CHECK-O3-NEXT: movq %rdi, %rsi -; CHECK-O3-NEXT: movq %rsp, %rdx -; CHECK-O3-NEXT: movl $8, %edi -; CHECK-O3-NEXT: movl $2, %ecx -; CHECK-O3-NEXT: callq __atomic_load@PLT -; CHECK-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-O3-NEXT: popq %rax -; CHECK-O3-NEXT: retq -; ; CHECK-SSE-O3-LABEL: atomic_vec2_i32: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: pushq %rax @@ -652,18 +382,6 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ; CHECK-AVX-O3-NEXT: popq %rax ; CHECK-AVX-O3-NEXT: retq ; -; CHECK-O0-LABEL: atomic_vec2_i32: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: pushq %rax -; CHECK-O0-NEXT: movq %rdi, %rsi -; CHECK-O0-NEXT: movl $8, %edi -; CHECK-O0-NEXT: movq %rsp, %rdx -; CHECK-O0-NEXT: movl $2, %ecx -; CHECK-O0-NEXT: callq __atomic_load@PLT -; CHECK-O0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; CHECK-O0-NEXT: popq %rax -; CHECK-O0-NEXT: retq -; ; CHECK-SSE-O0-LABEL: atomic_vec2_i32: ; CHECK-SSE-O0: # %bb.0: ; CHECK-SSE-O0-NEXT: pushq %rax @@ -692,18 +410,6 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { } define <4 x float> @atomic_vec4_float(ptr %x) nounwind { -; CHECK-O3-LABEL: atomic_vec4_float: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: subq $24, %rsp -; CHECK-O3-NEXT: movq %rdi, %rsi -; CHECK-O3-NEXT: movq %rsp, %rdx -; CHECK-O3-NEXT: movl $16, %edi -; CHECK-O3-NEXT: movl $2, %ecx -; CHECK-O3-NEXT: callq __atomic_load@PLT -; CHECK-O3-NEXT: movaps (%rsp), %xmm0 -; CHECK-O3-NEXT: addq $24, %rsp -; CHECK-O3-NEXT: retq -; ; CHECK-SSE-O3-LABEL: atomic_vec4_float: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: subq $24, %rsp @@ -728,18 +434,6 @@ define <4 x float> @atomic_vec4_float(ptr %x) nounwind { ; CHECK-AVX-O3-NEXT: addq $24, %rsp ; CHECK-AVX-O3-NEXT: retq ; -; CHECK-O0-LABEL: atomic_vec4_float: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: subq $24, %rsp -; CHECK-O0-NEXT: movq %rdi, %rsi -; CHECK-O0-NEXT: movl $16, %edi -; CHECK-O0-NEXT: movq %rsp, %rdx -; CHECK-O0-NEXT: movl $2, %ecx -; CHECK-O0-NEXT: callq __atomic_load@PLT -; CHECK-O0-NEXT: movaps (%rsp), %xmm0 -; CHECK-O0-NEXT: addq $24, %rsp -; CHECK-O0-NEXT: retq -; ; CHECK-SSE-O0-LABEL: atomic_vec4_float: ; CHECK-SSE-O0: # %bb.0: ; CHECK-SSE-O0-NEXT: subq $24, %rsp @@ -768,21 +462,6 @@ define <4 x float> @atomic_vec4_float(ptr %x) nounwind { } define <8 x double> @atomic_vec8_double(ptr %x) nounwind { -; CHECK-O3-LABEL: atomic_vec8_double: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: subq $72, %rsp -; CHECK-O3-NEXT: movq %rdi, %rsi -; CHECK-O3-NEXT: movq %rsp, %rdx -; CHECK-O3-NEXT: movl $64, %edi -; CHECK-O3-NEXT: movl $2, %ecx -; CHECK-O3-NEXT: callq __atomic_load@PLT -; CHECK-O3-NEXT: movaps (%rsp), %xmm0 -; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 -; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 -; CHECK-O3-NEXT: addq $72, %rsp -; CHECK-O3-NEXT: retq -; ; CHECK-SSE-O3-LABEL: atomic_vec8_double: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: subq $72, %rsp @@ -798,20 +477,30 @@ define <8 x double> @atomic_vec8_double(ptr %x) nounwind { ; CHECK-SSE-O3-NEXT: addq $72, %rsp ; CHECK-SSE-O3-NEXT: retq ; -; CHECK-O0-LABEL: atomic_vec8_double: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: subq $72, %rsp -; CHECK-O0-NEXT: movq %rdi, %rsi -; CHECK-O0-NEXT: movl $64, %edi -; CHECK-O0-NEXT: movq %rsp, %rdx -; CHECK-O0-NEXT: movl $2, %ecx -; CHECK-O0-NEXT: callq __atomic_load@PLT -; CHECK-O0-NEXT: movapd (%rsp), %xmm0 -; CHECK-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1 -; CHECK-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2 -; CHECK-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3 -; CHECK-O0-NEXT: addq $72, %rsp -; CHECK-O0-NEXT: retq +; CHECK-AVX2-O3-LABEL: atomic_vec8_double: +; CHECK-AVX2-O3: # %bb.0: +; CHECK-AVX2-O3-NEXT: subq $72, %rsp +; CHECK-AVX2-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX2-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX2-O3-NEXT: movl $64, %edi +; CHECK-AVX2-O3-NEXT: movl $2, %ecx +; CHECK-AVX2-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX2-O3-NEXT: vmovups (%rsp), %ymm0 +; CHECK-AVX2-O3-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 +; CHECK-AVX2-O3-NEXT: addq $72, %rsp +; CHECK-AVX2-O3-NEXT: retq +; +; CHECK-AVX512-O3-LABEL: atomic_vec8_double: +; CHECK-AVX512-O3: # %bb.0: +; CHECK-AVX512-O3-NEXT: subq $72, %rsp +; CHECK-AVX512-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX512-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX512-O3-NEXT: movl $64, %edi +; CHECK-AVX512-O3-NEXT: movl $2, %ecx +; CHECK-AVX512-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX512-O3-NEXT: vmovups (%rsp), %zmm0 +; CHECK-AVX512-O3-NEXT: addq $72, %rsp +; CHECK-AVX512-O3-NEXT: retq ; ; CHECK-SSE-O0-LABEL: atomic_vec8_double: ; CHECK-SSE-O0: # %bb.0: @@ -827,24 +516,36 @@ define <8 x double> @atomic_vec8_double(ptr %x) nounwind { ; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3 ; CHECK-SSE-O0-NEXT: addq $72, %rsp ; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX2-O0-LABEL: atomic_vec8_double: +; CHECK-AVX2-O0: # %bb.0: +; CHECK-AVX2-O0-NEXT: subq $72, %rsp +; CHECK-AVX2-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX2-O0-NEXT: movl $64, %edi +; CHECK-AVX2-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX2-O0-NEXT: movl $2, %ecx +; CHECK-AVX2-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX2-O0-NEXT: vmovupd (%rsp), %ymm0 +; CHECK-AVX2-O0-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm1 +; CHECK-AVX2-O0-NEXT: addq $72, %rsp +; CHECK-AVX2-O0-NEXT: retq +; +; CHECK-AVX512-O0-LABEL: atomic_vec8_double: +; CHECK-AVX512-O0: # %bb.0: +; CHECK-AVX512-O0-NEXT: subq $72, %rsp +; CHECK-AVX512-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX512-O0-NEXT: movl $64, %edi +; CHECK-AVX512-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX512-O0-NEXT: movl $2, %ecx +; CHECK-AVX512-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX512-O0-NEXT: vmovupd (%rsp), %zmm0 +; CHECK-AVX512-O0-NEXT: addq $72, %rsp +; CHECK-AVX512-O0-NEXT: retq %ret = load atomic <8 x double>, ptr %x acquire, align 4 ret <8 x double> %ret } define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) nounwind { -; CHECK-O3-LABEL: atomic_vec16_bfloat: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: subq $40, %rsp -; CHECK-O3-NEXT: movq %rdi, %rsi -; CHECK-O3-NEXT: movq %rsp, %rdx -; CHECK-O3-NEXT: movl $32, %edi -; CHECK-O3-NEXT: movl $2, %ecx -; CHECK-O3-NEXT: callq __atomic_load@PLT -; CHECK-O3-NEXT: movaps (%rsp), %xmm0 -; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; CHECK-O3-NEXT: addq $40, %rsp -; CHECK-O3-NEXT: retq -; ; CHECK-SSE-O3-LABEL: atomic_vec16_bfloat: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: subq $40, %rsp @@ -870,19 +571,6 @@ define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) nounwind { ; CHECK-AVX-O3-NEXT: addq $40, %rsp ; CHECK-AVX-O3-NEXT: retq ; -; CHECK-O0-LABEL: atomic_vec16_bfloat: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: subq $40, %rsp -; CHECK-O0-NEXT: movq %rdi, %rsi -; CHECK-O0-NEXT: movl $32, %edi -; CHECK-O0-NEXT: movq %rsp, %rdx -; CHECK-O0-NEXT: movl $2, %ecx -; CHECK-O0-NEXT: callq __atomic_load@PLT -; CHECK-O0-NEXT: movaps (%rsp), %xmm0 -; CHECK-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; CHECK-O0-NEXT: addq $40, %rsp -; CHECK-O0-NEXT: retq -; ; CHECK-SSE-O0-LABEL: atomic_vec16_bfloat: ; CHECK-SSE-O0: # %bb.0: ; CHECK-SSE-O0-NEXT: subq $40, %rsp @@ -912,21 +600,6 @@ define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) nounwind { } define <32 x half> @atomic_vec32_half(ptr %x) nounwind { -; CHECK-O3-LABEL: atomic_vec32_half: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: subq $72, %rsp -; CHECK-O3-NEXT: movq %rdi, %rsi -; CHECK-O3-NEXT: movq %rsp, %rdx -; CHECK-O3-NEXT: movl $64, %edi -; CHECK-O3-NEXT: movl $2, %ecx -; CHECK-O3-NEXT: callq __atomic_load@PLT -; CHECK-O3-NEXT: movaps (%rsp), %xmm0 -; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 -; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 -; CHECK-O3-NEXT: addq $72, %rsp -; CHECK-O3-NEXT: retq -; ; CHECK-SSE-O3-LABEL: atomic_vec32_half: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: subq $72, %rsp @@ -942,20 +615,30 @@ define <32 x half> @atomic_vec32_half(ptr %x) nounwind { ; CHECK-SSE-O3-NEXT: addq $72, %rsp ; CHECK-SSE-O3-NEXT: retq ; -; CHECK-O0-LABEL: atomic_vec32_half: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: subq $72, %rsp -; CHECK-O0-NEXT: movq %rdi, %rsi -; CHECK-O0-NEXT: movl $64, %edi -; CHECK-O0-NEXT: movq %rsp, %rdx -; CHECK-O0-NEXT: movl $2, %ecx -; CHECK-O0-NEXT: callq __atomic_load@PLT -; CHECK-O0-NEXT: movaps (%rsp), %xmm0 -; CHECK-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; CHECK-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 -; CHECK-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 -; CHECK-O0-NEXT: addq $72, %rsp -; CHECK-O0-NEXT: retq +; CHECK-AVX2-O3-LABEL: atomic_vec32_half: +; CHECK-AVX2-O3: # %bb.0: +; CHECK-AVX2-O3-NEXT: subq $72, %rsp +; CHECK-AVX2-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX2-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX2-O3-NEXT: movl $64, %edi +; CHECK-AVX2-O3-NEXT: movl $2, %ecx +; CHECK-AVX2-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX2-O3-NEXT: vmovups (%rsp), %ymm0 +; CHECK-AVX2-O3-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 +; CHECK-AVX2-O3-NEXT: addq $72, %rsp +; CHECK-AVX2-O3-NEXT: retq +; +; CHECK-AVX512-O3-LABEL: atomic_vec32_half: +; CHECK-AVX512-O3: # %bb.0: +; CHECK-AVX512-O3-NEXT: subq $72, %rsp +; CHECK-AVX512-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX512-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX512-O3-NEXT: movl $64, %edi +; CHECK-AVX512-O3-NEXT: movl $2, %ecx +; CHECK-AVX512-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX512-O3-NEXT: vmovups (%rsp), %zmm0 +; CHECK-AVX512-O3-NEXT: addq $72, %rsp +; CHECK-AVX512-O3-NEXT: retq ; ; CHECK-SSE-O0-LABEL: atomic_vec32_half: ; CHECK-SSE-O0: # %bb.0: @@ -971,6 +654,31 @@ define <32 x half> @atomic_vec32_half(ptr %x) nounwind { ; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 ; CHECK-SSE-O0-NEXT: addq $72, %rsp ; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX2-O0-LABEL: atomic_vec32_half: +; CHECK-AVX2-O0: # %bb.0: +; CHECK-AVX2-O0-NEXT: subq $72, %rsp +; CHECK-AVX2-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX2-O0-NEXT: movl $64, %edi +; CHECK-AVX2-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX2-O0-NEXT: movl $2, %ecx +; CHECK-AVX2-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX2-O0-NEXT: vmovups (%rsp), %ymm0 +; CHECK-AVX2-O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 +; CHECK-AVX2-O0-NEXT: addq $72, %rsp +; CHECK-AVX2-O0-NEXT: retq +; +; CHECK-AVX512-O0-LABEL: atomic_vec32_half: +; CHECK-AVX512-O0: # %bb.0: +; CHECK-AVX512-O0-NEXT: subq $72, %rsp +; CHECK-AVX512-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX512-O0-NEXT: movl $64, %edi +; CHECK-AVX512-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX512-O0-NEXT: movl $2, %ecx +; CHECK-AVX512-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX512-O0-NEXT: vmovups (%rsp), %zmm0 +; CHECK-AVX512-O0-NEXT: addq $72, %rsp +; CHECK-AVX512-O0-NEXT: retq %ret = load atomic <32 x half>, ptr %x acquire, align 4 ret <32 x half> %ret } |
