aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/X86/avg.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/avg.ll')
-rw-r--r--llvm/test/CodeGen/X86/avg.ll74
1 files changed, 49 insertions, 25 deletions
diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll
index 0de308a..5152c005 100644
--- a/llvm/test/CodeGen/X86/avg.ll
+++ b/llvm/test/CodeGen/X86/avg.ll
@@ -728,45 +728,70 @@ define void @avg_v32i8_2(ptr %a, ptr %b) nounwind {
define void @avg_v64i8_2(ptr %a, ptr %b) nounwind {
; SSE2-LABEL: avg_v64i8_2:
; SSE2: # %bb.0:
-; SSE2-NEXT: movaps (%rsi), %xmm0
-; SSE2-NEXT: movaps 16(%rsi), %xmm1
-; SSE2-NEXT: movaps 32(%rsi), %xmm2
-; SSE2-NEXT: movaps 48(%rsi), %xmm3
-; SSE2-NEXT: movups %xmm3, (%rax)
-; SSE2-NEXT: movups %xmm2, (%rax)
-; SSE2-NEXT: movups %xmm1, (%rax)
-; SSE2-NEXT: movups %xmm0, (%rax)
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: movdqa 16(%rdi), %xmm1
+; SSE2-NEXT: movdqa 32(%rdi), %xmm2
+; SSE2-NEXT: movdqa 48(%rdi), %xmm3
+; SSE2-NEXT: pavgb (%rsi), %xmm0
+; SSE2-NEXT: pavgb 16(%rsi), %xmm1
+; SSE2-NEXT: pavgb 32(%rsi), %xmm2
+; SSE2-NEXT: pavgb 48(%rsi), %xmm3
+; SSE2-NEXT: movdqu %xmm3, (%rax)
+; SSE2-NEXT: movdqu %xmm2, (%rax)
+; SSE2-NEXT: movdqu %xmm1, (%rax)
+; SSE2-NEXT: movdqu %xmm0, (%rax)
; SSE2-NEXT: retq
;
; AVX1-LABEL: avg_v64i8_2:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovaps (%rsi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rsi), %ymm1
-; AVX1-NEXT: vmovups %ymm1, (%rax)
-; AVX1-NEXT: vmovups %ymm0, (%rax)
-; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: vmovdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3
+; AVX1-NEXT: vpavgb (%rsi), %xmm0, %xmm0
+; AVX1-NEXT: vpavgb 16(%rsi), %xmm1, %xmm1
+; AVX1-NEXT: vpavgb 32(%rsi), %xmm2, %xmm2
+; AVX1-NEXT: vpavgb 48(%rsi), %xmm3, %xmm3
+; AVX1-NEXT: vmovdqu %xmm3, (%rax)
+; AVX1-NEXT: vmovdqu %xmm2, (%rax)
+; AVX1-NEXT: vmovdqu %xmm1, (%rax)
+; AVX1-NEXT: vmovdqu %xmm0, (%rax)
; AVX1-NEXT: retq
;
; AVX2-LABEL: avg_v64i8_2:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovaps (%rsi), %ymm0
-; AVX2-NEXT: vmovaps 32(%rsi), %ymm1
-; AVX2-NEXT: vmovups %ymm1, (%rax)
-; AVX2-NEXT: vmovups %ymm0, (%rax)
+; AVX2-NEXT: vmovdqa (%rdi), %ymm0
+; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1
+; AVX2-NEXT: vpavgb (%rsi), %ymm0, %ymm0
+; AVX2-NEXT: vpavgb 32(%rsi), %ymm1, %ymm1
+; AVX2-NEXT: vmovdqu %ymm1, (%rax)
+; AVX2-NEXT: vmovdqu %ymm0, (%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: avg_v64i8_2:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vmovaps (%rsi), %zmm0
-; AVX512-NEXT: vmovups %zmm0, (%rax)
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512F-LABEL: avg_v64i8_2:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
+; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
+; AVX512F-NEXT: vpavgb (%rsi), %ymm0, %ymm0
+; AVX512F-NEXT: vpavgb 32(%rsi), %ymm1, %ymm1
+; AVX512F-NEXT: vmovdqu %ymm1, (%rax)
+; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v64i8_2:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
+; AVX512BW-NEXT: vpavgb (%rsi), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%1 = load <64 x i8>, ptr %a
%2 = load <64 x i8>, ptr %b
%3 = zext <64 x i8> %1 to <64 x i32>
%4 = zext <64 x i8> %2 to <64 x i32>
- %5 = add nuw nsw <64 x i32> %4, %4
+ %5 = add nuw nsw <64 x i32> %3, %4
%6 = add nuw nsw <64 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%8 = trunc <64 x i32> %7 to <64 x i8>
@@ -774,7 +799,6 @@ define void @avg_v64i8_2(ptr %a, ptr %b) nounwind {
ret void
}
-
define void @avg_v4i16_2(ptr %a, ptr %b) nounwind {
; SSE2-LABEL: avg_v4i16_2:
; SSE2: # %bb.0: