aboutsummaryrefslogtreecommitdiff
path: root/clang/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'clang/test/CodeGen/X86')
-rwxr-xr-xclang/test/CodeGen/X86/amx_movrs_tranpose.c53
-rwxr-xr-xclang/test/CodeGen/X86/amx_movrs_tranpose_api.c81
-rwxr-xr-xclang/test/CodeGen/X86/amx_movrs_transpose_errors.c22
-rw-r--r--clang/test/CodeGen/X86/amx_tf32.c5
-rw-r--r--clang/test/CodeGen/X86/amx_tf32_api.c7
-rw-r--r--clang/test/CodeGen/X86/amx_tf32_errors.c8
-rw-r--r--clang/test/CodeGen/X86/amx_transpose.c75
-rw-r--r--clang/test/CodeGen/X86/amx_transpose_api.c114
-rw-r--r--clang/test/CodeGen/X86/amx_transpose_errors.c75
-rw-r--r--clang/test/CodeGen/X86/avx-builtins-constrained.c35
-rw-r--r--clang/test/CodeGen/X86/avx-builtins.c37
-rw-r--r--clang/test/CodeGen/X86/avx10_2_512ni-builtins.c36
-rw-r--r--clang/test/CodeGen/X86/avx10_2bf16-builtins.c8
-rw-r--r--clang/test/CodeGen/X86/avx10_2ni-builtins.c48
-rw-r--r--clang/test/CodeGen/X86/avx2-builtins.c95
-rw-r--r--clang/test/CodeGen/X86/avx512bf16-builtins.c23
-rw-r--r--clang/test/CodeGen/X86/avx512bitalg-builtins.c10
-rw-r--r--clang/test/CodeGen/X86/avx512bw-builtins.c499
-rw-r--r--clang/test/CodeGen/X86/avx512dq-builtins.c125
-rw-r--r--clang/test/CodeGen/X86/avx512f-builtins.c539
-rw-r--r--clang/test/CodeGen/X86/avx512fp16-builtins-constrained.c85
-rw-r--r--clang/test/CodeGen/X86/avx512fp16-builtins.c14
-rw-r--r--clang/test/CodeGen/X86/avx512vbmi-builtins.c295
-rw-r--r--clang/test/CodeGen/X86/avx512vbmivl-builtin.c174
-rw-r--r--clang/test/CodeGen/X86/avx512vl-builtins.c499
-rw-r--r--clang/test/CodeGen/X86/avx512vlbf16-builtins.c41
-rw-r--r--clang/test/CodeGen/X86/avx512vlbitalg-builtins.c14
-rw-r--r--clang/test/CodeGen/X86/avx512vlbw-builtins.c293
-rw-r--r--clang/test/CodeGen/X86/avx512vldq-builtins.c12
-rw-r--r--clang/test/CodeGen/X86/avx512vlfp16-builtins-constrained.c37
-rw-r--r--clang/test/CodeGen/X86/avx512vlvnni-builtins.c24
-rw-r--r--clang/test/CodeGen/X86/avx512vnni-builtins.c12
-rw-r--r--clang/test/CodeGen/X86/avxvnni-builtins.c16
-rw-r--r--clang/test/CodeGen/X86/avxvnniint16-builtins.c24
-rwxr-xr-xclang/test/CodeGen/X86/f16c-builtins.c57
-rw-r--r--clang/test/CodeGen/X86/fma-builtins.c100
-rw-r--r--clang/test/CodeGen/X86/fma4-builtins.c116
-rw-r--r--clang/test/CodeGen/X86/gfni-builtins.c394
-rw-r--r--clang/test/CodeGen/X86/math-builtins.c322
-rw-r--r--clang/test/CodeGen/X86/mmx-builtins.c18
-rw-r--r--clang/test/CodeGen/X86/mmx-inline-asm-error.c4
-rw-r--r--clang/test/CodeGen/X86/sse-builtins-constrained.c5
-rw-r--r--clang/test/CodeGen/X86/sse-builtins.c4
-rw-r--r--clang/test/CodeGen/X86/sse2-builtins-constrained.c37
-rw-r--r--clang/test/CodeGen/X86/sse2-builtins.c45
-rw-r--r--clang/test/CodeGen/X86/sse3-builtins.c2
-rw-r--r--clang/test/CodeGen/X86/sse41-builtins.c10
-rw-r--r--clang/test/CodeGen/X86/ssse3-builtins.c2
48 files changed, 3577 insertions, 974 deletions
diff --git a/clang/test/CodeGen/X86/amx_movrs_tranpose.c b/clang/test/CodeGen/X86/amx_movrs_tranpose.c
deleted file mode 100755
index 192c153..0000000
--- a/clang/test/CodeGen/X86/amx_movrs_tranpose.c
+++ /dev/null
@@ -1,53 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-movrs -emit-llvm -o - -Wall -Werror -pedantic \
-// RUN: -target-feature +amx-transpose -Wno-gnu-statement-expression| FileCheck %s
-
-#include <immintrin.h>
-#include <stddef.h>
-
-char buf[2048];
-#define STRIDE 32
-
-// CHECK-LABEL: define dso_local void @test_tile_2rpntlvwz0rs_internal(
-// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}})
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-void test_tile_2rpntlvwz0rs_internal(int row, int col0, int col1, void *D0, void *D1, void *B) {
- _tile_2rpntlvwz0rs_internal(row, col0, col1, D0, D1, B, 1);
-}
-
-// CHECK-LABEL: define dso_local void @test_tile_2rpntlvwz0rst1_internal(
-// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}})
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-void test_tile_2rpntlvwz0rst1_internal(int row, int col0, int col1, void *D0, void *D1, void *B) {
- _tile_2rpntlvwz0rst1_internal(row, col0, col1, D0, D1, B, 1);
-}
-
-// CHECK-LABEL: define dso_local void @test_tile_2rpntlvwz1rs_internal(
-// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}})
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-void test_tile_2rpntlvwz1rs_internal(int row, int col0, int col1, void *D0, void *D1, void *B) {
- _tile_2rpntlvwz1rs_internal(row, col0, col1, D0, D1, B, 1);
-}
-
-// CHECK-LABEL: define dso_local void @test_tile_2rpntlvwz1rst1_internal(
-// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rst1.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}})
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-void test_tile_2rpntlvwz1rst1_internal(int row, int col0, int col1, void *D0, void *D1, void *B) {
- _tile_2rpntlvwz1rst1_internal(row, col0, col1, D0, D1, B, 1);
-}
diff --git a/clang/test/CodeGen/X86/amx_movrs_tranpose_api.c b/clang/test/CodeGen/X86/amx_movrs_tranpose_api.c
deleted file mode 100755
index b174cc5..0000000
--- a/clang/test/CodeGen/X86/amx_movrs_tranpose_api.c
+++ /dev/null
@@ -1,81 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-movrs -emit-llvm -o - -Wall -Werror -pedantic \
-// RUN: -target-feature +amx-transpose -Wno-gnu-statement-expression| FileCheck %s
-
-#include <immintrin.h>
-#include <stddef.h>
-
-char buf[2048];
-#define STRIDE 32
-
-void test_tile_2rpntlvwz0rs(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz0rs
- // CHECK: call void @llvm.x86.t2rpntlvwz0rs(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz0rs(1, A, B);
-}
-
-void test_tile_2rpntlvwz0rst1(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz0rst1
- // CHECK: call void @llvm.x86.t2rpntlvwz0rst1(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz0rst1(1, A, B);
-}
-
-void test_tile_2rpntlvwz1rs(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz1rs
- // CHECK: call void @llvm.x86.t2rpntlvwz1rs(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz1rs(1, A, B);
-}
-
-void test_tile_2rpntlvwz1rst1(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz1rst1
- // CHECK: call void @llvm.x86.t2rpntlvwz1rst1(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz1rst1(1, A, B);
-}
-
-void test__tile_2rpntlvwz0rs(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test__tile_2rpntlvwz0rs
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz0rs(&dst0, &dst1, buf, STRIDE);
-}
-
-void test__tile_2rpntlvwz0rst1(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test__tile_2rpntlvwz0rst1
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz0rst1(&dst0, &dst1, buf, STRIDE);
-}
-
-void test__tile_2rpntlvwz1rs(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test__tile_2rpntlvwz1rs
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz1rs(&dst0, &dst1, buf, STRIDE);
-}
-
-void test__tile_2rpntlvwz1rst1(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test__tile_2rpntlvwz1rst1
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rst1.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz1rst1(&dst0, &dst1, buf, STRIDE);
-}
diff --git a/clang/test/CodeGen/X86/amx_movrs_transpose_errors.c b/clang/test/CodeGen/X86/amx_movrs_transpose_errors.c
deleted file mode 100755
index 840b52b..0000000
--- a/clang/test/CodeGen/X86/amx_movrs_transpose_errors.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-int8 -target-feature +amx-transpose -target-feature +amx-movrs \
-// RUN: -verify
-
-#include <immintrin.h>
-#include <stddef.h>
-
-void test_tile_2rpntlvwz0rs(const void *A, size_t B) {
- _tile_2rpntlvwz0rs(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz0rst1(const void *A, size_t B) {
- _tile_2rpntlvwz0rst1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz1rs(const void *A, size_t B) {
- _tile_2rpntlvwz1rs(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz1rst1(const void *A, size_t B) {
- _tile_2rpntlvwz1rst1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
diff --git a/clang/test/CodeGen/X86/amx_tf32.c b/clang/test/CodeGen/X86/amx_tf32.c
index 661a9df..54ad6bb 100644
--- a/clang/test/CodeGen/X86/amx_tf32.c
+++ b/clang/test/CodeGen/X86/amx_tf32.c
@@ -10,8 +10,3 @@ void test_tile_mmultf32ps(void) {
_tile_mmultf32ps(1, 2, 3);
}
-void test_tile_tmmultf32ps(void) {
- // CHECK-LABEL: @test_tile_tmmultf32ps(
- // CHECK: call void @llvm.x86.ttmmultf32ps(i8 1, i8 2, i8 3)
- _tile_tmmultf32ps(1, 2, 3);
-}
diff --git a/clang/test/CodeGen/X86/amx_tf32_api.c b/clang/test/CodeGen/X86/amx_tf32_api.c
index 2ac8489..8f574b7 100644
--- a/clang/test/CodeGen/X86/amx_tf32_api.c
+++ b/clang/test/CodeGen/X86/amx_tf32_api.c
@@ -18,10 +18,3 @@ void test_tile_mmultf32ps(__tile1024i a, __tile1024i b, __tile1024i c) {
__tile_mmultf32ps(&c, a, b);
}
-void test_tile_tmmultf32ps(__tile1024i a, __tile1024i b, __tile1024i c) {
- //CHECK-LABEL: @test_tile_tmmultf32ps
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.ttmmultf32ps.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_tmmultf32ps(&c, a, b);
-}
diff --git a/clang/test/CodeGen/X86/amx_tf32_errors.c b/clang/test/CodeGen/X86/amx_tf32_errors.c
index 4502130..f0fdd06 100644
--- a/clang/test/CodeGen/X86/amx_tf32_errors.c
+++ b/clang/test/CodeGen/X86/amx_tf32_errors.c
@@ -13,11 +13,3 @@ void test_tile_mmultf32ps() {
_tile_mmultf32ps(1, 3, 3); // expected-error {{tile arguments must refer to different tiles}}
}
-void test_tile_tmmultf32ps() {
- _tile_tmmultf32ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
- _tile_tmmultf32ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
- _tile_tmmultf32ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}}
- _tile_tmmultf32ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}}
- _tile_tmmultf32ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}}
- _tile_tmmultf32ps(1, 2, 2); // expected-error {{tile arguments must refer to different tiles}}
-}
diff --git a/clang/test/CodeGen/X86/amx_transpose.c b/clang/test/CodeGen/X86/amx_transpose.c
deleted file mode 100644
index 7e88fd8..0000000
--- a/clang/test/CodeGen/X86/amx_transpose.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-transpose \
-// RUN: -target-feature +amx-bf16 -target-feature +amx-fp16 -target-feature +amx-complex \
-// RUN: -target-feature +avx512f -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression| FileCheck %s
-
-#include <immintrin.h>
-#include <stddef.h>
-
-void test_tile_2rpntlvwz0(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz0
- // CHECK: call void @llvm.x86.t2rpntlvwz0(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz0(1, A, B);
-}
-
-void test_tile_2rpntlvwz0t1(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz0t1
- // CHECK: call void @llvm.x86.t2rpntlvwz0t1(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz0t1(1, A, B);
-}
-
-void test_tile_2rpntlvwz1(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz1
- // CHECK: call void @llvm.x86.t2rpntlvwz1(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz1(1, A, B);
-}
-
-void test_tile_2rpntlvwz1t1(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz1t1
- // CHECK: call void @llvm.x86.t2rpntlvwz1t1(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz1t1(1, A, B);
-}
-
-void test_tile_transposed(void)
-{
- // CHECK-LABEL: @test_tile_transposed
- // CHECK: call void @llvm.x86.ttransposed(i8 1, i8 2)
- _tile_transposed(1, 2);
-}
-
-void test_tile_tdpbf16ps(void)
-{
- // CHECK-LABEL: @test_tile_tdpbf16ps
- // CHECK: call void @llvm.x86.ttdpbf16ps(i8 1, i8 2, i8 3)
- _tile_tdpbf16ps(1, 2, 3);
-}
-
-void test_tile_tdpfp16ps(void)
-{
- // CHECK-LABEL: @test_tile_tdpfp16ps
- // CHECK: call void @llvm.x86.ttdpfp16ps(i8 4, i8 5, i8 6)
- _tile_tdpfp16ps(4, 5, 6);
-}
-
-void test_tile_tcmmimfp16ps(void) {
- // CHECK-LABEL: @test_tile_tcmmimfp16ps
- // CHECK: call void @llvm.x86.ttcmmimfp16ps(i8 1, i8 2, i8 3)
- _tile_tcmmimfp16ps(1, 2, 3);
-}
-
-void test_tile_tcmmrlfp16ps(void) {
- // CHECK-LABEL: @test_tile_tcmmrlfp16ps
- // CHECK: call void @llvm.x86.ttcmmrlfp16ps(i8 1, i8 2, i8 3)
- _tile_tcmmrlfp16ps(1, 2, 3);
-}
-
-void test_tile_conjtcmmimfp16ps(void) {
- // CHECK-LABEL: @test_tile_conjtcmmimfp16ps
- // CHECK: call void @llvm.x86.tconjtcmmimfp16ps(i8 1, i8 2, i8 3)
- _tile_conjtcmmimfp16ps(1, 2, 3);
-}
-
-void test_tile_conjtfp16(void) {
- // CHECK-LABEL: @test_tile_conjtfp16
- // CHECK: call void @llvm.x86.tconjtfp16(i8 1, i8 2)
- _tile_conjtfp16(1, 2);
-}
diff --git a/clang/test/CodeGen/X86/amx_transpose_api.c b/clang/test/CodeGen/X86/amx_transpose_api.c
deleted file mode 100644
index dc3ef51..0000000
--- a/clang/test/CodeGen/X86/amx_transpose_api.c
+++ /dev/null
@@ -1,114 +0,0 @@
-// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512f \
-// RUN: -target-feature +amx-transpose -target-feature +amx-bf16 -target-feature +amx-fp16 -target-feature +amx-complex \
-// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
-
-#include <immintrin.h>
-
-char buf[2048];
-#define STRIDE 32
-
-char buf2[2048];
-
-void test_tile_2rpntlvwz0(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test_tile_2rpntlvwz0
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz0(&dst0, &dst1, buf, STRIDE);
-}
-
-void test_tile_2rpntlvwz0t1(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test_tile_2rpntlvwz0t1
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz0t1(&dst0, &dst1, buf, STRIDE);
-}
-
-void test_tile_2rpntlvwz1(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test_tile_2rpntlvwz1
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz1(&dst0, &dst1, buf, STRIDE);
-}
-
-void test_tile_2rpntlvwz1t1(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test_tile_2rpntlvwz1t1
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1t1.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz1t1(&dst0, &dst1, buf, STRIDE);
-}
-
-void test_tile_transposed(__tile1024i dst, __tile1024i src) {
- //CHECK-LABEL: @test_tile_transposed
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.ttransposed.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_transposed(&dst, src);
-}
-
-void test_tile_tdpbf16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
- //CHECK-LABEL: @test_tile_tdpbf16ps
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.ttdpbf16ps.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_tdpbf16ps(&c, a, b);
-}
-
-void test_tile_tdpfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
- //CHECK-LABEL: @test_tile_tdpfp16ps
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.ttdpfp16ps.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_tdpfp16ps(&c, a, b);
-}
-
-void test_tile_tcmmimfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
- //CHECK-LABEL: @test_tile_tcmmimfp16ps
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.ttcmmimfp16ps.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_tcmmimfp16ps(&c, a, b);
-}
-
-void test_tile_tcmmrlfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
- //CHECK-LABEL: @test_tile_tcmmrlfp16ps
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.ttcmmrlfp16ps.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_tcmmrlfp16ps(&c, a, b);
-}
-
-void test_tile_conjtcmmimfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
- //CHECK-LABEL: @test_tile_conjtcmmimfp16ps
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.tconjtcmmimfp16ps.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_conjtcmmimfp16ps(&c, a, b);
-}
-
-void test_tile_conjtfp16(__tile1024i dst, __tile1024i src) {
- //CHECK-LABEL: @test_tile_conjtfp16
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.tconjtfp16.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_conjtfp16(&dst, src);
-}
diff --git a/clang/test/CodeGen/X86/amx_transpose_errors.c b/clang/test/CodeGen/X86/amx_transpose_errors.c
deleted file mode 100644
index 80368c5..0000000
--- a/clang/test/CodeGen/X86/amx_transpose_errors.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-int8 -target-feature +amx-bf16 -target-feature +amx-transpose \
-// RUN: -target-feature +avx512f -target-feature +amx-fp16 -target-feature +amx-complex -verify
-
-#include <immintrin.h>
-#include <stddef.h>
-
-// Transpose
-void test_tile_2rpntlvwz0(const void *A, size_t B) {
- _tile_2rpntlvwz0(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz0t1(const void *A, size_t B) {
- _tile_2rpntlvwz0t1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz1(const void *A, size_t B) {
- _tile_2rpntlvwz1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz1t1(const void *A, size_t B) {
- _tile_2rpntlvwz1t1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_tdpbf16ps()
-{
- _tile_tdpbf16ps(8, 2, 3); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_tdpbf16ps(1, 8, 3); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_tdpbf16ps(1, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_tdpbf16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}}
- _tile_tdpbf16ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}}
- _tile_tdpbf16ps(1, 2, 2); // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_tdpfp16ps()
-{
- _tile_tdpfp16ps(8, 5, 6); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_tdpfp16ps(1, 8, 6); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_tdpfp16ps(1, 5, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_tdpfp16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}}
- _tile_tdpfp16ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}}
- _tile_tdpfp16ps(1, 2, 2); // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_transposed()
-{
- _tile_transposed(8, 2); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_transposed(1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_tcmmimfp16ps() {
- _tile_tcmmimfp16ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
- _tile_tcmmimfp16ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
- _tile_tcmmimfp16ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}}
- _tile_tcmmimfp16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_tcmmrlfp16ps() {
- _tile_tcmmrlfp16ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
- _tile_tcmmrlfp16ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
- _tile_tcmmrlfp16ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}}
- _tile_tcmmrlfp16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_conjtcmmimfp16ps() {
- _tile_conjtcmmimfp16ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
- _tile_conjtcmmimfp16ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
- _tile_conjtcmmimfp16ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}}
- _tile_conjtcmmimfp16ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_conjtfp16() {
- _tile_conjtfp16(16, 2); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
- _tile_conjtfp16(1, 26); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
-}
diff --git a/clang/test/CodeGen/X86/avx-builtins-constrained.c b/clang/test/CodeGen/X86/avx-builtins-constrained.c
new file mode 100644
index 0000000..428febe
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx-builtins-constrained.c
@@ -0,0 +1,35 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+
+#ifdef STRICT
+// Test that the constrained intrinsics are picking up the exception
+// metadata from the AST instead of the global default from the command line.
+
+#pragma float_control(except, on)
+#endif
+
+
+#include <immintrin.h>
+
+__m256 test_mm256_sqrt_ps(__m256 x) {
+ // COMMON-LABEL: test_mm256_sqrt_ps
+ // UNCONSTRAINED: call {{.*}}<8 x float> @llvm.sqrt.v8f32(<8 x float> {{.*}})
+ // CONSTRAINED: call {{.*}}<8 x float> @llvm.experimental.constrained.sqrt.v8f32(<8 x float> {{.*}}, metadata !{{.*}})
+ // CHECK-ASM: vsqrtps %ymm{{.*}},
+ return _mm256_sqrt_ps(x);
+}
+
+__m256d test_mm256_sqrt_pd(__m256d x) {
+ // COMMON-LABEL: test_mm256_sqrt_pd
+ // UNCONSTRAINED: call {{.*}}<4 x double> @llvm.sqrt.v4f64(<4 x double> {{.*}})
+ // CONSTRAINED: call {{.*}}<4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double> {{.*}}, metadata !{{.*}})
+ // CHECK-ASM: vsqrtpd %ymm{{.*}},
+ return _mm256_sqrt_pd(x);
+} \ No newline at end of file
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 737febb..13da429 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -45,12 +45,14 @@ __m256d test_mm256_addsub_pd(__m256d A, __m256d B) {
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_addsub_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_addsub_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+1.0, +1.0, +1.0, +1.0}), +0.0, +3.0, +2.0, +5.0));
__m256 test_mm256_addsub_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_addsub_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_addsub_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(_mm256_addsub_ps((__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f}), +0.0f, +3.0f, +2.0f, +5.0f, +4.0f, +7.0f, +6.0f, +9.0f));
__m256d test_mm256_and_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_and_pd
@@ -966,6 +968,8 @@ __m128 test_mm256_cvtpd_ps(__m256d A) {
return _mm256_cvtpd_ps(A);
}
+TEST_CONSTEXPR(match_m128(_mm256_cvtpd_ps((__m256d){ 0.0, -1.0, +2.0, +3.5 }), 0.0f, -1.0f, +2.0f, +3.5f));
+
__m256i test_mm256_cvtps_epi32(__m256 A) {
// CHECK-LABEL: test_mm256_cvtps_epi32
// CHECK: call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %{{.*}})
@@ -1398,18 +1402,21 @@ __m128d test_mm_permute_pd(__m128d A) {
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 0>
return _mm_permute_pd(A, 1);
}
+TEST_CONSTEXPR(match_m128d(_mm_permute_pd(((__m128d){1.0, 2.0}), 1), 2.0, 1.0));
__m256d test_mm256_permute_pd(__m256d A) {
// CHECK-LABEL: test_mm256_permute_pd
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
return _mm256_permute_pd(A, 5);
}
+TEST_CONSTEXPR(match_m256d(_mm256_permute_pd(((__m256d){1.0f, 2.0f, 3.0f, 4.0f}), 5), 2.0f, 1.0f, 4.0f, 3.0f));
__m128 test_mm_permute_ps(__m128 A) {
// CHECK-LABEL: test_mm_permute_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
return _mm_permute_ps(A, 0x1b);
}
+TEST_CONSTEXPR(match_m128(_mm_permute_ps(((__m128){1.0, 2.0, 3.0, 4.0}), 0x1b), 4.0, 3.0, 2.0, 1.0));
// Test case for PR12401
__m128 test2_mm_permute_ps(__m128 a) {
@@ -1417,12 +1424,14 @@ __m128 test2_mm_permute_ps(__m128 a) {
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 2, i32 1, i32 2, i32 3>
return _mm_permute_ps(a, 0xe6);
}
+TEST_CONSTEXPR(match_m128(_mm_permute_ps(((__m128){1.0, 2.0, 3.0, 4.0}), 0xe6), 3.0, 2.0, 3.0, 4.0));
__m256 test_mm256_permute_ps(__m256 A) {
// CHECK-LABEL: test_mm256_permute_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
return _mm256_permute_ps(A, 0x1b);
}
+TEST_CONSTEXPR(match_m256(_mm256_permute_ps(((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), 0x1b), 4.0, 3.0, 2.0, 1.0, 8.0, 7.0, 6.0, 5.0));
__m256d test_mm256_permute2f128_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_permute2f128_pd
@@ -1447,24 +1456,52 @@ __m128d test_mm_permutevar_pd(__m128d A, __m128i B) {
// CHECK: call {{.*}}<2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %{{.*}}, <2 x i64> %{{.*}})
return _mm_permutevar_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_permutevar_pd(
+ ((__m128d){0.0, 1.0}),
+ ((__m128i){0b10, 0b00})
+ ),
+ 1.0, 0.0
+));
__m256d test_mm256_permutevar_pd(__m256d A, __m256i B) {
// CHECK-LABEL: test_mm256_permutevar_pd
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_permutevar_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_permutevar_pd(
+ ((__m256d){0.0, 1.0, 2.0, 3.0}),
+ ((__m256i){0b10, 0b00, 0b00, 0b10})
+ ),
+ 1.0, 0.0, 2.0, 3.0
+));
__m128 test_mm_permutevar_ps(__m128 A, __m128i B) {
// CHECK-LABEL: test_mm_permutevar_ps
// CHECK: call {{.*}}<4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %{{.*}}, <4 x i32> %{{.*}})
return _mm_permutevar_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_permutevar_ps(
+ ((__m128){0.0, 1.0, 2.0, 3.0}),
+ ((__m128i)(__v4si){0b11, 0b10, 0b01, 0b00})
+ ),
+ 3.0, 2.0, 1.0, 0.0
+));
__m256 test_mm256_permutevar_ps(__m256 A, __m256i B) {
// CHECK-LABEL: test_mm256_permutevar_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_permutevar_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_permutevar_ps(
+ ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m256i)(__v8si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10})
+ ),
+ 3.0, 2.0, 1.0, 0.0, 5.0, 4.0, 7.0, 6.0
+));
__m256 test_mm256_rcp_ps(__m256 A) {
// CHECK-LABEL: test_mm256_rcp_ps
diff --git a/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c
index 728c9f5..1ba6e87 100644
--- a/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c
@@ -176,20 +176,20 @@ __m512i test_mm512_maskz_dpbuuds_epi32(__mmask16 __U, __m512i __W, __m512i __A,
/* VNNI INT16 */
__m512i test_mm512_dpwsud_epi32(__m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: @test_mm512_dpwsud_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_dpwsud_epi32(__A, __B, __C);
}
__m512i test_mm512_mask_dpwsud_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_mm512_mask_dpwsud_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpwsud_epi32(__A, __B, __C, __D);
}
__m512i test_mm512_maskz_dpwsud_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: @test_mm512_maskz_dpwsud_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: zeroinitializer
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpwsud_epi32(__U, __A, __B, __C);
@@ -197,20 +197,20 @@ __m512i test_mm512_maskz_dpwsud_epi32(__mmask16 __U, __m512i __A, __m512i __B, _
__m512i test_mm512_dpwsuds_epi32(__m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: @test_mm512_dpwsuds_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsuds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsuds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_dpwsuds_epi32(__A, __B, __C);
}
__m512i test_mm512_mask_dpwsuds_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_mm512_mask_dpwsuds_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsuds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsuds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpwsuds_epi32(__A, __B, __C, __D);
}
__m512i test_mm512_maskz_dpwsuds_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: @test_mm512_maskz_dpwsuds_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsuds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsuds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: zeroinitializer
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpwsuds_epi32(__U, __A, __B, __C);
@@ -218,20 +218,20 @@ __m512i test_mm512_maskz_dpwsuds_epi32(__mmask16 __U, __m512i __A, __m512i __B,
__m512i test_mm512_dpwusd_epi32(__m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: @test_mm512_dpwusd_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_dpwusd_epi32(__A, __B, __C);
}
__m512i test_mm512_mask_dpwusd_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_mm512_mask_dpwusd_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpwusd_epi32(__A, __B, __C, __D);
}
__m512i test_mm512_maskz_dpwusd_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: @test_mm512_maskz_dpwusd_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: zeroinitializer
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpwusd_epi32(__U, __A, __B, __C);
@@ -239,20 +239,20 @@ __m512i test_mm512_maskz_dpwusd_epi32(__mmask16 __U, __m512i __A, __m512i __B, _
__m512i test_mm512_dpwusds_epi32(__m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: @test_mm512_dpwusds_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_dpwusds_epi32(__A, __B, __C);
}
__m512i test_mm512_mask_dpwusds_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_mm512_mask_dpwusds_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpwusds_epi32(__A, __B, __C, __D);
}
__m512i test_mm512_maskz_dpwusds_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: @test_mm512_maskz_dpwusds_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: zeroinitializer
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpwusds_epi32(__U, __A, __B, __C);
@@ -260,20 +260,20 @@ __m512i test_mm512_maskz_dpwusds_epi32(__mmask16 __U, __m512i __A, __m512i __B,
__m512i test_mm512_dpwuud_epi32(__m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: @test_mm512_dpwuud_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_dpwuud_epi32(__A, __B, __C);
}
__m512i test_mm512_mask_dpwuud_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_mm512_mask_dpwuud_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpwuud_epi32(__A, __B, __C, __D);
}
__m512i test_mm512_maskz_dpwuud_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: @test_mm512_maskz_dpwuud_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: zeroinitializer
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpwuud_epi32(__U, __A, __B, __C);
@@ -281,20 +281,20 @@ __m512i test_mm512_maskz_dpwuud_epi32(__mmask16 __U, __m512i __A, __m512i __B, _
__m512i test_mm512_dpwuuds_epi32(__m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: @test_mm512_dpwuuds_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuuds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuuds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_dpwuuds_epi32(__A, __B, __C);
}
__m512i test_mm512_mask_dpwuuds_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_mm512_mask_dpwuuds_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuuds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuuds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpwuuds_epi32(__A, __B, __C, __D);
}
__m512i test_mm512_maskz_dpwuuds_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: @test_mm512_maskz_dpwuuds_epi32(
-// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuuds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuuds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: zeroinitializer
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpwuuds_epi32(__U, __A, __B, __C);
diff --git a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
index f8a4c51..fac7ef2 100644
--- a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
@@ -1,7 +1,11 @@
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
+
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m256bh test_mm256_setzero_pbh() {
// CHECK-LABEL: @test_mm256_setzero_pbh
@@ -353,6 +357,7 @@ __m128bh test_mm_move_sbh(__m128bh A, __m128bh B) {
// CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 0
return _mm_move_sbh(A, B);
}
+TEST_CONSTEXPR(match_m128bh(_mm_move_sbh((__m128bh)(__v8bf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f}, (__m128bh)(__v8bf){9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f,16.0f}), 9.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
__m128bh test_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
// CHECK-LABEL: @test_mm_mask_move_sbh
@@ -366,6 +371,7 @@ __m128bh test_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128b
// CHECK-NEXT: insertelement <8 x bfloat> [[VEC]], bfloat [[SEL]], i64 0
return _mm_mask_move_sbh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128bh(_mm_mask_move_sbh((__m128bh)(__v8bf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f}, (__mmask8)0x01, (__m128bh)(__v8bf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}, (__m128bh)(__v8bf){9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f,16.0f}), 9.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
__m128bh test_mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
// CHECK-LABEL: @test_mm_maskz_move_sbh
diff --git a/clang/test/CodeGen/X86/avx10_2ni-builtins.c b/clang/test/CodeGen/X86/avx10_2ni-builtins.c
index a250d91..be2719c 100644
--- a/clang/test/CodeGen/X86/avx10_2ni-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2ni-builtins.c
@@ -259,168 +259,168 @@ __m256i test_mm256_maskz_dpbuuds_epi32(__mmask8 __U, __m256i __W, __m256i __A, _
// VNNI INT16
__m128i test_mm_mask_dpwsud_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
// CHECK-LABEL: @test_mm_mask_dpwsud_epi32(
-// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpwsud_epi32(__A, __B, __C, __D);
}
__m128i test_mm_maskz_dpwsud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: @test_mm_maskz_dpwsud_epi32(
-// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpwsud_epi32(__U, __A, __B, __C);
}
__m256i test_mm256_mask_dpwsud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
// CHECK-LABEL: @test_mm256_mask_dpwsud_epi32(
-// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpwsud_epi32(__A, __B, __C, __D);
}
__m256i test_mm256_maskz_dpwsud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: @test_mm256_maskz_dpwsud_epi32(
-// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpwsud_epi32(__U, __A, __B, __C);
}
__m128i test_mm_mask_dpwsuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
// CHECK-LABEL: @test_mm_mask_dpwsuds_epi32(
-// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpwsuds_epi32(__A, __B, __C, __D);
}
__m128i test_mm_maskz_dpwsuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: @test_mm_maskz_dpwsuds_epi32(
-// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpwsuds_epi32(__U, __A, __B, __C);
}
__m256i test_mm256_mask_dpwsuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
// CHECK-LABEL: @test_mm256_mask_dpwsuds_epi32(
-// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpwsuds_epi32(__A, __B, __C, __D);
}
__m256i test_mm256_maskz_dpwsuds_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: @test_mm256_maskz_dpwsuds_epi32(
-// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpwsuds_epi32(__U, __A, __B, __C);
}
__m128i test_mm_mask_dpwusd_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
// CHECK-LABEL: @test_mm_mask_dpwusd_epi32(
-// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpwusd_epi32(__A, __B, __C, __D);
}
__m128i test_mm_maskz_dpwusd_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: @test_mm_maskz_dpwusd_epi32(
-// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpwusd_epi32(__U, __A, __B, __C);
}
__m256i test_mm256_mask_dpwusd_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
// CHECK-LABEL: @test_mm256_mask_dpwusd_epi32(
-// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpwusd_epi32(__A, __B, __C, __D);
}
__m256i test_mm256_maskz_dpwusd_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: @test_mm256_maskz_dpwusd_epi32(
-// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpwusd_epi32(__U, __A, __B, __C);
}
__m128i test_mm_mask_dpwusds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
// CHECK-LABEL: @test_mm_mask_dpwusds_epi32(
-// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpwusds_epi32(__A, __B, __C, __D);
}
__m128i test_mm_maskz_dpwusds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: @test_mm_maskz_dpwusds_epi32(
-// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpwusds_epi32(__U, __A, __B, __C);
}
__m256i test_mm256_mask_dpwusds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
// CHECK-LABEL: @test_mm256_mask_dpwusds_epi32(
-// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpwusds_epi32(__A, __B, __C, __D);
}
__m256i test_mm256_maskz_dpwusds_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: @test_mm256_maskz_dpwusds_epi32(
-// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpwusds_epi32(__U, __A, __B, __C);
}
__m128i test_mm_mask_dpwuud_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
// CHECK-LABEL: @test_mm_mask_dpwuud_epi32(
-// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpwuud_epi32(__A, __B, __C, __D);
}
__m128i test_mm_maskz_dpwuud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: @test_mm_maskz_dpwuud_epi32(
-// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpwuud_epi32(__U, __A, __B, __C);
}
__m256i test_mm256_mask_dpwuud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
// CHECK-LABEL: @test_mm256_mask_dpwuud_epi32(
-// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpwuud_epi32(__A, __B, __C, __D);
}
__m256i test_mm256_maskz_dpwuud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: @test_mm256_maskz_dpwuud_epi32(
-// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpwuud_epi32(__U, __A, __B, __C);
}
__m128i test_mm_mask_dpwuuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
// CHECK-LABEL: @test_mm_mask_dpwuuds_epi32(
-// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpwuuds_epi32(__A, __B, __C, __D);
}
__m128i test_mm_maskz_dpwuuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: @test_mm_maskz_dpwuuds_epi32(
-// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpwuuds_epi32(__U, __A, __B, __C);
}
__m256i test_mm256_mask_dpwuuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
// CHECK-LABEL: @test_mm256_mask_dpwuuds_epi32(
-// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpwuuds_epi32(__A, __B, __C, __D);
}
__m256i test_mm256_maskz_dpwuuds_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: @test_mm256_maskz_dpwuuds_epi32(
-// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpwuuds_epi32(__U, __A, __B, __C);
}
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index de4cb2f..c9474e9 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -109,6 +109,9 @@ __m256i test_mm256_alignr_epi8(__m256i a, __m256i b) {
// CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49>
return _mm256_alignr_epi8(a, b, 2);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_alignr_epi8(((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 2), 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 1, 2, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 17, 18));
+TEST_CONSTEXPR(match_v32qi(_mm256_alignr_epi8(((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 16), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32));
+TEST_CONSTEXPR(match_v32qi(_mm256_alignr_epi8(((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 32), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m256i test2_mm256_alignr_epi8(__m256i a, __m256i b) {
// CHECK-LABEL: test2_mm256_alignr_epi8
@@ -302,22 +305,26 @@ __m256i test_mm256_bslli_epi128(__m256i a) {
// CHECK: shufflevector <32 x i8> zeroinitializer, <32 x i8> %{{.*}}, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60>
return _mm256_bslli_epi128(a, 3);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_bslli_epi128(((__m256i)(__v32qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 3), 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29));
+TEST_CONSTEXPR(match_v32qi(_mm256_bslli_epi128(((__m256i)(__v32qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m256i test_mm256_bsrli_epi128(__m256i a) {
// CHECK-LABEL: test_mm256_bsrli_epi128
// CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50>
return _mm256_bsrli_epi128(a, 3);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_bsrli_epi128(((__m256i)(__v32qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 3), 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0));
+TEST_CONSTEXPR(match_v32qi(_mm256_bsrli_epi128(((__m256i)(__v32qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m256i test_mm256_cmpeq_epi8(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_cmpeq_epi8
// CHECK: icmp eq <32 x i8>
return _mm256_cmpeq_epi8(a, b);
}
-TEST_CONSTEXPR(match_v16qi(_mm_cmpeq_epi8(
- (__m128i)(__v16qs){1,-2,3,-4,-5,6,-7,8,-9,10,-11,12,-13,14,-15,16},
- (__m128i)(__v16qs){10,-2,6,-4,-5,12,-14,8,-9,20,-22,12,-26,14,-30,16}),
- 0,-1,0,-1,-1,0,0,-1,-1,0,0,-1,0,-1,0,-1));
+TEST_CONSTEXPR(match_v32qi(_mm256_cmpeq_epi8(
+ (__m256i)(__v32qs){1,-2,3,-4,-5,6,-7,8,-9,10,-11,12,-13,14,-15,16,-16,15,-14,13,-12,11,-10,9,-8,7,-6,5,4,-3,2,-1},
+ (__m256i)(__v32qs){10,-2,6,-4,-5,12,-14,8,-9,20,-22,12,-26,14,-30,16,10,-2,6,-4,-5,12,-14,8,-9,20,-22,12,-26,14,-30,16}),
+ 0, -1, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m256i test_mm256_cmpeq_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_cmpeq_epi16
@@ -1104,24 +1111,50 @@ __m256i test_mm256_permute4x64_epi64(__m256i a) {
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> <i32 3, i32 0, i32 2, i32 0>
return _mm256_permute4x64_epi64(a, 35);
}
+// Control value 0x00: [0,0,0,0] -> broadcast element 0
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x00), 40LL, 40LL, 40LL, 40LL));
+// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x1B), 10LL, 20LL, 30LL, 40LL));
+// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x39), 30LL, 20LL, 10LL, 40LL));
+// Control value 0x12: [2,0,1,0] -> [C,A,B,A]
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x12), 20LL, 40LL, 30LL, 40LL));
+// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0xE4), 40LL, 30LL, 20LL, 10LL));
+// Test with negative values
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){-40LL, -30LL, -20LL, -10LL}), 0x1B), -10LL, -20LL, -30LL, -40LL));
__m256d test_mm256_permute4x64_pd(__m256d a) {
// CHECK-LABEL: test_mm256_permute4x64_pd
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
return _mm256_permute4x64_pd(a, 25);
}
+// Control value 0x00: [0,0,0,0] -> broadcast element 0
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x00), 4.0, 4.0, 4.0, 4.0));
+// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x1B), 1.0, 2.0, 3.0, 4.0));
+// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x39), 3.0, 2.0, 1.0, 4.0));
+// Control value 0x12: [2,0,1,0] -> [C,A,B,A]
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x12), 2.0, 4.0, 3.0, 4.0));
+// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0xE4), 4.0, 3.0, 2.0, 1.0));
__m256i test_mm256_permutevar8x32_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_permutevar8x32_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_permutevar8x32_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_permutevar8x32_epi32((__m256i)(__v8si){7, 6, 5, 4, 3, 2, 1, 0}, (__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}), 7, 6, 5, 4, 3, 2, 1, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_permutevar8x32_epi32((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}, (__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 0, 0, 0, 0, 0, 0, 0, 0));
__m256 test_mm256_permutevar8x32_ps(__m256 a, __m256i b) {
// CHECK-LABEL: test_mm256_permutevar8x32_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx2.permps(<8 x float> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_permutevar8x32_ps(a, b);
}
+TEST_CONSTEXPR(match_m256(_mm256_permutevar8x32_ps((__m256){7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f}, (__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}), 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_permutevar8x32_ps((__m256){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, (__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
__m256i test_mm256_sad_epu8(__m256i x, __m256i y) {
// CHECK-LABEL: test_mm256_sad_epu8
@@ -1180,6 +1213,37 @@ __m256i test_mm256_sign_epi32(__m256i a, __m256i b) {
}
TEST_CONSTEXPR(match_v8si(_mm256_sign_epi32((__m256i)(__v8si){0xbeef,0xfeed,0xbead,0xdeed, -1,2,-3,4}, (__m256i)(__v8si){0,0,0,0,-1,-1,-1,-1}), 0,0,0,0, 1,-2,3,-4));
+__m256i test_mm256_sll_epi16(__m256i a, __m128i b) {
+ // CHECK-LABEL: test_mm256_sll_epi16
+ // CHECK: call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ return _mm256_sll_epi16(a, b);
+}
+TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
+TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
+
+__m256i test_mm256_sll_epi32(__m256i a, __m128i b) {
+ // CHECK-LABEL: test_mm256_sll_epi32
+ // CHECK: call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ return _mm256_sll_epi32(a, b);
+}
+TEST_CONSTEXPR(match_v8si(_mm256_sll_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 2, 4, 6, 8, 10, 12, 14));
+TEST_CONSTEXPR(match_v8si(_mm256_sll_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_sll_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){0, 1, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_sll_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 1, 1}), 0, 2, 4, 6, 8, 10, 12, 14));
+
+__m256i test_mm256_sll_epi64(__m256i a, __m128i b) {
+ // CHECK-LABEL: test_mm256_sll_epi64
+ // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ return _mm256_sll_epi64(a, b);
+}
+TEST_CONSTEXPR(match_v4di(_mm256_sll_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 0}), 0, 2, 4, 6));
+TEST_CONSTEXPR(match_v4di(_mm256_sll_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){64, 0}), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4di(_mm256_sll_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 1}), 0, 2, 4, 6));
+
__m256i test_mm256_slli_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_slli_epi16
// CHECK: call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %{{.*}}, i32 %{{.*}})
@@ -1272,12 +1336,22 @@ __m256i test_mm256_sra_epi16(__m256i a, __m128i b) {
// CHECK: call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm256_sra_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), -16384, 16383, -2, -1, -1, 0, 0, 1, -16384, 16383, -2, -1, -1, 0, 0, 1));
+TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0, -1, 0, -1, -1, -1, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0, -1, 0, -1, -1, -1, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0, -1, 0, -1, -1, -1, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0, -1, 0, -1, -1, -1, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), -16384, 16383, -2, -1, -1, 0, 0, 1, -16384, 16383, -2, -1, -1, 0, 0, 1));
__m256i test_mm256_sra_epi32(__m256i a, __m128i b) {
// CHECK-LABEL: test_mm256_sra_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm256_sra_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_sra_epi32((__m256i)(__v8si){-32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v4si){1, 0, 0, 0}), -16384, 16383, -2, -1, -1, 0, 0, 1));
+TEST_CONSTEXPR(match_v8si(_mm256_sra_epi32((__m256i)(__v8si){-32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v4si){32, 0, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_sra_epi32((__m256i)(__v8si){-32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v4si){0, 1, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_sra_epi32((__m256i)(__v8si){-32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v4si){1, 0, 1, 1}), -16384, 16383, -2, -1, -1, 0, 0, 1));
__m256i test_mm256_srai_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_srai_epi16
@@ -1324,18 +1398,31 @@ __m256i test_mm256_srl_epi16(__m256i a, __m128i b) {
// CHECK: call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm256_srl_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7));
+TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7));
__m256i test_mm256_srl_epi32(__m256i a, __m128i b) {
// CHECK-LABEL: test_mm256_srl_epi32
// CHECK:call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm256_srl_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_srl_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 1, 1, 2, 2, 3, 3));
+TEST_CONSTEXPR(match_v8si(_mm256_srl_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_srl_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){0, 1, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_srl_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 1, 1}), 0, 0, 1, 1, 2, 2, 3, 3));
__m256i test_mm256_srl_epi64(__m256i a, __m128i b) {
// CHECK-LABEL: test_mm256_srl_epi64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm256_srl_epi64(a, b);
}
+TEST_CONSTEXPR(match_v4di(_mm256_srl_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 0}), 0, 0, 1, 1));
+TEST_CONSTEXPR(match_v4di(_mm256_srl_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){64, 0}), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4di(_mm256_srl_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 1}), 0, 0, 1, 1));
__m256i test_mm256_srli_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_srli_epi16
diff --git a/clang/test/CodeGen/X86/avx512bf16-builtins.c b/clang/test/CodeGen/X86/avx512bf16-builtins.c
index 52c20aa..556c662 100644
--- a/clang/test/CodeGen/X86/avx512bf16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bf16-builtins.c
@@ -2,8 +2,14 @@
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bf16 -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bf16 -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bf16 -emit-llvm -o - -Wall -Werror | FileCheck %s
+//
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bf16 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bf16 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bf16 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bf16 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
#include <immintrin.h>
+#include "builtin_test_helpers.h"
float test_mm_cvtsbh_ss(__bf16 A) {
// CHECK-LABEL: test_mm_cvtsbh_ss
@@ -11,6 +17,7 @@ float test_mm_cvtsbh_ss(__bf16 A) {
// CHECK: ret float %{{.*}}
return _mm_cvtsbh_ss(A);
}
+TEST_CONSTEXPR(_mm_cvtsbh_ss(-1.0f) == -1.0f);
__m512bh test_mm512_cvtne2ps_pbh(__m512 A, __m512 B) {
// CHECK-LABEL: test_mm512_cvtne2ps_pbh
@@ -74,23 +81,23 @@ __m512 test_mm512_mask_dpbf16_ps(__m512 D, __m512bh A, __m512bh B, __mmask16 U)
__m512 test_mm512_cvtpbh_ps(__m256bh A) {
// CHECK-LABEL: test_mm512_cvtpbh_ps
- // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
- // CHECK: call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %{{.*}}, i32 %{{.*}})
+ // CHECK: fpext <16 x bfloat> %{{.*}} to <16 x float>
return _mm512_cvtpbh_ps(A);
}
+TEST_CONSTEXPR(match_m512(_mm512_cvtpbh_ps((__m256bh){-0.0f, 1.0f, -2.0f, 4.0f, -8.0f, 16.0f, -32.0f, 64.0f, -128.0f, -0.5f, 0.25f, -0.125f, -4.0f, 2.0f, -1.0f, 0.0f}), -0.0f, 1.0f, -2.0f, 4.0f, -8.0f, 16.0f, -32.0f, 64.0f, -128.0f, -0.5f, 0.25f, -0.125f, -4.0f, 2.0f, -1.0f, 0.0f));
__m512 test_mm512_maskz_cvtpbh_ps(__mmask16 M, __m256bh A) {
// CHECK-LABEL: test_mm512_maskz_cvtpbh_ps
- // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
- // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
- // CHECK: call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %{{.*}}, i32 %{{.*}})
+ // CHECK: fpext <16 x bfloat> %{{.*}} to <16 x float>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_cvtpbh_ps(M, A);
}
+TEST_CONSTEXPR(match_m512(_mm512_maskz_cvtpbh_ps(0xA753, (__m256bh){-0.0f, 1.0f, -2.0f, 4.0f, -8.0f, 16.0f, -32.0f, 64.0f, -128.0f, -0.5f, 0.25f, -0.125f, -4.0f, 2.0f, -1.0f, 0.0f}), -0.0f, 1.0f, 0.0f, 0.0f, -8.0f, 0.0f, -32.0f, 0.0f, -128.0f, -0.5f, 0.25f, 0.0f, 0.0f, 2.0f, 0.0f, 0.0f));
__m512 test_mm512_mask_cvtpbh_ps(__m512 S, __mmask16 M, __m256bh A) {
// CHECK-LABEL: test_mm512_mask_cvtpbh_ps
- // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
- // CHECK: call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %{{.*}}, i32 %{{.*}})
- // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ // CHECK: fpext <16 x bfloat> %{{.*}} to <16 x float>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_cvtpbh_ps(S, M, A);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpbh_ps((__m512){ 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f }, 0xA753, (__m256bh){-0.0f, 1.0f, -2.0f, 4.0f, -8.0f, 16.0f, -32.0f, 64.0f, -128.0f, -0.5f, 0.25f, -0.125f, -4.0f, 2.0f, -1.0f, 0.0f}), -0.0f, 1.0f, 99.0f, 99.0f, -8.0f, 99.0f, -32.0f, 99.0f, -128.0f, -0.5f, 0.25f, 99.0f, 99.0f, 2.0f, 99.0f, 0.0f));
diff --git a/clang/test/CodeGen/X86/avx512bitalg-builtins.c b/clang/test/CodeGen/X86/avx512bitalg-builtins.c
index 3ac8674..7d524ab 100644
--- a/clang/test/CodeGen/X86/avx512bitalg-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bitalg-builtins.c
@@ -70,4 +70,14 @@ __mmask64 test_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
// CHECK: @llvm.x86.avx512.vpshufbitqmb.512
return _mm512_bitshuffle_epi64_mask(__A, __B);
}
+TEST_CONSTEXPR(_mm512_bitshuffle_epi64_mask(
+ (__m512i)(__v64qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85,
+ 1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
+ (__m512i)(__v64qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7,
+ 0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff010155ff0101ULL);
+TEST_CONSTEXPR(_mm512_mask_bitshuffle_epi64_mask(0xFFFFFFFF00000000ULL,
+ (__m512i)(__v64qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85,
+ 1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
+ (__m512i)(__v64qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7,
+ 0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff010100000000ULL);
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index be2cd48..7cdec9b 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -209,6 +209,10 @@ unsigned char test_kortestz_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m
_mm512_cmpneq_epu16_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestz_mask32_u8(0x0000'0000, 0x0000'0000) == 1);
+TEST_CONSTEXPR(_kortestz_mask32_u8(0x0000'0000, 0x8000'0000) == 0);
+TEST_CONSTEXPR(_kortestz_mask32_u8(0x0123'4567, 0xFEDC'BA98) == 0);
+
unsigned char test_kortestc_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_kortestc_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -222,6 +226,10 @@ unsigned char test_kortestc_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m
_mm512_cmpneq_epu16_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestc_mask32_u8(0x0000'0000, 0x0000'0000) == 0);
+TEST_CONSTEXPR(_kortestc_mask32_u8(0x0000'0000, 0x8000'0000) == 0);
+TEST_CONSTEXPR(_kortestc_mask32_u8(0x0123'4567, 0xFEDC'BA98) == 1);
+
unsigned char test_kortest_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_kortest_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -242,6 +250,30 @@ unsigned char test_kortest_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m5
_mm512_cmpneq_epu16_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_kortest_mask32_u8(unsigned int A, unsigned int B) {
+ unsigned char all_ones{};
+ return (_kortest_mask32_u8(A, B, &all_ones) << 4) | all_ones;
+}
+
+void _kortest_mask32_u8() {
+ constexpr unsigned int A1 = 0x0000'0000;
+ constexpr unsigned int B1 = 0x0000'0000;
+ constexpr unsigned char expected_result_1 = 0x10;
+ static_assert(test_kortest_mask32_u8(A1, B1) == expected_result_1);
+ constexpr unsigned int A2 = 0x0000'0000;
+ constexpr unsigned int B2 = 0x8000'0000;
+ constexpr unsigned char expected_result_2 = 0x00;
+ static_assert(test_kortest_mask32_u8(A2, B2) == expected_result_2);
+ constexpr unsigned int A3 = 0x0123'4567;
+ constexpr unsigned int B3 = 0xFEDC'BA98;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_kortest_mask32_u8(A3, B3) == expected_result_3);
+}
+#endif
+
unsigned char test_kortestz_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_kortestz_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -255,6 +287,10 @@ unsigned char test_kortestz_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m
_mm512_cmpneq_epu8_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestz_mask64_u8(0x0000'0000'0000'0000, 0x0000'0000'0000'0000) == 1);
+TEST_CONSTEXPR(_kortestz_mask64_u8(0x0000'0000'0000'0000, 0x8000'0000'0000'0000) == 0);
+TEST_CONSTEXPR(_kortestz_mask64_u8(0x0123'4567'89AB'CDEF, 0xFEDC'BA98'7654'3210) == 0);
+
unsigned char test_kortestc_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_kortestc_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -268,6 +304,10 @@ unsigned char test_kortestc_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m
_mm512_cmpneq_epu8_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestc_mask64_u8(0x0000'0000'0000'0000, 0x0000'0000'0000'0000) == 0);
+TEST_CONSTEXPR(_kortestc_mask64_u8(0x0023'4567'89AB'CDEF, 0xFEDC'BA98'7654'3210) == 0);
+TEST_CONSTEXPR(_kortestc_mask64_u8(0x0123'4567'89AB'CDEF, 0xFEDC'BA98'7654'3210) == 1);
+
unsigned char test_kortest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_kortest_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -288,6 +328,30 @@ unsigned char test_kortest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m5
_mm512_cmpneq_epu8_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_kortest_mask64_u8(unsigned long long A, unsigned long long B) {
+ unsigned char all_ones{};
+ return (_kortest_mask64_u8(A, B, &all_ones) << 4) | all_ones;
+}
+
+void _kortest_mask64_u8() {
+ constexpr unsigned long long A1 = 0x0000'0000'0000'0000;
+ constexpr unsigned long long B1 = 0x0000'0000'0000'0000;
+ constexpr unsigned char expected_result_1 = 0x10;
+ static_assert(test_kortest_mask64_u8(A1, B1) == expected_result_1);
+ constexpr unsigned long long A2 = 0x0000'0000'0000'0000;
+ constexpr unsigned long long B2 = 0x8000'0000'0000'0000;
+ constexpr unsigned char expected_result_2 = 0x00;
+ static_assert(test_kortest_mask64_u8(A2, B2) == expected_result_2);
+ constexpr unsigned long long A3 = 0x0123'4567'89AB'CDEF;
+ constexpr unsigned long long B3 = 0xFEDC'BA98'7654'3210;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_kortest_mask64_u8(A3, B3) == expected_result_3);
+}
+#endif
+
unsigned char test_ktestz_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestz_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -298,6 +362,11 @@ unsigned char test_ktestz_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu16_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestz_mask32_u8(0x0000'0000, 0x0000'0000) == 1);
+TEST_CONSTEXPR(_ktestz_mask32_u8(0x0000'0000, 0x8000'0000) == 1);
+TEST_CONSTEXPR(_ktestz_mask32_u8(0xF000'0000, 0x8000'0000) == 0);
+TEST_CONSTEXPR(_ktestz_mask32_u8(0x0123'4567, 0x0123'4567) == 0);
+
unsigned char test_ktestc_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestc_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -308,6 +377,11 @@ unsigned char test_ktestc_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu16_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestc_mask32_u8(0x0000'0000, 0x0000'0000) == 1);
+TEST_CONSTEXPR(_ktestc_mask32_u8(0x0000'0000, 0x8000'0000) == 0);
+TEST_CONSTEXPR(_ktestc_mask32_u8(0xF000'0000, 0x8000'0000) == 1);
+TEST_CONSTEXPR(_ktestc_mask32_u8(0x0123'4567, 0x0123'4567) == 1);
+
unsigned char test_ktest_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_ktest_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -322,6 +396,34 @@ unsigned char test_ktest_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512
_mm512_cmpneq_epu16_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_ktest_mask32_u8(unsigned int A, unsigned int B) {
+ unsigned char and_not{};
+ return (_ktest_mask32_u8(A, B, &and_not) << 4) | and_not;
+}
+
+void _ktest_mask32_u8() {
+ constexpr unsigned int A1 = 0x0000'0000;
+ constexpr unsigned int B1 = 0x0000'0000;
+ constexpr unsigned char expected_result_1 = 0x11;
+ static_assert(test_ktest_mask32_u8(A1, B1) == expected_result_1);
+ constexpr unsigned int A2 = 0x0000'0000;
+ constexpr unsigned int B2 = 0x8000'0000;
+ constexpr unsigned char expected_result_2 = 0x10;
+ static_assert(test_ktest_mask32_u8(A2, B2) == expected_result_2);
+ constexpr unsigned int A3 = 0xF000'0000;
+ constexpr unsigned int B3 = 0x8000'0000;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_ktest_mask32_u8(A3, B3) == expected_result_3);
+ constexpr unsigned int A4 = 0x0123'4567;
+ constexpr unsigned int B4 = 0x0123'4567;
+ constexpr unsigned char expected_result_4 = 0x01;
+ static_assert(test_ktest_mask32_u8(A4, B4) == expected_result_4);
+}
+#endif
+
unsigned char test_ktestz_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestz_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -332,6 +434,11 @@ unsigned char test_ktestz_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu8_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestz_mask64_u8(0x0000'0000'0000'0000, 0x0000'0000'0000'0000) == 1);
+TEST_CONSTEXPR(_ktestz_mask64_u8(0x0000'0000'0000'0000, 0x8000'0000'0000'0000) == 1);
+TEST_CONSTEXPR(_ktestz_mask64_u8(0xF000'0000'0000'0000, 0x8000'0000'0000'0000) == 0);
+TEST_CONSTEXPR(_ktestz_mask64_u8(0x0123'4567'89AB'CDEF, 0x0123'4567'89AB'CDEF) == 0);
+
unsigned char test_ktestc_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestc_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -342,6 +449,11 @@ unsigned char test_ktestc_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu8_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestc_mask64_u8(0x0000'0000'0000'0000, 0x0000'0000'0000'0000) == 1);
+TEST_CONSTEXPR(_ktestc_mask64_u8(0x0000'0000'0000'0000, 0x8000'0000'0000'0000) == 0);
+TEST_CONSTEXPR(_ktestc_mask64_u8(0xF000'0000'0000'0000, 0x8000'0000'0000'0000) == 1);
+TEST_CONSTEXPR(_ktestc_mask64_u8(0x0123'4567'89AB'CDEF, 0x0123'4567'89AB'CDEF) == 1);
+
unsigned char test_ktest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_ktest_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -356,6 +468,34 @@ unsigned char test_ktest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512
_mm512_cmpneq_epu8_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201402L)
+constexpr unsigned char
+test_ktest_mask64_u8(unsigned long long A, unsigned long long B) {
+ unsigned char and_not{};
+ return (_ktest_mask64_u8(A, B, &and_not) << 4) | and_not;
+}
+
+void _ktest_mask64_u8() {
+ constexpr unsigned long long A1 = 0x0000'0000'0000'0000;
+ constexpr unsigned long long B1 = 0x0000'0000'0000'0000;
+ constexpr unsigned char expected_result_1 = 0x11;
+ static_assert(test_ktest_mask64_u8(A1, B1) == expected_result_1);
+ constexpr unsigned long long A2 = 0x0000'0000'0000'0000;
+ constexpr unsigned long long B2 = 0x8000'0000'0000'0000;
+ constexpr unsigned char expected_result_2 = 0x10;
+ static_assert(test_ktest_mask64_u8(A2, B2) == expected_result_2);
+ constexpr unsigned long long A3 = 0xF000'0000'0000'0000;
+ constexpr unsigned long long B3 = 0x8000'0000'0000'0000;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_ktest_mask64_u8(A3, B3) == expected_result_3);
+ constexpr unsigned long long A4 = 0x0123'4567'89AB'CDEF;
+ constexpr unsigned long long B4 = 0x0123'4567'89AB'CDEF;
+ constexpr unsigned char expected_result_4 = 0x01;
+ static_assert(test_ktest_mask64_u8(A4, B4) == expected_result_4);
+}
+#endif
+
__mmask32 test_kadd_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_kadd_mask32
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -394,6 +534,10 @@ __mmask32 test_kshiftli_mask32(__m512i A, __m512i B, __m512i C, __m512i D) {
// CHECK: [[RES:%.*]] = shufflevector <32 x i1> zeroinitializer, <32 x i1> [[VAL]], <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32>
return _mm512_mask_cmpneq_epu16_mask(_kshiftli_mask32(_mm512_cmpneq_epu16_mask(A, B), 31), C, D);
}
+TEST_CONSTEXPR(_kshiftli_mask32(0x00000001, 1) == 0x00000002);
+TEST_CONSTEXPR(_kshiftli_mask32(0x00000001, 31) == 0x80000000);
+TEST_CONSTEXPR(_kshiftli_mask32(0x00000001, 32) == 0x00000000);
+TEST_CONSTEXPR(_kshiftli_mask32(0x0000FFFF, 8) == 0x00FFFF00);
__mmask32 test_kshiftri_mask32(__m512i A, __m512i B, __m512i C, __m512i D) {
// CHECK-LABEL: test_kshiftri_mask32
@@ -401,6 +545,10 @@ __mmask32 test_kshiftri_mask32(__m512i A, __m512i B, __m512i C, __m512i D) {
// CHECK: [[RES:%.*]] = shufflevector <32 x i1> [[VAL]], <32 x i1> zeroinitializer, <32 x i32> <i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
return _mm512_mask_cmpneq_epu16_mask(_kshiftri_mask32(_mm512_cmpneq_epu16_mask(A, B), 31), C, D);
}
+TEST_CONSTEXPR(_kshiftri_mask32(0x80000000, 1) == 0x40000000);
+TEST_CONSTEXPR(_kshiftri_mask32(0x80000000, 31) == 0x00000001);
+TEST_CONSTEXPR(_kshiftri_mask32(0x80000000, 32) == 0x00000000);
+TEST_CONSTEXPR(_kshiftri_mask32(0xFFFF0000, 8) == 0x00FFFF00);
__mmask64 test_kshiftli_mask64(__m512i A, __m512i B, __m512i C, __m512i D) {
// CHECK-LABEL: test_kshiftli_mask64
@@ -408,6 +556,10 @@ __mmask64 test_kshiftli_mask64(__m512i A, __m512i B, __m512i C, __m512i D) {
// CHECK: [[RES:%.*]] = shufflevector <64 x i1> zeroinitializer, <64 x i1> [[VAL]], <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95>
return _mm512_mask_cmpneq_epu8_mask(_kshiftli_mask64(_mm512_cmpneq_epu8_mask(A, B), 32), C, D);
}
+TEST_CONSTEXPR(_kshiftli_mask64(0x0000000000000001ULL, 1) == 0x0000000000000002ULL);
+TEST_CONSTEXPR(_kshiftli_mask64(0x0000000000000001ULL, 63) == 0x8000000000000000ULL);
+TEST_CONSTEXPR(_kshiftli_mask64(0x0000000000000001ULL, 64) == 0x0000000000000000ULL);
+TEST_CONSTEXPR(_kshiftli_mask64(0x00000000FFFFFFFFULL, 16) == 0x0000FFFFFFFF0000ULL);
__mmask64 test_kshiftri_mask64(__m512i A, __m512i B, __m512i C, __m512i D) {
// CHECK-LABEL: test_kshiftri_mask64
@@ -415,27 +567,41 @@ __mmask64 test_kshiftri_mask64(__m512i A, __m512i B, __m512i C, __m512i D) {
// CHECK: [[RES:%.*]] = shufflevector <64 x i1> [[VAL]], <64 x i1> zeroinitializer, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95>
return _mm512_mask_cmpneq_epu8_mask(_kshiftri_mask64(_mm512_cmpneq_epu8_mask(A, B), 32), C, D);
}
+TEST_CONSTEXPR(_kshiftri_mask64(0x8000000000000000ULL, 1) == 0x4000000000000000ULL);
+TEST_CONSTEXPR(_kshiftri_mask64(0x8000000000000000ULL, 63) == 0x0000000000000001ULL);
+TEST_CONSTEXPR(_kshiftri_mask64(0x8000000000000000ULL, 64) == 0x0000000000000000ULL);
+TEST_CONSTEXPR(_kshiftri_mask64(0xFFFFFFFF00000000ULL, 16) == 0x0000FFFFFFFF0000ULL);
unsigned int test_cvtmask32_u32(__m512i A, __m512i B) {
// CHECK-LABEL: test_cvtmask32_u32
return _cvtmask32_u32(_mm512_cmpneq_epu16_mask(A, B));
}
+TEST_CONSTEXPR(_cvtmask32_u32((__mmask32)0xDEADBEEF) == 0xDEADBEEF);
+
unsigned long long test_cvtmask64_u64(__m512i A, __m512i B) {
// CHECK-LABEL: test_cvtmask64_u64
return _cvtmask64_u64(_mm512_cmpneq_epu8_mask(A, B));
}
+TEST_CONSTEXPR(_cvtmask64_u64((__mmask64)0x123456789ABCDEF0ULL) == 0x123456789ABCDEF0ULL);
+
__mmask32 test_cvtu32_mask32(__m512i A, __m512i B, unsigned int C) {
// CHECK-LABEL: test_cvtu32_mask32
return _mm512_mask_cmpneq_epu16_mask(_cvtu32_mask32(C), A, B);
}
+TEST_CONSTEXPR(_cvtu32_mask32(0x13579BDF) == (__mmask32)0x13579BDF);
+TEST_CONSTEXPR(_cvtu32_mask32(_cvtmask32_u32((__mmask32)0x2468ACE0)) == (__mmask32)0x2468ACE0);
+
__mmask64 test_cvtu64_mask64(__m512i A, __m512i B, unsigned long long C) {
// CHECK-LABEL: test_cvtu64_mask64
return _mm512_mask_cmpneq_epu8_mask(_cvtu64_mask64(C), A, B);
}
+TEST_CONSTEXPR(_cvtu64_mask64(0x0F0F0F0F0F0F0F0FULL) == (__mmask64)0x0F0F0F0F0F0F0F0FULL);
+TEST_CONSTEXPR(_cvtu64_mask64(_cvtmask64_u64((__mmask64)0xF0F0F0F0F0F0F0F0ULL)) == (__mmask64)0xF0F0F0F0F0F0F0F0ULL);
+
__mmask32 test_load_mask32(__mmask32 *A, __m512i B, __m512i C) {
// CHECK-LABEL: test_load_mask32
// CHECK: [[LOAD:%.*]] = load i32, ptr %{{.*}}
@@ -1065,12 +1231,16 @@ __m512i test_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_packs_epi32(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_packs_epi32((__mmask32)0xAAAAAAAA,(__m512i)(__v16si){40000,-50000,32767,-32768,70000,-70000,42,-42,0,1,-1,30000,32768,-32769,65535,-65536},(__m512i)(__v16si){0,1,-1,65536,-1000000,1000000,32768,-32769,123456,-123456,32767,-32768,22222,-22222,40000,-40000}),0,-32768,0,-32768,0,1,0,32767,0,-32768,0,-42,0,32767,0,-32768,0,1,0,30000,0,-32768,0,-32768,0,-32768,0,-32768,0,-22222,0,-32768));
+
__m512i test_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_packs_epi32
// CHECK: @llvm.x86.avx512.packssdw.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_packs_epi32(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_packs_epi32((__m512i)(__v32hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32},(__mmask32)0xAAAAAAAA,(__m512i)(__v16si){40000,-50000,32767,-32768,70000,-70000,42,-42,0,1,-1,30000,32768,-32769,65535,-65536},(__m512i)(__v16si){0,1,-1,65536,-1000000,1000000,32768,-32769,123456,-123456,32767,-32768,22222,-22222,40000,-40000}),1,-32768,3,-32768,5,1,7,32767,9,-32768,11,-42,13,32767,15,-32768,17,1,19,30000,21,-32768,23,-32768,25,-32768,27,-32768,29,-22222,31,-32768));
+
__m512i test_mm512_packs_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_packs_epi16
// CHECK: @llvm.x86.avx512.packsswb.512
@@ -1083,48 +1253,62 @@ __m512i test_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_packs_epi16(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_packs_epi16((__m512i)(__v64qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v32hi){130,-200,127,-128,300,-1000,42,-42,32767,-32767,127,-128,30000,-30000,90,-90,130,-200,0,-1,126,-127,128,-129,500,-500,7,-7,255,-255,127,-128},(__m512i)(__v32hi){0,1,-1,255,-129,128,20000,-32768,5,-5,100,-100,127,-128,512,-512,1,2,-2,300,-300,127,-128,42,0,1,-1,127,-128,90,-90,-32768}),1,-128,3,-128,5,-128,7,-42,9,1,11,127,13,127,15,-128,17,-128,19,-128,21,-128,23,-90,25,-5,27,-100,29,-128,31,-128,33,-128,35,-1,37,-127,39,-128,41,2,43,127,45,127,47,42,49,-128,51,-7,53,-128,55,-128,57,1,59,127,61,90,63,-128));
+
__m512i test_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_packs_epi16
// CHECK: @llvm.x86.avx512.packsswb.512
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_packs_epi16(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_packs_epi16((__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v32hi){130,-200,127,-128,300,-1000,42,-42,32767,-32767,127,-128,30000,-30000,90,-90,130,-200,0,-1,126,-127,128,-129,500,-500,7,-7,255,-255,127,-128},(__m512i)(__v32hi){0,1,-1,255,-129,128,20000,-32768,5,-5,100,-100,127,-128,512,-512,1,2,-2,300,-300,127,-128,42,0,1,-1,127,-128,90,-90,-32768}),0,-128,0,-128,0,-128,0,-42,0,1,0,127,0,127,0,-128,0,-128,0,-128,0,-128,0,-90,0,-5,0,-100,0,-128,0,-128,0,-128,0,-1,0,-127,0,-128,0,2,0,127,0,127,0,42,0,-128,0,-7,0,-128,0,-128,0,1,0,127,0,90,0,-128));
+
__m512i test_mm512_packus_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_packus_epi32
// CHECK: @llvm.x86.avx512.packusdw.512
return _mm512_packus_epi32(__A,__B);
}
TEST_CONSTEXPR(match_v32hi(_mm512_packus_epi32((__m512i)(__v16si){40000, -50000, 32767, -32768, 70000, -70000, 42, -42, 0, 1, -1, 65535, 32768, -32769, 22222, -22222}, (__m512i)(__v16si){0, 1, -1, 65536, -1000000, 1000000, 32768, -32769, 123456, -123456, 32767, -32768, 40000, -40000, 65535, 0}), -25536, 0, 32767, 0, 0, 1, 0, -1, -1, 0, 42, 0, 0, -1, -32768, 0, 0, 1, 0, -1, -1, 0, 32767, 0, -32768, 0, 22222, 0, -25536, 0, -1, 0));
+
__m512i test_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_packus_epi32
// CHECK: @llvm.x86.avx512.packusdw.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_packus_epi32(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_packus_epi32((__mmask32)0xAAAAAAAA,(__m512i)(__v16si){40000,-50000,32767,-32768,70000,-70000,42,-42,0,1,-1,65535,32768,-32769,22222,-22222},(__m512i)(__v16si){0,1,-1,65536,-1000000,1000000,32768,-32769,123456,-123456,32767,-32768,40000,-40000,65535,0}),0,0,0,0,0,1,0,-1,0,0,0,0,0,-1,0,0,0,1,0,-1,0,0,0,0,0,0,0,0,0,0,0,0));
+
__m512i test_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_packus_epi32
// CHECK: @llvm.x86.avx512.packusdw.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_packus_epi32(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_packus_epi32((__m512i)(__v32hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32},(__mmask32)0xAAAAAAAA,(__m512i)(__v16si){40000,-50000,32767,-32768,70000,-70000,42,-42,0,1,-1,65535,32768,-32769,22222,-22222},(__m512i)(__v16si){0,1,-1,65536,-1000000,1000000,32768,-32769,123456,-123456,32767,-32768,40000,-40000,65535,0}),1,0,3,0,5,1,7,-1,9,0,11,0,13,-1,15,0,17,1,19,-1,21,0,23,0,25,0,27,0,29,0,31,0));
+
__m512i test_mm512_packus_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_packus_epi16
// CHECK: @llvm.x86.avx512.packuswb.512
return _mm512_packus_epi16(__A,__B);
}
TEST_CONSTEXPR(match_v64qi(_mm512_packus_epi16((__m512i)(__v32hi){-1, 0, 1, 127, 128, 255, 256, -200, 300, 42, -42, 500, 20000, -32768, 129, -129, -1, 0, 1, 127, 128, 255, 256, -200, 300, 42, -42, 500, 20000, -32768, 129, -129}, (__m512i)(__v32hi){0, 1, -1, 255, -129, 128, 20000, -32768, 32767, -32767, 127, -128, 30000, -30000, 90, -90, 0, 1, -1, 255, -129, 128, 20000, -32768, 32767, -32767, 127, -128, 30000, -30000, 90, -90}), 0, 0, 1, 127, -128, -1, -1, 0, 0, 1, 0, -1, 0, -128, -1, 0, -1, 42, 0, -1, -1, 0, -127, 0, -1, 0, 127, 0, -1, 0, 90, 0, 0, 0, 1, 127, -128, -1, -1, 0, 0, 1, 0, -1, 0, -128, -1, 0, -1, 42, 0, -1, -1, 0, -127, 0, -1, 0, 127, 0, -1, 0, 90, 0));
+
__m512i test_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_packus_epi16
// CHECK: @llvm.x86.avx512.packuswb.512
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_packus_epi16(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_packus_epi16((__m512i)(__v64qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v32hi){-1,0,1,127,128,255,256,-200,300,42,-42,500,20000,-32768,129,-129,-1,0,1,127,128,255,256,-200,300,42,-42,500,20000,-32768,129,-129},(__m512i)(__v32hi){0,1,-1,255,-129,128,20000,-32768,32767,-32767,127,-128,30000,-30000,90,-90,0,1,-1,255,-129,128,20000,-32768,32767,-32767,127,-128,30000,-30000,90,-90}),1,0,3,127,5,-1,7,0,9,1,11,-1,13,-128,15,0,17,42,19,-1,21,0,23,0,25,0,27,0,29,0,31,0,33,0,35,127,37,-1,39,0,41,1,43,-1,45,-128,47,0,49,42,51,-1,53,0,55,0,57,0,59,0,61,0,63,0));
+
__m512i test_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_packus_epi16
// CHECK: @llvm.x86.avx512.packuswb.512
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_packus_epi16(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_packus_epi16((__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v32hi){-1,0,1,127,128,255,256,-200,300,42,-42,500,20000,-32768,129,-129,-1,0,1,127,128,255,256,-200,300,42,-42,500,20000,-32768,129,-129},(__m512i)(__v32hi){0,1,-1,255,-129,128,20000,-32768,32767,-32767,127,-128,30000,-30000,90,-90,0,1,-1,255,-129,128,20000,-32768,32767,-32767,127,-128,30000,-30000,90,-90}),0,0,0,127,0,-1,0,0,0,1,0,-1,0,-128,0,0,0,42,0,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,127,0,-1,0,0,0,1,0,-1,0,-128,0,0,0,42,0,-1,0,0,0,0,0,0,0,0,0,0,0,0));
+
__m512i test_mm512_adds_epi8(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_adds_epi8
// CHECK: @llvm.sadd.sat.v64i8
@@ -1138,18 +1322,22 @@ __m512i test_mm512_mask_adds_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m51
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_adds_epi8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_adds_epi8((__m512i)(__v64qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v64qs){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,-16,+17,-18,+19,-20,+21,-22,+23,-24,+25,-26,+27,-28,+29,-30,+31,-32,+33,-34,+35,-36,+37,-38,+39,-40,+41,-42,+43,-44,+45,-46,+47,+100,+50,-100,+20,+80,-50,+120,-20,-100,-50,+100,-20,-80,+50,-120,+20},(__m512i)(__v64qs){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,-16,+17,-18,+19,-20,+21,-22,+23,-24,+25,-26,+27,-28,+29,-30,+31,-32,+33,-34,+35,-36,+37,-38,+39,-40,+41,-42,+43,-44,+45,-46,+47,+50,+80,-50,+110,+60,-30,+20,-10,+50,+80,-50,+110,+60,-30,+20,-10}),1,+2,3,+6,5,+10,7,+14,9,+18,11,+22,13,+26,15,+30,17,+34,19,+38,21,+42,23,+46,25,+50,27,+54,29,+58,31,+62,33,+66,35,+70,37,+74,39,+78,41,+82,43,+86,45,+90,47,+94,49,+127,51,+127,53,-80,+55,-30,57,+30,59,+90,61,+20,63,+10));
+
__m512i test_mm512_maskz_adds_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_adds_epi8
// CHECK: @llvm.sadd.sat.v64i8
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_adds_epi8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_adds_epi8((__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v64qs){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,-16,+17,-18,+19,-20,+21,-22,+23,-24,+25,-26,+27,-28,+29,-30,+31,-32,+33,-34,+35,-36,+37,-38,+39,-40,+41,-42,+43,-44,+45,-46,+47,+100,+50,-100,+20,+80,-50,+120,-20,-100,-50,+100,-20,-80,+50,-120,+20},(__m512i)(__v64qs){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,-16,+17,-18,+19,-20,+21,-22,+23,-24,+25,-26,+27,-28,+29,-30,+31,-32,+33,-34,+35,-36,+37,-38,+39,-40,+41,-42,+43,-44,+45,-46,+47,+50,+80,-50,+110,+60,-30,+20,-10,+50,+80,-50,+110,+60,-30,+20,-10}),0,+2,0,+6,0,+10,0,+14,0,+18,0,+22,0,+26,0,+30,0,+34,0,+38,0,+42,0,+46,0,+50,0,+54,0,+58,0,+62,0,+66,0,+70,0,+74,0,+78,0,+82,0,+86,0,+90,0,+94,0,+127,0,+127,0,-80,0,-30,0,+30,0,+90,0,+20,0,+10));
+
__m512i test_mm512_adds_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_adds_epi16
// CHECK: @llvm.sadd.sat.v32i16
return _mm512_adds_epi16(__A,__B);
}
-TEST_CONSTEXPR(match_v32hi(_mm512_adds_epi16((__m512i)(__v32hi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, +32000, -32000, +32000, -32000}, (__m512i)(__v32hi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, +800, -800, -800, +800}), 0, +2, -4, +6, -8, +10, -12, +14, -16, +18, -20, +22, -24, +26, -28, +30, -32, +34, -36, +38, -40, +42, -44, +46, -48, +50, -52, +54, +32767, -32768, +31200, -31200));
+TEST_CONSTEXPR(match_v32hi(_mm512_adds_epi16((__m512i)(__v32hi){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,-16,+17,-18,+19,-20,+21,-22,+23,-24,+25,-26,+27,+32000,-32000,+32000,-32000},(__m512i)(__v32hi){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,-16,+17,-18,+19,-20,+21,-22,+23,-24,+25,-26,+27,+800,-800,-800,+800}),0,+2,-4,+6,-8,+10,-12,+14,-16,+18,-20,+22,-24,+26,-28,+30,-32,+34,-36,+38,-40,+42,-44,+46,-48,+50,-52,+54,+32767,-32768,+31200,-31200));
__m512i test_mm512_mask_adds_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_adds_epi16
@@ -1157,12 +1345,16 @@ __m512i test_mm512_mask_adds_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m5
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_adds_epi16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_adds_epi16((__m512i)(__v32hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32},(__mmask32)0xAAAAAAAAu,(__m512i)(__v32hi){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,-16,+17,-18,+19,-20,+21,-22,+23,-24,+25,-26,+27,+32000,-32000,+32000,+32000},(__m512i)(__v32hi){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,-16,+17,-18,+19,-20,+21,-22,+23,-24,+25,-26,+27,+800,-800,-800,+800}),1,+2,3,+6,5,+10,7,+14,9,+18,11,+22,13,+26,15,+30,17,+34,19,+38,21,+42,23,+46,25,+50,27,+54,29,-32768,31,+32767));
+
__m512i test_mm512_maskz_adds_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_adds_epi16
// CHECK: @llvm.sadd.sat.v32i16
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_adds_epi16(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_adds_epi16((__mmask32)0xAAAAAAAAu,(__m512i)(__v32hi){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,-16,+17,-18,+19,-20,+21,-22,+23,-24,+25,-26,+27,+32000,-32000,+32000,+32000},(__m512i)(__v32hi){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,-16,+17,-18,+19,-20,+21,-22,+23,-24,+25,-26,+27,+800,-800,-800,+800}),0,+2,0,+6,0,+10,0,+14,0,+18,0,+22,0,+26,0,+30,0,+34,0,+38,0,+42,0,+46,0,+50,0,+54,0,-32768,0,+32767));
+
__m512i test_mm512_adds_epu8(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_adds_epu8
// CHECK-NOT: @llvm.x86.avx512.mask.paddus.b.512
@@ -1178,7 +1370,7 @@ __m512i test_mm512_mask_adds_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m51
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_adds_epu8(__W,__U,__A,__B);
}
-TEST_CONSTEXPR(match_v32hu(_mm512_adds_epu16((__m512i)(__v32hu){0, 0, 0, 0, +16384, +16384, +16384, +16384, +16384, +16384, +32767, +32767, +32767, +32767, +32767, +32767, +32768, +32768, +32768, +32768, +32768, +32768, +49152, +49152, +49152, +49152, +49152, +49152, +65535, +65535, +65535, +65535}, (__m512i)(__v32hu){0, +32767, +32768, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +32767, +32768, +65535}), 0, +32767, +32768, +65535, +16384, +32768, +49151, +49152, +65535, +65535, +32767, +49151, +65534, +65535, +65535, +65535, +32768, +49152, +65535, +65535, +65535, +65535, +49152, +65535, +65535, +65535, +65535, +65535, +65535, +65535, +65535, +65535));
+TEST_CONSTEXPR(match_v64qu(_mm512_mask_adds_epu8((__m512i)(__v64qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v64qu){0,0,0,0,0,0,0,0,+63,+63,+63,+63,+63,+63,+63,+63,+64,+64,+64,+64,+64,+64,+64,+64,+127,+127,+127,+127,+127,+127,+127,+127,+128,+128,+128,+128,+128,+128,+128,+128,+191,+191,+191,+191,+191,+191,+191,+191,+192,+192,+192,+192,+192,+192,+192,+192,+255,+255,+255,+255,+255,+255,+255,+255},(__m512i)(__v64qu){0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255}),1,+63,3,+127,5,+191,7,+255,9,+126,11,+190,13,+254,15,+255,17,+127,19,+191,21,+255,23,+255,25,+190,27,+254,29,+255,31,+255,33,+191,35,+255,37,+255,39,+255,41,+254,43,+255,45,+255,47,+255,49,+255,51,+255,53,+255,55,+255,57,+255,59,+255,61,+255,63,+255));
__m512i test_mm512_maskz_adds_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_adds_epu8
@@ -1187,12 +1379,16 @@ __m512i test_mm512_maskz_adds_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_adds_epu8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v64qu(_mm512_maskz_adds_epu8((__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v64qu){0,0,0,0,0,0,0,0,+63,+63,+63,+63,+63,+63,+63,+63,+64,+64,+64,+64,+64,+64,+64,+64,+127,+127,+127,+127,+127,+127,+127,+127,+128,+128,+128,+128,+128,+128,+128,+128,+191,+191,+191,+191,+191,+191,+191,+191,+192,+192,+192,+192,+192,+192,+192,+192,+255,+255,+255,+255,+255,+255,+255,+255},(__m512i)(__v64qu){0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255}),0,+63,0,+127,0,+191,0,+255,0,+126,0,+190,0,+254,0,+255,0,+127,0,+191,0,+255,0,+255,0,+190,0,+254,0,+255,0,+255,0,+191,0,+255,0,+255,0,+255,0,+254,0,+255,0,+255,0,+255,0,+255,0,+255,0,+255,0,+255,0,+255,0,+255,0,+255,0,+255));
+
__m512i test_mm512_adds_epu16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_adds_epu16
// CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
// CHECK: call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_adds_epu16(__A,__B);
}
+TEST_CONSTEXPR(match_v32hu(_mm512_adds_epu16((__m512i)(__v32hu){0, 0, 0, 0, +16384, +16384, +16384, +16384, +16384, +16384, +32767, +32767, +32767, +32767, +32767, +32767, +32768, +32768, +32768, +32768, +32768, +32768, +49152, +49152, +49152, +49152, +49152, +49152, +65535, +65535, +65535, +65535}, (__m512i)(__v32hu){0, +32767, +32768, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +32767, +32768, +65535}), 0, +32767, +32768, +65535, +16384, +32768, +49151, +49152, +65535, +65535, +32767, +49151, +65534, +65535, +65535, +65535, +32768, +49152, +65535, +65535, +65535, +65535, +49152, +65535, +65535, +65535, +65535, +65535, +65535, +65535, +65535, +65535));
+
__m512i test_mm512_mask_adds_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_adds_epu16
// CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
@@ -1200,6 +1396,8 @@ __m512i test_mm512_mask_adds_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m5
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_adds_epu16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32hu(_mm512_mask_adds_epu16((__m512i)(__v32hu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32},(__mmask32)0xAAAAAAAA,(__m512i)(__v32hu){0,0,0,0,+16384,+16384,+16384,+16384,+16384,+16384,+32767,+32767,+32767,+32767,+32767,+32767,+32768,+32768,+32768,+32768,+32768,+32768,+49152,+49152,+49152,+49152,+49152,+49152,+65535,+65535,+65535,+65535},(__m512i)(__v32hu){0,+32767,+32768,+65535,0,+16384,+32767,+32768,+49152,+65535,0,+16384,+32767,+32768,+49152,+65535,0,+16384,+32767,+32768,+49152,+65535,0,+16384,+32767,+32768,+49152,+65535,0,+32767,+32768,+65535}),1,+32767,3,+65535,5,+32768,7,+49152,9,+65535,11,+49151,13,+65535,15,+65535,17,+49152,19,+65535,21,+65535,23,+65535,25,+65535,27,+65535,29,+65535,31,+65535));
+
__m512i test_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_adds_epu16
// CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
@@ -1207,6 +1405,8 @@ __m512i test_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_adds_epu16(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32hu(_mm512_maskz_adds_epu16((__mmask32)0xAAAAAAAA,(__m512i)(__v32hu){0,0,0,0,+16384,+16384,+16384,+16384,+16384,+16384,+32767,+32767,+32767,+32767,+32767,+32767,+32768,+32768,+32768,+32768,+32768,+32768,+49152,+49152,+49152,+49152,+49152,+49152,+65535,+65535,+65535,+65535},(__m512i)(__v32hu){0,+32767,+32768,+65535,0,+16384,+32767,+32768,+49152,+65535,0,+16384,+32767,+32768,+49152,+65535,0,+16384,+32767,+32768,+49152,+65535,0,+16384,+32767,+32768,+49152,+65535,0,+32767,+32768,+65535}),0,+32767,0,+65535,0,+32768,0,+49152,0,+65535,0,+49151,0,+65535,0,+65535,0,+49152,0,+65535,0,+65535,0,+65535,0,+65535,0,+65535,0,+65535,0,+65535));
+
__m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_avg_epu8
// CHECK: @llvm.x86.avx512.pavg.b.512
@@ -1500,12 +1700,16 @@ __m512i test_mm512_mask_subs_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m51
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_subs_epi8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_subs_epi8((__m512i)(__v64qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v64qs){1,-100,3,4,5,-6,7,100,9,-100,11,12,13,-14,15,100,17,-100,19,20,21,-22,23,100,25,-100,27,28,29,-30,31,100,33,-100,35,36,37,-38,39,100,41,-100,43,44,45,-46,47,100,49,-100,51,52,53,-54,55,100,57,-100,59,60,61,-62,63,100},(__m512i)(__v64qs){1,100,3,4,5,6,7,-100,9,100,11,12,13,14,15,-100,17,100,19,20,21,22,23,-100,25,100,27,28,29,30,31,-100,33,100,35,36,37,38,39,-100,41,100,43,44,45,46,47,-100,49,100,51,52,53,54,55,-100,57,100,59,60,61,62,63,-100}),1,-128,3,0,5,-12,7,127,9,-128,11,0,13,-28,15,127,17,-128,19,0,21,-44,23,127,25,-128,27,0,29,-60,31,127,33,-128,35,0,37,-76,39,127,41,-128,43,0,45,-92,47,127,49,-128,51,0,53,-108,55,127,57,-128,59,0,61,-124,63,127));
+
__m512i test_mm512_maskz_subs_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_subs_epi8
// CHECK: @llvm.ssub.sat.v64i8
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_subs_epi8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_subs_epi8((__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v64qs){1,-100,3,4,5,-6,7,100,9,-100,11,12,13,-14,15,100,17,-100,19,20,21,-22,23,100,25,-100,27,28,29,-30,31,100,33,-100,35,36,37,-38,39,100,41,-100,43,44,45,-46,47,100,49,-100,51,52,53,-54,55,100,57,-100,59,60,61,-62,63,100},(__m512i)(__v64qs){1,100,3,4,5,6,7,-100,9,100,11,12,13,14,15,-100,17,100,19,20,21,22,23,-100,25,100,27,28,29,30,31,-100,33,100,35,36,37,38,39,-100,41,100,43,44,45,46,47,-100,49,100,51,52,53,54,55,-100,57,100,59,60,61,62,63,-100}),0,-128,0,0,0,-12,0,127,0,-128,0,0,0,-28,0,127,0,-128,0,0,0,-44,0,127,0,-128,0,0,0,-60,0,127,0,-128,0,0,0,-76,0,127,0,-128,0,0,0,-92,0,127,0,-128,0,0,0,-108,0,127,0,-128,0,0,0,-124,0,127));
+
__m512i test_mm512_subs_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_subs_epi16
// CHECK: @llvm.ssub.sat.v32i16
@@ -1518,18 +1722,24 @@ __m512i test_mm512_mask_subs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m5
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_subs_epi16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_subs_epi16((__m512i)(__v32hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32},(__mmask32)0xAAAAAAAA,(__m512i)(__v32hi){1,-30000,3,30000,5,-6,7,8,9,-30000,11,30000,13,-14,15,16,17,-30000,19,30000,21,-22,23,24,25,-30000,27,30000,29,-30,31,32},(__m512i)(__v32hi){1,30000,3,-30000,5,6,7,-8,9,30000,11,-30000,13,14,15,-16,17,30000,19,-30000,21,22,23,-24,25,30000,27,-30000,29,30,31,-32}),1,-32768,3,32767,5,-12,7,16,9,-32768,11,32767,13,-28,15,32,17,-32768,19,32767,21,-44,23,48,25,-32768,27,32767,29,-60,31,64));
+
__m512i test_mm512_maskz_subs_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_subs_epi16
// CHECK: @llvm.ssub.sat.v32i16
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_subs_epi16(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_subs_epi16((__mmask32)0xAAAAAAAAu,(__m512i)(__v32hi){1,-30000,3,30000,5,-6,7,8,9,-30000,11,30000,13,-14,15,16,17,-30000,19,30000,21,-22,23,24,25,-30000,27,30000,29,-30,31,32},(__m512i)(__v32hi){1,30000,3,-30000,5,6,7,-8,9,30000,11,-30000,13,14,15,-16,17,30000,19,-30000,21,22,23,-24,25,30000,27,-30000,29,30,31,-32}),0,-32768,0,32767,0,-12,0,16,0,-32768,0,32767,0,-28,0,32,0,-32768,0,32767,0,-44,0,48,0,-32768,0,32767,0,-60,0,64));
+
__m512i test_mm512_subs_epu8(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_subs_epu8
// CHECK-NOT: @llvm.x86.avx512.mask.psubus.b.512
// CHECK: call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}})
return _mm512_subs_epu8(__A,__B);
}
+TEST_CONSTEXPR(match_v64qu(_mm512_subs_epu8((__m512i)(__v64qu){0,0,0,0,0,0,0,0,+63,+63,+63,+63,+63,+63,+63,+63,+64,+64,+64,+64,+64,+64,+64,+64,+127,+127,+127,+127,+127,+127,+127,+127,+128,+128,+128,+128,+128,+128,+128,+128,+191,+191,+191,+191,+191,+191,+191,+191,+192,+192,+192,+192,+192,+192,+192,+192,+255,+255,+255,+255,+255,+255,+255,+255},(__m512i)(__v64qu){0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255}),0,0,0,0,0,0,0,0,+63,0,0,0,0,0,0,0,+64,+1,0,0,0,0,0,0,+127,+64,+63,0,0,0,0,0,+128,+65,+64,+1,0,0,0,0,+191,+128,+127,+64,+63,0,0,0,+192,+129,+128,+65,+64,+1,0,0,+255,+192,+191,+128,+127,+64,+63,+0));
+
__m512i test_mm512_mask_subs_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_subs_epu8
// CHECK-NOT: @llvm.x86.avx512.mask.psubus.b.512
@@ -1537,7 +1747,7 @@ __m512i test_mm512_mask_subs_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m51
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_subs_epu8(__W,__U,__A,__B);
}
-TEST_CONSTEXPR(match_v64qu(_mm512_subs_epu8((__m512i)(__v64qu){0, 0, 0, 0, 0, 0, 0, 0, +63, +63, +63, +63, +63, +63, +63, +63, +64, +64, +64, +64, +64, +64, +64, +64, +127, +127, +127, +127, +127, +127, +127, +127, +128, +128, +128, +128, +128, +128, +128, +128, +191, +191, +191, +191, +191, +191, +191, +191, +192, +192, +192, +192, +192, +192, +192, +192, +255, +255, +255, +255, +255, +255, +255, +255}, (__m512i)(__v64qu){0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255}), 0, 0, 0, 0, 0, 0, 0, 0, +63, 0, 0, 0, 0, 0, 0, 0, +64, +1, 0, 0, 0, 0, 0, 0, +127, +64, +63, 0, 0, 0, 0, 0, +128, +65, +64, +1, 0, 0, 0, 0, +191, +128, +127, +64, +63, 0, 0, 0, +192, +129, +128, +65, +64, +1, 0, 0, +255, +192, +191, +128, +127, +64, +63, +0));
+TEST_CONSTEXPR(match_v64qu(_mm512_mask_subs_epu8((__m512i)(__v64qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v64qu){0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255,0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255,0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255,0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255},(__m512i)(__v64qu){0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255,0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255,0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255,0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255}),1,200,3,0,5,0,7,254,9,0,11,1,13,1,15,0,17,200,19,0,21,0,23,254,25,0,27,1,29,1,31,0,33,200,35,0,37,0,39,254,41,0,43,1,45,1,47,0,49,200,51,0,53,0,55,254,57,0,59,1,61,1,63,0));
__m512i test_mm512_maskz_subs_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_subs_epu8
@@ -1546,20 +1756,25 @@ __m512i test_mm512_maskz_subs_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_subs_epu8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v64qu(_mm512_maskz_subs_epu8((__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v64qu){0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255,0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255,0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255,0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255},(__m512i)(__v64qu){0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255,0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255,0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255,0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255}),0,200,0,0,0,0,0,254,0,0,0,1,0,1,0,0,0,200,0,0,0,0,0,254,0,0,0,1,0,1,0,0,0,200,0,0,0,0,0,254,0,0,0,1,0,1,0,0,0,200,0,0,0,0,0,254,0,0,0,1,0,1,0,0));
+
__m512i test_mm512_subs_epu16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_subs_epu16
// CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
// CHECK: call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_subs_epu16(__A,__B);
}
+TEST_CONSTEXPR(match_v32hu(_mm512_subs_epu16((__m512i)(__v32hu){0, 0, 0, 0, +16384, +16384, +16384, +16384, +16384, +16384, +32767, +32767, +32767, +32767, +32767, +32767, +32768, +32768, +32768, +32768, +32768, +32768, +49152, +49152, +49152, +49152, +49152, +49152, +65535, +65535, +65535, +65535}, (__m512i)(__v32hu){0, +32767, +32768, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +32767, +32768, +65535}), 0, 0, 0, 0, +16384, 0, 0, 0, 0, 0, +32767, +16383, 0, 0, 0, 0, +32768, +16384, +1, 0, 0, 0, +49152, +32768, +16385, +16384, 0, 0, +65535, +32768, +32767, 0));
+
__m512i test_mm512_mask_subs_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_subs_epu16
// CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
// CHECK: call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_subs_epu16(__W,__U,__A,__B);
-TEST_CONSTEXPR(match_v32hu(_mm512_subs_epu16((__m512i)(__v32hu){0, 0, 0, 0, +16384, +16384, +16384, +16384, +16384, +16384, +32767, +32767, +32767, +32767, +32767, +32767, +32768, +32768, +32768, +32768, +32768, +32768, +49152, +49152, +49152, +49152, +49152, +49152, +65535, +65535, +65535, +65535}, (__m512i)(__v32hu){0, +32767, +32768, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +32767, +32768, +65535}), 0, 0, 0, 0, +16384, 0, 0, 0, 0, 0, +32767, +16383, 0, 0, 0, 0, +32768, +16384, +1, 0, 0, 0, +49152, +32768, +16385, +16384, 0, 0, +65535, +32768, +32767, 0));
}
+TEST_CONSTEXPR(match_v32hu(_mm512_mask_subs_epu16((__m512i)(__v32hu){101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132},(__mmask32)0xAAAAAAAAu,(__m512i)(__v32hu){0,65000,0,40000,0,100,0,65535,0,0,0,1000,0,1,0,50000,0,65000,0,40000,0,100,0,65535,0,0,0,1000,0,1,0,50000},(__m512i)(__v32hu){0,5000,0,40000,0,200,0,1,0,1,0,65535,0,0,0,25000,0,5000,0,40000,0,200,0,1,0,1,0,65535,0,0,0,25000}),101,60000,103,0,105,0,107,65534,109,0,111,0,113,1,115,25000,117,60000,119,0,121,0,123,65534,125,0,127,0,129,1,131,25000));
+
__m512i test_mm512_maskz_subs_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_subs_epu16
// CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
@@ -1567,6 +1782,8 @@ __m512i test_mm512_maskz_subs_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_subs_epu16(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32hu(_mm512_maskz_subs_epu16((__mmask32)0xAAAAAAAAu,(__m512i)(__v32hu){51,65000,0,40000,0,100,0,65535,42,0,0,1000,0,1,0,50000,69,65000,0,40000,0,100,0,65535,71,0,0,1000,0,1,0,50000},(__m512i)(__v32hu){2652,5000,0,40000,0,200,0,1,398,1,0,65535,0,0,0,25000,29625,5000,0,40000,0,200,0,1,25274,1,0,65535,0,0,0,25000}),0,60000,0,0,0,0,0,65534,0,0,0,0,0,1,0,25000,0,60000,0,0,0,0,0,65534,0,0,0,0,0,1,0,25000));
+
__m512i test_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, __m512i __B) {
// CHECK-LABEL: test_mm512_mask2_permutex2var_epi16
// CHECK: @llvm.x86.avx512.vpermi2var.hi.512
@@ -1591,6 +1808,132 @@ __m512i test_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i
return _mm512_maskz_permutex2var_epi16(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_permutex2var_epi16(
+ (__m512i)(__v32hi){
+ 0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150,
+ 160, 170, 180, 190, 200, 210, 220, 230,
+ 240, 250, 260, 270, 280, 290, 300, 310},
+ (__m512i)(__v32hi){
+ 0, 32, 1, 33, 2, 34, 3, 35,
+ 4, 36, 5, 37, 6, 38, 7, 39,
+ 8, 40, 9, 41, 10, 42, 11, 43,
+ 12, 44, 13, 45, 14, 46, 15, 47},
+ (__m512i)(__v32hi){
+ 400, 410, 420, 430, 440, 450, 460, 470,
+ 480, 490, 500, 510, 520, 530, 540, 550,
+ 560, 570, 580, 590, 600, 610, 620, 630,
+ 640, 650, 660, 670, 680, 690, 700, 710}),
+ 0, 400, 10, 410, 20, 420, 30, 430,
+ 40, 440, 50, 450, 60, 460, 70, 470,
+ 80, 480, 90, 490, 100, 500, 110, 510,
+ 120, 520, 130, 530, 140, 540, 150, 550));
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_mask_permutex2var_epi16(
+ (__m512i)(__v32hi){
+ -1, -2, -3, -4, -5, -6, -7, -8,
+ -9, -10, -11, -12, -13, -14, -15, -16,
+ -17, -18, -19, -20, -21, -22, -23, -24,
+ -25, -26, -27, -28, -29, -30, -31, -32},
+ 0xAAAAAAAA,
+ (__m512i)(__v32hi){
+ 0, 32, 1, 33, 2, 34, 3, 35,
+ 4, 36, 5, 37, 6, 38, 7, 39,
+ 8, 40, 9, 41, 10, 42, 11, 43,
+ 12, 44, 13, 45, 14, 46, 15, 47},
+ (__m512i)(__v32hi){
+ 400, 410, 420, 430, 440, 450, 460, 470,
+ 480, 490, 500, 510, 520, 530, 540, 550,
+ 560, 570, 580, 590, 600, 610, 620, 630,
+ 640, 650, 660, 670, 680, 690, 700, 710}),
+ -1, 400, -3, 410, -5, 420, -7, 430,
+ -9, 440, -11, 450, -13, 460, -15, 470,
+ -17, 480, -19, 490, -21, 500, -23, 510,
+ -25, 520, -27, 530, -29, 540, -31, 550));
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_maskz_permutex2var_epi16(
+ 0x55555555,
+ (__m512i)(__v32hi){
+ 0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150,
+ 160, 170, 180, 190, 200, 210, 220, 230,
+ 240, 250, 260, 270, 280, 290, 300, 310},
+ (__m512i)(__v32hi){
+ 0, 32, 1, 33, 2, 34, 3, 35,
+ 4, 36, 5, 37, 6, 38, 7, 39,
+ 8, 40, 9, 41, 10, 42, 11, 43,
+ 12, 44, 13, 45, 14, 46, 15, 47},
+ (__m512i)(__v32hi){
+ 400, 410, 420, 430, 440, 450, 460, 470,
+ 480, 490, 500, 510, 520, 530, 540, 550,
+ 560, 570, 580, 590, 600, 610, 620, 630,
+ 640, 650, 660, 670, 680, 690, 700, 710}),
+ 0, 0, 10, 0, 20, 0, 30, 0,
+ 40, 0, 50, 0, 60, 0, 70, 0,
+ 80, 0, 90, 0, 100, 0, 110, 0,
+ 120, 0, 130, 0, 140, 0, 150, 0));
+
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_permutex2var_epi8(
+ (__m512i)(__v64qu){
+ 0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 127, 126, 125,
+ 124, 123, 122, 121, 120, 119, 118, 117,
+ 116, 115, 114, 113, 112, 111, 110, 109,
+ 108, 107, 106, 105, 104, 103, 102, 101,
+ 100, 99, 98, 97, 96, 95, 94, 93,
+ 92, 91, 90, 89, 88, 87, 86, 85,
+ 84, 83, 82, 81, 80, 79, 78, 77},
+ (__m512i)(__v64qu){
+ 0, 64, 1, 65, 2, 66, 3, 67,
+ 4, 68, 5, 69, 6, 70, 7, 71,
+ 8, 72, 9, 73, 10, 74, 11, 75,
+ 12, 76, 13, 77, 14, 78, 15, 79,
+ 16, 80, 17, 81, 18, 82, 19, 83,
+ 20, 84, 21, 85, 22, 86, 23, 87,
+ 24, 88, 25, 89, 26, 90, 27, 91,
+ 28, 92, 29, 93, 30, 94, 31, 95},
+ (__m512i)(__v64qu){
+ 200, 210, 220, 230, 240, 250, 254, 253,
+ 252, 251, 250, 249, 248, 247, 246, 245,
+ 244, 243, 242, 241, 240, 239, 238, 237,
+ 236, 235, 234, 233, 232, 231, 230, 229,
+ 228, 227, 226, 225, 224, 223, 222, 221,
+ 220, 219, 218, 217, 216, 215, 214, 213,
+ 212, 211, 210, 209, 208, 207, 206, 205,
+ 204, 203, 202, 201, 200, 199, 198, 197}),
+ 0, 200, 10, 210, 20, 220, 30, 230,
+ 40, 240, 50, 250, 60, 254, 70, 253,
+ 80, 252, 90, 251, 100, 250, 110, 249,
+ 120, 248, 127, 247, 126, 246, 125, 245,
+ 124, 244, 123, 243, 122, 242, 121, 241,
+ 120, 240, 119, 239, 118, 238, 117, 237,
+ 116, 236, 115, 235, 114, 234, 113, 233,
+ 112, 232, 111, 231, 110, 230, 109, 229));
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_mask2_permutex2var_epi16(
+ (__m512i)(__v32hi){
+ 0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150,
+ 160, 170, 180, 190, 200, 210, 220, 230,
+ 240, 250, 260, 270, 280, 290, 300, 310},
+ (__m512i)(__v32hi){
+ 0, 32, 1, 33, 2, 34, 3, 35,
+ 4, 36, 5, 37, 6, 38, 7, 39,
+ 8, 40, 9, 41, 10, 42, 11, 43,
+ 12, 44, 13, 45, 14, 46, 15, 47},
+ 0x55555555,
+ (__m512i)(__v32hi){
+ 400, 410, 420, 430, 440, 450, 460, 470,
+ 480, 490, 500, 510, 520, 530, 540, 550,
+ 560, 570, 580, 590, 600, 610, 620, 630,
+ 640, 650, 660, 670, 680, 690, 700, 710}),
+ 0, 32, 10, 33, 20, 34, 30, 35,
+ 40, 36, 50, 37, 60, 38, 70, 39,
+ 80, 40, 90, 41, 100, 42, 110, 43,
+ 120, 44, 130, 45, 140, 46, 150, 47));
+
__m512i test_mm512_mulhrs_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mulhrs_epi16
// CHECK: @llvm.x86.avx512.pmul.hr.sw.512
@@ -1775,6 +2118,7 @@ __m512i test_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, _
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_unpackhi_epi8(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_unpackhi_epi8((__m512i)(__v64qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask64)0xFAAAAAAAAAAAAAAA,(__m512i)(__v64qs){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,-128,-127,-126,-125,-124,-123,-122,-121,-120,-119,-118,-117,-116,-115,-114,-113,-112,-111,-110,-109,-108,-107,-106,-105,-104,-103,-102,-101,-100,-99,-98,-97,-96,-95,-94,-93,-92,-91,-90,-89},(__m512i)(__v64qs){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-46,-47,-48,-49,-50,-51,-52,-53,-54,-55,-56,-57,-58,-59,-60,-61,-62,-63,-64}),1,-9,3,-10,5,-11,7,-12,9,-13,11,-14,13,-15,15,-16,17,-25,19,-26,21,-27,23,-28,25,-29,27,-30,29,-31,31,-32,33,-41,35,-42,37,-43,39,-44,41,-45,43,-46,45,-47,47,-48,49,-57,51,-58,53,-59,55,-60,57,-61,59,-62,-90,-63,-89,-64));
__m512i test_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_unpackhi_epi8
@@ -1782,6 +2126,7 @@ __m512i test_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B)
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_unpackhi_epi8(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_unpackhi_epi8((__mmask64)0xFAAAAAAAAAAAAAAA,(__m512i)(__v64qs){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,-128,-127,-126,-125,-124,-123,-122,-121,-120,-119,-118,-117,-116,-115,-114,-113,-112,-111,-110,-109,-108,-107,-106,-105,-104,-103,-102,-101,-100,-99,-98,-97,-96,-95,-94,-93,-92,-91,-90,-89},(__m512i)(__v64qs){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-46,-47,-48,-49,-50,-51,-52,-53,-54,-55,-56,-57,-58,-59,-60,-61,-62,-63,-64}),0,-9,0,-10,0,-11,0,-12,0,-13,0,-14,0,-15,0,-16,0,-25,0,-26,0,-27,0,-28,0,-29,0,-30,0,-31,0,-32,0,-41,0,-42,0,-43,0,-44,0,-45,0,-46,0,-47,0,-48,0,-57,0,-58,0,-59,0,-60,0,-61,0,-62,-90,-63,-89,-64));
__m512i test_mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_unpackhi_epi16
@@ -1797,6 +2142,7 @@ __m512i test_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_unpackhi_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_unpackhi_epi16((__m512i)(__v32hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32},(__mmask32)0xFAAAAAAAu,(__m512i)(__v32hi){100,101,102,103,104,105,106,107,110,111,112,113,114,115,116,117,120,121,122,123,124,125,126,127,130,131,132,133,134,135,136,137},(__m512i)(__v32hi){200,201,202,203,204,205,206,207,210,211,212,213,214,215,216,217,220,221,222,223,224,225,226,227,230,231,232,233,234,235,236,237}),1,204,3,205,5,206,7,207,9,214,11,215,13,216,15,217,17,224,19,225,21,226,23,227,25,234,27,235,136,236,137,237));
__m512i test_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_unpackhi_epi16
@@ -1804,6 +2150,7 @@ __m512i test_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_unpackhi_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_unpackhi_epi16((__mmask32)0xFAAAAAAAu,(__m512i)(__v32hi){100,101,102,103,104,105,106,107,110,111,112,113,114,115,116,117,120,121,122,123,124,125,126,127,130,131,132,133,134,135,136,137},(__m512i)(__v32hi){200,201,202,203,204,205,206,207,210,211,212,213,214,215,216,217,220,221,222,223,224,225,226,227,230,231,232,233,234,235,236,237}),0,204,0,205,0,206,0,207,0,214,0,215,0,216,0,217,0,224,0,225,0,226,0,227,0,234,0,235,136,236,137,237));
__m512i test_mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_unpacklo_epi8
@@ -1818,6 +2165,7 @@ __m512i test_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, _
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_unpacklo_epi8(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_unpacklo_epi8((__m512i)(__v64qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask64)0xFAAAAAAAAAAAAAAA,(__m512i)(__v64qs){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-50,-51,-52,-53,-54,-55,-56,-57,-58,-59,-60,-61,-62,-63,-64,-65},(__m512i)(__v64qs){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75}),1,-1,3,-2,5,-3,7,-4,9,-5,11,-6,13,-7,15,-8,17,20,19,21,21,22,23,23,25,24,27,25,29,26,31,27,33,40,35,41,37,42,39,43,41,44,43,45,45,46,47,47,49,60,51,61,53,62,55,63,57,64,59,65,-56,66,-57,67));
__m512i test_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_unpacklo_epi8
@@ -1825,6 +2173,7 @@ __m512i test_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B)
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_unpacklo_epi8(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_unpacklo_epi8((__mmask64)0xFAAAAAAAAAAAAAAA,(__m512i)(__v64qs){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-50,-51,-52,-53,-54,-55,-56,-57,-58,-59,-60,-61,-62,-63,-64,-65},(__m512i)(__v64qs){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75}),0,-1,0,-2,0,-3,0,-4,0,-5,0,-6,0,-7,0,-8,0,20,0,21,0,22,0,23,0,24,0,25,0,26,0,27,0,40,0,41,0,42,0,43,0,44,0,45,0,46,0,47,0,60,0,61,0,62,0,63,0,64,0,65,-56,66,-57,67));
__m512i test_mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_unpacklo_epi16
@@ -1839,6 +2188,7 @@ __m512i test_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A,
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_unpacklo_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_unpacklo_epi16((__m512i)(__v32hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32},(__mmask32)0xFAAAAAAAu,(__m512i)(__v32hi){100,101,102,103,104,105,106,107,110,111,112,113,114,115,116,117,120,121,122,123,124,125,126,127,130,131,132,133,134,135,136,137},(__m512i)(__v32hi){200,201,202,203,204,205,206,207,210,211,212,213,214,215,216,217,220,221,222,223,224,225,226,227,230,231,232,233,234,235,236,237}),1,200,3,201,5,202,7,203,9,210,11,211,13,212,15,213,17,220,19,221,21,222,23,223,25,230,27,231,132,232,133,233));
__m512i test_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_unpacklo_epi16
@@ -1846,6 +2196,7 @@ __m512i test_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B)
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_unpacklo_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_unpacklo_epi16((__mmask32)0xFAAAAAAAu,(__m512i)(__v32hi){100,101,102,103,104,105,106,107,110,111,112,113,114,115,116,117,120,121,122,123,124,125,126,127,130,131,132,133,134,135,136,137},(__m512i)(__v32hi){200,201,202,203,204,205,206,207,210,211,212,213,214,215,216,217,220,221,222,223,224,225,226,227,230,231,232,233,234,235,236,237}),0,200,0,201,0,202,0,203,0,210,0,211,0,212,0,213,0,220,0,221,0,222,0,223,0,230,0,231,132,232,133,233));
__m512i test_mm512_cvtepi8_epi16(__m256i __A) {
// CHECK-LABEL: test_mm512_cvtepi8_epi16
@@ -1972,8 +2323,15 @@ TEST_CONSTEXPR(match_v32hi(_mm512_maskz_sllv_epi16(0xB120676B, (__m512i)(__v32hi
__m512i test_mm512_sll_epi16(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_sll_epi16
// CHECK: @llvm.x86.avx512.psll.w.512
- return _mm512_sll_epi16(__A, __B);
+ return _mm512_sll_epi16(__A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62));
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){17, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62));
__m512i test_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sll_epi16
@@ -1986,8 +2344,10 @@ __m512i test_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_sll_epi16
// CHECK: @llvm.x86.avx512.psll.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
- return _mm512_maskz_sll_epi16(__U, __A, __B);
+ return _mm512_maskz_sll_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_sll_epi16((__m512i)(__v32hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A5A5A5A, (__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 99, 2, 99, 6, 8, 99, 12, 99, 99, 18, 99, 22, 24, 99, 28, 99, 99, 34, 99, 38, 40, 99, 44, 99, 99, 50, 99, 54, 56, 99, 60, 99));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_sll_epi16(0xA5A5A5A5, (__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 4, 0, 0, 10, 0, 14, 16, 0, 20, 0, 0, 26, 0, 30, 32, 0, 36, 0, 0, 42, 0, 46, 48, 0, 52, 0, 0, 58, 0, 62));
__m512i test_mm512_slli_epi16(__m512i __A) {
// CHECK-LABEL: test_mm512_slli_epi16
@@ -2097,8 +2457,15 @@ TEST_CONSTEXPR(match_v32hi(_mm512_maskz_srav_epi16(0xB120676B, (__m512i)(__v32hi
__m512i test_mm512_sra_epi16(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_sra_epi16
// CHECK: @llvm.x86.avx512.psra.w.512
- return _mm512_sra_epi16(__A, __B);
+ return _mm512_sra_epi16(__A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31));
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){17, 0, 0, 0, 0, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31));
__m512i test_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sra_epi16
@@ -2111,8 +2478,10 @@ __m512i test_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_sra_epi16
// CHECK: @llvm.x86.avx512.psra.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
- return _mm512_maskz_sra_epi16(__U, __A, __B);
+ return _mm512_maskz_sra_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_sra_epi16((__m512i)(__v32hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A5A5A5A, (__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 99, -1, 99, -3, 4, 99, 6, 99, 99, -9, 99, -11, 12, 99, 14, 99, 99, -17, 99, -19, 20, 99, 22, 99, 99, -25, 99, -27, 28, 99, 30, 99));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_sra_epi16(0xA5A5A5A5, (__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 2, 0, 0, -5, 0, -7, 8, 0, 10, 0, 0, -13, 0, -15, 16, 0, 18, 0, 0, -21, 0, -23, 24, 0, 26, 0, 0, -29, 0, -31));
__m512i test_mm512_srai_epi16(__m512i __A) {
// CHECK-LABEL: test_mm512_srai_epi16
@@ -2160,8 +2529,15 @@ __m512i test_mm512_maskz_srai_epi16_2(__mmask32 __U, __m512i __A, unsigned int _
__m512i test_mm512_srl_epi16(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_srl_epi16
// CHECK: @llvm.x86.avx512.psrl.w.512
- return _mm512_srl_epi16(__A, __B);
+ return _mm512_srl_epi16(__A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 32767, 2, 32765, 4, 32763, 6, 32761, 8, 32759, 10, 32757, 12, 32755, 14, 32753, 16, 32751, 18, 32749, 20, 32747, 22, 32745, 24, 32743, 26, 32741, 28, 32739, 30, 32737));
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){17, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 0, 32767, 2, 32765, 4, 32763, 6, 32761, 8, 32759, 10, 32757, 12, 32755, 14, 32753, 16, 32751, 18, 32749, 20, 32747, 22, 32745, 24, 32743, 26, 32741, 28, 32739, 30, 32737));
__m512i test_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_srl_epi16
@@ -2174,8 +2550,10 @@ __m512i test_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_srl_epi16
// CHECK: @llvm.x86.avx512.psrl.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
- return _mm512_maskz_srl_epi16(__U, __A, __B);
+ return _mm512_maskz_srl_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_srl_epi16((__m512i)(__v32hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A5A5A5A, (__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 99, 32767, 99, 32765, 4, 99, 6, 99, 99, 32759, 99, 32757, 12, 99, 14, 99, 99, 32751, 99, 32749, 20, 99, 22, 99, 99, 32743, 99, 32741, 28, 99, 30, 99));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_srl_epi16(0xA5A5A5A5, (__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 2, 0, 0, 32763, 0, 32761, 8, 0, 10, 0, 0, 32755, 0, 32753, 16, 0, 18, 0, 0, 32747, 0, 32745, 24, 0, 26, 0, 0, 32739, 0, 32737));
__m512i test_mm512_srli_epi16(__m512i __A) {
// CHECK-LABEL: test_mm512_srli_epi16
@@ -2233,24 +2611,28 @@ __m512i test_mm512_mask_mov_epi16(__m512i __W, __mmask32 __U, __m512i __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_mov_epi16(__W, __U, __A);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_mov_epi16((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31},(__mmask32)0xAAAAAAAA,(__m512i)(__v32hi){-0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31}),0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15,16,-17,18,-19,20,-21,22,-23,24,-25,26,-27,28,-29,30,-31));
__m512i test_mm512_maskz_mov_epi16(__mmask32 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_mov_epi16
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_mov_epi16(__U, __A);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_mov_epi16((__mmask32)0xAAAAAAAA,(__m512i)(__v32hi){-0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31}),0,-1,0,-3,0,-5,0,-7,0,-9,0,-11,0,-13,0,-15,0,-17,0,-19,0,-21,0,-23,0,-25,0,-27,0,-29,0,-31));
__m512i test_mm512_mask_mov_epi8(__m512i __W, __mmask64 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_mov_epi8
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_mov_epi8(__W, __U, __A);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_mov_epi8((__m512i)(__v64qs){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63},(__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v64qs){-0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-46,-47,-48,-49,-50,-51,-52,-53,-54,-55,-56,-57,-58,-59,-60,-61,-62,-63}),0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15,16,-17,18,-19,20,-21,22,-23,24,-25,26,-27,28,-29,30,-31,32,-33,34,-35,36,-37,38,-39,40,-41,42,-43,44,-45,46,-47,48,-49,50,-51,52,-53,54,-55,56,-57,58,-59,60,-61,62,-63));
__m512i test_mm512_maskz_mov_epi8(__mmask64 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_mov_epi8
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_mov_epi8(__U, __A);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_mov_epi8((__mmask64)0xAAAAAAAAAAAAAAAA,(__m512i)(__v64qs){-0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-46,-47,-48,-49,-50,-51,-52,-53,-54,-55,-56,-57,-58,-59,-60,-61,-62,-63}),0,-1,0,-3,0,-5,0,-7,0,-9,0,-11,0,-13,0,-15,0,-17,0,-19,0,-21,0,-23,0,-25,0,-27,0,-29,0,-31,0,-33,0,-35,0,-37,0,-39,0,-41,0,-43,0,-45,0,-47,0,-49,0,-51,0,-53,0,-55,0,-57,0,-59,0,-61,0,-63));
__m512i test_mm512_mask_set1_epi8(__m512i __O, __mmask64 __M, char __A) {
// CHECK-LABEL: test_mm512_mask_set1_epi8
@@ -2319,6 +2701,7 @@ __m512i test_mm512_mask_set1_epi8(__m512i __O, __mmask64 __M, char __A) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_set1_epi8(__O, __M, __A);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_set1_epi8((__m512i)(__v64qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask64)0xAAAAAAAAAAAAAAAA,(char)42),1,42,3,42,5,42,7,42,9,42,11,42,13,42,15,42,17,42,19,42,21,42,23,42,25,42,27,42,29,42,31,42,33,42,35,42,37,42,39,42,41,42,43,42,45,42,47,42,49,42,51,42,53,42,55,42,57,42,59,42,61,42,63,42));
__m512i test_mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
// CHECK-LABEL: test_mm512_maskz_set1_epi8
@@ -2389,6 +2772,7 @@ __m512i test_mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_set1_epi8(__M, __A);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_set1_epi8((__mmask64)0xAAAAAAAAAAAAAAAA,(char)42),0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42));
__mmask64 test_mm512_kunpackd(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_mm512_kunpackd
@@ -2400,6 +2784,12 @@ __mmask64 test_mm512_kunpackd(__m512i __A, __m512i __B, __m512i __C, __m512i __D
return _mm512_mask_cmpneq_epu8_mask(_mm512_kunpackd(_mm512_cmpneq_epu8_mask(__B, __A),_mm512_cmpneq_epu8_mask(__C, __D)), __E, __F);
}
+TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull);
+TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x2345678989ABCDEFull);
+TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0xFFFFFFFF00000000ull);
+TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull);
+TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull);
+
__mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_mm512_kunpackw
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -2410,6 +2800,12 @@ __mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, __m512i __C, __m512i __D
return _mm512_mask_cmpneq_epu16_mask(_mm512_kunpackw(_mm512_cmpneq_epu16_mask(__B, __A),_mm512_cmpneq_epu16_mask(__C, __D)), __E, __F);
}
+TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu);
+TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu);
+TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0xFFFF0000u);
+TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu);
+TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u);
+
__m512i test_mm512_loadu_epi16 (void *__P)
{
// CHECK-LABEL: test_mm512_loadu_epi16
@@ -2537,6 +2933,18 @@ __mmask64 test_mm512_movepi8_mask(__m512i __A) {
return _mm512_movepi8_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi8_mask(
+ ((__m512i)(__v64qi){0, 1, char(129), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, char(255)})
+) == (__mmask64)0x8000000000000004);
+
+
__m512i test_mm512_movm_epi8(__mmask64 __A) {
// CHECK-LABEL: test_mm512_movm_epi8
// CHECK: %{{.*}} = bitcast i64 %{{.*}} to <64 x i1>
@@ -2544,6 +2952,18 @@ __m512i test_mm512_movm_epi8(__mmask64 __A) {
return _mm512_movm_epi8(__A);
}
+TEST_CONSTEXPR(_mm512_movepi8_mask(
+ ((__m512i)(__v64qi){0, 1, char(129), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, char(255)})
+) == (__mmask64)0x8000000000000004);
+
+
__m512i test_mm512_movm_epi16(__mmask32 __A) {
// CHECK-LABEL: test_mm512_movm_epi16
// CHECK: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
@@ -2564,6 +2984,7 @@ __m512i test_mm512_mask_broadcastb_epi8(__m512i __O, __mmask64 __M, __m128i __A)
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_broadcastb_epi8(__O, __M, __A);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_broadcastb_epi8((__m512i)(__v64qs){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63},(__mmask64)0xAAAAAAAAAAAAAAAA,(__m128i)(__v16qs){-120,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),0,-120,2,-120,4,-120,6,-120,8,-120,10,-120,12,-120,14,-120,16,-120,18,-120,20,-120,22,-120,24,-120,26,-120,28,-120,30,-120,32,-120,34,-120,36,-120,38,-120,40,-120,42,-120,44,-120,46,-120,48,-120,50,-120,52,-120,54,-120,56,-120,58,-120,60,-120,62,-120));
__m512i test_mm512_maskz_broadcastb_epi8(__mmask64 __M, __m128i __A) {
// CHECK-LABEL: test_mm512_maskz_broadcastb_epi8
@@ -2571,6 +2992,7 @@ __m512i test_mm512_maskz_broadcastb_epi8(__mmask64 __M, __m128i __A) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_broadcastb_epi8(__M, __A);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_broadcastb_epi8((__mmask64)0xAAAAAAAAAAAAAAAA,(__m128i)(__v16qs){-120,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120));
__m512i test_mm512_broadcastw_epi16(__m128i __A) {
// CHECK-LABEL: test_mm512_broadcastw_epi16
@@ -2578,6 +3000,33 @@ __m512i test_mm512_broadcastw_epi16(__m128i __A) {
return _mm512_broadcastw_epi16(__A);
}
TEST_CONSTEXPR(match_v32hi(_mm512_broadcastw_epi16((__m128i)(__v8hi){42, 3, 10, 8, 0, 256, 256, 128}), 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42));
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_permutex2var_epi16((__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+ (__m512i)(__v32hi){0, 31, 32, 63, 1, 33, 2, 34,
+ 3, 35, 4, 36, 5, 37, 6, 38,
+ 7, 39, 8, 40, 9, 41, 10, 42,
+ 11, 43, 12, 44, 13, 45, 14, 46},
+ (__m512i)(__v32hi){101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132}),
+ 1, 32, 101, 132, 2, 102, 3, 103,
+ 4, 104, 5, 105, 6, 106, 7, 107,
+ 8, 108, 9, 109, 10, 110, 11, 111,
+ 12, 112, 13, 113, 14, 114, 15, 115));
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_mask_permutex2var_epi16((__m512i)(__v32hi){-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
+ -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32},
+ 0xAAAAAAAA,
+ (__m512i)(__v32hi){0, 31, 32, 63, 1, 33, 2, 34,
+ 3, 35, 4, 36, 5, 37, 6, 38,
+ 7, 39, 8, 40, 9, 41, 10, 42,
+ 11, 43, 12, 44, 13, 45, 14, 46},
+ (__m512i)(__v32hi){101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132}),
+ -1, -32, -3, 132, -5, 102, -7, 103,
+ -9, 104, -11, 105, -13, 106, -15, 107,
+ -17, 108, -19, 109, -21, 110, -23, 111,
+ -25, 112, -27, 113, -29, 114, -31, 115));
__m512i test_mm512_mask_broadcastw_epi16(__m512i __O, __mmask32 __M, __m128i __A) {
// CHECK-LABEL: test_mm512_mask_broadcastw_epi16
@@ -2585,6 +3034,7 @@ __m512i test_mm512_mask_broadcastw_epi16(__m512i __O, __mmask32 __M, __m128i __A
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_broadcastw_epi16(__O, __M, __A);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_broadcastw_epi16((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31},(__mmask32)0xAAAAAAAA,(__m128i)(__v8hi){-120,1,2,3,4,5,6,7}),0,-120,2,-120,4,-120,6,-120,8,-120,10,-120,12,-120,14,-120,16,-120,18,-120,20,-120,22,-120,24,-120,26,-120,28,-120,30,-120));
__m512i test_mm512_maskz_broadcastw_epi16(__mmask32 __M, __m128i __A) {
// CHECK-LABEL: test_mm512_maskz_broadcastw_epi16
@@ -2592,6 +3042,7 @@ __m512i test_mm512_maskz_broadcastw_epi16(__mmask32 __M, __m128i __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_broadcastw_epi16(__M, __A);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_broadcastw_epi16((__mmask32)0xAAAAAAAAu,(__m128i)(__v8hi){-120,1,2,3,4,5,6,7}),0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120));
__m512i test_mm512_mask_set1_epi16(__m512i __O, __mmask32 __M, short __A) {
// CHECK-LABEL: test_mm512_mask_set1_epi16
@@ -2630,6 +3081,7 @@ __m512i test_mm512_mask_set1_epi16(__m512i __O, __mmask32 __M, short __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_set1_epi16(__O, __M, __A);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_set1_epi16((__m512i)(__v32hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32},(__mmask32)0xAAAAAAAA,-1),1,-1,3,-1,5,-1,7,-1,9,-1,11,-1,13,-1,15,-1,17,-1,19,-1,21,-1,23,-1,25,-1,27,-1,29,-1,31,-1));
__m512i test_mm512_maskz_set1_epi16(__mmask32 __M, short __A) {
// CHECK-LABEL: test_mm512_maskz_set1_epi16
@@ -2668,12 +3120,17 @@ __m512i test_mm512_maskz_set1_epi16(__mmask32 __M, short __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_set1_epi16(__M, __A);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_set1_epi16((__mmask32)0xAAAAAAAA,42),0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42));
+
__m512i test_mm512_permutexvar_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_permutexvar_epi16
// CHECK: @llvm.x86.avx512.permvar.hi.512
- return _mm512_permutexvar_epi16(__A, __B);
+ return _mm512_permutexvar_epi16(__A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_permutexvar_epi16((__m512i)(__v32hi){31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v32hi){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41}), 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v32hi(_mm512_permutexvar_epi16((__m512i)(__v32hi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, (__m512i)(__v32hi){100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131}), 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100));
+
__m512i test_mm512_maskz_permutexvar_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_permutexvar_epi16
// CHECK: @llvm.x86.avx512.permvar.hi.512
@@ -2685,13 +3142,22 @@ __m512i test_mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __
// CHECK-LABEL: test_mm512_mask_permutexvar_epi16
// CHECK: @llvm.x86.avx512.permvar.hi.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
- return _mm512_mask_permutexvar_epi16(__W, __M, __A, __B);
+ return _mm512_mask_permutexvar_epi16(__W, __M, __A, __B);
}
+
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_permutexvar_epi16((__m512i)(__v32hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xFFFFFFFF, (__m512i)(__v32hi){31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v32hi){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41}), 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_permutexvar_epi16((__m512i)(__v32hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xAAAAAAAA, (__m512i)(__v32hi){31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v32hi){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41}), 99, 40, 99, 38, 99, 36, 99, 34, 99, 32, 99, 30, 99, 28, 99, 26, 99, 24, 99, 22, 99, 20, 99, 18, 99, 16, 99, 14, 99, 12, 99, 10));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_permutexvar_epi16(0xFFFFFFFF, (__m512i)(__v32hi){31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v32hi){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41}), 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_permutexvar_epi16(0xAAAAAAAA, (__m512i)(__v32hi){31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v32hi){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41}), 0, 40, 0, 38, 0, 36, 0, 34, 0, 32, 0, 30, 0, 28, 0, 26, 0, 24, 0, 22, 0, 20, 0, 18, 0, 16, 0, 14, 0, 12, 0, 10));
+
__m512i test_mm512_alignr_epi8(__m512i __A,__m512i __B){
// CHECK-LABEL: test_mm512_alignr_epi8
// CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113>
return _mm512_alignr_epi8(__A, __B, 2);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_alignr_epi8(((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 2), 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 1, 2, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 17, 18, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 33, 34, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127, 49, 50));
+TEST_CONSTEXPR(match_v64qi(_mm512_alignr_epi8(((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 16), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64));
+TEST_CONSTEXPR(match_v64qi(_mm512_alignr_epi8(((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 32), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m512i test_mm512_mask_alignr_epi8(__m512i __W, __mmask64 __U, __m512i __A,__m512i __B){
// CHECK-LABEL: test_mm512_mask_alignr_epi8
@@ -2699,6 +3165,7 @@ __m512i test_mm512_mask_alignr_epi8(__m512i __W, __mmask64 __U, __m512i __A,__m5
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_alignr_epi8(__W, __U, __A, __B, 2);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_alignr_epi8(((__m512i)(__v64qs){127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127}), (__mmask64)0x000000000000000f, ((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 2), 67, 68, 69, 70, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127));
__m512i test_mm512_maskz_alignr_epi8(__mmask64 __U, __m512i __A,__m512i __B){
// CHECK-LABEL: test_mm512_maskz_alignr_epi8
@@ -2706,6 +3173,7 @@ __m512i test_mm512_maskz_alignr_epi8(__mmask64 __U, __m512i __A,__m512i __B){
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_alignr_epi8(__U, __A, __B, 2);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_alignr_epi8((__mmask64)0x000000000000000f, ((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 2), 67, 68, 69, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
@@ -2741,6 +3209,13 @@ __mmask32 test_mm512_movepi16_mask(__m512i __A) {
return _mm512_movepi16_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi16_mask(
+ ((__m512i)(__v32hi){0, 1, short(32768), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, short(32768)})
+) == (__mmask32)0x80000004);
+
void test_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
{
// CHECK-LABEL: test_mm512_mask_cvtepi16_storeu_epi8
diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index 9c4ada3..d8647b5 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -117,6 +117,10 @@ unsigned char test_kortestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m5
_mm512_cmpneq_epu64_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestz_mask8_u8(0x00, 0x00) == 1);
+TEST_CONSTEXPR(_kortestz_mask8_u8(0x00, 0x80) == 0);
+TEST_CONSTEXPR(_kortestz_mask8_u8(0x01, 0xFE) == 0);
+
unsigned char test_kortestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_kortestc_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
@@ -130,6 +134,10 @@ unsigned char test_kortestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m5
_mm512_cmpneq_epu64_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestc_mask8_u8(0x00, 0x00) == 0);
+TEST_CONSTEXPR(_kortestc_mask8_u8(0x00, 0x80) == 0);
+TEST_CONSTEXPR(_kortestc_mask8_u8(0x01, 0xFE) == 1);
+
unsigned char test_kortest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_kortest_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
@@ -150,6 +158,30 @@ unsigned char test_kortest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu64_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_kortest_mask8_u8(unsigned char A, unsigned char B) {
+ unsigned char all_ones{};
+ return (_kortest_mask8_u8(A, B, &all_ones) << 4) | all_ones;
+}
+
+void _kortest_mask8_u8() {
+ constexpr unsigned char A1 = 0x00;
+ constexpr unsigned char B1 = 0x00;
+ constexpr unsigned char expected_result_1 = 0x10;
+ static_assert(test_kortest_mask8_u8(A1, B1) == expected_result_1);
+ constexpr unsigned char A2 = 0x00;
+ constexpr unsigned char B2 = 0x80;
+ constexpr unsigned char expected_result_2 = 0x00;
+ static_assert(test_kortest_mask8_u8(A2, B2) == expected_result_2);
+ constexpr unsigned char A3 = 0x01;
+ constexpr unsigned char B3 = 0xFE;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_kortest_mask8_u8(A3, B3) == expected_result_3);
+}
+#endif
+
unsigned char test_ktestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestz_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
@@ -160,6 +192,11 @@ unsigned char test_ktestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512
_mm512_cmpneq_epu64_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestz_mask8_u8(0x00, 0x00) == 1);
+TEST_CONSTEXPR(_ktestz_mask8_u8(0x00, 0x80) == 1);
+TEST_CONSTEXPR(_ktestz_mask8_u8(0xF0, 0x80) == 0);
+TEST_CONSTEXPR(_ktestz_mask8_u8(0x01, 0x01) == 0);
+
unsigned char test_ktestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestc_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
@@ -170,6 +207,11 @@ unsigned char test_ktestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512
_mm512_cmpneq_epu64_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestc_mask8_u8(0x00, 0x00) == 1);
+TEST_CONSTEXPR(_ktestc_mask8_u8(0x00, 0x80) == 0);
+TEST_CONSTEXPR(_ktestc_mask8_u8(0xF0, 0x80) == 1);
+TEST_CONSTEXPR(_ktestc_mask8_u8(0x01, 0x01) == 1);
+
unsigned char test_ktest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_ktest_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
@@ -184,6 +226,34 @@ unsigned char test_ktest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i
_mm512_cmpneq_epu64_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_ktest_mask8_u8(unsigned char A, unsigned char B) {
+ unsigned char all_ones{};
+ return (_ktest_mask8_u8(A, B, &all_ones) << 4) | all_ones;
+}
+
+void _ktest_mask8_u8() {
+ constexpr unsigned char A1 = 0x00;
+ constexpr unsigned char B1 = 0x00;
+ constexpr unsigned char expected_result_1 = 0x11;
+ static_assert(test_ktest_mask8_u8(A1, B1) == expected_result_1);
+ constexpr unsigned char A2 = 0x00;
+ constexpr unsigned char B2 = 0x80;
+ constexpr unsigned char expected_result_2 = 0x10;
+ static_assert(test_ktest_mask8_u8(A2, B2) == expected_result_2);
+ constexpr unsigned char A3 = 0xF0;
+ constexpr unsigned char B3 = 0x80;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_ktest_mask8_u8(A3, B3) == expected_result_3);
+ constexpr unsigned char A4 = 0x01;
+ constexpr unsigned char B4 = 0x01;
+ constexpr unsigned char expected_result_4 = 0x01;
+ static_assert(test_ktest_mask8_u8(A4, B4) == expected_result_4);
+}
+#endif
+
unsigned char test_ktestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestz_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -194,6 +264,11 @@ unsigned char test_ktestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu32_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestz_mask16_u8(0x0000, 0x0000) == 1);
+TEST_CONSTEXPR(_ktestz_mask16_u8(0x0000, 0x8000) == 1);
+TEST_CONSTEXPR(_ktestz_mask16_u8(0xF000, 0x8000) == 0);
+TEST_CONSTEXPR(_ktestz_mask16_u8(0x0123, 0x0123) == 0);
+
unsigned char test_ktestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestc_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -204,6 +279,11 @@ unsigned char test_ktestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu32_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestc_mask16_u8(0x0000, 0x0000) == 1);
+TEST_CONSTEXPR(_ktestc_mask16_u8(0x0000, 0x8000) == 0);
+TEST_CONSTEXPR(_ktestc_mask16_u8(0xF000, 0x8000) == 1);
+TEST_CONSTEXPR(_ktestc_mask16_u8(0x0123, 0x0123) == 1);
+
unsigned char test_ktest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_ktest_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -218,6 +298,34 @@ unsigned char test_ktest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512
_mm512_cmpneq_epu32_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_ktest_mask16_u8(unsigned int A, unsigned int B) {
+ unsigned char all_ones{};
+ return (_ktest_mask16_u8(A, B, &all_ones) << 4) | all_ones;
+}
+
+void _ktest_mask16_u8() {
+ constexpr unsigned int A1 = 0x0000;
+ constexpr unsigned int B1 = 0x0000;
+ constexpr unsigned char expected_result_1 = 0x11;
+ static_assert(test_ktest_mask16_u8(A1, B1) == expected_result_1);
+ constexpr unsigned int A2 = 0x0000;
+ constexpr unsigned int B2 = 0x8000;
+ constexpr unsigned char expected_result_2 = 0x10;
+ static_assert(test_ktest_mask16_u8(A2, B2) == expected_result_2);
+ constexpr unsigned int A3 = 0xF000;
+ constexpr unsigned int B3 = 0x8000;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_ktest_mask16_u8(A3, B3) == expected_result_3);
+ constexpr unsigned int A4 = 0x0123;
+ constexpr unsigned int B4 = 0x0123;
+ constexpr unsigned char expected_result_4 = 0x01;
+ static_assert(test_ktest_mask16_u8(A4, B4) == expected_result_4);
+}
+#endif
+
__mmask8 test_kadd_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_kadd_mask8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
@@ -256,6 +364,10 @@ __mmask8 test_kshiftli_mask8(__m512i A, __m512i B, __m512i C, __m512i D) {
// CHECK: [[RES:%.*]] = shufflevector <8 x i1> zeroinitializer, <8 x i1> [[VAL]], <8 x i32> <i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
return _mm512_mask_cmpneq_epu64_mask(_kshiftli_mask8(_mm512_cmpneq_epu64_mask(A, B), 2), C, D);
}
+TEST_CONSTEXPR(_kshiftli_mask8(0x01, 1) == 0x02);
+TEST_CONSTEXPR(_kshiftli_mask8(0x01, 7) == 0x80);
+TEST_CONSTEXPR(_kshiftli_mask8(0x01, 8) == 0x00);
+TEST_CONSTEXPR(_kshiftli_mask8(0x0F, 2) == 0x3C);
__mmask8 test_kshiftri_mask8(__m512i A, __m512i B, __m512i C, __m512i D) {
// CHECK-LABEL: test_kshiftri_mask8
@@ -263,6 +375,10 @@ __mmask8 test_kshiftri_mask8(__m512i A, __m512i B, __m512i C, __m512i D) {
// CHECK: [[RES:%.*]] = shufflevector <8 x i1> [[VAL]], <8 x i1> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
return _mm512_mask_cmpneq_epu64_mask(_kshiftri_mask8(_mm512_cmpneq_epu64_mask(A, B), 2), C, D);
}
+TEST_CONSTEXPR(_kshiftri_mask8(0x80, 1) == 0x40);
+TEST_CONSTEXPR(_kshiftri_mask8(0x80, 7) == 0x01);
+TEST_CONSTEXPR(_kshiftri_mask8(0x80, 8) == 0x00);
+TEST_CONSTEXPR(_kshiftri_mask8(0xF0, 2) == 0x3C);
unsigned int test_cvtmask8_u32(__m512i A, __m512i B) {
// CHECK-LABEL: test_cvtmask8_u32
@@ -270,12 +386,17 @@ unsigned int test_cvtmask8_u32(__m512i A, __m512i B) {
return _cvtmask8_u32(_mm512_cmpneq_epu64_mask(A, B));
}
+TEST_CONSTEXPR(_cvtmask8_u32((__mmask8)0x5A) == 0x5A);
+
__mmask8 test_cvtu32_mask8(__m512i A, __m512i B, unsigned int C) {
// CHECK-LABEL: test_cvtu32_mask8
// CHECK: trunc i32 %{{.*}} to i8
return _mm512_mask_cmpneq_epu64_mask(_cvtu32_mask8(C), A, B);
}
+TEST_CONSTEXPR(_cvtu32_mask8(0xB7) == (__mmask8)0xB7);
+TEST_CONSTEXPR(_cvtu32_mask8(_cvtmask8_u32((__mmask8)0xDE)) == (__mmask8)0xDE);
+
__mmask8 test_load_mask8(__mmask8 *A, __m512i B, __m512i C) {
// CHECK-LABEL: test_load_mask8
// CHECK: [[LOAD:%.*]] = load i8, ptr %{{.*}}
@@ -1251,6 +1372,8 @@ __mmask16 test_mm512_movepi32_mask(__m512i __A) {
return _mm512_movepi32_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi32_mask(((__m512i)(__v16si){0, 1, -1, 3, 4, 5, 6, 7,8, 9, 10, 11, 12, 13, 14, -1})) == (__mmask16)0x8004);
+
__m512i test_mm512_movm_epi32(__mmask16 __A) {
// CHECK-LABEL: test_mm512_movm_epi32
// CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
@@ -1271,6 +1394,8 @@ __mmask8 test_mm512_movepi64_mask(__m512i __A) {
return _mm512_movepi64_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi64_mask(((__m512i)(__v8di){0, 1, -1, 3, 4, 5, 6, -1})) == (__mmask8)0x84);
+
__m512 test_mm512_broadcast_f32x2(__m128 __A) {
// CHECK-LABEL: test_mm512_broadcast_f32x2
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 6959937..ab047a8 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -519,6 +519,40 @@ __m512i test_mm512_maskz_alignr_epi64( __mmask8 u, __m512i a, __m512i b)
return _mm512_maskz_alignr_epi64(u, a, b, 2);
}
+TEST_CONSTEXPR(match_v16si(_mm512_alignr_epi32(((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800,
+ 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}),
+ ((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15}), 19),
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 100, 200, 300));
+TEST_CONSTEXPR(match_v16si(_mm512_mask_alignr_epi32(((__m512i)(__v16si){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000,
+ 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000}),
+ 0xA5A5,
+ ((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800,
+ 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}),
+ ((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15}), 19),
+ 3, 2000, 5, 4000, 5000, 8, 7000, 10,
+ 11, 10000, 13, 12000, 13000, 100, 15000, 300));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_alignr_epi32(0x0F0F,
+ ((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800,
+ 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}),
+ ((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15}), 19),
+ 3, 4, 5, 6, 0, 0, 0, 0, 11, 12, 13, 14, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_alignr_epi64(((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}),
+ ((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11),
+ 4, 5, 6, 7, 8, 10, 11, 12));
+TEST_CONSTEXPR(match_v8di(_mm512_mask_alignr_epi64(((__m512i)(__v8di){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}),
+ 0xA5,
+ ((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}),
+ ((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11),
+ 4, 2000, 6, 4000, 5000, 10, 7000, 12));
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_alignr_epi64(0x33,
+ ((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}),
+ ((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11),
+ 4, 5, 0, 0, 8, 10, 0, 0));
+
__m512d test_mm512_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_fmadd_round_pd
// CHECK: @llvm.x86.avx512.vfmadd.pd.512
@@ -2806,6 +2840,37 @@ __mmask8 test_mm512_mask_cmp_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b)
return (__mmask8)_mm512_mask_cmp_epu64_mask(__u, __a, __b, 0);
}
+__m512d test_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_pd
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ return _mm512_mask_blend_pd(__U, __A, __W);
+}
+TEST_CONSTEXPR(match_m512d(
+ _mm512_mask_blend_pd(
+ (__mmask8)0x01,
+ (__m512d)(__v8df){2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0},
+ (__m512d)(__v8df){10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0}
+ ),
+ 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0
+));
+
+__m512 test_mm512_mask_blend_ps(__mmask8 __U, __m512 __A, __m512 __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_ps
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ return _mm512_mask_blend_ps(__U, __A, __W);
+}
+TEST_CONSTEXPR(match_m512(
+ _mm512_mask_blend_ps(
+ (__mmask16)0x01,
+ (__m512)(__v16sf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f},
+ (__m512)(__v16sf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
+ 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f}
+ ),
+ 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f
+));
+
__m512i test_mm512_mask_and_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) {
// CHECK-LABEL: test_mm512_mask_and_epi32
// CHECK: and <16 x i32>
@@ -3137,6 +3202,7 @@ __m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_mul_epi32(__k,__A,__B);
}
+TEST_CONSTEXPR(match_m512i(_mm512_maskz_mul_epi32((__mmask8)0b11110000, (__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){10, 20, 30, 40, 50, 60, 70, 80}), 0, 0, 0, 0, 250, 360, 490, 640));
__m512i test_mm512_mask_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) {
//CHECK-LABEL: test_mm512_mask_mul_epi32
@@ -3148,6 +3214,7 @@ __m512i test_mm512_mask_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B, __m512
//CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_mul_epi32(__src,__k,__A,__B);
}
+TEST_CONSTEXPR(match_m512i(_mm512_mask_mul_epi32((__m512i){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}, (__mmask8)0b11110000, (__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){10, 20, 30, 40, 50, 60, 70, 80}), 1000, 2000, 3000, 4000, 250, 360, 490, 640));
__m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_mul_epu32
@@ -3166,6 +3233,7 @@ __m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_mul_epu32(__k,__A,__B);
}
+TEST_CONSTEXPR(match_m512i(_mm512_maskz_mul_epu32((__mmask8)0b11110000, (__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){10, 20, 30, 40, 50, 60, 70, 80}), 0, 0, 0, 0, 250, 360, 490, 640));
__m512i test_mm512_mask_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) {
//CHECK-LABEL: test_mm512_mask_mul_epu32
@@ -3175,6 +3243,7 @@ __m512i test_mm512_mask_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B, __m512
//CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_mul_epu32(__src,__k,__A,__B);
}
+TEST_CONSTEXPR(match_m512i(_mm512_mask_mul_epu32((__m512i){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}, (__mmask8)0b11110000, (__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){10, 20, 30, 40, 50, 60, 70, 80}), 1000, 2000, 3000, 4000, 250, 360, 490, 640));
__m512i test_mm512_maskz_mullo_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_mullo_epi32
@@ -3237,12 +3306,16 @@ __m512d test_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d _
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_add_pd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512d(_mm512_mask_add_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m512d){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}, (__m512d){100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0}), 1.0, 2.0, 3.0, 4.0, 550.0, 660.0, 770.0, 880.0));
+
__m512d test_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_maskz_add_pd
// CHECK: fadd <8 x double> %{{.*}}, %{{.*}}
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_add_pd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_add_pd((__mmask8)0b11110000, (__m512d){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}, (__m512d){100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0}), 0.0, 0.0, 0.0, 0.0, 550.0, 660.0, 770.0, 880.0));
+
__m512 test_mm512_add_round_ps(__m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_add_round_ps
// CHECK: @llvm.x86.avx512.add.ps.512
@@ -3266,12 +3339,16 @@ __m512 test_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_add_ps(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask_add_ps((__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}, (__mmask16)0b1111111100000000, (__m512){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f, 100.0f, 110.0f, 120.0f, 130.0f, 140.0f, 150.0f, 160.0f}, (__m512){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f, 900.0f, 1000.0f, 1100.0f, 1200.0f, 1300.0f, 1400.0f, 1500.0f, 1600.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 990.0f, 1100.0f, 1210.0f, 1320.0f, 1430.0f, 1540.0f, 1650.0f, 1760.0f));
+
__m512 test_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_maskz_add_ps
// CHECK: fadd <16 x float> %{{.*}}, %{{.*}}
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_add_ps(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512(_mm512_maskz_add_ps((__mmask16)0b1111111100000000, (__m512){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f, 100.0f, 110.0f, 120.0f, 130.0f, 140.0f, 150.0f, 160.0f}, (__m512){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f, 900.0f, 1000.0f, 1100.0f, 1200.0f, 1300.0f, 1400.0f, 1500.0f, 1600.0f}), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 990.0f, 1100.0f, 1210.0f, 1320.0f, 1430.0f, 1540.0f, 1650.0f, 1760.0f));
+
__m128 test_mm_add_round_ss(__m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_add_round_ss
// CHECK: @llvm.x86.avx512.mask.add.ss.round
@@ -3302,6 +3379,8 @@ __m128 test_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_mask_add_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_add_ss((__m128)(__v4sf){10.0f, 100.0f, 200.0f, 300.0f}, 0x1,(__m128)(__v4sf){1.25f, 3.0f, 4.0f, 5.0f},(__m128)(__v4sf){2.75f, 6.0f, 7.0f, 8.0f}),4.0f, 100.0f, 200.0f, 300.0f));
+
__m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_add_ss
// CHECK-NOT: @llvm.x86.avx512.mask.add.ss.round
@@ -3317,6 +3396,8 @@ __m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_maskz_add_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_add_ss(0x1, (__m128)(__v4sf){1.25f, 3.0f, 4.0f, 5.0f}, (__m128)(__v4sf){2.75f, 6.0f, 7.0f, 8.0f}), 4.0f, 0.0f, 0.0f, 0.0f));
+
__m128d test_mm_add_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_add_round_sd
// CHECK: @llvm.x86.avx512.mask.add.sd.round
@@ -3347,6 +3428,8 @@ __m128d test_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_mask_add_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_add_sd((__m128d)(__v2df){10.0, 999.0}, 0x1, (__m128d)(__v2df){5.5, 77.0}, (__m128d)(__v2df){0.25, 88.0}), 5.75, 999.0));
+
__m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_add_sd
// CHECK-NOT: @llvm.x86.avx512.mask.add.sd.round
@@ -3362,6 +3445,8 @@ __m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_maskz_add_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_add_sd(0x1, (__m128d)(__v2df){5.5, 77.0}, (__m128d)(__v2df){0.25, 88.0}), 5.75, 0.0));
+
__m512d test_mm512_sub_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_sub_round_pd
// CHECK: @llvm.x86.avx512.sub.pd.512
@@ -3385,12 +3470,16 @@ __m512d test_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d _
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_sub_pd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512d(_mm512_mask_sub_pd((__m512d){100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0}, (__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 100.0, 200.0, 300.0, 400.0, -45.0, -54.0, -63.0, -72.0));
+
__m512d test_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_maskz_sub_pd
// CHECK: fsub <8 x double> %{{.*}}, %{{.*}}
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_sub_pd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_sub_pd((__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 0.0, 0.0, 0.0, 0.0, -45.0, -54.0, -63.0, -72.0));
+
__m512 test_mm512_sub_round_ps(__m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_sub_round_ps
// CHECK: @llvm.x86.avx512.sub.ps.512
@@ -3414,12 +3503,16 @@ __m512 test_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_sub_ps(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask_sub_ps((__m512){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f, 900.0f, 1000.0f, 1100.0f, 1200.0f, 1300.0f, 1400.0f, 1500.0f, 1600.0f}, (__mmask16)0b1111111100000000, (__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}, (__m512){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f, 100.0f, 110.0f, 120.0f, 130.0f, 140.0f, 150.0f, 160.0f}), 100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f, -81.0f, -90.0f, -99.0f, -108.0f, -117.0f, -126.0f, -135.0f, -144.0f));
+
__m512 test_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_maskz_sub_ps
// CHECK: fsub <16 x float> %{{.*}}, %{{.*}}
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_sub_ps(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512(_mm512_maskz_sub_ps((__mmask16)0b1111111100000000, (__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}, (__m512){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f, 100.0f, 110.0f, 120.0f, 130.0f, 140.0f, 150.0f, 160.0f}), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -81.0f, -90.0f, -99.0f, -108.0f, -117.0f, -126.0f, -135.0f, -144.0f));
+
__m128 test_mm_sub_round_ss(__m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_sub_round_ss
// CHECK: @llvm.x86.avx512.mask.sub.ss.round
@@ -3450,6 +3543,8 @@ __m128 test_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_mask_sub_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_sub_ss((__m128)(__v4sf){-1.0f, 10.0f, 20.0f, 30.0f}, 0x1, (__m128)(__v4sf){7.0f, 3.0f, 4.0f, 5.0f}, (__m128)(__v4sf){2.5f, 6.0f, 7.0f, 8.0f}), 4.5f, 10.0f, 20.0f, 30.0f));
+
__m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_sub_ss
// CHECK-NOT: @llvm.x86.avx512.mask.sub.ss.round
@@ -3465,6 +3560,8 @@ __m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_maskz_sub_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_sub_ss(0x1, (__m128)(__v4sf){7.0f, 3.0f, 4.0f, 5.0f}, (__m128)(__v4sf){2.5f, 6.0f, 7.0f, 8.0f}), 4.5f, 0.0f, 0.0f, 0.0f));
+
__m128d test_mm_sub_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_sub_round_sd
// CHECK: @llvm.x86.avx512.mask.sub.sd.round
@@ -3495,6 +3592,8 @@ __m128d test_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_mask_sub_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_sub_sd((__m128d)(__v2df){-1.0, 111.0}, 0x1, (__m128d)(__v2df){9.0, 70.0}, (__m128d)(__v2df){3.5, 80.0}), 5.5, 111.0));
+
__m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_sub_sd
// CHECK-NOT: @llvm.x86.avx512.mask.sub.sd.round
@@ -3510,6 +3609,8 @@ __m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_maskz_sub_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_sub_sd(0x1, (__m128d)(__v2df){9.0, 70.0}, (__m128d)(__v2df){3.5, 80.0}), 5.5, 0.0));
+
__m512d test_mm512_mul_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_mul_round_pd
// CHECK: @llvm.x86.avx512.mul.pd.512
@@ -3533,12 +3634,16 @@ __m512d test_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d _
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_mul_pd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512d(_mm512_mask_mul_pd((__m512d){100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0}, (__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 100.0, 200.0, 300.0, 400.0, 250.0, 360.0, 490.0, 640.0));
+
__m512d test_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_maskz_mul_pd
// CHECK: fmul <8 x double> %{{.*}}, %{{.*}}
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_mul_pd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_mul_pd((__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 0.0, 0.0, 0.0, 0.0, 250.0, 360.0, 490.0, 640.0));
+
__m512 test_mm512_mul_round_ps(__m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_mul_round_ps
// CHECK: @llvm.x86.avx512.mul.ps.512
@@ -3562,12 +3667,16 @@ __m512 test_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_mul_ps(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask_mul_ps((__m512){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f, 900.0f, 1000.0f, 1100.0f, 1200.0f, 1300.0f, 1400.0f, 1500.0f, 1600.0f}, (__mmask16)0b1111111100000000, (__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}, (__m512){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f, 100.0f, 110.0f, 120.0f, 130.0f, 140.0f, 150.0f, 160.0f}), 100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f, 810.0f, 1000.0f, 1210.0f, 1440.0f, 1690.0f, 1960.0f, 2250.0f, 2560.0f));
+
__m512 test_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_maskz_mul_ps
// CHECK: fmul <16 x float> %{{.*}}, %{{.*}}
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_mul_ps(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512(_mm512_maskz_mul_ps((__mmask16)0b1111111100000000, (__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}, (__m512){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f, 100.0f, 110.0f, 120.0f, 130.0f, 140.0f, 150.0f, 160.0f}), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 810.0f, 1000.0f, 1210.0f, 1440.0f, 1690.0f, 1960.0f, 2250.0f, 2560.0f));
+
__m128 test_mm_mul_round_ss(__m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_mul_round_ss
// CHECK: @llvm.x86.avx512.mask.mul.ss.round
@@ -3598,6 +3707,8 @@ __m128 test_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_mask_mul_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_mul_ss((__m128)(__v4sf){42.0f, -1.0f, -2.0f, -3.0f}, 0x1, (__m128)(__v4sf){6.0f, 9.0f, 9.0f, 9.0f}, (__m128)(__v4sf){7.0f, 8.0f, 8.0f, 8.0f}), 42.0f, -1.0f, -2.0f, -3.0f));
+
__m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_mul_ss
// CHECK-NOT: @llvm.x86.avx512.mask.mul.ss.round
@@ -3613,6 +3724,8 @@ __m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_maskz_mul_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_mul_ss(0x1, (__m128)(__v4sf){6.0f, 9.0f, 9.0f, 9.0f}, (__m128)(__v4sf){7.0f, 8.0f, 8.0f, 8.0f}), 42.0f, 0.0f, 0.0f, 0.0f));
+
__m128d test_mm_mul_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_mul_round_sd
// CHECK: @llvm.x86.avx512.mask.mul.sd.round
@@ -3643,6 +3756,8 @@ __m128d test_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_mask_mul_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_mul_sd((__m128d)(__v2df){123.0, -9.0}, 0x1, (__m128d)(__v2df){2.5, 1.0}, (__m128d)(__v2df){4.0, 2.0}), 10.0, -9.0));
+
__m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_mul_sd
// CHECK-NOT: @llvm.x86.avx512.mask.mul.sd.round
@@ -3658,6 +3773,8 @@ __m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_maskz_mul_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_mul_sd(0x1, (__m128d)(__v2df){2.5, 1.0}, (__m128d)(__v2df){4.0, 2.0}), 10.0, 0.0));
+
__m512d test_mm512_div_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_div_round_pd
// CHECK: @llvm.x86.avx512.div.pd.512
@@ -3687,12 +3804,16 @@ __m512d test_mm512_mask_div_pd(__m512d __w, __mmask8 __u, __m512d __a, __m512d _
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_div_pd(__w,__u,__a,__b);
}
+TEST_CONSTEXPR(match_m512d(_mm512_mask_div_pd((__m512d){100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0}, (__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 100.0, 200.0, 300.0, 400.0, 0.1, 0.1, 0.1, 0.1));
+
__m512d test_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_maskz_div_pd
// CHECK: fdiv <8 x double> %{{.*}}, %{{.*}}
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_div_pd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_div_pd((__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 0.0, 0.0, 0.0, 0.0, 0.1, 0.1, 0.1, 0.1));
+
__m512 test_mm512_div_round_ps(__m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_div_round_ps
// CHECK: @llvm.x86.avx512.div.ps.512
@@ -3722,12 +3843,16 @@ __m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_div_ps(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask_div_ps((__m512){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f, 900.0f, 1000.0f, 1100.0f, 1200.0f, 1300.0f, 1400.0f, 1500.0f, 1600.0f}, (__mmask16)0b1111111100000000, (__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}, (__m512){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f, 100.0f, 110.0f, 120.0f, 130.0f, 140.0f, 150.0f, 160.0f}), 100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f));
+
__m512 test_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_maskz_div_ps
// CHECK: fdiv <16 x float> %{{.*}}, %{{.*}}
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_div_ps(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m512(_mm512_maskz_div_ps((__mmask16)0b1111111100000000, (__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}, (__m512){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f, 100.0f, 110.0f, 120.0f, 130.0f, 140.0f, 150.0f, 160.0f}), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f));
+
__m128 test_mm_div_round_ss(__m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_div_round_ss
// CHECK: @llvm.x86.avx512.mask.div.ss.round
@@ -3757,6 +3882,8 @@ __m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_mask_div_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_div_ss((__m128)(__v4sf){-7.0f, 5.0f, 6.0f, 7.0f}, 0x1, (__m128)(__v4sf){9.0f, 1.0f, 1.0f, 1.0f}, (__m128)(__v4sf){3.0f, 2.0f, 2.0f, 2.0f}), 3.0f, 5.0f, 6.0f, 7.0f));
+
__m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_div_ss
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
@@ -3771,6 +3898,8 @@ __m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_maskz_div_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_div_ss(0x1, (__m128)(__v4sf){9.0f, 1.0f, 1.0f, 1.0f}, (__m128)(__v4sf){3.0f, 2.0f, 2.0f, 2.0f}), 3.0f, 0.0f, 0.0f, 0.0f));
+
__m128d test_mm_div_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_div_round_sd
// CHECK: @llvm.x86.avx512.mask.div.sd.round
@@ -3800,6 +3929,8 @@ __m128d test_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_mask_div_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_div_sd((__m128d)(__v2df){-8.0, 44.0}, 0x1, (__m128d)(__v2df){8.0, 10.0}, (__m128d)(__v2df){2.0, 20.0}), 4.0, 44.0));
+
__m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_div_sd
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
@@ -3814,6 +3945,8 @@ __m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_maskz_div_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_div_sd(0x1, (__m128d)(__v2df){8.0, 10.0}, (__m128d)(__v2df){2.0, 20.0}), 4.0, 0.0));
+
__m128 test_mm_max_round_ss(__m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_max_round_ss
// CHECK: @llvm.x86.avx512.mask.max.ss.round
@@ -5448,6 +5581,7 @@ __m512d test_mm512_permute_pd(__m512d __X) {
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
return _mm512_permute_pd(__X, 2);
}
+TEST_CONSTEXPR(match_m512d(_mm512_permute_pd(((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), 2), 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 6.0));
__m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) {
// CHECK-LABEL: test_mm512_mask_permute_pd
@@ -5455,6 +5589,13 @@ __m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) {
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_permute_pd(__W, __U, __X, 2);
}
+TEST_CONSTEXPR(match_m512d(_mm512_mask_permute_pd(
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ (__mmask8)0b01010100,
+ ((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ 2),
+ 0.0, 1.0, 10.0, 3.0, 12.0, 5.0, 14.0, 7.0
+));
__m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) {
// CHECK-LABEL: test_mm512_maskz_permute_pd
@@ -5462,12 +5603,23 @@ __m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) {
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_permute_pd(__U, __X, 2);
}
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_permute_pd(
+ (__mmask8)0b01010100,
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ 2),
+ 0.0, 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0
+));
__m512 test_mm512_permute_ps(__m512 __X) {
// CHECK-LABEL: test_mm512_permute_ps
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
return _mm512_permute_ps(__X, 2);
}
+TEST_CONSTEXPR(match_m512(_mm512_permute_ps(
+ ((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}),
+ 2),
+ 2, 0, 0, 0, 6, 4, 4, 4, 10, 8, 8, 8, 14, 12, 12, 12
+));
__m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) {
// CHECK-LABEL: test_mm512_mask_permute_ps
@@ -5475,6 +5627,13 @@ __m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) {
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_permute_ps(__W, __U, __X, 2);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask_permute_ps(
+ ((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}),
+ (__mmask16)0b1010101010101010,
+ ((__m512){16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}),
+ 2),
+ 0, 16, 2, 16, 4, 20, 6, 20, 8, 24, 10, 24, 12, 28, 14, 28
+));
__m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) {
// CHECK-LABEL: test_mm512_maskz_permute_ps
@@ -5482,12 +5641,25 @@ __m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) {
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_permute_ps(__U, __X, 2);
}
+TEST_CONSTEXPR(match_m512(_mm512_maskz_permute_ps(
+ (__mmask16)0b1010101010101010,
+ ((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}),
+ 2),
+ 0, 0, 0, 0, 0, 4, 0, 4, 0, 8, 0, 8, 0, 12, 0, 12
+));
__m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) {
// CHECK-LABEL: test_mm512_permutevar_pd
// CHECK: @llvm.x86.avx512.vpermilvar.pd.512
return _mm512_permutevar_pd(__A, __C);
}
+TEST_CONSTEXPR(match_m512d(
+ _mm512_permutevar_pd(
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m512i){0b00, 0b00, 0b10, 0b00, 0b00, 0b10, 0b10, 0b10})
+ ),
+ 0.0, 0.0, 3.0, 2.0, 4.0, 5.0, 7.0, 7.0
+));
__m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_permutevar_pd
@@ -5495,6 +5667,15 @@ __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_permutevar_pd(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m512d(
+ _mm512_mask_permutevar_pd(
+ ((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ (__mmask8)0b01010101,
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m512i){0b00, 0b00, 0b10, 0b00, 0b00, 0b10, 0b10, 0b10})
+ ),
+ 0.0, 9.0, 3.0, 11.0, 4.0, 13.0, 7.0, 15.0
+));
__m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_permutevar_pd
@@ -5502,12 +5683,27 @@ __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_permutevar_pd(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m512d(
+ _mm512_maskz_permutevar_pd(
+ (__mmask8)0b01010101,
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m512i){0b00, 0b00, 0b10, 0b00, 0b00, 0b10, 0b10, 0b10})
+ ),
+ 0.0, 0.0, 3.0, 0.0, 4.0, 0.0, 7.0, 0.0
+));
__m512 test_mm512_permutevar_ps(__m512 __A, __m512i __C) {
// CHECK-LABEL: test_mm512_permutevar_ps
// CHECK: @llvm.x86.avx512.vpermilvar.ps.512
return _mm512_permutevar_ps(__A, __C);
}
+TEST_CONSTEXPR(match_m512(
+ _mm512_permutevar_ps(
+ ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ ((__m512i)(__v16si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10, 0b10, 0b11, 0b00, 0b01, 0b00, 0b11, 0b01, 0b10})
+ ),
+ 3.0, 2.0, 1.0, 0.0, 5.0, 4.0, 7.0, 6.0, 10.0, 11.0, 8.0, 9.0, 12.0, 15.0, 13.0, 14.0
+));
__m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_permutevar_ps
@@ -5515,6 +5711,15 @@ __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m5
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_permutevar_ps(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m512(
+ _mm512_mask_permutevar_ps(
+ ((__m512){16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0}),
+ (__mmask16)0b0101010101010101,
+ ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ ((__m512i)(__v16si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10, 0b10, 0b11, 0b00, 0b01, 0b00, 0b11, 0b01, 0b10})
+ ),
+ 3.0, 17.0, 1.0, 19.0, 5.0, 21.0, 7.0, 23.0, 10.0, 25.0, 8.0, 27.0, 12.0, 29.0, 13.0, 31.0
+));
__m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_permutevar_ps
@@ -5522,6 +5727,14 @@ __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_permutevar_ps(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m512(
+ _mm512_maskz_permutevar_ps(
+ (__mmask16)0b0101010101010101,
+ ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ ((__m512i)(__v16si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10, 0b10, 0b11, 0b00, 0b01, 0b00, 0b11, 0b01, 0b10})
+ ),
+ 3.0, 0.0, 1.0, 0.0, 5.0, 0.0, 7.0, 0.0, 10.0, 0.0, 8.0, 0.0, 12.0, 0.0, 13.0, 0.0
+));
__m512i test_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
// CHECK-LABEL: test_mm512_permutex2var_epi32
@@ -5607,6 +5820,56 @@ __m512i test_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i _
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_permutex2var_epi64(__U, __A, __I, __B);
}
+
+TEST_CONSTEXPR(match_v16si(
+ _mm512_permutex2var_epi32(
+ (__m512i)(__v16si){0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150},
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512i)(__v16si){200, 210, 220, 230, 240, 250, 260, 270,
+ 280, 290, 300, 310, 320, 330, 340, 350}),
+ 0, 150, 200, 350, 10, 210, 20, 220,
+ 30, 230, 40, 240, 50, 250, 60, 260));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_permutex2var_epi32(
+ (__m512i)(__v16si){-1, -2, -3, -4, -5, -6, -7, -8,
+ -9, -10, -11, -12, -13, -14, -15, -16},
+ 0xAAAA,
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512i)(__v16si){200, 210, 220, 230, 240, 250, 260, 270,
+ 280, 290, 300, 310, 320, 330, 340, 350}),
+ -1, -16, -3, 350, -5, 210, -7, 220,
+ -9, 230, -11, 240, -13, 250, -15, 260));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_permutex2var_epi32(
+ 0x5555,
+ (__m512i)(__v16si){0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150},
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512i)(__v16si){200, 210, 220, 230, 240, 250, 260, 270,
+ 280, 290, 300, 310, 320, 330, 340, 350}),
+ 0, 0, 200, 0, 10, 0, 20, 0,
+ 30, 0, 40, 0, 50, 0, 60, 0));
+TEST_CONSTEXPR(match_m512(
+ _mm512_permutex2var_ps(
+ (__m512){1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f},
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512){101.f, 102.f, 103.f, 104.f, 105.f, 106.f, 107.f, 108.f,
+ 109.f, 110.f, 111.f, 112.f, 113.f, 114.f, 115.f, 116.f}),
+ 1.f, 16.f, 101.f, 116.f, 2.f, 102.f, 3.f, 103.f,
+ 4.f, 104.f, 5.f, 105.f, 6.f, 106.f, 7.f, 107.f));
+TEST_CONSTEXPR(match_m512d(
+ _mm512_permutex2var_pd(
+ (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0},
+ (__m512i)(__v8di){0, 15, 16, 23, 1, 17, 2, 18},
+ (__m512d){101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0}),
+ 1.0, 108.0, 1.0, 8.0, 2.0, 2.0, 3.0, 3.0));
+
__mmask16 test_mm512_testn_epi32_mask(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_testn_epi32_mask
// CHECK: and <16 x i32> %{{.*}}, %{{.*}}
@@ -6019,8 +6282,13 @@ __m512i test_mm512_maskz_srai_epi64_2(__mmask8 __U, __m512i __A, unsigned int __
__m512i test_mm512_sll_epi32(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_sll_epi32
// CHECK: @llvm.x86.avx512.psll.d.512
- return _mm512_sll_epi32(__A, __B);
+ return _mm512_sll_epi32(__A, __B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_sll_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
+TEST_CONSTEXPR(match_v16si(_mm512_sll_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_sll_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){33, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_sll_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){0, 1, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_sll_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){1, 0, 1, 1}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
__m512i test_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sll_epi32
@@ -6033,14 +6301,20 @@ __m512i test_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_sll_epi32
// CHECK: @llvm.x86.avx512.psll.d.512
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
- return _mm512_maskz_sll_epi32(__U, __A, __B);
+ return _mm512_maskz_sll_epi32(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_sll_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A5A, (__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){1, 0, 0, 0}), 99, 2, 99, 6, 8, 99, 12, 99, 99, 18, 99, 22, 24, 99, 28, 99));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_sll_epi32(0xA5A5, (__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 4, 0, 0, 10, 0, 14, 16, 0, 20, 0, 0, 26, 0, 30));
__m512i test_mm512_sll_epi64(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_sll_epi64
// CHECK: @llvm.x86.avx512.psll.q.512
- return _mm512_sll_epi64(__A, __B);
+ return _mm512_sll_epi64(__A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_sll_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){1, 0}), 0, 2, 4, 6, 8, 10, 12, 14));
+TEST_CONSTEXPR(match_v8di(_mm512_sll_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){64, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8di(_mm512_sll_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){65, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8di(_mm512_sll_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){1, 1}), 0, 2, 4, 6, 8, 10, 12, 14));
__m512i test_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sll_epi64
@@ -6053,8 +6327,10 @@ __m512i test_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_sll_epi64
// CHECK: @llvm.x86.avx512.psll.q.512
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_maskz_sll_epi64(__U, __A, __B);
+ return _mm512_maskz_sll_epi64(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_sll_epi64((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}, 0x5A, (__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){1, 0}), 99, 2, 99, 6, 8, 99, 12, 99));
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_sll_epi64(0xA5, (__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){1, 0}), 0, 0, 4, 0, 0, 10, 0, 14));
__m512i test_mm512_sllv_epi32(__m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_sllv_epi32
@@ -6105,8 +6381,13 @@ TEST_CONSTEXPR(match_v8di(_mm512_maskz_sllv_epi64(0xE4, (__m512i)(__v8di){ 16, -
__m512i test_mm512_sra_epi32(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_sra_epi32
// CHECK: @llvm.x86.avx512.psra.d.512
- return _mm512_sra_epi32(__A, __B);
+ return _mm512_sra_epi32(__A, __B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_sra_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15));
+TEST_CONSTEXPR(match_v16si(_mm512_sra_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){32, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v16si(_mm512_sra_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){33, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v16si(_mm512_sra_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){0, 1, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v16si(_mm512_sra_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 1, 1}), 0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15));
__m512i test_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sra_epi32
@@ -6119,14 +6400,20 @@ __m512i test_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_sra_epi32
// CHECK: @llvm.x86.avx512.psra.d.512
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
- return _mm512_maskz_sra_epi32(__U, __A, __B);
+ return _mm512_maskz_sra_epi32(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_sra_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A5A, (__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 99, -1, 99, -3, 4, 99, 6, 99, 99, -9, 99, -11, 12, 99, 14, 99));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_sra_epi32(0xA5A5, (__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 2, 0, 0, -5, 0, -7, 8, 0, 10, 0, 0, -13, 0, -15));
__m512i test_mm512_sra_epi64(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_sra_epi64
// CHECK: @llvm.x86.avx512.psra.q.512
- return _mm512_sra_epi64(__A, __B);
+ return _mm512_sra_epi64(__A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_sra_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 0, -1, 2, -3, 4, -5, 6, -7));
+TEST_CONSTEXPR(match_v8di(_mm512_sra_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){64, 0}), 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v8di(_mm512_sra_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){65, 0}), 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v8di(_mm512_sra_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 1}), 0, -1, 2, -3, 4, -5, 6, -7));
__m512i test_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sra_epi64
@@ -6139,8 +6426,10 @@ __m512i test_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_sra_epi64
// CHECK: @llvm.x86.avx512.psra.q.512
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_maskz_sra_epi64(__U, __A, __B);
+ return _mm512_maskz_sra_epi64(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_sra_epi64((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}, 0x5A, (__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 99, -1, 99, -3, 4, 99, 6, 99));
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_sra_epi64(0xA5, (__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 0, 0, 2, 0, 0, -5, 0, -7));
__m512i test_mm512_srav_epi32(__m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_srav_epi32
@@ -6191,8 +6480,13 @@ TEST_CONSTEXPR(match_v8di(_mm512_maskz_srav_epi64(0xE4, (__m512i)(__v8di){ 16, -
__m512i test_mm512_srl_epi32(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_srl_epi32
// CHECK: @llvm.x86.avx512.psrl.d.512
- return _mm512_srl_epi32(__A, __B);
+ return _mm512_srl_epi32(__A, __B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_srl_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 2147483647, 2, 2147483645, 4, 2147483643, 6, 2147483641, 8, 2147483639, 10, 2147483637, 12, 2147483635, 14, 2147483633));
+TEST_CONSTEXPR(match_v16si(_mm512_srl_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_srl_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){33, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_srl_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){0, 1, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_srl_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 1, 1}), 0, 2147483647, 2, 2147483645, 4, 2147483643, 6, 2147483641, 8, 2147483639, 10, 2147483637, 12, 2147483635, 14, 2147483633));
__m512i test_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_srl_epi32
@@ -6205,14 +6499,19 @@ __m512i test_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_srl_epi32
// CHECK: @llvm.x86.avx512.psrl.d.512
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
- return _mm512_maskz_srl_epi32(__U, __A, __B);
+ return _mm512_maskz_srl_epi32(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_srl_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A5A, (__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 99, 2147483647, 99, 2147483645, 4, 99, 6, 99, 99, 2147483639, 99, 2147483637, 12, 99, 14, 99));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_srl_epi32(0xA5A5, (__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 2, 0, 0, 2147483643, 0, 2147483641, 8, 0, 10, 0, 0, 2147483635, 0, 2147483633));
__m512i test_mm512_srl_epi64(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_srl_epi64
// CHECK: @llvm.x86.avx512.psrl.q.512
- return _mm512_srl_epi64(__A, __B);
+ return _mm512_srl_epi64(__A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_srl_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 0, 9223372036854775807, 2, 9223372036854775805, 4, 9223372036854775803, 6, 9223372036854775801));
+TEST_CONSTEXPR(match_v8di(_mm512_srl_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){64, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8di(_mm512_srl_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 1}), 0, 9223372036854775807, 2, 9223372036854775805, 4, 9223372036854775803, 6, 9223372036854775801));
__m512i test_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_srl_epi64
@@ -6225,8 +6524,10 @@ __m512i test_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_srl_epi64
// CHECK: @llvm.x86.avx512.psrl.q.512
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_maskz_srl_epi64(__U, __A, __B);
+ return _mm512_maskz_srl_epi64(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_srl_epi64((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}, 0x5A, (__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 99, 9223372036854775807, 99, 9223372036854775805, 4, 99, 6, 99));
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_srl_epi64(0xA5, (__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 0, 0, 2, 0, 0, 9223372036854775803, 0, 9223372036854775801));
__m512i test_mm512_srlv_epi32(__m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_srlv_epi32
@@ -6629,6 +6930,7 @@ __m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) {
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
return _mm512_shuffle_f32x4(__A, __B, 4);
}
+TEST_CONSTEXPR(match_m512(_mm512_shuffle_f32x4(((__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), ((__m512){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0, 160.0}), 0b11111111), 13.0f, 14.0f, 15.0f, 16.0f, 13.0f, 14.0f, 15.0f, 16.0f, 130.0, 140.0, 150.0, 160.0, 130.0, 140.0, 150.0, 160.0));
__m512 test_mm512_mask_shuffle_f32x4(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_mask_shuffle_f32x4
@@ -6636,6 +6938,7 @@ __m512 test_mm512_mask_shuffle_f32x4(__m512 __W, __mmask16 __U, __m512 __A, __m5
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_shuffle_f32x4(__W, __U, __A, __B, 4);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask_shuffle_f32x4(((__m512){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f, 900.0f, 1000.0f, 1100.0f, 1200.0f, 1300.0f, 1400.0f, 1500.0f, 1600.0f}), 0b1111111111111110, ((__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), ((__m512){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0, 160.0}), 0b11111111), 100.0f, 14.0f, 15.0f, 16.0f, 13.0f, 14.0f, 15.0f, 16.0f, 130.0, 140.0, 150.0, 160.0, 130.0, 140.0, 150.0, 160.0));
__m512 test_mm512_maskz_shuffle_f32x4(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_maskz_shuffle_f32x4
@@ -6643,12 +6946,14 @@ __m512 test_mm512_maskz_shuffle_f32x4(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_shuffle_f32x4(__U, __A, __B, 4);
}
+TEST_CONSTEXPR(match_m512(_mm512_maskz_shuffle_f32x4(0b1111111111110111, ((__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), ((__m512){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0, 160.0}), 0b11111111), 13.0f, 14.0f, 15.0f, 0.0f, 13.0f, 14.0f, 15.0f, 16.0f, 130.0, 140.0, 150.0, 160.0, 130.0, 140.0, 150.0, 160.0));
__m512d test_mm512_shuffle_f64x2(__m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_shuffle_f64x2
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
return _mm512_shuffle_f64x2(__A, __B, 4);
}
+TEST_CONSTEXPR(match_m512d(_mm512_shuffle_f64x2(((__m512d){1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0}), ((__m512d){10.0,20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 0b10101100), 1.0, 2.0, 7.0, 8.0, 50.0, 60.0, 50.0, 60.0));
__m512d test_mm512_mask_shuffle_f64x2(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_mask_shuffle_f64x2
@@ -6656,6 +6961,7 @@ __m512d test_mm512_mask_shuffle_f64x2(__m512d __W, __mmask8 __U, __m512d __A, __
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_shuffle_f64x2(__W, __U, __A, __B, 4);
}
+TEST_CONSTEXPR(match_m512d(_mm512_mask_shuffle_f64x2(((__m512d){100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0}), 0b11110000, ((__m512d){1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0}), ((__m512d){10.0,20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 0b10101100), 100.0, 200.0, 300.0, 400.0, 50.0, 60.0, 50.0, 60.0));
__m512d test_mm512_maskz_shuffle_f64x2(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_maskz_shuffle_f64x2
@@ -6663,12 +6969,15 @@ __m512d test_mm512_maskz_shuffle_f64x2(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_shuffle_f64x2(__U, __A, __B, 4);
}
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_shuffle_f64x2(0b11110100, ((__m512d){1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0}), ((__m512d){10.0,20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 0b10101100), 0.0, 0.0, 7.0, 0.0, 50.0, 60.0, 50.0, 60.0));
__m512i test_mm512_shuffle_i32x4(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_shuffle_i32x4
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
return _mm512_shuffle_i32x4(__A, __B, 4);
}
+TEST_CONSTEXPR(match_v16si(_mm512_shuffle_i32x4(((__m512i)(__v16si){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m512i)(__v16si){10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160}), 0), 1, 2, 3, 4, 1, 2, 3, 4, 10, 20, 30, 40, 10, 20, 30, 40));
+
__m512i test_mm512_mask_shuffle_i32x4(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_shuffle_i32x4
@@ -6676,6 +6985,7 @@ __m512i test_mm512_mask_shuffle_i32x4(__m512i __W, __mmask16 __U, __m512i __A, _
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_shuffle_i32x4(__W, __U, __A, __B, 4);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_shuffle_i32x4(((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}), 0b1111111111111011, ((__m512i)(__v16si){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m512i)(__v16si){10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160}), 0), 1, 2, 300, 4, 1, 2, 3, 4, 10, 20, 30, 40, 10, 20, 30, 40));
__m512i test_mm512_maskz_shuffle_i32x4(__mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_shuffle_i32x4
@@ -6683,12 +6993,14 @@ __m512i test_mm512_maskz_shuffle_i32x4(__mmask16 __U, __m512i __A, __m512i __B)
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_shuffle_i32x4(__U, __A, __B, 4);
}
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_shuffle_i32x4(0b1011111111111111, ((__m512i)(__v16si){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m512i)(__v16si){10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160}), 0), 1, 2, 3, 4, 1, 2, 3, 4, 10, 20, 30, 40, 10, 20, 0, 40));
__m512i test_mm512_shuffle_i64x2(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_shuffle_i64x2
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
return _mm512_shuffle_i64x2(__A, __B, 4);
}
+TEST_CONSTEXPR(match_m512i(_mm512_shuffle_i64x2(((__m512i){1, 2, 3, 4, 5, 6, 7, 8}), ((__m512i){10, 20, 30, 40, 50, 60, 70, 80}), 0b11000110), 5, 6, 3, 4, 10, 20, 70, 80));
__m512i test_mm512_mask_shuffle_i64x2(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_shuffle_i64x2
@@ -6696,6 +7008,7 @@ __m512i test_mm512_mask_shuffle_i64x2(__m512i __W, __mmask8 __U, __m512i __A, __
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_shuffle_i64x2(__W, __U, __A, __B, 4);
}
+TEST_CONSTEXPR(match_m512i(_mm512_mask_shuffle_i64x2(((__m512i){100, 200, 300, 400, 500, 600, 700, 800}), 0b11111101, ((__m512i){1, 2, 3, 4, 5, 6, 7, 8}), ((__m512i){10, 20, 30, 40, 50, 60, 70, 80}), 0b11000110), 5, 200, 3, 4, 10, 20, 70, 80));
__m512i test_mm512_maskz_shuffle_i64x2(__mmask8 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_shuffle_i64x2
@@ -6703,6 +7016,7 @@ __m512i test_mm512_maskz_shuffle_i64x2(__mmask8 __U, __m512i __A, __m512i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_shuffle_i64x2(__U, __A, __B, 4);
}
+TEST_CONSTEXPR(match_m512i(_mm512_maskz_shuffle_i64x2(0b00111101, ((__m512i){1, 2, 3, 4, 5, 6, 7, 8}), ((__m512i){10, 20, 30, 40, 50, 60, 70, 80}), 0b11000110), 5, 0, 3, 4, 10, 20, 0, 0));
__m512d test_mm512_shuffle_pd(__m512d __M, __m512d __V) {
// CHECK-LABEL: test_mm512_shuffle_pd
@@ -8824,23 +9138,32 @@ __m512i test_mm512_maskz_permutex_epi64(__mmask8 __M, __m512i __X) {
__m512d test_mm512_permutexvar_pd(__m512i __X, __m512d __Y) {
// CHECK-LABEL: test_mm512_permutexvar_pd
// CHECK: @llvm.x86.avx512.permvar.df.512
- return _mm512_permutexvar_pd(__X, __Y);
+ return _mm512_permutexvar_pd(__X, __Y);
}
+TEST_CONSTEXPR(match_m512d(_mm512_permutexvar_pd((__m512i)(__v8di){7, 6, 5, 4, 3, 2, 1, 0}, (__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0));
+TEST_CONSTEXPR(match_m512d(_mm512_permutexvar_pd((__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0}, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0));
+
__m512d test_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) {
// CHECK-LABEL: test_mm512_mask_permutexvar_pd
// CHECK: @llvm.x86.avx512.permvar.df.512
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
- return _mm512_mask_permutexvar_pd(__W, __U, __X, __Y);
+ return _mm512_mask_permutexvar_pd(__W, __U, __X, __Y);
}
+TEST_CONSTEXPR(match_m512d(_mm512_mask_permutexvar_pd((__m512d){9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0}, 0xFF, (__m512i)(__v8di){7, 6, 5, 4, 3, 2, 1, 0}, (__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0));
+TEST_CONSTEXPR(match_m512d(_mm512_mask_permutexvar_pd((__m512d){9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0}, 0xAA, (__m512i)(__v8di){7, 6, 5, 4, 3, 2, 1, 0}, (__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), 9.0, 6.0, 9.0, 4.0, 9.0, 2.0, 9.0, 0.0));
+
__m512d test_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) {
// CHECK-LABEL: test_mm512_maskz_permutexvar_pd
// CHECK: @llvm.x86.avx512.permvar.df.512
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
- return _mm512_maskz_permutexvar_pd(__U, __X, __Y);
+ return _mm512_maskz_permutexvar_pd(__U, __X, __Y);
}
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_permutexvar_pd(0xFF, (__m512i)(__v8di){7, 6, 5, 4, 3, 2, 1, 0}, (__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0));
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_permutexvar_pd(0xAA, (__m512i)(__v8di){7, 6, 5, 4, 3, 2, 1, 0}, (__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), 0.0, 6.0, 0.0, 4.0, 0.0, 2.0, 0.0, 0.0));
+
__m512i test_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_maskz_permutexvar_epi64
// CHECK: @llvm.x86.avx512.permvar.di.512
@@ -8851,36 +9174,54 @@ __m512i test_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __
__m512i test_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_permutexvar_epi64
// CHECK: @llvm.x86.avx512.permvar.di.512
- return _mm512_permutexvar_epi64(__X, __Y);
+ return _mm512_permutexvar_epi64(__X, __Y);
}
+TEST_CONSTEXPR(match_v8di(_mm512_permutexvar_epi64((__m512i)(__v8di){7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}), 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v8di(_mm512_permutexvar_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m512i)(__v8di){100, 101, 102, 103, 104, 105, 106, 107}), 100, 101, 102, 103, 104, 105, 106, 107));
+
__m512i test_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_mask_permutexvar_epi64
// CHECK: @llvm.x86.avx512.permvar.di.512
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_mask_permutexvar_epi64(__W, __M, __X, __Y);
+ return _mm512_mask_permutexvar_epi64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_permutexvar_epi64((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}, 0xFF, (__m512i)(__v8di){7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}), 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v8di(_mm512_mask_permutexvar_epi64((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}, 0xAA, (__m512i)(__v8di){7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}), 99, 16, 99, 14, 99, 12, 99, 10));
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_permutexvar_epi64(0xFF, (__m512i)(__v8di){7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}), 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_permutexvar_epi64(0xAA, (__m512i)(__v8di){7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}), 0, 16, 0, 14, 0, 12, 0, 10));
+
__m512 test_mm512_permutexvar_ps(__m512i __X, __m512 __Y) {
// CHECK-LABEL: test_mm512_permutexvar_ps
// CHECK: @llvm.x86.avx512.permvar.sf.512
- return _mm512_permutexvar_ps(__X, __Y);
+ return _mm512_permutexvar_ps(__X, __Y);
}
+TEST_CONSTEXPR(match_m512(_mm512_permutexvar_ps((__m512i)(__v16si){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}), 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
+TEST_CONSTEXPR(match_m512(_mm512_permutexvar_ps((__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, (__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f));
+
__m512 test_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) {
// CHECK-LABEL: test_mm512_mask_permutexvar_ps
// CHECK: @llvm.x86.avx512.permvar.sf.512
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
- return _mm512_mask_permutexvar_ps(__W, __U, __X, __Y);
+ return _mm512_mask_permutexvar_ps(__W, __U, __X, __Y);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask_permutexvar_ps((__m512){99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f}, 0xFFFF, (__m512i)(__v16si){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}), 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
+TEST_CONSTEXPR(match_m512(_mm512_mask_permutexvar_ps((__m512){99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f}, 0xAAAA, (__m512i)(__v16si){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}), 99.0f, 14.0f, 99.0f, 12.0f, 99.0f, 10.0f, 99.0f, 8.0f, 99.0f, 6.0f, 99.0f, 4.0f, 99.0f, 2.0f, 99.0f, 0.0f));
+
__m512 test_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) {
// CHECK-LABEL: test_mm512_maskz_permutexvar_ps
// CHECK: @llvm.x86.avx512.permvar.sf.512
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
- return _mm512_maskz_permutexvar_ps(__U, __X, __Y);
+ return _mm512_maskz_permutexvar_ps(__U, __X, __Y);
}
+TEST_CONSTEXPR(match_m512(_mm512_maskz_permutexvar_ps(0xFFFF, (__m512i)(__v16si){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}), 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
+TEST_CONSTEXPR(match_m512(_mm512_maskz_permutexvar_ps(0xAAAA, (__m512i)(__v16si){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}), 0.0f, 14.0f, 0.0f, 12.0f, 0.0f, 10.0f, 0.0f, 8.0f, 0.0f, 6.0f, 0.0f, 4.0f, 0.0f, 2.0f, 0.0f, 0.0f));
+
__m512i test_mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_maskz_permutexvar_epi32
// CHECK: @llvm.x86.avx512.permvar.si.512
@@ -8891,16 +9232,25 @@ __m512i test_mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i _
__m512i test_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_permutexvar_epi32
// CHECK: @llvm.x86.avx512.permvar.si.512
- return _mm512_permutexvar_epi32(__X, __Y);
+ return _mm512_permutexvar_epi32(__X, __Y);
}
+TEST_CONSTEXPR(match_v16si(_mm512_permutexvar_epi32((__m512i)(__v16si){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v16si){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}), 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v16si(_mm512_permutexvar_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m512i)(__v16si){100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}), 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115));
+
__m512i test_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_mask_permutexvar_epi32
// CHECK: @llvm.x86.avx512.permvar.si.512
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
- return _mm512_mask_permutexvar_epi32(__W, __M, __X, __Y);
+ return _mm512_mask_permutexvar_epi32(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_permutexvar_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xFFFF, (__m512i)(__v16si){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v16si){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}), 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v16si(_mm512_mask_permutexvar_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xAAAA, (__m512i)(__v16si){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v16si){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}), 99, 24, 99, 22, 99, 20, 99, 18, 99, 16, 99, 14, 99, 12, 99, 10));
+
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_permutexvar_epi32(0xFFFF, (__m512i)(__v16si){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v16si){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}), 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_permutexvar_epi32(0xAAAA, (__m512i)(__v16si){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m512i)(__v16si){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}), 0, 24, 0, 22, 0, 20, 0, 18, 0, 16, 0, 14, 0, 12, 0, 10));
+
__mmask16 test_mm512_kand(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_mm512_kand
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -8965,6 +9315,10 @@ int test_mm512_kortestc(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
_mm512_cmpneq_epu32_mask(__C, __D));
}
+TEST_CONSTEXPR(_mm512_kortestc(0x0000, 0x0000) == 0);
+TEST_CONSTEXPR(_mm512_kortestc(0x0000, 0x8000) == 0);
+TEST_CONSTEXPR(_mm512_kortestc(0x0123, 0xFEDC) == 1);
+
int test_mm512_kortestz(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_mm512_kortestz
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -8977,6 +9331,10 @@ int test_mm512_kortestz(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
_mm512_cmpneq_epu32_mask(__C, __D));
}
+TEST_CONSTEXPR(_mm512_kortestz(0x0000, 0x0000) == 1);
+TEST_CONSTEXPR(_mm512_kortestz(0x0000, 0x8000) == 0);
+TEST_CONSTEXPR(_mm512_kortestz(0x0123, 0xFEDC) == 0);
+
unsigned char test_kortestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_kortestz_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -8990,6 +9348,10 @@ unsigned char test_kortestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m
_mm512_cmpneq_epu32_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestz_mask16_u8(0x0000, 0x0000) == 1);
+TEST_CONSTEXPR(_kortestz_mask16_u8(0x0000, 0x8000) == 0);
+TEST_CONSTEXPR(_kortestz_mask16_u8(0x0123, 0xFEDC) == 0);
+
unsigned char test_kortestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_kortestc_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -9003,6 +9365,10 @@ unsigned char test_kortestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m
_mm512_cmpneq_epu32_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestc_mask16_u8(0x0000, 0x0000) == 0);
+TEST_CONSTEXPR(_kortestc_mask16_u8(0x0000, 0x8000) == 0);
+TEST_CONSTEXPR(_kortestc_mask16_u8(0x0123, 0xFEDC) == 1);
+
unsigned char test_kortest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_kortest_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -9023,6 +9389,30 @@ unsigned char test_kortest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m5
_mm512_cmpneq_epu32_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_kortest_mask16_u8(unsigned short A, unsigned short B) {
+ unsigned char all_ones{};
+ return (_kortest_mask16_u8(A, B, &all_ones) << 4) | all_ones;
+}
+
+void _kortest_mask16_u8() {
+ constexpr unsigned short A1 = 0x0000;
+ constexpr unsigned short B1 = 0x0000;
+ constexpr unsigned char expected_result_1 = 0x10;
+ static_assert(test_kortest_mask16_u8(A1, B1) == expected_result_1);
+ constexpr unsigned short A2 = 0x0000;
+ constexpr unsigned short B2 = 0x8000;
+ constexpr unsigned char expected_result_2 = 0x00;
+ static_assert(test_kortest_mask16_u8(A2, B2) == expected_result_2);
+ constexpr unsigned short A3 = 0x0123;
+ constexpr unsigned short B3 = 0xFEDC;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_kortest_mask16_u8(A3, B3) == expected_result_3);
+}
+#endif
+
__mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_mm512_kunpackb
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -9036,6 +9426,12 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D
__E, __F);
}
+TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0x00FF);
+TEST_CONSTEXPR(_mm512_kunpackb(0xABCD, 0x1234) == 0xCD34);
+TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0xFF00);
+TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55);
+TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD);
+
__mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_mm512_kxnor
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -9176,6 +9572,10 @@ __mmask16 test_kshiftli_mask16(__m512i A, __m512i B, __m512i C, __m512i D) {
// CHECK: bitcast <16 x i1> {{.*}} to i16
return _mm512_mask_cmpneq_epu32_mask(_kshiftli_mask16(_mm512_cmpneq_epu32_mask(A, B), 1), C, D);
}
+TEST_CONSTEXPR(_kshiftli_mask16(0x0001, 1) == 0x0002);
+TEST_CONSTEXPR(_kshiftli_mask16(0x0001, 15) == 0x8000);
+TEST_CONSTEXPR(_kshiftli_mask16(0x0001, 16) == 0x0000);
+TEST_CONSTEXPR(_kshiftli_mask16(0x00FF, 4) == 0x0FF0);
__mmask16 test_kshiftri_mask16(__m512i A, __m512i B, __m512i C, __m512i D) {
// CHECK-LABEL: test_kshiftri_mask16
@@ -9184,6 +9584,10 @@ __mmask16 test_kshiftri_mask16(__m512i A, __m512i B, __m512i C, __m512i D) {
// CHECK: bitcast <16 x i1> {{.*}} to i16
return _mm512_mask_cmpneq_epu32_mask(_kshiftri_mask16(_mm512_cmpneq_epu32_mask(A, B), 1), C, D);
}
+TEST_CONSTEXPR(_kshiftri_mask16(0x8000, 1) == 0x4000);
+TEST_CONSTEXPR(_kshiftri_mask16(0x8000, 15) == 0x0001);
+TEST_CONSTEXPR(_kshiftri_mask16(0x8000, 16) == 0x0000);
+TEST_CONSTEXPR(_kshiftri_mask16(0xFF00, 4) == 0x0FF0);
unsigned int test_cvtmask16_u32(__m512i A, __m512i B) {
// CHECK-LABEL: test_cvtmask16_u32
@@ -9193,6 +9597,8 @@ unsigned int test_cvtmask16_u32(__m512i A, __m512i B) {
return _cvtmask16_u32(_mm512_cmpneq_epu32_mask(A, B));
}
+TEST_CONSTEXPR(_cvtmask16_u32((__mmask16)0xBEEF) == 0xBEEF);
+
__mmask16 test_cvtu32_mask16(__m512i A, __m512i B, unsigned int C) {
// CHECK-LABEL: test_cvtu32_mask16
// CHECK: trunc i32 %{{.*}} to i16
@@ -9200,6 +9606,9 @@ __mmask16 test_cvtu32_mask16(__m512i A, __m512i B, unsigned int C) {
return _mm512_mask_cmpneq_epu32_mask(_cvtu32_mask16(C), A, B);
}
+TEST_CONSTEXPR(_cvtu32_mask16(0xCAFE) == (__mmask16)0xCAFE);
+TEST_CONSTEXPR(_cvtu32_mask16(_cvtmask16_u32((__mmask16)0x1357)) == (__mmask16)0x1357);
+
__mmask16 test_load_mask16(__mmask16 *A, __m512i B, __m512i C) {
// CHECK-LABEL: test_load_mask16
// CHECK: [[LOAD:%.*]] = load i16, ptr %{{.*}}{{$}}
@@ -10219,6 +10628,8 @@ __m256 test_mm512_cvtpd_ps (__m512d __A)
return _mm512_cvtpd_ps (__A);
}
+TEST_CONSTEXPR(match_m256(_mm512_cvtpd_ps((__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f));
+
__m256 test_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
{
// CHECK-LABEL: test_mm512_mask_cvtpd_ps
@@ -10226,6 +10637,8 @@ __m256 test_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
return _mm512_mask_cvtpd_ps (__W,__U,__A);
}
+TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps((__m256){ 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f }, 0x05, (__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, 9.0f, +4.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f));
+
__m512 test_mm512_cvtpd_pslo(__m512d __A)
{
// CHECK-LABEL: test_mm512_cvtpd_pslo
@@ -10235,6 +10648,8 @@ __m512 test_mm512_cvtpd_pslo(__m512d __A)
return _mm512_cvtpd_pslo(__A);
}
+TEST_CONSTEXPR(match_m512(_mm512_cvtpd_pslo((__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+
__m512 test_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) {
// CHECK-LABEL: test_mm512_mask_cvtpd_pslo
// CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512
@@ -10243,6 +10658,8 @@ __m512 test_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) {
return _mm512_mask_cvtpd_pslo(__W, __U, __A);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpd_pslo((__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }, 0x3, (__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, +2.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+
__m256 test_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
{
// CHECK-LABEL: test_mm512_maskz_cvtpd_ps
@@ -11464,12 +11881,16 @@ __m128 test_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
return _mm_mask_cvtsd_ss(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtsd_ss((__m128){ 9.0f, 5.0f, 6.0f, 7.0f }, 0x1, (__m128){ 1.0f, 2.0f, 3.0f, 4.0f }, (__m128d){ -1.0, 42.0 }), -1.0f, 2.0f, 3.0f, 4.0f));
+
__m128 test_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_cvtsd_ss
// CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round
return _mm_maskz_cvtsd_ss(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtsd_ss(0x1, (__m128){ 1.0f, 2.0f, 3.0f, 4.0f }, (__m128d){ -1.0, 42.0 }), -1.0f, 2.0f, 3.0f, 4.0f));
+
__m512i test_mm512_setzero_epi32(void)
{
// CHECK-LABEL: test_mm512_setzero_epi32
@@ -11541,6 +11962,7 @@ __m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
// CHECK-NEXT: insertelement <4 x float> [[VEC]], float [[SEL]], i64 0
return _mm_mask_move_ss ( __W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_move_ss((__m128)(__v4sf){1.0f,2.0f,3.0f,4.0f}, (__mmask8)0x01, (__m128)(__v4sf){100.0f,200.0f,300.0f,400.0f}, (__m128)(__v4sf){9.0f,10.0f,11.0f,12.0f}), 9.0f,2.0f,3.0f,4.0f));
__m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
{
@@ -11555,6 +11977,7 @@ __m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
// CHECK-NEXT: insertelement <4 x float> [[VEC]], float [[SEL]], i64 0
return _mm_maskz_move_ss (__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_move_ss((__mmask8)0x01, (__m128)(__v4sf){0.0f,0.0f,0.0f,0.0f}, (__m128)(__v4sf){9.0f,10.0f,11.0f,12.0f}), 9.0f,0.0f,0.0f,0.0f));
__m128d test_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
@@ -11569,6 +11992,7 @@ __m128d test_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __
// CHECK-NEXT: insertelement <2 x double> [[VEC]], double [[SEL]], i64 0
return _mm_mask_move_sd ( __W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_move_sd((__m128d)(__v2df){1.0,2.0}, (__mmask8)0x01, (__m128d)(__v2df){100.0,200.0}, (__m128d)(__v2df){9.0,10.0}), 9.0,2.0));
__m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
{
@@ -11583,6 +12007,7 @@ __m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> [[VEC]], double [[SEL]], i64 0
return _mm_maskz_move_sd (__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_move_sd((__mmask8)0x01, (__m128d)(__v2df){0.0,0.0}, (__m128d)(__v2df){9.0,10.0}), 9.0,0.0));
void test_mm_mask_store_ss(float * __P, __mmask8 __U, __m128 __A)
{
@@ -11753,3 +12178,73 @@ void test_mm512_mask_i32loscatter_epi64(void *__addr, __mmask8 __mask, __m512i _
// CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
_mm512_mask_i32loscatter_epi64(__addr, __mask, __index, __v1, 2);
}
+
+
+TEST_CONSTEXPR(match_m512d(
+ _mm512_permutex2var_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0},
+ (__m512i)(__v8di){0, 15, 16, 23, 1, 17, 2, 18},
+ (__m512d){101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0}),
+ 1.0, 108.0, 1.0, 8.0, 2.0, 2.0, 3.0, 3.0));
+TEST_CONSTEXPR(match_m512d(
+ _mm512_mask_permutex2var_pd((__m512d){-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0},
+ 0xAA,
+ (__m512i)(__v8di){0, 15, 16, 23, 1, 17, 2, 18},
+ (__m512d){101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0}),
+ -1.0, 108.0, -3.0, -8.0, -5.0, -2.0, -7.0, -3.0));
+TEST_CONSTEXPR(match_m512d(
+ _mm512_maskz_permutex2var_pd(0x55, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0},
+ (__m512i)(__v8di){0, 15, 16, 23, 1, 17, 2, 18},
+ (__m512d){101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0}),
+ 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0));
+
+TEST_CONSTEXPR(match_m512(
+ _mm512_permutex2var_ps((__m512){1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f},
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512){101.f, 102.f, 103.f, 104.f, 105.f, 106.f, 107.f, 108.f,
+ 109.f, 110.f, 111.f, 112.f, 113.f, 114.f, 115.f, 116.f}),
+ 1.f, 16.f, 101.f, 116.f, 2.f, 102.f, 3.f, 103.f,
+ 4.f, 104.f, 5.f, 105.f, 6.f, 106.f, 7.f, 107.f));
+TEST_CONSTEXPR(match_m512(
+ _mm512_mask_permutex2var_ps((__m512){-1.f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f, -8.f,
+ -9.f, -10.f, -11.f, -12.f, -13.f, -14.f, -15.f, -16.f},
+ 0xAAAA,
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512){101.f, 102.f, 103.f, 104.f, 105.f, 106.f, 107.f, 108.f,
+ 109.f, 110.f, 111.f, 112.f, 113.f, 114.f, 115.f, 116.f}),
+ -1.f, -16.f, -3.f, 116.f, -5.f, 102.f, -7.f, 103.f,
+ -9.f, 104.f, -11.f, 105.f, -13.f, 106.f, -15.f, 107.f));
+
+TEST_CONSTEXPR(match_v16si(
+ _mm512_permutex2var_epi32((__m512i)(__v16si){1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16},
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512i)(__v16si){101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116}),
+ 1, 16, 101, 116, 2, 102, 3, 103,
+ 4, 104, 5, 105, 6, 106, 7, 107));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_permutex2var_epi32(0x5555,
+ (__m512i)(__v16si){1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16},
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512i)(__v16si){101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116}),
+ 1, 0, 101, 0, 2, 0, 3, 0,
+ 4, 0, 5, 0, 6, 0, 7, 0));
+
+TEST_CONSTEXPR(match_v8di(
+ _mm512_permutex2var_epi64((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8di){0, 15, 16, 23, 1, 17, 2, 18},
+ (__m512i)(__v8di){101, 102, 103, 104, 105, 106, 107, 108}),
+ 1, 108, 1, 8, 2, 2, 3, 3));
+TEST_CONSTEXPR(match_v8di(
+ _mm512_mask_permutex2var_epi64((__m512i)(__v8di){-1, -2, -3, -4, -5, -6, -7, -8},
+ 0xAA,
+ (__m512i)(__v8di){0, 15, 16, 23, 1, 17, 2, 18},
+ (__m512i)(__v8di){101, 102, 103, 104, 105, 106, 107, 108}),
+ -1, 108, -3, -8, -5, -2, -7, -3));
diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins-constrained.c b/clang/test/CodeGen/X86/avx512fp16-builtins-constrained.c
new file mode 100644
index 0000000..95403ae
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins-constrained.c
@@ -0,0 +1,85 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+
+#ifdef STRICT
+// Test that the constrained intrinsics are picking up the exception
+// metadata from the AST instead of the global default from the command line.
+
+#pragma float_control(except, on)
+#endif
+
+
+#include <immintrin.h>
+
+__m128h test_mm_sqrt_sh(__m128h x, __m128h y) {
+ // COMMON-LABEL: test_mm_sqrt_sh
+ // UNCONSTRAINED: call {{.*}}half @llvm.sqrt.f16(half {{.*}})
+ // CONSTRAINED: call {{.*}}half @llvm.experimental.constrained.sqrt.f16(half {{.*}}, metadata !{{.*}})
+ // CHECK-ASM: vsqrtsh %xmm{{.*}},
+ return _mm_sqrt_sh(x, y);
+}
+
+__m128h test_mm_mask_sqrt_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B){
+ // COMMON-LABEL: test_mm_mask_sqrt_sh
+ // COMMONIR: extractelement <8 x half> %{{.*}}, i64 0
+ // UNCONSTRAINED: call {{.*}}half @llvm.sqrt.f16(half %{{.*}})
+ // CONSTRAINED: call {{.*}}half @llvm.experimental.constrained.sqrt.f16(half %{{.*}}, metadata !{{.*}})
+ // CHECK-ASM: vsqrtsh %xmm{{.*}},
+ // COMMONIR-NEXT: extractelement <8 x half> %{{.*}}, i64 0
+ // COMMONIR-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // COMMONIR-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // COMMONIR-NEXT: select i1 {{.*}}, half {{.*}}, half {{.*}}
+ // COMMONIR-NEXT: insertelement <8 x half> %{{.*}}, half {{.*}}, i64 0
+ return _mm_mask_sqrt_sh(__W,__U,__A,__B);
+}
+
+__m128h test_mm_maskz_sqrt_sh(__mmask8 __U, __m128h __A, __m128h __B){
+ // COMMON-LABEL: test_mm_maskz_sqrt_sh
+ // COMMONIR: extractelement <2 x half> %{{.*}}, i64 0
+ // UNCONSTRAINED: call {{.*}}half @llvm.sqrt.f16(half %{{.*}})
+ // CONSTRAINED: call {{.*}}half @llvm.experimental.constrained.sqrt.f16(half %{{.*}}, metadata !{{.*}})
+ // CHECK-ASM: vsqrtsh %xmm{{.*}},
+ // COMMONIR-NEXT: extractelement <2 x half> %{{.*}}, i64 0
+ // COMMONIR-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // COMMONIR-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // COMMONIR-NEXT: select i1 {{.*}}, half {{.*}}, half {{.*}}
+ // COMMONIR-NEXT: insertelement <2 x half> %{{.*}}, half {{.*}}, i64 0
+ return _mm_maskz_sqrt_sh(__U,__A,__B);
+}
+
+__m512h test_mm512_sqrt_ph(__m512h x) {
+ // COMMON-LABEL: test_mm512_sqrt_ph
+ // UNCONSTRAINED: call {{.*}}<32 x half> @llvm.sqrt.v32f16(<32 x half> {{.*}})
+ // CONSTRAINED: call {{.*}}<32 x half> @llvm.experimental.constrained.sqrt.v32f16(<32 x half> {{.*}}, metadata !{{.*}})
+ // CHECK-ASM: vsqrtph %zmm{{.*}},
+ return _mm512_sqrt_ph(x);
+}
+
+__m512h test_mm512_mask_sqrt_ph (__m512h __W, __mmask32 __U, __m512h __A)
+{
+ // COMMON-LABEL: test_mm512_mask_sqrt_ph
+ // UNCONSTRAINED: call {{.*}}<32 x half> @llvm.sqrt.v32f16(<32 x half> %{{.*}})
+ // CONSTRAINED: call {{.*}}<32 x half> @llvm.experimental.constrained.sqrt.v32f16(<32 x half> %{{.*}}, metadata !{{.*}})
+ // CHECK-ASM: vsqrtph %zmm{{.*}},
+ // COMMONIR: bitcast i32 %{{.*}} to <32 x i1>
+ // COMMONIR: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
+ return _mm512_mask_sqrt_ph (__W,__U,__A);
+}
+
+__m512h test_mm512_maskz_sqrt_ph (__mmask32 __U, __m512h __A)
+{
+ // COMMON-LABEL: test_mm512_maskz_sqrt_ph
+ // UNCONSTRAINED: call {{.*}}<32 x half> @llvm.sqrt.v32f16(<32 x half> %{{.*}})
+ // CONSTRAINED: call {{.*}}<32 x half> @llvm.experimental.constrained.sqrt.v32f16(<32 x half> %{{.*}}, metadata !{{.*}})
+ // CHECK-ASM: vsqrtph %zmm{{.*}},
+ // COMMONIR: bitcast i32 %{{.*}} to <32 x i1>
+ // COMMONIR: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> {{.*}}
+ return _mm512_maskz_sqrt_ph (__U,__A);
+}
diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c
index f0a0a3b..1c8ab8c 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -796,6 +796,8 @@ __m128h test_mm_mask_add_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_mask_add_sh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_mask_add_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x01,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),110.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
+
__m128h test_mm_maskz_add_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_maskz_add_sh
// CHECK: %{{.*}} = extractelement <8 x half> %{{.*}}, i32 0
@@ -810,6 +812,7 @@ __m128h test_mm_maskz_add_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_maskz_add_sh(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_maskz_add_sh((__mmask8)0x01,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),110.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f));
__m128h test_mm_add_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_add_sh
@@ -849,6 +852,8 @@ __m128h test_mm_mask_sub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_mask_sub_sh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_mask_sub_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x01,(__m128h)(__v8hf){20.0f,21.0f,22.0f,23.0f,24.0f,25.0f,26.0f,27.0f},(__m128h)(__v8hf){5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f,12.0f}),15.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
+
__m128h test_mm_maskz_sub_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_maskz_sub_sh
// CHECK: %{{.*}} = extractelement <8 x half> %{{.*}}, i32 0
@@ -863,6 +868,7 @@ __m128h test_mm_maskz_sub_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_maskz_sub_sh(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_maskz_sub_sh((__mmask8)0x01,(__m128h)(__v8hf){20.0f,21.0f,22.0f,23.0f,24.0f,25.0f,26.0f,27.0f},(__m128h)(__v8hf){5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f,12.0f}),15.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f));
__m128h test_mm_sub_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_sub_sh
@@ -902,6 +908,8 @@ __m128h test_mm_mask_mul_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_mask_mul_sh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_mask_mul_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x01,(__m128h)(__v8hf){3.0f,4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f},(__m128h)(__v8hf){4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f}),12.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
+
__m128h test_mm_maskz_mul_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_maskz_mul_sh
// CHECK: %{{.*}} = extractelement <8 x half> %{{.*}}, i32 0
@@ -916,6 +924,7 @@ __m128h test_mm_maskz_mul_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_maskz_mul_sh(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_maskz_mul_sh((__mmask8)0x01,(__m128h)(__v8hf){3.0f,4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f},(__m128h)(__v8hf){4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f}),12.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f));
__m128h test_mm_mul_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_mul_sh
@@ -955,6 +964,8 @@ __m128h test_mm_mask_div_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_mask_div_sh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_mask_div_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x01,(__m128h)(__v8hf){8.0f,9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f},(__m128h)(__v8hf){4.0f,3.0f,2.0f,1.0f,2.0f,3.0f,4.0f,5.0f}),2.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
+
__m128h test_mm_maskz_div_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_maskz_div_sh
// CHECK: %{{.*}} = extractelement <8 x half> %{{.*}}, i32 0
@@ -969,6 +980,7 @@ __m128h test_mm_maskz_div_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_maskz_div_sh(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_maskz_div_sh((__mmask8)0x01,(__m128h)(__v8hf){8.0f,9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f},(__m128h)(__v8hf){4.0f,3.0f,2.0f,1.0f,2.0f,3.0f,4.0f,5.0f}),2.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f));
__m128h test_mm_div_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_div_sh
@@ -1622,6 +1634,7 @@ __m128h test_mm_mask_move_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B
// CHECK-NEXT: insertelement <8 x half> [[VEC]], half [[SEL]], i64 0
return _mm_mask_move_sh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_mask_move_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f}, (__mmask8)0x01, (__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f}, (__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}), 100.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
__m128h test_mm_maskz_move_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_maskz_move_sh
@@ -1635,6 +1648,7 @@ __m128h test_mm_maskz_move_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-NEXT: insertelement <8 x half> [[VEC]], half [[SEL]], i64 0
return _mm_maskz_move_sh(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_maskz_move_sh((__mmask8)0x01, (__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f}, (__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}), 100.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f));
short test_mm_cvtsi128_si16(__m128i A) {
// CHECK-LABEL: test_mm_cvtsi128_si16
diff --git a/clang/test/CodeGen/X86/avx512vbmi-builtins.c b/clang/test/CodeGen/X86/avx512vbmi-builtins.c
index c3b6298..854cc09 100644
--- a/clang/test/CodeGen/X86/avx512vbmi-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vbmi-builtins.c
@@ -3,8 +3,14 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m512i test_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U, __m512i __B) {
// CHECK-LABEL: test_mm512_mask2_permutex2var_epi8
@@ -33,6 +39,154 @@ __m512i test_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i _
return _mm512_maskz_permutex2var_epi8(__U, __A, __I, __B);
}
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_permutex2var_epi8((__m512i)(__v64qu){
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63},
+ (__m512i)(__v64qu){
+ 0, 64, 1, 65, 2, 66, 3, 67,
+ 4, 68, 5, 69, 6, 70, 7, 71,
+ 8, 72, 9, 73, 10, 74, 11, 75,
+ 12, 76, 13, 77, 14, 78, 15, 79,
+ 16, 80, 17, 81, 18, 82, 19, 83,
+ 20, 84, 21, 85, 22, 86, 23, 87,
+ 24, 88, 25, 89, 26, 90, 27, 91,
+ 28, 92, 29, 93, 30, 94, 31, 95},
+ (__m512i)(__v64qu){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ 0, 1, 2, 3, 4, 5, 6, 7}),
+ 0, 200, 1, 201, 2, 202, 3, 203,
+ 4, 204, 5, 205, 6, 206, 7, 207,
+ 8, 208, 9, 209, 10, 210, 11, 211,
+ 12, 212, 13, 213, 14, 214, 15, 215,
+ 16, 216, 17, 217, 18, 218, 19, 219,
+ 20, 220, 21, 221, 22, 222, 23, 223,
+ 24, 224, 25, 225, 26, 226, 27, 227,
+ 28, 228, 29, 229, 30, 230, 31, 231));
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_mask_permutex2var_epi8((__m512i)(__v64qu){
+ 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25,
+ 26, 27, 28, 29, 30, 31, 32, 33,
+ 34, 35, 36, 37, 38, 39, 40, 41,
+ 42, 43, 44, 45, 46, 47, 48, 49,
+ 50, 51, 52, 53, 54, 55, 56, 57,
+ 58, 59, 60, 61, 62, 63, 64, 65,
+ 66, 67, 68, 69, 70, 71, 72, 73},
+ 0xAAAAAAAAAAAAAAAAULL,
+ (__m512i)(__v64qu){
+ 0, 64, 1, 65, 2, 66, 3, 67,
+ 4, 68, 5, 69, 6, 70, 7, 71,
+ 8, 72, 9, 73, 10, 74, 11, 75,
+ 12, 76, 13, 77, 14, 78, 15, 79,
+ 16, 80, 17, 81, 18, 82, 19, 83,
+ 20, 84, 21, 85, 22, 86, 23, 87,
+ 24, 88, 25, 89, 26, 90, 27, 91,
+ 28, 92, 29, 93, 30, 94, 31, 95},
+ (__m512i)(__v64qu){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ 0, 1, 2, 3, 4, 5, 6, 7}),
+ 10, 200, 12, 201, 14, 202, 16, 203,
+ 18, 204, 20, 205, 22, 206, 24, 207,
+ 26, 208, 28, 209, 30, 210, 32, 211,
+ 34, 212, 36, 213, 38, 214, 40, 215,
+ 42, 216, 44, 217, 46, 218, 48, 219,
+ 50, 220, 52, 221, 54, 222, 56, 223,
+ 58, 224, 60, 225, 62, 226, 64, 227,
+ 66, 228, 68, 229, 70, 230, 72, 231));
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_maskz_permutex2var_epi8(0x5555555555555555ULL,
+ (__m512i)(__v64qu){
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63},
+ (__m512i)(__v64qu){
+ 0, 64, 1, 65, 2, 66, 3, 67,
+ 4, 68, 5, 69, 6, 70, 7, 71,
+ 8, 72, 9, 73, 10, 74, 11, 75,
+ 12, 76, 13, 77, 14, 78, 15, 79,
+ 16, 80, 17, 81, 18, 82, 19, 83,
+ 20, 84, 21, 85, 22, 86, 23, 87,
+ 24, 88, 25, 89, 26, 90, 27, 91,
+ 28, 92, 29, 93, 30, 94, 31, 95},
+ (__m512i)(__v64qu){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ 0, 1, 2, 3, 4, 5, 6, 7}),
+ 0, 0, 1, 0, 2, 0, 3, 0,
+ 4, 0, 5, 0, 6, 0, 7, 0,
+ 8, 0, 9, 0, 10, 0, 11, 0,
+ 12, 0, 13, 0, 14, 0, 15, 0,
+ 16, 0, 17, 0, 18, 0, 19, 0,
+ 20, 0, 21, 0, 22, 0, 23, 0,
+ 24, 0, 25, 0, 26, 0, 27, 0,
+ 28, 0, 29, 0, 30, 0, 31, 0));
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_mask2_permutex2var_epi8((__m512i)(__v64qu){
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63},
+ (__m512i)(__v64qu){
+ 0, 64, 1, 65, 2, 66, 3, 67,
+ 4, 68, 5, 69, 6, 70, 7, 71,
+ 8, 72, 9, 73, 10, 74, 11, 75,
+ 12, 76, 13, 77, 14, 78, 15, 79,
+ 16, 80, 17, 81, 18, 82, 19, 83,
+ 20, 84, 21, 85, 22, 86, 23, 87,
+ 24, 88, 25, 89, 26, 90, 27, 91,
+ 28, 92, 29, 93, 30, 94, 31, 95},
+ 0x5555555555555555ULL,
+ (__m512i)(__v64qu){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ 0, 1, 2, 3, 4, 5, 6, 7}),
+ 0, 64, 1, 65, 2, 66, 3, 67,
+ 4, 68, 5, 69, 6, 70, 7, 71,
+ 8, 72, 9, 73, 10, 74, 11, 75,
+ 12, 76, 13, 77, 14, 78, 15, 79,
+ 16, 80, 17, 81, 18, 82, 19, 83,
+ 20, 84, 21, 85, 22, 86, 23, 87,
+ 24, 88, 25, 89, 26, 90, 27, 91,
+ 28, 92, 29, 93, 30, 94, 31, 95));
+
__m512i test_mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_permutexvar_epi8
// CHECK: call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %{{.*}}, <64 x i8> %{{.*}})
@@ -53,22 +207,149 @@ __m512i test_mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __m512i __A
return _mm512_mask_permutexvar_epi8(__W, __M, __A, __B);
}
+__m512i test_mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) {
+ // CHECK-LABEL: test_mm512_multishift_epi64_epi8
+ // CHECK: call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> %{{.*}}, <64 x i8> %{{.*}})
+ return _mm512_multishift_epi64_epi8(__X, __Y);
+}
+
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_multishift_epi64_epi8(
+ (__m512i)(__v64qu){
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56},
+ (__m512i)(__v64qu){
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+ 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+ 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78}),
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+ 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+ 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78));
+
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_multishift_epi64_epi8(
+ (__m512i)(__v64qu){
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4},
+ (__m512i)(__v64qu){
+ 0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+ 0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+ 0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+ 0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+ 0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+ 0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+ 0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+ 0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE}),
+ 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+ 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+ 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+ 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+ 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+ 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+ 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+ 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21));
+
__m512i test_mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_mask_multishift_epi64_epi8
// CHECK: call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> %{{.*}}, <64 x i8> %{{.*}})
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
- return _mm512_mask_multishift_epi64_epi8(__W, __M, __X, __Y);
+ return _mm512_mask_multishift_epi64_epi8(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_mask_multishift_epi64_epi8(
+ (__m512i)(__v64qu){
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+ 0xAAAAAAAAAAAAAAAAULL,
+ (__m512i)(__v64qu){
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56},
+ (__m512i)(__v64qu){
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+ 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+ 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78}),
+ 0xFF, 0x02, 0xFF, 0x04, 0xFF, 0x06, 0xFF, 0x08,
+ 0xFF, 0x12, 0xFF, 0x14, 0xFF, 0x16, 0xFF, 0x18,
+ 0xFF, 0x22, 0xFF, 0x24, 0xFF, 0x26, 0xFF, 0x28,
+ 0xFF, 0x32, 0xFF, 0x34, 0xFF, 0x36, 0xFF, 0x38,
+ 0xFF, 0x42, 0xFF, 0x44, 0xFF, 0x46, 0xFF, 0x48,
+ 0xFF, 0x52, 0xFF, 0x54, 0xFF, 0x56, 0xFF, 0x58,
+ 0xFF, 0x62, 0xFF, 0x64, 0xFF, 0x66, 0xFF, 0x68,
+ 0xFF, 0x72, 0xFF, 0x74, 0xFF, 0x76, 0xFF, 0x78));
+
__m512i test_mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_maskz_multishift_epi64_epi8
// CHECK: call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> %{{.*}}, <64 x i8> %{{.*}})
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
- return _mm512_maskz_multishift_epi64_epi8(__M, __X, __Y);
+ return _mm512_maskz_multishift_epi64_epi8(__M, __X, __Y);
}
-__m512i test_mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) {
- // CHECK-LABEL: test_mm512_multishift_epi64_epi8
- // CHECK: call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> %{{.*}}, <64 x i8> %{{.*}})
- return _mm512_multishift_epi64_epi8(__X, __Y);
-}
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_maskz_multishift_epi64_epi8(
+ 0x5555555555555555ULL,
+ (__m512i)(__v64qu){
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56},
+ (__m512i)(__v64qu){
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+ 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+ 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78}),
+ 0x01, 0, 0x03, 0, 0x05, 0, 0x07, 0,
+ 0x11, 0, 0x13, 0, 0x15, 0, 0x17, 0,
+ 0x21, 0, 0x23, 0, 0x25, 0, 0x27, 0,
+ 0x31, 0, 0x33, 0, 0x35, 0, 0x37, 0,
+ 0x41, 0, 0x43, 0, 0x45, 0, 0x47, 0,
+ 0x51, 0, 0x53, 0, 0x55, 0, 0x57, 0,
+ 0x61, 0, 0x63, 0, 0x65, 0, 0x67, 0,
+ 0x71, 0, 0x73, 0, 0x75, 0, 0x77, 0));
diff --git a/clang/test/CodeGen/X86/avx512vbmivl-builtin.c b/clang/test/CodeGen/X86/avx512vbmivl-builtin.c
index c4d5fc8..76a11c3 100644
--- a/clang/test/CodeGen/X86/avx512vbmivl-builtin.c
+++ b/clang/test/CodeGen/X86/avx512vbmivl-builtin.c
@@ -3,8 +3,14 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m128i test_mm_permutexvar_epi8(__m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_permutexvar_epi8
@@ -77,9 +83,29 @@ __m128i test_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
// CHECK-LABEL: test_mm_maskz_permutex2var_epi8
// CHECK: call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
- return _mm_maskz_permutex2var_epi8(__U, __A, __I, __B);
+ return _mm_maskz_permutex2var_epi8(__U, __A, __I, __B);
}
+TEST_CONSTEXPR(match_v16qu(
+ _mm_permutex2var_epi8((__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16},
+ (__m128i)(__v16qu){0, 16, 1, 17, 2, 18, 3, 19,
+ 4, 20, 5, 21, 6, 22, 7, 23},
+ (__m128i)(__v16qu){101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116}),
+ 1, 101, 2, 102, 3, 103, 4, 104,
+ 5, 105, 6, 106, 7, 107, 8, 108));
+TEST_CONSTEXPR(match_v16qu(
+ _mm_mask_permutex2var_epi8((__m128i)(__v16qu){200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215},
+ 0xAAAA,
+ (__m128i)(__v16qu){0, 16, 1, 17, 2, 18, 3, 19,
+ 4, 20, 5, 21, 6, 22, 7, 23},
+ (__m128i)(__v16qu){101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116}),
+ 200, 101, 202, 102, 204, 103, 206, 104,
+ 208, 105, 210, 106, 212, 107, 214, 108));
+
__m256i test_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_permutex2var_epi8
// CHECK: call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
@@ -97,46 +123,164 @@ __m256i test_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i _
// CHECK-LABEL: test_mm256_maskz_permutex2var_epi8
// CHECK: call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
- return _mm256_maskz_permutex2var_epi8(__U, __A, __I, __B);
+ return _mm256_maskz_permutex2var_epi8(__U, __A, __I, __B);
+}
+
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_permutex2var_epi8((__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32},
+ (__m256i)(__v32qu){0, 32, 1, 33, 2, 34, 3, 35,
+ 4, 36, 5, 37, 6, 38, 7, 39,
+ 8, 40, 9, 41, 10, 42, 11, 43,
+ 12, 44, 13, 45, 14, 46, 15, 47},
+ (__m256i)(__v32qu){101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124,
+ 125, 126, 127, 128, 129, 130, 131, 132}),
+ 1, 101, 2, 102, 3, 103, 4, 104,
+ 5, 105, 6, 106, 7, 107, 8, 108,
+ 9, 109, 10, 110, 11, 111, 12, 112,
+ 13, 113, 14, 114, 15, 115, 16, 116));
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_mask_permutex2var_epi8((__m256i)(__v32qu){200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231},
+ 0xAAAAAAAA,
+ (__m256i)(__v32qu){0, 32, 1, 33, 2, 34, 3, 35,
+ 4, 36, 5, 37, 6, 38, 7, 39,
+ 8, 40, 9, 41, 10, 42, 11, 43,
+ 12, 44, 13, 45, 14, 46, 15, 47},
+ (__m256i)(__v32qu){101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124,
+ 125, 126, 127, 128, 129, 130, 131, 132}),
+ 200, 101, 202, 102, 204, 103, 206, 104,
+ 208, 105, 210, 106, 212, 107, 214, 108,
+ 216, 109, 218, 110, 220, 111, 222, 112,
+ 224, 113, 226, 114, 228, 115, 230, 116));
+
+__m128i test_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) {
+ // CHECK-LABEL: test_mm_multishift_epi64_epi8
+ // CHECK: call <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ return _mm_multishift_epi64_epi8(__X, __Y);
}
+TEST_CONSTEXPR(match_v16qu(
+ _mm_multishift_epi64_epi8(
+ (__m128i)(__v16qu){0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56},
+ (__m128i)(__v16qu){0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18}),
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18));
+
__m128i test_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) {
// CHECK-LABEL: test_mm_mask_multishift_epi64_epi8
// CHECK: call <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
- return _mm_mask_multishift_epi64_epi8(__W, __M, __X, __Y);
+ return _mm_mask_multishift_epi64_epi8(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v16qu(
+ _mm_mask_multishift_epi64_epi8(
+ (__m128i)(__v16qu){0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+ 0xAAAA,
+ (__m128i)(__v16qu){0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56},
+ (__m128i)(__v16qu){0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18}),
+ 0xFF, 0x02, 0xFF, 0x04, 0xFF, 0x06, 0xFF, 0x08,
+ 0xFF, 0x12, 0xFF, 0x14, 0xFF, 0x16, 0xFF, 0x18));
+
__m128i test_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y) {
// CHECK-LABEL: test_mm_maskz_multishift_epi64_epi8
// CHECK: call <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
- return _mm_maskz_multishift_epi64_epi8(__M, __X, __Y);
+ return _mm_maskz_multishift_epi64_epi8(__M, __X, __Y);
}
-__m128i test_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) {
- // CHECK-LABEL: test_mm_multishift_epi64_epi8
- // CHECK: call <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
- return _mm_multishift_epi64_epi8(__X, __Y);
+TEST_CONSTEXPR(match_v16qu(
+ _mm_maskz_multishift_epi64_epi8(
+ 0x5555,
+ (__m128i)(__v16qu){0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56},
+ (__m128i)(__v16qu){0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18}),
+ 0x01, 0, 0x03, 0, 0x05, 0, 0x07, 0,
+ 0x11, 0, 0x13, 0, 0x15, 0, 0x17, 0));
+
+__m256i test_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) {
+ // CHECK-LABEL: test_mm256_multishift_epi64_epi8
+ // CHECK: call <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
+ return _mm256_multishift_epi64_epi8(__X, __Y);
}
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_multishift_epi64_epi8(
+ (__m256i)(__v32qu){0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56},
+ (__m256i)(__v32qu){0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38}),
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38));
+
__m256i test_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) {
// CHECK-LABEL: test_mm256_mask_multishift_epi64_epi8
// CHECK: call <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
- return _mm256_mask_multishift_epi64_epi8(__W, __M, __X, __Y);
+ return _mm256_mask_multishift_epi64_epi8(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_mask_multishift_epi64_epi8(
+ (__m256i)(__v32qu){0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+ 0xAAAAAAAA,
+ (__m256i)(__v32qu){0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56},
+ (__m256i)(__v32qu){0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38}),
+ 0xFF, 0x02, 0xFF, 0x04, 0xFF, 0x06, 0xFF, 0x08,
+ 0xFF, 0x12, 0xFF, 0x14, 0xFF, 0x16, 0xFF, 0x18,
+ 0xFF, 0x22, 0xFF, 0x24, 0xFF, 0x26, 0xFF, 0x28,
+ 0xFF, 0x32, 0xFF, 0x34, 0xFF, 0x36, 0xFF, 0x38));
+
__m256i test_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) {
// CHECK-LABEL: test_mm256_maskz_multishift_epi64_epi8
// CHECK: call <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
- return _mm256_maskz_multishift_epi64_epi8(__M, __X, __Y);
+ return _mm256_maskz_multishift_epi64_epi8(__M, __X, __Y);
}
-__m256i test_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) {
- // CHECK-LABEL: test_mm256_multishift_epi64_epi8
- // CHECK: call <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
- return _mm256_multishift_epi64_epi8(__X, __Y);
-}
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_maskz_multishift_epi64_epi8(
+ 0x55555555,
+ (__m256i)(__v32qu){0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 0, 8, 16, 24, 32, 40, 48, 56},
+ (__m256i)(__v32qu){0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38}),
+ 0x01, 0, 0x03, 0, 0x05, 0, 0x07, 0,
+ 0x11, 0, 0x13, 0, 0x15, 0, 0x17, 0,
+ 0x21, 0, 0x23, 0, 0x25, 0, 0x27, 0,
+ 0x31, 0, 0x33, 0, 0x35, 0, 0x37, 0));
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 33c4397..013c19b 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -770,6 +770,7 @@ __m256i test_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
//CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_mul_epi32(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_m256i(_mm256_mask_mul_epi32((__m256i){100,200,300,400}, (__mmask8)0b00001100, (__m256i){1,2,3,4}, (__m256i){10,20,30,40}), 100,200,90,160));
__m256i test_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) {
//CHECK-LABEL: test_mm256_maskz_mul_epi32
@@ -781,7 +782,7 @@ __m256i test_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) {
//CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_maskz_mul_epi32(__M, __X, __Y);
}
-
+TEST_CONSTEXPR(match_m256i(_mm256_maskz_mul_epi32((__mmask8)0b00001100, (__m256i){1,2,3,4}, (__m256i){10,20,30,40}), 0,0,90,160));
__m128i test_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
__m128i __Y) {
@@ -794,6 +795,7 @@ __m128i test_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
//CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_mul_epi32(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_m128i(_mm_mask_mul_epi32((__m128i){100,200}, (__mmask8)0b00000001, (__m128i){1,2}, (__m128i){10,20}), 10,200));
__m128i test_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) {
//CHECK-LABEL: test_mm_maskz_mul_epi32
@@ -805,6 +807,7 @@ __m128i test_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) {
//CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_maskz_mul_epi32(__M, __X, __Y);
}
+TEST_CONSTEXPR(match_m128i(_mm_maskz_mul_epi32((__mmask8)0b00000010, (__m128i){1,2}, (__m128i){10,20}), 0,40));
__m256i test_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
__m256i __Y) {
@@ -815,6 +818,7 @@ __m256i test_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
//CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_mul_epu32(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_m256i(_mm256_mask_mul_epu32((__m256i){100,200,300,400}, (__mmask8)0b00001100, (__m256i){1,2,3,4}, (__m256i){10,20,30,40}), 100,200,90,160));
__m256i test_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) {
//CHECK-LABEL: test_mm256_maskz_mul_epu32
@@ -824,6 +828,7 @@ __m256i test_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) {
//CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_maskz_mul_epu32(__M, __X, __Y);
}
+TEST_CONSTEXPR(match_m256i(_mm256_maskz_mul_epu32((__mmask8)0b00001100, (__m256i){1,2,3,4}, (__m256i){10,20,30,40}), 0,0,90,160));
__m128i test_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
__m128i __Y) {
@@ -834,6 +839,7 @@ __m128i test_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
//CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_mul_epu32(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_m128i(_mm_mask_mul_epu32((__m128i){100,200}, (__mmask8)0b00000001, (__m128i){1,2}, (__m128i){10,20}), 10,200));
__m128i test_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) {
//CHECK-LABEL: test_mm_maskz_mul_epu32
@@ -843,6 +849,7 @@ __m128i test_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) {
//CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_maskz_mul_epu32(__M, __X, __Y);
}
+TEST_CONSTEXPR(match_m128i(_mm_maskz_mul_epu32((__mmask8)0b00000010, (__m128i){1,2}, (__m128i){10,20}), 0,40));
__m128i test_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B) {
//CHECK-LABEL: test_mm_maskz_mullo_epi32
@@ -3606,48 +3613,64 @@ __m128d test_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_add_pd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_add_pd((__m128d){1.0, 2.0}, (__mmask8)0b00000001, (__m128d){10.0, 20.0}, (__m128d){100.0, 200.0}), 110.0, 2.0));
+
__m128d test_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_add_pd
// CHECK: fadd <2 x double> %{{.*}}, %{{.*}}
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_add_pd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_add_pd((__mmask8)0b00000001, (__m128d){10.0, 20.0}, (__m128d){100.0, 200.0}), 110.0, 0.0));
+
__m256d test_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: test_mm256_mask_add_pd
// CHECK: fadd <4 x double> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_add_pd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask_add_pd((__m256d){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b00001100, (__m256d){10.0, 20.0, 30.0, 40.0}, (__m256d){100.0, 200.0, 300.0, 400.0}), 1.0, 2.0, 330.0, 440.0));
+
__m256d test_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: test_mm256_maskz_add_pd
// CHECK: fadd <4 x double> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_add_pd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_add_pd((__mmask8)0b00001100, (__m256d){10.0, 20.0, 30.0, 40.0}, (__m256d){100.0, 200.0, 300.0, 400.0}), 0.0, 0.0, 330.0, 440.0));
+
__m128 test_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_mask_add_ps
// CHECK: fadd <4 x float> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_add_ps(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_add_ps((__m128){1.0f, 2.0f, 3.0f, 4.0f}, (__mmask8)0b00001010, (__m128){10.0f, 20.0f, 30.0f, 40.0f}, (__m128){100.0f, 200.0f, 300.0f, 400.0f}), 1.0f, 220.0f, 3.0f, 440.0f));
+
__m128 test_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_add_ps
// CHECK: fadd <4 x float> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_add_ps(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_add_ps((__mmask8)0b00001010, (__m128){10.0f, 20.0f, 30.0f, 40.0f}, (__m128){100.0f, 200.0f, 300.0f, 400.0f}), 0.0f, 220.0f, 0.0f, 440.0f));
+
__m256 test_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: test_mm256_mask_add_ps
// CHECK: fadd <8 x float> %{{.*}}, %{{.*}}
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_add_ps(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask_add_ps((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}, (__mmask8)0b11110000, (__m256){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}, (__m256){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 550.0f, 660.0f, 770.0f, 880.0f));
+
__m256 test_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: test_mm256_maskz_add_ps
// CHECK: fadd <8 x float> %{{.*}}, %{{.*}}
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_add_ps(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256(_mm256_maskz_add_ps((__mmask8)0b11110000, (__m256){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}, (__m256){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}), 0.0f, 0.0f, 0.0f, 0.0f, 550.0f, 660.0f, 770.0f, 880.0f));
+
__m128i test_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) {
// CHECK-LABEL: test_mm_mask_blend_epi32
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
@@ -3701,21 +3724,6 @@ TEST_CONSTEXPR(match_m256d(
10.0, 2.0, 2.0, 2.0
));
-__m512d test_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
- // CHECK-LABEL: test_mm512_mask_blend_pd
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
- return _mm512_mask_blend_pd(__U, __A, __W);
-}
-
-TEST_CONSTEXPR(match_m512d(
- _mm512_mask_blend_pd(
- (__mmask8)0x01,
- (__m512d)(__v8df){2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0},
- (__m512d)(__v8df){10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0}
- ),
- 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0
-));
-
__m128 test_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) {
// CHECK-LABEL: test_mm_mask_blend_ps
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
@@ -3744,23 +3752,6 @@ TEST_CONSTEXPR(match_m256(
10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f
));
-__m512 test_mm512_mask_blend_ps(__mmask8 __U, __m512 __A, __m512 __W) {
- // CHECK-LABEL: test_mm512_mask_blend_ps
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
- return _mm512_mask_blend_ps(__U, __A, __W);
-}
-TEST_CONSTEXPR(match_m512(
- _mm512_mask_blend_ps(
- (__mmask16)0x01,
- (__m512)(__v16sf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f,
- 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f},
- (__m512)(__v16sf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
- 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f}
- ),
- 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f,
- 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f
-));
-
__m128i test_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) {
// CHECK-LABEL: test_mm_mask_blend_epi64
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
@@ -4008,23 +3999,31 @@ __m128 test_mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A) {
// CHECK: @llvm.x86.avx512.mask.cvtpd2ps
return _mm_mask_cvtpd_ps(__W,__U,__A);
}
+
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtpd_ps((__m128){ 9.0f, 9.0f, 9.0f, 9.0f }, 0x3, (__m128d){ -1.0, +2.0 }), -1.0f, +2.0f, 9.0f, 9.0f));
__m128 test_mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A) {
// CHECK-LABEL: test_mm_maskz_cvtpd_ps
// CHECK: @llvm.x86.avx512.mask.cvtpd2ps
return _mm_maskz_cvtpd_ps(__U,__A);
}
+
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtpd_ps(0x1, (__m128d){ -1.0, +2.0 }), -1.0f, 0.0f, 0.0f, 0.0f));
__m128 test_mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A) {
// CHECK-LABEL: test_mm256_mask_cvtpd_ps
// CHECK: @llvm.x86.avx.cvt.pd2.ps.256
// CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
return _mm256_mask_cvtpd_ps(__W,__U,__A);
}
+
+TEST_CONSTEXPR(match_m128(_mm256_mask_cvtpd_ps((__m128){ 9.0f, 9.0f, 9.0f, 9.0f }, 0xF, (__m256d){ 0.0, -1.0, +2.0, +3.5 }), 0.0f, -1.0f, +2.0f, +3.5f));
__m128 test_mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A) {
// CHECK-LABEL: test_mm256_maskz_cvtpd_ps
// CHECK: @llvm.x86.avx.cvt.pd2.ps.256
// CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
return _mm256_maskz_cvtpd_ps(__U,__A);
}
+
+TEST_CONSTEXPR(match_m128(_mm256_maskz_cvtpd_ps(0x5, (__m256d){ 0.0, -1.0, +2.0, +3.5 }), 0.0f, 0.0f, +2.0f, 0.0f));
__m128i test_mm_cvtpd_epu32(__m128d __A) {
// CHECK-LABEL: test_mm_cvtpd_epu32
// CHECK: @llvm.x86.avx512.mask.cvtpd2udq.128
@@ -4352,48 +4351,64 @@ __m128d test_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_div_pd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_div_pd((__m128d){1.0, 2.0}, (__mmask8)0b00000001, (__m128d){10.0, 20.0}, (__m128d){100.0, 200.0}), 0.1, 2.0));
+
__m128d test_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_div_pd
// CHECK: fdiv <2 x double> %{{.*}}, %{{.*}}
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_div_pd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_div_pd((__mmask8)0b00000001, (__m128d){10.0, 20.0}, (__m128d){100.0, 200.0}), 0.1, 0.0));
+
__m256d test_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: test_mm256_mask_div_pd
// CHECK: fdiv <4 x double> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_div_pd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask_div_pd((__m256d){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b00001100, (__m256d){10.0, 20.0, 30.0, 40.0}, (__m256d){100.0, 200.0, 300.0, 400.0}), 1.0, 2.0, 0.1, 0.1));
+
__m256d test_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: test_mm256_maskz_div_pd
// CHECK: fdiv <4 x double> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_div_pd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_div_pd((__mmask8)0b00001100, (__m256d){10.0, 20.0, 30.0, 40.0}, (__m256d){100.0, 200.0, 300.0, 400.0}), 0.0, 0.0, 0.1, 0.1));
+
__m128 test_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_mask_div_ps
// CHECK: fdiv <4 x float> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_div_ps(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_div_ps((__m128){1.0f, 2.0f, 3.0f, 4.0f}, (__mmask8)0b00001010, (__m128){10.0f, 20.0f, 30.0f, 40.0f}, (__m128){100.0f, 200.0f, 300.0f, 400.0f}), 1.0f, 0.1f, 3.0f, 0.1f));
+
__m128 test_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_div_ps
// CHECK: fdiv <4 x float> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_div_ps(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_div_ps((__mmask8)0b00001010, (__m128){10.0f, 20.0f, 30.0f, 40.0f}, (__m128){100.0f, 200.0f, 300.0f, 400.0f}), 0.0f, 0.1f, 0.0f, 0.1f));
+
__m256 test_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: test_mm256_mask_div_ps
// CHECK: fdiv <8 x float> %{{.*}}, %{{.*}}
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_div_ps(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask_div_ps((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}, (__mmask8)0b11110000, (__m256){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}, (__m256){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 0.1f, 0.1f, 0.1f, 0.1f));
+
__m256 test_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: test_mm256_maskz_div_ps
// CHECK: fdiv <8 x float> %{{.*}}, %{{.*}}
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_div_ps(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256(_mm256_maskz_div_ps((__mmask8)0b11110000, (__m256){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}, (__m256){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}), 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.1f, 0.1f, 0.1f));
+
__m128d test_mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A) {
// CHECK-LABEL: test_mm_mask_expand_pd
// CHECK: @llvm.x86.avx512.mask.expand
@@ -4716,48 +4731,64 @@ __m128d test_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_mul_pd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_mul_pd((__m128d){1.0, 2.0}, (__mmask8)0b00000001, (__m128d){10.0, 20.0}, (__m128d){100.0, 200.0}), 1000.0, 2.0));
+
__m128d test_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_mul_pd
// CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_mul_pd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_mul_pd((__mmask8)0b00000001, (__m128d){10.0, 20.0}, (__m128d){100.0, 200.0}), 1000.0, 0.0));
+
__m256d test_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: test_mm256_mask_mul_pd
// CHECK: fmul <4 x double> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_mul_pd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask_mul_pd((__m256d){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b00001100, (__m256d){10.0, 20.0, 30.0, 40.0}, (__m256d){100.0, 200.0, 300.0, 400.0}), 1.0, 2.0, 9000.0, 16000.0));
+
__m256d test_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: test_mm256_maskz_mul_pd
// CHECK: fmul <4 x double> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_mul_pd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_mul_pd((__mmask8)0b00001100, (__m256d){10.0, 20.0, 30.0, 40.0}, (__m256d){100.0, 200.0, 300.0, 400.0}), 0.0, 0.0, 9000.0, 16000.0));
+
__m128 test_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_mask_mul_ps
// CHECK: fmul <4 x float> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_mul_ps(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_mul_ps((__m128){1.0f, 2.0f, 3.0f, 4.0f}, (__mmask8)0b00001010, (__m128){10.0f, 20.0f, 30.0f, 40.0f}, (__m128){100.0f, 200.0f, 300.0f, 400.0f}), 1.0f, 4000.0f, 3.0f, 16000.0f));
+
__m128 test_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_mul_ps
// CHECK: fmul <4 x float> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_mul_ps(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_mul_ps((__mmask8)0b00001010, (__m128){10.0f, 20.0f, 30.0f, 40.0f}, (__m128){100.0f, 200.0f, 300.0f, 400.0f}), 0.0f, 4000.0f, 0.0f, 16000.0f));
+
__m256 test_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: test_mm256_mask_mul_ps
// CHECK: fmul <8 x float> %{{.*}}, %{{.*}}
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_mul_ps(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask_mul_ps((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}, (__mmask8)0b11110000, (__m256){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}, (__m256){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 25000.0f, 36000.0f, 49000.0f, 64000.0f));
+
__m256 test_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: test_mm256_maskz_mul_ps
// CHECK: fmul <8 x float> %{{.*}}, %{{.*}}
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_mul_ps(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256(_mm256_maskz_mul_ps((__mmask8)0b11110000, (__m256){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}, (__m256){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}), 0.0f, 0.0f, 0.0f, 0.0f, 25000.0f, 36000.0f, 49000.0f, 64000.0f));
+
__m128i test_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_abs_epi32
// CHECK: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %{{.*}}, i1 false)
@@ -5562,60 +5593,87 @@ __m128d test_mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_sub_pd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_sub_pd((__m128d){1.0, 2.0}, (__mmask8)0b00000001, (__m128d){10.0, 20.0}, (__m128d){100.0, 200.0}), -90.0, 2.0));
+
__m128d test_mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_sub_pd
// CHECK: fsub <2 x double> %{{.*}}, %{{.*}}
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_sub_pd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_sub_pd((__mmask8)0b00000001, (__m128d){10.0, 20.0}, (__m128d){100.0, 200.0}), -90.0, 0.0));
+
__m256d test_mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: test_mm256_mask_sub_pd
// CHECK: fsub <4 x double> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_sub_pd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask_sub_pd((__m256d){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b00001100, (__m256d){10.0, 20.0, 30.0, 40.0}, (__m256d){100.0, 200.0, 300.0, 400.0}), 1.0, 2.0, -270.0, -360.0));
+
__m256d test_mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: test_mm256_maskz_sub_pd
// CHECK: fsub <4 x double> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_sub_pd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_sub_pd((__mmask8)0b00001100, (__m256d){10.0, 20.0, 30.0, 40.0}, (__m256d){100.0, 200.0, 300.0, 400.0}), 0.0, 0.0, -270.0, -360.0));
+
__m128 test_mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_mask_sub_ps
// CHECK: fsub <4 x float> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_sub_ps(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_sub_ps((__m128){1.0f, 2.0f, 3.0f, 4.0f}, (__mmask8)0b00001010, (__m128){10.0f, 20.0f, 30.0f, 40.0f}, (__m128){100.0f, 200.0f, 300.0f, 400.0f}), 1.0f, -180.0f, 3.0f, -360.0f));
+
__m128 test_mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_sub_ps
// CHECK: fsub <4 x float> %{{.*}}, %{{.*}}
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_sub_ps(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_sub_ps((__mmask8)0b00001010, (__m128){10.0f, 20.0f, 30.0f, 40.0f}, (__m128){100.0f, 200.0f, 300.0f, 400.0f}), 0.0f, -180.0f, 0.0f, -360.0f));
+
__m256 test_mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: test_mm256_mask_sub_ps
// CHECK: fsub <8 x float> %{{.*}}, %{{.*}}
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_sub_ps(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask_sub_ps((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}, (__mmask8)0b11110000, (__m256){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}, (__m256){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}), 1.0f, 2.0f, 3.0f, 4.0f, -450.0f, -540.0f, -630.0f, -720.0f));
+
__m256 test_mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: test_mm256_maskz_sub_ps
// CHECK: fsub <8 x float> %{{.*}}, %{{.*}}
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_sub_ps(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m256(_mm256_maskz_sub_ps((__mmask8)0b11110000, (__m256){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}, (__m256){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}), 0.0f, 0.0f, 0.0f, 0.0f, -450.0f, -540.0f, -630.0f, -720.0f));
+
__m128i test_mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) {
// CHECK-LABEL: test_mm_mask2_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.128
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask2_permutex2var_epi32(__A,__I,__U,__B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask2_permutex2var_epi32((__m128i)(__v4si){10, 20, 30, 40},
+ (__m128i)(__v4si){0, 3, 4, 6}, 0x05,
+ (__m128i)(__v4si){100, 200, 300, 400}),
+ 10, 3, 100, 6));
__m256i test_mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) {
// CHECK-LABEL: test_mm256_mask2_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.256
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask2_permutex2var_epi32(__A,__I,__U,__B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask2_permutex2var_epi32((__m256i)(__v8si){0, 10, 20, 30, 40, 50, 60, 70},
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ 0xA5,
+ (__m256i)(__v8si){100, 110, 120, 130, 140, 150, 160, 170}),
+ 0, 7, 100, 15, 1, 110, 2, 120));
__m128d test_mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
// CHECK-LABEL: test_mm_mask2_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.128
@@ -5646,149 +5704,255 @@ __m128i test_mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask2_permutex2var_epi64(__A,__I,__U,__B);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_mask2_permutex2var_epi64((__m128i)(__v2di){10, 20},
+ (__m128i)(__v2di){0, 5}, 0x1,
+ (__m128i)(__v2di){100, 200}),
+ 10, 5));
__m256i test_mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) {
// CHECK-LABEL: test_mm256_mask2_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.256
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask2_permutex2var_epi64(__A,__I,__U,__B);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_mask2_permutex2var_epi64((__m256i)(__v4di){0, 10, 20, 30},
+ (__m256i)(__v4di){0, 1, 4, 5}, 0x5,
+ (__m256i)(__v4di){100, 110, 120, 130}),
+ 0, 1, 100, 5));
__m128i test_mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
// CHECK-LABEL: test_mm_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.128
return _mm_permutex2var_epi32(__A,__I,__B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_permutex2var_epi32((__m128i)(__v4si){10, 20, 30, 40},
+ (__m128i)(__v4si){0, 3, 4, 6},
+ (__m128i)(__v4si){100, 200, 300, 400}),
+ 10, 40, 100, 300));
__m128i test_mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) {
// CHECK-LABEL: test_mm_mask_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.128
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_permutex2var_epi32(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_permutex2var_epi32((__m128i)(__v4si){-1, -2, -3, -4}, 0x0A,
+ (__m128i)(__v4si){0, 3, 4, 6},
+ (__m128i)(__v4si){100, 200, 300, 400}),
+ -1, -4, -3, 300));
__m128i test_mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.128
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_permutex2var_epi32(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_permutex2var_epi32(0x0A, (__m128i)(__v4si){10, 20, 30, 40},
+ (__m128i)(__v4si){0, 3, 4, 6},
+ (__m128i)(__v4si){100, 200, 300, 400}),
+ 0, 40, 0, 300));
__m256i test_mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.256
return _mm256_permutex2var_epi32(__A,__I,__B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_permutex2var_epi32((__m256i)(__v8si){0, 10, 20, 30, 40, 50, 60, 70},
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m256i)(__v8si){100, 110, 120, 130, 140, 150, 160, 170}),
+ 0, 70, 100, 170, 10, 110, 20, 120));
__m256i test_mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.256
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_permutex2var_epi32(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_permutex2var_epi32((__m256i)(__v8si){-1, -2, -3, -4, -5, -6, -7, -8}, 0xAA,
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m256i)(__v8si){100, 110, 120, 130, 140, 150, 160, 170}),
+ -1, -8, -3, 170, -5, 110, -7, 120));
__m256i test_mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.256
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_permutex2var_epi32(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_maskz_permutex2var_epi32(0xAA, (__m256i)(__v8si){0, 10, 20, 30, 40, 50, 60, 70},
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m256i)(__v8si){100, 110, 120, 130, 140, 150, 160, 170}),
+ 0, 70, 0, 170, 0, 110, 0, 120));
__m128d test_mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
// CHECK-LABEL: test_mm_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.128
return _mm_permutex2var_pd(__A,__I,__B);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_permutex2var_pd((__m128d){1.0, 2.0}, (__m128i)(__v2di){0, 2}, (__m128d){10.0, 20.0}),
+ 1.0, 10.0));
__m128d test_mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
// CHECK-LABEL: test_mm_mask_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.128
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_permutex2var_pd(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_mask_permutex2var_pd((__m128d){-1.0, -2.0}, 0x2, (__m128i)(__v2di){0, 2}, (__m128d){10.0, 20.0}),
+ -1.0, 10.0));
__m128d test_mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.128
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_permutex2var_pd(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_maskz_permutex2var_pd(0x2, (__m128d){1.0, 2.0}, (__m128i)(__v2di){0, 2}, (__m128d){10.0, 20.0}),
+ 0.0, 10.0));
__m256d test_mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
// CHECK-LABEL: test_mm256_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.256
return _mm256_permutex2var_pd(__A,__I,__B);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_permutex2var_pd((__m256d){1.0, 2.0, 3.0, 4.0}, (__m256i)(__v4di){0, 4, 1, 5}, (__m256d){10.0, 20.0, 30.0, 40.0}),
+ 1.0, 10.0, 2.0, 20.0));
__m256d test_mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B) {
// CHECK-LABEL: test_mm256_mask_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.256
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_permutex2var_pd(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_mask_permutex2var_pd((__m256d){-1.0, -2.0, -3.0, -4.0}, 0x2, (__m256i)(__v4di){0, 4, 1, 5}, (__m256d){10.0, 20.0, 30.0, 40.0}),
+ -1.0, 10.0, -3.0, -4.0));
__m256d test_mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B) {
// CHECK-LABEL: test_mm256_maskz_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.256
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_permutex2var_pd(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_maskz_permutex2var_pd(0x2, (__m256d){1.0, 2.0, 3.0, 4.0}, (__m256i)(__v4di){0, 4, 1, 5}, (__m256d){10.0, 20.0, 30.0, 40.0}),
+ 0.0, 10.0, 0.0, 0.0));
__m128 test_mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
// CHECK-LABEL: test_mm_permutex2var_ps
// CHECK: @llvm.x86.avx512.vpermi2var.ps.128
return _mm_permutex2var_ps(__A,__I,__B);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_permutex2var_ps((__m128){1.f, 2.f, 3.f, 4.f}, (__m128i)(__v4si){0, 3, 4, 6}, (__m128){10.f, 20.f, 30.f, 40.f}),
+ 1.f, 4.f, 10.f, 30.f));
__m128 test_mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
// CHECK-LABEL: test_mm_mask_permutex2var_ps
// CHECK: @llvm.x86.avx512.vpermi2var.ps.128
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_permutex2var_ps(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_mask_permutex2var_ps((__m128){-1.f, -2.f, -3.f, -4.f}, 0x0A, (__m128i)(__v4si){0, 3, 4, 6}, (__m128){10.f, 20.f, 30.f, 40.f}),
+ -1.f, -4.f, -3.f, 30.f));
__m128 test_mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_permutex2var_ps
// CHECK: @llvm.x86.avx512.vpermi2var.ps.128
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_permutex2var_ps(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_maskz_permutex2var_ps(0x0A, (__m128){1.f, 2.f, 3.f, 4.f}, (__m128i)(__v4si){0, 3, 4, 6}, (__m128){10.f, 20.f, 30.f, 40.f}),
+ 0.f, 4.f, 0.f, 30.f));
__m256 test_mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
// CHECK-LABEL: test_mm256_permutex2var_ps
// CHECK: @llvm.x86.avx512.vpermi2var.ps.256
return _mm256_permutex2var_ps(__A,__I,__B);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_permutex2var_ps((__m256){0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f},
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m256){10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f}),
+ 0.f, 7.f, 10.f, 17.f, 1.f, 11.f, 2.f, 12.f));
__m256 test_mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
// CHECK-LABEL: test_mm256_mask_permutex2var_ps
// CHECK: @llvm.x86.avx512.vpermi2var.ps.256
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_permutex2var_ps(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_mask_permutex2var_ps((__m256){-1.f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f, -8.f}, 0xAA, (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10}, (__m256){10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f}),
+ -1.f, -8.f, -3.f, 17.f, -5.f, 11.f, -7.f, 12.f));
__m256 test_mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B) {
// CHECK-LABEL: test_mm256_maskz_permutex2var_ps
// CHECK: @llvm.x86.avx512.vpermi2var.ps.256
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_permutex2var_ps(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_maskz_permutex2var_ps(0xAA, (__m256){0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}, (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10}, (__m256){10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f}),
+ 0.f, 7.f, 0.f, 17.f, 0.f, 11.f, 0.f, 12.f));
__m128i test_mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
// CHECK-LABEL: test_mm_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.128
return _mm_permutex2var_epi64(__A,__I,__B);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_permutex2var_epi64((__m128i)(__v2di){10, 20}, (__m128i)(__v2di){0, 3}, (__m128i)(__v2di){100, 200}),
+ 10, 200));
__m128i test_mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) {
// CHECK-LABEL: test_mm_mask_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.128
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_permutex2var_epi64(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_mask_permutex2var_epi64((__m128i)(__v2di){-1, -2}, 0x2, (__m128i)(__v2di){0, 3}, (__m128i)(__v2di){100, 200}),
+ -1, 200));
__m128i test_mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.128
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_maskz_permutex2var_epi64(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_maskz_permutex2var_epi64(0x2, (__m128i)(__v2di){10, 20}, (__m128i)(__v2di){0, 3}, (__m128i)(__v2di){100, 200}),
+ 0, 200));
__m256i test_mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.256
return _mm256_permutex2var_epi64(__A,__I,__B);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_permutex2var_epi64((__m256i)(__v4di){0, 10, 20, 30}, (__m256i)(__v4di){0, 1, 4, 5}, (__m256i)(__v4di){100, 110, 120, 130}),
+ 0, 10, 100, 110));
__m256i test_mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.256
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_permutex2var_epi64(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_mask_permutex2var_epi64((__m256i)(__v4di){-1, -2, -3, -4}, 0x5, (__m256i)(__v4di){0, 1, 4, 5}, (__m256i)(__v4di){100, 110, 120, 130}),
+ -1, -2, 100, -4));
__m256i test_mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.256
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_maskz_permutex2var_epi64(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_maskz_permutex2var_epi64(0x5, (__m256i)(__v4di){0, 10, 20, 30}, (__m256i)(__v4di){0, 1, 4, 5}, (__m256i)(__v4di){100, 110, 120, 130}),
+ 0, 0, 100, 0));
+TEST_CONSTEXPR(match_v4si(
+ _mm_permutex2var_epi32((__m128i)(__v4si){10, 20, 30, 40},
+ (__m128i)(__v4si){0, 3, 4, 6},
+ (__m128i)(__v4si){100, 200, 300, 400}),
+ 10, 40, 100, 300));
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_permutex2var_epi32((__m128i)(__v4si){-1, -2, -3, -4}, 0x0A,
+ (__m128i)(__v4si){0, 3, 4, 6},
+ (__m128i)(__v4si){100, 200, 300, 400}),
+ -1, -4, -3, 300));
__m128i test_mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_cvtepi8_epi32
// CHECK: sext <4 x i8> %{{.*}} to <4 x i32>
@@ -6590,8 +6754,12 @@ __m256i test_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: test_mm256_maskz_srl_epi32
// CHECK: @llvm.x86.avx2.psrl.d
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
- return _mm256_maskz_srl_epi32(__U, __A, __B);
+ return _mm256_maskz_srl_epi32(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(_mm_mask_srl_epi32((__m128i)(__v4si){99, 99, 99, 99}, 0x5, (__m128i)(__v4si){-2, 4, -6, 8}, (__m128i)(__v4si){1, 0, 0, 0}), 2147483647, 99, 2147483645, 99));
+TEST_CONSTEXPR(match_v4si(_mm_maskz_srl_epi32(0xA, (__m128i)(__v4si){-2, 4, -6, 8}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 2, 0, 4));
+TEST_CONSTEXPR(match_v8si(_mm256_mask_srl_epi32((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}, 0x5A, (__m256i)(__v8si){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v4si){1, 0, 0, 0}), 99, 2147483647, 99, 2147483645, 4, 99, 6, 99));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_srl_epi32(0xA5, (__m256i)(__v8si){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 2, 0, 0, 2147483643, 0, 2147483641));
__m128i test_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_srli_epi32
@@ -6675,8 +6843,12 @@ __m256i test_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: test_mm256_maskz_srl_epi64
// CHECK: @llvm.x86.avx2.psrl.q
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_maskz_srl_epi64(__U, __A, __B);
+ return _mm256_maskz_srl_epi64(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_srl_epi64((__m128i)(__v2di){99, 99}, 0x1, (__m128i)(__v2di){-2, 4}, (__m128i)(__v2di){1, 0}), 9223372036854775807, 99));
+TEST_CONSTEXPR(match_v2di(_mm_maskz_srl_epi64(0x2, (__m128i)(__v2di){-2, 4}, (__m128i)(__v2di){1, 0}), 0, 2));
+TEST_CONSTEXPR(match_v4di(_mm256_mask_srl_epi64((__m256i)(__v4di){99, 99, 99, 99}, 0x5, (__m256i)(__v4di){0, -2, 4, -6}, (__m128i)(__v2di){1, 0}), 0, 99, 2, 99));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_srl_epi64(0xA, (__m256i)(__v4di){0, -2, 4, -6}, (__m128i)(__v2di){1, 0}), 0, 9223372036854775807, 0, 9223372036854775805));
__m128i test_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_srli_epi64
@@ -6761,8 +6933,12 @@ __m256i test_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: test_mm256_maskz_sll_epi32
// CHECK: @llvm.x86.avx2.psll.d
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
- return _mm256_maskz_sll_epi32(__U, __A, __B);
+ return _mm256_maskz_sll_epi32(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(_mm_mask_sll_epi32((__m128i)(__v4si){99, 99, 99, 99}, 0x5, (__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){1, 0, 0, 0}), 2, 99, 6, 99));
+TEST_CONSTEXPR(match_v4si(_mm_maskz_sll_epi32(0xA, (__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 4, 0, 8));
+TEST_CONSTEXPR(match_v8si(_mm256_mask_sll_epi32((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}, 0x5A, (__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 0, 0}), 99, 2, 99, 6, 8, 99, 12, 99));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_sll_epi32(0xA5, (__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 4, 0, 0, 10, 0, 14));
__m128i test_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_slli_epi32
@@ -6851,8 +7027,12 @@ __m256i test_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: test_mm256_maskz_sll_epi64
// CHECK: @llvm.x86.avx2.psll.q
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_maskz_sll_epi64(__U, __A, __B);
+ return _mm256_maskz_sll_epi64(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_sll_epi64((__m128i)(__v2di){99, 99}, 0x1, (__m128i)(__v2di){1, 2}, (__m128i)(__v2di){1, 0}), 2, 99));
+TEST_CONSTEXPR(match_v2di(_mm_maskz_sll_epi64(0x2, (__m128i)(__v2di){1, 2}, (__m128i)(__v2di){1, 0}), 0, 4));
+TEST_CONSTEXPR(match_v4di(_mm256_mask_sll_epi64((__m256i)(__v4di){99, 99, 99, 99}, 0x5, (__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 0}), 0, 99, 4, 99));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_sll_epi64(0xA, (__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 0}), 0, 2, 0, 6));
__m128i test_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_slli_epi64
@@ -7834,6 +8014,13 @@ __m128d test_mm_mask_permute_pd(__m128d __W, __mmask8 __U, __m128d __X) {
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_permute_pd(__W, __U, __X, 1);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_permute_pd(
+ ((__m128d){0.0, 1.0}),
+ (__mmask8)0b10,
+ ((__m128d){2.0, 3.0}),
+ 1),
+ 0.0, 2.0
+));
__m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) {
// CHECK-LABEL: test_mm_maskz_permute_pd
@@ -7841,6 +8028,12 @@ __m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) {
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_permute_pd(__U, __X, 1);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_permute_pd(
+ (__mmask8)0b10,
+ ((__m128d){1.0, 2.0}),
+ 1),
+ 0.0, 1.0
+));
__m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) {
// CHECK-LABEL: test_mm256_mask_permute_pd
@@ -7848,6 +8041,13 @@ __m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) {
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_permute_pd(__W, __U, __X, 5);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask_permute_pd(
+ ((__m256d){0.0, 1.0, 2.0, 3.0}),
+ (__mmask8)0b1010,
+ ((__m256d){4.0, 5.0, 6.0, 7.0}),
+ 5),
+ 0.0, 4.0, 2.0, 6.0
+));
__m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) {
// CHECK-LABEL: test_mm256_maskz_permute_pd
@@ -7855,6 +8055,12 @@ __m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) {
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_permute_pd(__U, __X, 5);
}
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_permute_pd(
+ (__mmask8)0b1010,
+ ((__m256d){4.0, 5.0, 6.0, 7.0}),
+ 5),
+ 0.0, 4.0, 0.0, 6.0
+));
__m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) {
// CHECK-LABEL: test_mm_mask_permute_ps
@@ -7862,6 +8068,13 @@ __m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) {
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_permute_ps(__W, __U, __X, 0x1b);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_permute_ps(
+ ((__m128){0.0, 1.0, 2.0, 3.0}),
+ (__mmask8)0b1010,
+ ((__m128){4.0, 5.0, 6.0, 7.0}),
+ 0x1b),
+ 0, 6.0, 2.0, 4.0
+));
__m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) {
// CHECK-LABEL: test_mm_maskz_permute_ps
@@ -7869,6 +8082,13 @@ __m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) {
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_permute_ps(__U, __X, 0x1b);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_permute_ps(
+ (__mmask8)0b1010,
+ ((__m128){4.0, 5.0, 6.0, 7.0}),
+ 0x1b),
+ 0.0, 6.0, 0.0, 4.0
+));
+
__m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) {
// CHECK-LABEL: test_mm256_mask_permute_ps
@@ -7876,6 +8096,13 @@ __m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) {
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_permute_ps(__W, __U, __X, 0x1b);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask_permute_ps(
+ ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ (__mmask8)0b10101010,
+ ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ 0x1b),
+ 0.0, 10.0, 2.0, 8.0, 4.0, 14.0, 6.0, 12.0
+));
__m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) {
// CHECK-LABEL: test_mm256_maskz_permute_ps
@@ -7883,6 +8110,12 @@ __m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) {
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_permute_ps(__U, __X, 0x1b);
}
+TEST_CONSTEXPR(match_m256(_mm256_maskz_permute_ps(
+ (__mmask8)0b10101010,
+ ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ 0x1b),
+ 0.0, 10.0, 0.0, 8.0, 0.0, 14.0, 0.0, 12.0
+));
__m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) {
// CHECK-LABEL: test_mm_mask_permutevar_pd
@@ -7890,6 +8123,15 @@ __m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m12
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_permutevar_pd(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_mask_permutevar_pd(
+ ((__m128d){3.0, 4.0}),
+ (__mmask8)0b01,
+ ((__m128d){0.0, 1.0}),
+ ((__m128i){0b10, 0b00})
+ ),
+ 1.0, 4.0
+));
__m128d test_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) {
// CHECK-LABEL: test_mm_maskz_permutevar_pd
@@ -7897,6 +8139,14 @@ __m128d test_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) {
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_permutevar_pd(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_maskz_permutevar_pd(
+ (__mmask8)0b01,
+ ((__m128d){0.0, 1.0}),
+ ((__m128i){0b10, 0b00})
+ ),
+ 1.0, 0.0
+));
__m256d test_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) {
// CHECK-LABEL: test_mm256_mask_permutevar_pd
@@ -7904,6 +8154,15 @@ __m256d test_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_permutevar_pd(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_mask_permutevar_pd(
+ ((__m256d){4.0, 5.0, 6.0, 7.0}),
+ (__mmask8)0b0101,
+ ((__m256d){0.0, 1.0, 2.0, 3.0}),
+ ((__m256i){0b10, 0b00, 0b00, 0b10})
+ ),
+ 1.0, 5.0, 2.0, 7.0
+));
__m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) {
// CHECK-LABEL: test_mm256_maskz_permutevar_pd
@@ -7911,6 +8170,14 @@ __m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) {
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_permutevar_pd(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_maskz_permutevar_pd(
+ (__mmask8)0b0101,
+ ((__m256d){0.0, 1.0, 2.0, 3.0}),
+ ((__m256i){0b10, 0b00, 0b00, 0b10})
+ ),
+ 1.0, 0.0, 2.0, 0.0
+));
__m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) {
// CHECK-LABEL: test_mm_mask_permutevar_ps
@@ -7918,6 +8185,15 @@ __m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_permutevar_ps(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_mask_permutevar_ps(
+ ((__m128){4.0, 5.0, 6.0, 7.0}),
+ (__mmask8)0b0101,
+ ((__m128){0.0, 1.0, 2.0, 3.0}),
+ ((__m128i)(__v4si){0b11, 0b10, 0b01, 0b00})
+ ),
+ 3.0, 5.0, 1.0, 7.0
+));
__m128 test_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) {
// CHECK-LABEL: test_mm_maskz_permutevar_ps
@@ -7925,6 +8201,14 @@ __m128 test_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) {
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_permutevar_ps(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_maskz_permutevar_ps(
+ (__mmask8)0b0101,
+ ((__m128){0.0, 1.0, 2.0, 3.0}),
+ ((__m128i)(__v4si){0b11, 0b10, 0b01, 0b00})
+ ),
+ 3.0, 0.0, 1.0, 0.0
+));
__m256 test_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) {
// CHECK-LABEL: test_mm256_mask_permutevar_ps
@@ -7932,6 +8216,15 @@ __m256 test_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m25
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_permutevar_ps(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_mask_permutevar_ps(
+ ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ (__mmask8)0b01010101,
+ ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m256i)(__v8si){0b11, 0b10, 0b01, 0b00, 0b00, 0b11, 0b01, 0b10})
+ ),
+ 3.0, 9.0, 1.0, 11.0, 4.0, 13.0, 5.0, 15.0
+));
__m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
// CHECK-LABEL: test_mm256_maskz_permutevar_ps
@@ -7939,6 +8232,14 @@ __m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_permutevar_ps(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_maskz_permutevar_ps(
+ (__mmask8)0b01010101,
+ ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m256i)(__v8si){0b11, 0b10, 0b01, 0b00, 0b00, 0b11, 0b01, 0b10})
+ ),
+ 3.0, 0.0, 1.0, 0.0, 4.0, 0.0, 5.0, 0.0
+));
__mmask8 test_mm_test_epi32_mask(__m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_test_epi32_mask
@@ -8201,8 +8502,12 @@ __m256i test_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: test_mm256_maskz_sra_epi32
// CHECK: @llvm.x86.avx2.psra.d
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
- return _mm256_maskz_sra_epi32(__U, __A, __B);
+ return _mm256_maskz_sra_epi32(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(_mm_mask_sra_epi32((__m128i)(__v4si){99, 99, 99, 99}, 0x5, (__m128i)(__v4si){-2, 4, -6, 8}, (__m128i)(__v4si){1, 0, 0, 0}), -1, 99, -3, 99));
+TEST_CONSTEXPR(match_v4si(_mm_maskz_sra_epi32(0xA, (__m128i)(__v4si){-2, 4, -6, 8}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 2, 0, 4));
+TEST_CONSTEXPR(match_v8si(_mm256_mask_sra_epi32((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}, 0x5A, (__m256i)(__v8si){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v4si){1, 0, 0, 0}), 99, -1, 99, -3, 4, 99, 6, 99));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_sra_epi32(0xA5, (__m256i)(__v8si){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 2, 0, 0, -5, 0, -7));
__m128i test_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_srai_epi32
@@ -8265,8 +8570,10 @@ __m256i test_mm256_maskz_srai_epi32_2(__mmask8 __U, __m256i __A, unsigned int __
__m128i test_mm_sra_epi64(__m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_sra_epi64
// CHECK: @llvm.x86.avx512.psra.q.128
- return _mm_sra_epi64(__A, __B);
+ return _mm_sra_epi64(__A, __B);
}
+TEST_CONSTEXPR(match_v2di(_mm_sra_epi64((__m128i)(__v2di){-2, 4}, (__m128i)(__v2di){1, 0}), -1, 2));
+TEST_CONSTEXPR(match_v2di(_mm_sra_epi64((__m128i)(__v2di){-2, 4}, (__m128i)(__v2di){64, 0}), -1, 0));
__m128i test_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_sra_epi64
@@ -8299,8 +8606,13 @@ __m256i test_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: test_mm256_maskz_sra_epi64
// CHECK: @llvm.x86.avx512.psra.q.256
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_maskz_sra_epi64(__U, __A, __B);
+ return _mm256_maskz_sra_epi64(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_sra_epi64((__m128i)(__v2di){99, 99}, 0x1, (__m128i)(__v2di){-2, 4}, (__m128i)(__v2di){1, 0}), -1, 99));
+TEST_CONSTEXPR(match_v2di(_mm_maskz_sra_epi64(0x2, (__m128i)(__v2di){-2, 4}, (__m128i)(__v2di){1, 0}), 0, 2));
+TEST_CONSTEXPR(match_v4di(_mm256_sra_epi64((__m256i)(__v4di){-2, 4, -6, 8}, (__m128i)(__v2di){1, 0}), -1, 2, -3, 4));
+TEST_CONSTEXPR(match_v4di(_mm256_mask_sra_epi64((__m256i)(__v4di){99, 99, 99, 99}, 0x5, (__m256i)(__v4di){0, -2, 4, -6}, (__m128i)(__v2di){1, 0}), 0, 99, 2, 99));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_sra_epi64(0xA, (__m256i)(__v4di){0, -2, 4, -6}, (__m128i)(__v2di){1, 0}), 0, -1, 0, -3));
__m128i test_mm_srai_epi64(__m128i __A) {
// CHECK-LABEL: test_mm_srai_epi64
@@ -8801,6 +9113,7 @@ __m256 test_mm256_shuffle_f32x4(__m256 __A, __m256 __B) {
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
return _mm256_shuffle_f32x4(__A, __B, 3);
}
+TEST_CONSTEXPR(match_m256(_mm256_shuffle_f32x4(((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), ((__m256){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 1), 5.0, 6.0, 7.0, 8.0, 10.0, 20.0, 30.0, 40.0));
__m256 test_mm256_mask_shuffle_f32x4(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: test_mm256_mask_shuffle_f32x4
@@ -8808,6 +9121,7 @@ __m256 test_mm256_mask_shuffle_f32x4(__m256 __W, __mmask8 __U, __m256 __A, __m25
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_shuffle_f32x4(__W, __U, __A, __B, 3);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask_shuffle_f32x4(((__m256){100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0}), 0b10101010, ((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), ((__m256){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 1), 100.0, 6.0, 300.0, 8.0, 500.0, 20.0, 700.0, 40.0));
__m256 test_mm256_maskz_shuffle_f32x4(__mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: test_mm256_maskz_shuffle_f32x4
@@ -8815,12 +9129,14 @@ __m256 test_mm256_maskz_shuffle_f32x4(__mmask8 __U, __m256 __A, __m256 __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_shuffle_f32x4(__U, __A, __B, 3);
}
+TEST_CONSTEXPR(match_m256(_mm256_maskz_shuffle_f32x4(0b01010101, ((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), ((__m256){10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0}), 1), 5.0, 0.0, 7.0, 0.0, 10.0, 0.0, 30.0, 0.0));
__m256d test_mm256_shuffle_f64x2(__m256d __A, __m256d __B) {
// CHECK-LABEL: test_mm256_shuffle_f64x2
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
return _mm256_shuffle_f64x2(__A, __B, 3);
}
+TEST_CONSTEXPR(match_m256d(_mm256_shuffle_f64x2(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){10.0, 20.0, 30.0, 40.0}), 3), 3.0, 4.0, 30.0, 40.0));
__m256d test_mm256_mask_shuffle_f64x2(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: test_mm256_mask_shuffle_f64x2
@@ -8829,6 +9145,7 @@ __m256d test_mm256_mask_shuffle_f64x2(__m256d __W, __mmask8 __U, __m256d __A, __
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_shuffle_f64x2(__W, __U, __A, __B, 3);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask_shuffle_f64x2(((__m256d){100.0, 200.0, 300.0, 400.0}), 0b00001111, ((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){10.0, 20.0, 30.0, 40.0}), 3), 3.0, 4.0, 30.0, 40.0));
__m256d test_mm256_maskz_shuffle_f64x2(__mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: test_mm256_maskz_shuffle_f64x2
@@ -8837,12 +9154,14 @@ __m256d test_mm256_maskz_shuffle_f64x2(__mmask8 __U, __m256d __A, __m256d __B) {
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_shuffle_f64x2(__U, __A, __B, 3);
}
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_shuffle_f64x2(0b00001011, ((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){10.0, 20.0, 30.0, 40.0}), 3), 3.0, 4.0, 0.0, 40.0));
__m256i test_mm256_shuffle_i32x4(__m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_shuffle_i32x4
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
return _mm256_shuffle_i32x4(__A, __B, 3);
}
+TEST_CONSTEXPR(match_v8si(_mm256_shuffle_i32x4(((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), ((__m256i)(__v8si){10, 20, 30, 40, 50, 60, 70, 80}), 0), 1, 2, 3, 4, 10, 20, 30, 40));
__m256i test_mm256_mask_shuffle_i32x4(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_shuffle_i32x4
@@ -8850,6 +9169,7 @@ __m256i test_mm256_mask_shuffle_i32x4(__m256i __W, __mmask8 __U, __m256i __A, __
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_shuffle_i32x4(__W, __U, __A, __B, 3);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mask_shuffle_i32x4(((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}), 0b00000000, ((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), ((__m256i)(__v8si){10, 20, 30, 40, 50, 60, 70, 80}), 0), 100, 200, 300, 400, 500, 600, 700, 800));
__m256i test_mm256_maskz_shuffle_i32x4(__mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_shuffle_i32x4
@@ -8857,12 +9177,14 @@ __m256i test_mm256_maskz_shuffle_i32x4(__mmask8 __U, __m256i __A, __m256i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_shuffle_i32x4(__U, __A, __B, 3);
}
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_i32x4(0b11111111, ((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), ((__m256i)(__v8si){10, 20, 30, 40, 50, 60, 70, 80}), 0), 1, 2, 3, 4, 10, 20, 30, 40));
__m256i test_mm256_shuffle_i64x2(__m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_shuffle_i64x2
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
return _mm256_shuffle_i64x2(__A, __B, 3);
}
+TEST_CONSTEXPR(match_m256i(_mm256_shuffle_i64x2(((__m256i){1ULL, 2ULL, 3ULL, 4ULL}), ((__m256i){10ULL, 20ULL, 30ULL, 40ULL}), 2), 1ULL, 2ULL, 30ULL, 40ULL));
__m256i test_mm256_mask_shuffle_i64x2(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_shuffle_i64x2
@@ -8871,6 +9193,7 @@ __m256i test_mm256_mask_shuffle_i64x2(__m256i __W, __mmask8 __U, __m256i __A, __
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_shuffle_i64x2(__W, __U, __A, __B, 3);
}
+TEST_CONSTEXPR(match_m256i(_mm256_mask_shuffle_i64x2(((__m256i){100ULL, 200ULL, 300ULL, 400ULL}), 0b00001101, ((__m256i){1ULL, 2ULL, 3ULL, 4ULL}), ((__m256i){10ULL, 20ULL, 30ULL, 40ULL}), 2), 1ULL, 200ULL, 30ULL, 40ULL));
__m256i test_mm256_maskz_shuffle_i64x2(__mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_shuffle_i64x2
@@ -8879,6 +9202,7 @@ __m256i test_mm256_maskz_shuffle_i64x2(__mmask8 __U, __m256i __A, __m256i __B) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_maskz_shuffle_i64x2(__U, __A, __B, 3);
}
+TEST_CONSTEXPR(match_m256i(_mm256_maskz_shuffle_i64x2( 0b00000110, ((__m256i){1ULL, 2ULL, 3ULL, 4ULL}), ((__m256i){10ULL, 20ULL, 30ULL, 40ULL}), 2), 0ULL, 2ULL, 30ULL, 0ULL));
__m128d test_mm_mask_shuffle_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_mask_shuffle_pd
@@ -10217,6 +10541,9 @@ __m256d test_mm256_permutexvar_pd(__m256i __X, __m256d __Y) {
return _mm256_permutexvar_pd(__X, __Y);
}
+TEST_CONSTEXPR(match_m256d(_mm256_permutexvar_pd((__m256i)(__v4di){3, 2, 1, 0}, (__m256d){0.0, 1.0, 2.0, 3.0}), 3.0, 2.0, 1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_permutexvar_pd((__m256i)(__v4di){0, 0, 0, 0}, (__m256d){1.0, 2.0, 3.0, 4.0}), 1.0, 1.0, 1.0, 1.0));
+
__m256d test_mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y) {
// CHECK-LABEL: test_mm256_mask_permutexvar_pd
// CHECK: @llvm.x86.avx512.permvar.df.256
@@ -10231,6 +10558,11 @@ __m256d test_mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y)
return _mm256_maskz_permutexvar_pd(__U, __X, __Y);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask_permutexvar_pd((__m256d){9.0, 9.0, 9.0, 9.0}, 0xF, (__m256i)(__v4di){3, 2, 1, 0}, (__m256d){0.0, 1.0, 2.0, 3.0}), 3.0, 2.0, 1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_mask_permutexvar_pd((__m256d){9.0, 9.0, 9.0, 9.0}, 0xA, (__m256i)(__v4di){3, 2, 1, 0}, (__m256d){0.0, 1.0, 2.0, 3.0}), 9.0, 2.0, 9.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_permutexvar_pd(0xF, (__m256i)(__v4di){3, 2, 1, 0}, (__m256d){0.0, 1.0, 2.0, 3.0}), 3.0, 2.0, 1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_permutexvar_pd(0xA, (__m256i)(__v4di){3, 2, 1, 0}, (__m256d){0.0, 1.0, 2.0, 3.0}), 0.0, 2.0, 0.0, 0.0));
+
__m256i test_mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y) {
// CHECK-LABEL: test_mm256_maskz_permutexvar_epi64
// CHECK: @llvm.x86.avx512.permvar.di.256
@@ -10245,6 +10577,11 @@ __m256i test_mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X
return _mm256_mask_permutexvar_epi64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v4di(_mm256_mask_permutexvar_epi64((__m256i)(__v4di){99, 99, 99, 99}, 0xF, (__m256i)(__v4di){3, 2, 1, 0}, (__m256i)(__v4di){10, 11, 12, 13}), 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v4di(_mm256_mask_permutexvar_epi64((__m256i)(__v4di){99, 99, 99, 99}, 0xA, (__m256i)(__v4di){3, 2, 1, 0}, (__m256i)(__v4di){10, 11, 12, 13}), 99, 12, 99, 10));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_permutexvar_epi64(0xF, (__m256i)(__v4di){3, 2, 1, 0}, (__m256i)(__v4di){10, 11, 12, 13}), 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_permutexvar_epi64(0xA, (__m256i)(__v4di){3, 2, 1, 0}, (__m256i)(__v4di){10, 11, 12, 13}), 0, 12, 0, 10));
+
__m256 test_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) {
// CHECK-LABEL: test_mm256_mask_permutexvar_ps
// CHECK: @llvm.x86.avx2.permps
@@ -10264,6 +10601,19 @@ __m256 test_mm256_permutexvar_ps(__m256i __X, __m256 __Y) {
return _mm256_permutexvar_ps( __X, __Y);
}
+TEST_CONSTEXPR(match_m256(_mm256_permutexvar_ps(((__m256i)(__v8si){7, 6, 5, 4, 3, 2, 1, 0}),
+ ((__m256){0.0f, 1.0f, 2.0f, 3.0f,
+ 4.0f, 5.0f, 6.0f, 7.0f})),
+ 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_permutexvar_ps(((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256){1.0f, 2.0f, 3.0f, 4.0f,
+ 5.0f, 6.0f, 7.0f, 8.0f})),
+ 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f));
+TEST_CONSTEXPR(match_m256(_mm256_mask_permutexvar_ps((__m256){99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f}, 0xFF, (__m256i)(__v8si){7, 6, 5, 4, 3, 2, 1, 0}, (__m256){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_mask_permutexvar_ps((__m256){99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f}, 0xAA, (__m256i)(__v8si){7, 6, 5, 4, 3, 2, 1, 0}, (__m256){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), 99.0f, 6.0f, 99.0f, 4.0f, 99.0f, 2.0f, 99.0f, 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_maskz_permutexvar_ps(0xFF, (__m256i)(__v8si){7, 6, 5, 4, 3, 2, 1, 0}, (__m256){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_maskz_permutexvar_ps(0xAA, (__m256i)(__v8si){7, 6, 5, 4, 3, 2, 1, 0}, (__m256){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), 0.0f, 6.0f, 0.0f, 4.0f, 0.0f, 2.0f, 0.0f, 0.0f));
+
__m256i test_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) {
// CHECK-LABEL: test_mm256_maskz_permutexvar_epi32
// CHECK: @llvm.x86.avx2.permd
@@ -10284,6 +10634,17 @@ __m256i test_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X
return _mm256_mask_permutexvar_epi32(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v8si(_mm256_permutexvar_epi32(((__m256i)(__v8si){7, 6, 5, 4, 3, 2, 1, 0}),
+ ((__m256i)(__v8si){10, 11, 12, 13, 14, 15, 16, 17})),
+ 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v8si(_mm256_permutexvar_epi32(((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}),
+ ((__m256i)(__v8si){100, 101, 102, 103, 104, 105, 106, 107})),
+ 100, 101, 102, 103, 104, 105, 106, 107));
+TEST_CONSTEXPR(match_v8si(_mm256_mask_permutexvar_epi32((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}, 0xFF, (__m256i)(__v8si){7, 6, 5, 4, 3, 2, 1, 0}, (__m256i)(__v8si){10, 11, 12, 13, 14, 15, 16, 17}), 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v8si(_mm256_mask_permutexvar_epi32((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}, 0xAA, (__m256i)(__v8si){7, 6, 5, 4, 3, 2, 1, 0}, (__m256i)(__v8si){10, 11, 12, 13, 14, 15, 16, 17}), 99, 16, 99, 14, 99, 12, 99, 10));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_permutexvar_epi32(0xFF, (__m256i)(__v8si){7, 6, 5, 4, 3, 2, 1, 0}, (__m256i)(__v8si){10, 11, 12, 13, 14, 15, 16, 17}), 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_permutexvar_epi32(0xAA, (__m256i)(__v8si){7, 6, 5, 4, 3, 2, 1, 0}, (__m256i)(__v8si){10, 11, 12, 13, 14, 15, 16, 17}), 0, 16, 0, 14, 0, 12, 0, 10));
+
__m128i test_mm_alignr_epi32(__m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_alignr_epi32
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
@@ -10364,6 +10725,53 @@ __m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
return _mm256_maskz_alignr_epi64(__U, __A, __B, 1);
}
+TEST_CONSTEXPR(match_v4si(_mm_alignr_epi32(((__m128i)(__v4si){100, 200, 300, 400}),
+ ((__m128i)(__v4si){10, 20, 30, 40}), 1),
+ 20, 30, 40, 100));
+TEST_CONSTEXPR(match_v4si(_mm_mask_alignr_epi32(((__m128i)(__v4si){1000, 2000, 3000, 4000}), 0x5,
+ ((__m128i)(__v4si){100, 200, 300, 400}),
+ ((__m128i)(__v4si){10, 20, 30, 40}), 1),
+ 20, 2000, 40, 4000));
+TEST_CONSTEXPR(match_v4si(_mm_maskz_alignr_epi32(0x3,
+ ((__m128i)(__v4si){100, 200, 300, 400}),
+ ((__m128i)(__v4si){10, 20, 30, 40}), 1),
+ 20, 30, 0, 0));
+
+TEST_CONSTEXPR(match_v8si(_mm256_alignr_epi32(((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
+ ((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3),
+ 4, 5, 6, 7, 8, 100, 200, 300));
+TEST_CONSTEXPR(match_v8si(_mm256_mask_alignr_epi32(((__m256i)(__v8si){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}),
+ 0xA5,
+ ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
+ ((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3),
+ 4, 2000, 6, 4000, 5000, 100, 7000, 300));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_alignr_epi32(0x33,
+ ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
+ ((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3),
+ 4, 5, 0, 0, 8, 100, 0, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_alignr_epi64(((__m128i)(__v2di){10, 11}), ((__m128i)(__v2di){1, 2}), 1), 2, 10));
+TEST_CONSTEXPR(match_v2di(_mm_mask_alignr_epi64(((__m128i)(__v2di){1000, 2000}), 0x1,
+ ((__m128i)(__v2di){10, 11}),
+ ((__m128i)(__v2di){1, 2}), 1),
+ 2, 2000));
+TEST_CONSTEXPR(match_v2di(_mm_maskz_alignr_epi64(0x2,
+ ((__m128i)(__v2di){10, 11}),
+ ((__m128i)(__v2di){1, 2}), 1),
+ 0, 10));
+
+TEST_CONSTEXPR(match_v4di(_mm256_alignr_epi64(((__m256i)(__v4di){10, 11, 12, 13}),
+ ((__m256i)(__v4di){1, 2, 3, 4}), 2),
+ 3, 4, 10, 11));
+TEST_CONSTEXPR(match_v4di(_mm256_mask_alignr_epi64(((__m256i)(__v4di){1000, 2000, 3000, 4000}), 0x5,
+ ((__m256i)(__v4di){10, 11, 12, 13}),
+ ((__m256i)(__v4di){1, 2, 3, 4}), 2),
+ 3, 2000, 10, 4000));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_alignr_epi64(0xA,
+ ((__m256i)(__v4di){10, 11, 12, 13}),
+ ((__m256i)(__v4di){1, 2, 3, 4}), 2),
+ 0, 4, 0, 11));
+
__m128 test_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
// CHECK-LABEL: test_mm_mask_movehdup_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
@@ -10472,6 +10880,17 @@ __m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) {
TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0x33u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 2,0,0,0, 6,4,0,0));
TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0xAAu, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 0,0,0,0, 0,4,0,4));
TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0xFFu, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 2,0,0,0, 6,4,4,4));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_permutex2var_epi32((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m256i)(__v8si){101, 102, 103, 104, 105, 106, 107, 108}),
+ 1, 8, 101, 108, 2, 102, 3, 103));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_permutex2var_epi32((__m256i)(__v8si){-1, -2, -3, -4, -5, -6, -7, -8},
+ 0xAA,
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m256i)(__v8si){101, 102, 103, 104, 105, 106, 107, 108}),
+ -1, -8, -3, 108, -5, 102, -7, 103));
__m128d test_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) {
// CHECK-LABEL: test_mm_mask_mov_pd
diff --git a/clang/test/CodeGen/X86/avx512vlbf16-builtins.c b/clang/test/CodeGen/X86/avx512vlbf16-builtins.c
index 5e37b4d..a5adae0 100644
--- a/clang/test/CodeGen/X86/avx512vlbf16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbf16-builtins.c
@@ -2,8 +2,14 @@
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bf16 -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bf16 -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bf16 -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+//
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bf16 -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bf16 -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bf16 -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bf16 -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m128bh test_mm_cvtne2ps2bf16(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_cvtne2ps2bf16
@@ -151,46 +157,49 @@ __bf16 test_mm_cvtness_sbh(float A) {
__m128 test_mm_cvtpbh_ps(__m128bh A) {
// CHECK-LABEL: test_mm_cvtpbh_ps
- // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
- // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
+ // CHECK: fpext <8 x bfloat> %{{.*}} to <8 x float>
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
return _mm_cvtpbh_ps(A);
}
+TEST_CONSTEXPR(match_m128(_mm_cvtpbh_ps((__m128bh){-8.0f, 16.0f, -32.0f, 64.0f, -0.0f, 1.0f, -2.0f, 4.0f}), -8.0f, 16.0f, -32.0f, 64.0f));
__m256 test_mm256_cvtpbh_ps(__m128bh A) {
// CHECK-LABEL: test_mm256_cvtpbh_ps
- // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
- // CHECK: call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %{{.*}}, i32 %{{.*}})
+ // CHECK: fpext <8 x bfloat> %{{.*}} to <8 x float>
return _mm256_cvtpbh_ps(A);
}
+TEST_CONSTEXPR(match_m256(_mm256_cvtpbh_ps((__m128bh){-0.0f, 1.0f, -2.0f, 4.0f, -8.0f, 16.0f, -32.0f, 64.0f}), -0.0f, 1.0f, -2.0f, 4.0f, -8.0f, 16.0f, -32.0f, 64.0f));
__m128 test_mm_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) {
// CHECK-LABEL: test_mm_maskz_cvtpbh_ps
- // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
- // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
- // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
+ // CHECK: fpext <8 x bfloat> %{{.*}} to <8 x float>
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_cvtpbh_ps(M, A);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtpbh_ps(0x01, (__m128bh){-0.0f, 1.0f, -2.0f, 4.0f, -8.0f, 16.0f, -32.0f, 64.0f}), -0.0f, 0.0f, 0.0f, 0.0f));
__m256 test_mm256_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) {
// CHECK-LABEL: test_mm256_maskz_cvtpbh_ps
- // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
- // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
- // CHECK: call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %{{.*}}, i32 %{{.*}})
+ // CHECK: fpext <8 x bfloat> %{{.*}} to <8 x float>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_cvtpbh_ps(M, A);
}
+TEST_CONSTEXPR(match_m256(_mm256_maskz_cvtpbh_ps(0x73, (__m128bh){-0.0f, 1.0f, -2.0f, 4.0f, -8.0f, 16.0f, -32.0f, 64.0f}), -0.0f, 1.0f, 0.0f, 0.0f, -8.0f, 16.0f, -32.0f, 0.0f));
__m128 test_mm_mask_cvtpbh_ps(__m128 S, __mmask8 M, __m128bh A) {
// CHECK-LABEL: test_mm_mask_cvtpbh_ps
- // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
- // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
- // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+ // CHECK: fpext <8 x bfloat> %{{.*}} to <8 x float>
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_cvtpbh_ps(S, M, A);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtpbh_ps((__m128){ 99.0f, 99.0f, 99.0f, 99.0f }, 0x03, (__m128bh){-0.0f, 1.0f, -2.0f, 4.0f, -8.0f, 16.0f, -32.0f, 64.0f}), -0.0f, 1.0f, 99.0f, 99.0f));
__m256 test_mm256_mask_cvtpbh_ps(__m256 S, __mmask8 M, __m128bh A) {
// CHECK-LABEL: test_mm256_mask_cvtpbh_ps
- // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
- // CHECK: call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %{{.*}}, i32 %{{.*}})
- // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
+ // CHECK: fpext <8 x bfloat> %{{.*}} to <8 x float>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_cvtpbh_ps(S, M, A);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask_cvtpbh_ps((__m256){ 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f }, 0x37, (__m128bh){-0.0f, 1.0f, -2.0f, 4.0f, -8.0f, 16.0f, -32.0f, 64.0f}), -0.0f, 1.0f, -2.0f, 99.0f, -8.0f, 16.0f, 99.0f, 99.0f));
diff --git a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
index e0b55c6..d19d2c2 100644
--- a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
@@ -116,6 +116,13 @@ __mmask32 test_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) {
// CHECK: @llvm.x86.avx512.vpshufbitqmb.256
return _mm256_bitshuffle_epi64_mask(__A, __B);
}
+TEST_CONSTEXPR(_mm256_bitshuffle_epi64_mask(
+ (__m256i)(__v32qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
+ (__m256i)(__v32qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff0101);
+
+TEST_CONSTEXPR(_mm256_mask_bitshuffle_epi64_mask(0xFFFF0000,
+ (__m256i)(__v32qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
+ (__m256i)(__v32qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff0000);
__mmask16 test_mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_bitshuffle_epi64_mask
@@ -129,4 +136,11 @@ __mmask16 test_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) {
// CHECK: @llvm.x86.avx512.vpshufbitqmb.128
return _mm_bitshuffle_epi64_mask(__A, __B);
}
+TEST_CONSTEXPR(_mm_bitshuffle_epi64_mask(
+ (__m128i)(__v16qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128},
+ (__m128i)(__v16qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56}) == 0x0101);
+
+TEST_CONSTEXPR(_mm_mask_bitshuffle_epi64_mask(0xFF00,
+ (__m128i)(__v16qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128},
+ (__m128i)(__v16qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56}) == 0x0100);
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index febef46..f6f27d9 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -475,6 +475,7 @@ __mmask8 test_mm_cmplt_epu16_mask(__m128i __a, __m128i __b) {
// CHECK: icmp ult <8 x i16> %{{.*}}, %{{.*}}
return (__mmask8)_mm_cmplt_epu16_mask(__a, __b);
}
+TEST_CONSTEXPR(_mm_cmplt_epu16_mask(((__m128i)(__v8hu){12351, 47995, 11802, 16970, 16956, 13965, 33529, 18928}), ((__m128i)(__v8hu){48792, 59915, 50576, 62643, 3758, 16415, 7966, 39475})) == (__mmask8)0xAF);
__mmask8 test_mm_mask_cmplt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
// CHECK-LABEL: test_mm_mask_cmplt_epu16_mask
@@ -941,56 +942,28 @@ __m128i test_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_blend_epi8(__U,__A,__W);
}
-TEST_CONSTEXPR(match_v16qi(
- _mm_mask_blend_epi8(
- (__mmask16)0x0001,
- (__m128i)(__v16qi){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
- (__m128i)(__v16qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25 }
- ),
- 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
-));
+TEST_CONSTEXPR(match_v16qi(_mm_mask_blend_epi8((__mmask16)0x0001,(__m128i)(__v16qi){2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2},(__m128i)(__v16qi){10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}),10,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2));
__m256i test_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi8
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_blend_epi8(__U,__A,__W);
}
-TEST_CONSTEXPR(match_v32qi(
- _mm256_mask_blend_epi8(
- (__mmask32) 0x00000001,
- (__m256i)(__v32qi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
- (__m256i)(__v32qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
- ),
- 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
-));
+TEST_CONSTEXPR(match_v32qi(_mm256_mask_blend_epi8((__mmask32)0x00000001,(__m256i)(__v32qi){2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2},(__m256i)(__v32qi){10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}),10,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2));
__m128i test_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) {
// CHECK-LABEL: test_mm_mask_blend_epi16
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_blend_epi16(__U,__A,__W);
}
-TEST_CONSTEXPR(match_v8hi(
- _mm_mask_blend_epi16(
- (__mmask8)0x01,
- (__m128i)(__v8hi){2, 2, 2, 2, 2, 2, 2, 2},
- (__m128i)(__v8hi){ 10,11,12,13,14,15,16,17 }
- ),
- 10, 2, 2, 2, 2, 2, 2, 2
-));
+TEST_CONSTEXPR(match_v8hi(_mm_mask_blend_epi16((__mmask8)0x01,(__m128i)(__v8hi){2,2,2,2,2,2,2,2},(__m128i)(__v8hi){10,11,12,13,14,15,16,17}),10,2,2,2,2,2,2,2));
__m256i test_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi16
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_blend_epi16(__U,__A,__W);
}
-TEST_CONSTEXPR(match_v16hi(
- _mm256_mask_blend_epi16(
- (__mmask16)0x0001,
- (__m256i)(__v16hi){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
- (__m256i)(__v16hi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25 }
- ),
- 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
-));
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_blend_epi16((__mmask16)0x0001,(__m256i)(__v16hi){2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2},(__m256i)(__v16hi){10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}),10,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2));
__m128i test_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_abs_epi8
@@ -1078,48 +1051,63 @@ __m128i test_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_packs_epi32(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_packs_epi32((__mmask8)0xAA,(__m128i)(__v4si){40000,-50000,65535,-65536},(__m128i)(__v4si){0,50000,40000,-40000}),0,-32768,0,-32768,0,32767,0,-32768));
+
__m128i test_mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_packs_epi32
// CHECK: @llvm.x86.sse2.packssdw
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_packs_epi32(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_packs_epi32((__m128i)(__v8hi){1,2,3,4,29,30,31,32},(__mmask8)0xAA,(__m128i)(__v4si){40000,-50000,65535,-65536},(__m128i)(__v4si){0,50000,40000,-40000}),1,-32768,3,-32768,29,32767,31,-32768));
+
__m256i test_mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_packs_epi32
// CHECK: @llvm.x86.avx2.packssdw
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_packs_epi32(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_packs_epi32((__mmask32)0xAAAA,(__m256i)(__v8si){40000,-50000,32767,-32768,32768,-32769,65535,-65536},(__m256i)(__v8si){0,1,-1,65536,22222,-22222,40000,-40000}),0,-32768,0,-32768,0,1,0,32767,0,-32768,0,-32768,0,-22222,0,-32768));
+
__m256i test_mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_packs_epi32
// CHECK: @llvm.x86.avx2.packssdw
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_packs_epi32(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_packs_epi32((__m256i)(__v16hi){1,2,3,4,5,6,7,8,25,26,27,28,29,30,31,32},(__mmask16)0xAAAA,(__m256i)(__v8si){40000,-50000,32767,-32768,32768,-32769,65535,-65536},(__m256i)(__v8si){0,1,-1,65536,22222,-22222,40000,-40000}),1,-32768,3,-32768,5,1,7,32767,25,-32768,27,-32768,29,-22222,31,-32768));
+
__m128i test_mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_packs_epi16
// CHECK: @llvm.x86.sse2.packsswb
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_packs_epi16(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_maskz_packs_epi16((__mmask16)0xAAAA,(__m128i)(__v8hi){130,-200,127,-128,255,-255,127,-128},(__m128i)(__v8hi){0,1,-1,255,-128,90,-90,-32768}),0,-128,0,-128,0,-128,0,-128,0,1,0,127,0,90,0,-128));
+
__m128i test_mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_packs_epi16
// CHECK: @llvm.x86.sse2.packsswb
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_packs_epi16(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_mask_packs_epi16((__m128i)(__v16qi){1,2,3,4,5,6,7,8,57,58,59,60,61,62,63,64},(__mmask16)0xAAAA,(__m128i)(__v8hi){130,-200,127,-128,255,-255,127,-128},(__m128i)(__v8hi){0,1,-1,255,-128,90,-90,-32768}),1,-128,3,-128,5,-128,7,-128,57,1,59,127,61,90,63,-128));
+
__m256i test_mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_packs_epi16
// CHECK: @llvm.x86.avx2.packsswb
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_packs_epi16(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_maskz_packs_epi16((__mmask32)0xAAAAAAAA,(__m256i)(__v16hi){130,-200,127,-128,300,-1000,42,-42,500,-500,7,-7,255,-255,127,-128},(__m256i)(__v16hi){0,1,-1,255,-129,128,20000,-32768,0,1,-1,127,-128,90,-90,-32768}),0,-128,0,-128,0,-128,0,-42,0,1,0,127,0,127,0,-128,0,-128,0,-7,0,-128,0,-128,0,1,0,127,0,90,0,-128));
+
__m256i test_mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_packs_epi16
// CHECK: @llvm.x86.avx2.packsswb
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_packs_epi16(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_mask_packs_epi16((__m256i)(__v32qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask32)0xAAAAAAAA,(__m256i)(__v16hi){130,-200,127,-128,300,-1000,42,-42,500,-500,7,-7,255,-255,127,-128},(__m256i)(__v16hi){0,1,-1,255,-129,128,20000,-32768,0,1,-1,127,-128,90,-90,-32768}),1,-128,3,-128,5,-128,7,-42,9,1,11,127,13,127,15,-128,49,-128,51,-7,53,-128,55,-128,57,1,59,127,61,90,63,-128));
__m128i test_mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_packus_epi32
@@ -1127,6 +1115,7 @@ __m128i test_mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_packus_epi32(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v8hu(_mm_mask_packus_epi32((__m128i)(__v8hu){1,2,3,4,5,6,7,8},(__mmask8)0xAA,(__m128i)(__v4si){40000,-50000,32767,-32768},(__m128i)(__v4si){0,1,-1,65536}),1,0,3,0,5,1,7,65535));
__m128i test_mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_packus_epi32
@@ -1134,6 +1123,7 @@ __m128i test_mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_packus_epi32(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v8hu(_mm_maskz_packus_epi32((__mmask8)0xAA,(__m128i)(__v4si){40000,-50000,32767,-32768},(__m128i)(__v4si){0,1,-1,65536}),0,0,0,0,0,1,0,65535));
__m256i test_mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_packus_epi32
@@ -1141,6 +1131,7 @@ __m256i test_mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_packus_epi32(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_packus_epi32((__mmask16)0xAAAA,(__m256i)(__v8si){40000,-50000,32767,-32768,32768,-32769,22222,-22222},(__m256i)(__v8si){0,1,-1,65536,40000,-40000,65535,0}),0,0,0,0,0,1,0,-1,0,0,0,0,0,0,0,0));
__m256i test_mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_packus_epi32
@@ -1148,6 +1139,7 @@ __m256i test_mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_packus_epi32(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_packus_epi32((__m256i)(__v16hi){1,2,3,4,5,6,7,8,25,26,27,28,29,30,31,32},(__mmask16)0xAAAA,(__m256i)(__v8si){40000,-50000,32767,-32768,32768,-32769,22222,-22222},(__m256i)(__v8si){0,1,-1,65536,40000,-40000,65535,0}),1,0,3,0,5,1,7,-1,25,0,27,0,29,0,31,0));
__m128i test_mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_packus_epi16
@@ -1155,6 +1147,7 @@ __m128i test_mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_packus_epi16(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v16qu(_mm_maskz_packus_epi16((__mmask16)0xAAAA,(__m128i)(__v8hi){-1,0,1,127,128,255,256,-200},(__m128i)(__v8hi){0,1,-1,255,-129,128,20000,-32768}),0,0,0,127,0,255,0,0,0,1,0,255,0,128,0,0));
__m128i test_mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_packus_epi16
@@ -1162,6 +1155,7 @@ __m128i test_mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m12
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_packus_epi16(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v16qu(_mm_mask_packus_epi16((__m128i)(__v16qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16},(__mmask16)0xAAAA,(__m128i)(__v8hi){-1,0,1,127,128,255,256,-200},(__m128i)(__v8hi){0,1,-1,255,-129,128,20000,-32768}),1,0,3,127,5,255,7,0,9,1,11,255,13,128,15,0));
__m256i test_mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_packus_epi16
@@ -1169,6 +1163,7 @@ __m256i test_mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_packus_epi16(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_maskz_packus_epi16((__mmask32)0xAAAAAAAA,(__m256i)(__v16hi){-1,0,1,127,128,255,256,-200,300,42,-42,500,20000,-32768,129,-129},(__m256i)(__v16hi){0,1,-1,255,-129,128,20000,-32768,32767,-32767,127,-128,30000,-30000,90,-90}),0,0,0,127,0,-1,0,0,0,1,0,-1,0,-128,0,0,0,42,0,-1,0,0,0,0,0,0,0,0,0,0,0,0));
__m256i test_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_packus_epi16
@@ -1176,6 +1171,7 @@ __m256i test_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_packus_epi16(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_mask_packus_epi16((__m256i)(__v32qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask32)0xAAAAAAAA,(__m256i)(__v16hi){-1,0,1,127,128,255,256,-200,300,42,-42,500,20000,-32768,129,-129},(__m256i)(__v16hi){0,1,-1,255,-129,128,20000,-32768,32767,-32767,127,-128,30000,-30000,90,-90}),1,0,3,127,5,-1,7,0,9,1,11,-1,13,-128,15,0,49,42,51,-1,53,0,55,0,57,0,59,0,61,0,63,0));
__m128i test_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_adds_epi8
@@ -1183,48 +1179,64 @@ __m128i test_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_adds_epi8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_mask_adds_epi8((__m128i)(__v16qs){1,2,3,4,5,6,7,8,57,58,59,60,61,62,63,64},(__mmask16)0xAAAA,(__m128i)(__v16qs){0,+1,-2,+3,-4,+5,-6,+7,-100,-50,+100,-20,-80,+120,-120,-20},(__m128i)(__v16qs){0,+1,-2,+3,-4,+5,-6,+7,+50,+80,-50,+110,+60,120,+20,-120}),1,+2,3,+6,5,+10,7,+14,57,+30,59,+90,61,+127,63,-128));
+
__m128i test_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_adds_epi8
// CHECK: @llvm.sadd.sat.v16i8
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_adds_epi8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_maskz_adds_epi8((__mmask16)0xAAAA,(__m128i)(__v16qs){0,+1,-2,+3,-4,+5,-6,+7,-100,-50,+100,-20,-80,+120,-120,-20},(__m128i)(__v16qs){0,+1,-2,+3,-4,+5,-6,+7,+50,+80,-50,+110,+60,120,+20,-120}),0,+2,0,+6,0,+10,0,+14,0,+30,0,+90,0,+127,0,-128));
+
__m256i test_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_adds_epi8
// CHECK: @llvm.sadd.sat.v32i8
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_adds_epi8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_mask_adds_epi8((__m256i)(__v32qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask32)0xAAAAAAAA,(__m256i)(__v32qs){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,+100,+50,-100,+20,+80,-50,+120,-20,-100,-50,+100,-20,-80,+50,-120,+20},(__m256i)(__v32qs){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,+50,+80,-50,+110,+60,-30,+20,-10,+50,+80,-50,+110,+60,-30,+20,-10}),1,+2,3,+6,5,+10,7,+14,9,+18,11,+22,13,+26,15,+30,49,+127,51,+127,53,-80,+55,-30,57,+30,59,+90,61,+20,63,+10));
+
__m256i test_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_adds_epi8
// CHECK: @llvm.sadd.sat.v32i8
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_adds_epi8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_maskz_adds_epi8((__mmask32)0xAAAAAAAA,(__m256i)(__v32qs){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,+100,+50,-100,+20,+80,-50,+120,-20,-100,-50,+100,-20,-80,+50,-120,+20},(__m256i)(__v32qs){0,+1,-2,+3,-4,+5,-6,+7,-8,+9,-10,+11,-12,+13,-14,+15,+50,+80,-50,+110,+60,-30,+20,-10,+50,+80,-50,+110,+60,-30,+20,-10}),0,+2,0,+6,0,+10,0,+14,0,+18,0,+22,0,+26,0,+30,0,+127,0,+127,0,-80,0,-30,0,+30,0,+90,0,+20,0,+10));
+
__m128i test_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_adds_epi16
// CHECK: @llvm.sadd.sat.v8i16
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_adds_epi16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_adds_epi16((__m128i)(__v8hi){9,10,11,12,13,14,15,16,},(__mmask8)0xAA,(__m128i)(__v8hi){-24,+25,-26,+27,+32000,-32000,+32000,+32000},(__m128i)(__v8hi){-24,+25,-26,+27,+800,-800,-800,+800}),9,+50,11,+54,13,-32768,15,+32767));
+
__m128i test_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_adds_epi16
// CHECK: @llvm.sadd.sat.v8i16
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_adds_epi16(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_adds_epi16((__mmask8)0xAA,(__m128i)(__v8hi){-24,+25,-26,+27,+32000,-32000,+32000,+32000},(__m128i)(__v8hi){-24,+25,-26,+27,+800,-800,-800,+800}),0,+50,0,+54,0,-32768,0,+32767));
+
__m256i test_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_adds_epi16
// CHECK: @llvm.sadd.sat.v16i16
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_adds_epi16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_adds_epi16((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,},(__mmask16)0xAAAA,(__m256i)(__v16hi){0,+1,-2,+3,-4,+5,-6,+7,-24,+25,-26,+27,+32000,-32000,+32000,+32000},(__m256i)(__v16hi){0,+1,-2,+3,-4,+5,-6,+7,-24,+25,-26,+27,+800,-800,-800,+800}),1,+2,3,+6,5,+10,7,+14,9,+50,11,+54,13,-32768,15,+32767));
+
__m256i test_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_adds_epi16
// CHECK: @llvm.sadd.sat.v16i16
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_adds_epi16(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_adds_epi16((__mmask16)0xAAAA,(__m256i)(__v16hi){0,+1,-2,+3,-4,+5,-6,+7,-24,+25,-26,+27,+32000,-32000,+32000,+32000},(__m256i)(__v16hi){0,+1,-2,+3,-4,+5,-6,+7,-24,+25,-26,+27,+800,-800,-800,+800}),0,+2,0,+6,0,+10,0,+14,0,+50,0,+54,0,-32768,0,+32767));
+
__m128i test_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_adds_epu8
// CHECK-NOT: @llvm.x86.sse2.paddus.b
@@ -1232,6 +1244,8 @@ __m128i test_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_adds_epu8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16qu(_mm_mask_adds_epu8((__m128i)(__v16qu){1,2,3,4,5,6,7,8,57,58,59,60,61,62,63,64},(__mmask16)0xAAAA,(__m128i)(__v16qu){0,0,0,0,0,0,0,0,+255,+255,+255,+255,+255,+255,+255,+255},(__m128i)(__v16qu){0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255}),1,+63,3,+127,5,+191,7,+255,57,+255,59,+255,61,+255,63,+255));
+
__m128i test_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_adds_epu8
// CHECK-NOT: @llvm.x86.sse2.paddus.b
@@ -1239,6 +1253,8 @@ __m128i test_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_adds_epu8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16qu(_mm_maskz_adds_epu8((__mmask16)0xAAAA,(__m128i)(__v16qu){0,0,0,0,0,0,0,0,+255,+255,+255,+255,+255,+255,+255,+255},(__m128i)(__v16qu){0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255}),0,+63,0,+127,0,+191,0,+255,0,+255,0,+255,0,+255,0,+255));
+
__m256i test_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_adds_epu8
// CHECK-NOT: @llvm.x86.avx2.paddus.b
@@ -1246,6 +1262,8 @@ __m256i test_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m25
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_adds_epu8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32qu(_mm256_mask_adds_epu8((__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask32)0xAAAAAAAA,(__m256i)(__v32qu){0,0,0,0,0,0,0,0,+63,+63,+63,+63,+63,+63,+63,+63,+192,+192,+192,+192,+192,+192,+192,+192,+255,+255,+255,+255,+255,+255,+255,+255},(__m256i)(__v32qu){0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255}),1,+63,3,+127,5,+191,7,+255,9,+126,11,+190,13,+254,15,+255,49,+255,51,+255,53,+255,55,+255,57,+255,59,+255,61,+255,63,+255));
+
__m256i test_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_adds_epu8
// CHECK-NOT: @llvm.x86.avx2.paddus.b
@@ -1253,6 +1271,8 @@ __m256i test_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_adds_epu8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32qu(_mm256_maskz_adds_epu8((__mmask32)0xAAAAAAAA,(__m256i)(__v32qu){0,0,0,0,0,0,0,0,+63,+63,+63,+63,+63,+63,+63,+63,+192,+192,+192,+192,+192,+192,+192,+192,+255,+255,+255,+255,+255,+255,+255,+255},(__m256i)(__v32qu){0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255,0,+63,+64,+127,+128,+191,+192,+255}),0,+63,0,+127,0,+191,0,+255,0,+126,0,+190,0,+254,0,+255,0,+255,0,+255,0,+255,0,+255,0,+255,0,+255,0,+255,0,+255));
+
__m128i test_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_adds_epu16
// CHECK-NOT: @llvm.x86.sse2.paddus.w
@@ -1260,6 +1280,8 @@ __m128i test_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_adds_epu16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v8hu(_mm_mask_adds_epu16((__m128i)(__v8hu){25,26,27,28,29,30,31,32},(__mmask8)0xAA,(__m128i)(__v8hu){+16384,+16384,+16384,+16384,+49152,+49152,+49152,+49152},(__m128i)(__v8hu){0,+16384,+32767,+32768,+32767,+32768,+49152,+65535}),25,+32768,27,+49152,29,+65535,31,+65535));
+
__m128i test_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_adds_epu16
// CHECK-NOT: @llvm.x86.sse2.paddus.w
@@ -1267,6 +1289,8 @@ __m128i test_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_adds_epu16(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v8hu(_mm_maskz_adds_epu16((__mmask8)0xAA,(__m128i)(__v8hu){+16384,+16384,+16384,+16384,+49152,+49152,+49152,+49152},(__m128i)(__v8hu){0,+16384,+32767,+32768,+32767,+32768,+49152,+65535}),0,+32768,0,+49152,0,+65535,0,+65535));
+
__m256i test_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_adds_epu16
// CHECK-NOT: @llvm.x86.avx2.paddus.w
@@ -1274,6 +1298,8 @@ __m256i test_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m2
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_adds_epu16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16hu(_mm256_mask_adds_epu16((__m256i)(__v16hu){1,2,3,4,5,6,7,8,25,26,27,28,29,30,31,32},(__mmask16)0xAAAA,(__m256i)(__v16hu){0,0,0,0,+16384,+16384,+16384,+16384,+49152,+49152,+49152,+49152,+65535,+65535,+65535,+65535},(__m256i)(__v16hu){0,+32767,+32768,+65535,0,+16384,+32767,+32768,+32767,+32768,+49152,+65535,0,+32767,+32768,+65535}),1,+32767,3,+65535,5,+32768,7,+49152,25,+65535,27,+65535,29,+65535,31,+65535));
+
__m256i test_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_adds_epu16
// CHECK-NOT: @llvm.x86.avx2.paddus.w
@@ -1281,6 +1307,8 @@ __m256i test_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_adds_epu16(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16hu(_mm256_maskz_adds_epu16((__mmask16)0xAAAA,(__m256i)(__v16hu){0,0,0,0,+16384,+16384,+16384,+16384,+49152,+49152,+49152,+49152,+65535,+65535,+65535,+65535},(__m256i)(__v16hu){0,+32767,+32768,+65535,0,+16384,+32767,+32768,+32767,+32768,+49152,+65535,0,+32767,+32768,+65535}),0,+32767,0,+65535,0,+32768,0,+49152,0,+65535,0,+65535,0,+65535,0,+65535));
+
__m128i test_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_avg_epu8
// CHECK: @llvm.x86.sse2.pavg.b
@@ -1740,48 +1768,64 @@ __m128i test_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_subs_epi8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_mask_subs_epi8((__m128i)(__v16qs){1,2,3,4,5,6,7,8,57,58,59,60,61,62,63,64},(__mmask16)0xAAAA,(__m128i)(__v16qs){1,-100,3,4,5,-6,7,100,57,-100,59,60,61,-62,63,100},(__m128i)(__v16qs){1,100,3,4,5,6,7,-100,57,100,59,60,61,62,63,-100}),1,-128,3,0,5,-12,7,127,57,-128,59,0,61,-124,63,127));
+
__m128i test_mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_subs_epi8
// CHECK: @llvm.ssub.sat.v16i8
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_subs_epi8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_maskz_subs_epi8((__mmask16)0xAAAA,(__m128i)(__v16qs){1,-100,3,4,5,-6,7,100,57,-100,59,60,61,-62,63,100},(__m128i)(__v16qs){1,100,3,4,5,6,7,-100,57,100,59,60,61,62,63,-100}),0,-128,0,0,0,-12,0,127,0,-128,0,0,0,-124,0,127));
+
__m256i test_mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_subs_epi8
// CHECK: @llvm.ssub.sat.v32i8
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_subs_epi8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_mask_subs_epi8((__m256i)(__v32qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask32)0xAAAAAAAA,(__m256i)(__v32qs){1,-100,3,4,5,-6,7,100,9,-100,11,12,13,-14,15,100,49,-100,51,52,53,-54,55,100,57,-100,59,60,61,-62,63,100},(__m256i)(__v32qs){1,100,3,4,5,6,7,-100,9,100,11,12,13,14,15,-100,49,100,51,52,53,54,55,-100,57,100,59,60,61,62,63,-100}),1,-128,3,0,5,-12,7,127,9,-128,11,0,13,-28,15,127,49,-128,51,0,53,-108,55,127,57,-128,59,0,61,-124,63,127));
+
__m256i test_mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_subs_epi8
// CHECK: @llvm.ssub.sat.v32i8
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_subs_epi8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_maskz_subs_epi8((__mmask32)0xAAAAAAAA,(__m256i)(__v32qs){1,-100,3,4,5,-6,7,100,9,-100,11,12,13,-14,15,100,49,-100,51,52,53,-54,55,100,57,-100,59,60,61,-62,63,100},(__m256i)(__v32qs){1,100,3,4,5,6,7,-100,9,100,11,12,13,14,15,-100,49,100,51,52,53,54,55,-100,57,100,59,60,61,62,63,-100}),0,-128,0,0,0,-12,0,127,0,-128,0,0,0,-28,0,127,0,-128,0,0,0,-108,0,127,0,-128,0,0,0,-124,0,127));
+
__m128i test_mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_subs_epi16
// CHECK: @llvm.ssub.sat.v8i16
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_subs_epi16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_subs_epi16((__m128i)(__v8hi){1,2,3,4,29,30,31,32},(__mmask8)0xAA,(__m128i)(__v8hi){1,-30000,3,30000,29,-30,31,32},(__m128i)(__v8hi){1,30000,3,-30000,29,30,31,-32}),1,-32768,3,32767,29,-60,31,64));
+
__m128i test_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_subs_epi16
// CHECK: @llvm.ssub.sat.v8i16
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_subs_epi16(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_subs_epi16((__mmask8)0xAA,(__m128i)(__v8hi){1,-30000,3,30000,29,-30,31,32},(__m128i)(__v8hi){1,30000,3,-30000,29,30,31,-32}),0,-32768,0,32767,0,-60,0,64));
+
__m256i test_mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_subs_epi16
// CHECK: @llvm.ssub.sat.v16i16
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_subs_epi16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_subs_epi16((__m256i)(__v16hi){1,2,3,4,5,6,7,8,25,26,27,28,29,30,31,32},(__mmask16)0xAAAA,(__m256i)(__v16hi){1,-30000,3,30000,5,-6,7,8,25,-30000,27,30000,29,-30,31,32},(__m256i)(__v16hi){1,30000,3,-30000,5,6,7,-8,25,30000,27,-30000,29,30,31,-32}),1,-32768,3,32767,5,-12,7,16,25,-32768,27,32767,29,-60,31,64));
+
__m256i test_mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_subs_epi16
// CHECK: @llvm.ssub.sat.v16i16
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_subs_epi16(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_subs_epi16((__mmask16)0xAAAA,(__m256i)(__v16hi){1,-30000,3,30000,5,-6,7,8,25,-30000,27,30000,29,-30,31,32},(__m256i)(__v16hi){1,30000,3,-30000,5,6,7,-8,25,30000,27,-30000,29,30,31,-32}),0,-32768,0,32767,0,-12,0,16,0,-32768,0,32767,0,-60,0,64));
+
__m128i test_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_subs_epu8
// CHECK-NOT: @llvm.x86.sse2.psubus.b
@@ -1789,6 +1833,8 @@ __m128i test_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_subs_epu8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16qu(_mm_mask_subs_epu8((__m128i)(__v16qu){1,2,3,4,5,6,7,8,57,58,59,60,61,62,63,64},(__mmask16)0xAAAA,(__m128i)(__v16qu){0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255},(__m128i)(__v16qu){0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255}),1,200,3,0,5,0,7,254,57,0,59,1,61,1,63,0));
+
__m128i test_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_subs_epu8
// CHECK-NOT: @llvm.x86.sse2.psubus.b
@@ -1796,6 +1842,8 @@ __m128i test_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_subs_epu8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16qu(_mm_maskz_subs_epu8((__mmask16)0xAAAA,(__m128i)(__v16qu){0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255},(__m128i)(__v16qu){0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255}),0,200,0,0,0,0,0,254,0,0,0,1,0,1,0,0));
+
__m256i test_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_subs_epu8
// CHECK-NOT: @llvm.x86.avx2.psubus.b
@@ -1803,6 +1851,8 @@ __m256i test_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m25
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_subs_epu8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32qu(_mm256_mask_subs_epu8((__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask32)0xAAAAAAAA,(__m256i)(__v32qu){0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255,0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255},(__m256i)(__v32qu){0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255,0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255}),1,200,3,0,5,0,7,254,9,0,11,1,13,1,15,0,49,200,51,0,53,0,55,254,57,0,59,1,61,1,63,0));
+
__m256i test_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_subs_epu8
// CHECK-NOT: @llvm.x86.avx2.psubus.b
@@ -1810,6 +1860,8 @@ __m256i test_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_subs_epu8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32qu(_mm256_maskz_subs_epu8((__mmask32)0xAAAAAAAA,(__m256i)(__v32qu){0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255,0,250,0,128,0,20,0,255,0,0,0,1,0,100,0,255},(__m256i)(__v32qu){0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255,0,50,0,128,0,30,0,1,0,1,0,0,0,99,0,255}),0,200,0,0,0,0,0,254,0,0,0,1,0,1,0,0,0,200,0,0,0,0,0,254,0,0,0,1,0,1,0,0));
+
__m128i test_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_subs_epu16
// CHECK-NOT: @llvm.x86.sse2.psubus.w
@@ -1817,6 +1869,8 @@ __m128i test_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_subs_epu16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v8hu(_mm_mask_subs_epu16((__m128i)(__v8hu){101,102,103,104,129,130,131,132},(__mmask8)0xAAu,(__m128i)(__v8hu){0,65000,0,40000,0,1,0,50000},(__m128i)(__v8hu){0,5000,0,60000,0,0,0,25000}),101,60000,103,0,129,1,131,25000));
+
__m128i test_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_subs_epu16
// CHECK-NOT: @llvm.x86.sse2.psubus.w
@@ -1824,6 +1878,8 @@ __m128i test_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_subs_epu16(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v8hu(_mm_maskz_subs_epu16((__mmask8)0xAAu,(__m128i)(__v8hu){0,65000,0,40000,0,1,0,50000},(__m128i)(__v8hu){0,5000,0,60000,0,0,0,25000}),0,60000,0,0,0,1,0,25000));
+
__m256i test_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_subs_epu16
// CHECK-NOT: @llvm.x86.avx2.psubus.w
@@ -1831,6 +1887,8 @@ __m256i test_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m2
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_subs_epu16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16hu(_mm256_mask_subs_epu16((__m256i)(__v16hu){101,102,103,104,105,106,107,108,125,126,127,128,129,130,131,132},(__mmask16)0xAAAAu,(__m256i)(__v16hu){0,65000,0,40000,0,100,0,65535,0,0,0,1000,0,1,0,50000},(__m256i)(__v16hu){0,5000,0,40000,0,200,0,1,0,1,0,65535,0,0,0,25000}),101,60000,103,0,105,0,107,65534,125,0,127,0,129,1,131,25000));
+
__m256i test_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_subs_epu16
// CHECK-NOT: @llvm.x86.avx2.psubus.w
@@ -1838,7 +1896,7 @@ __m256i test_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_subs_epu16(__U,__A,__B);
}
-
+TEST_CONSTEXPR(match_v16hu(_mm256_maskz_subs_epu16((__mmask16)0xAAAAu,(__m256i)(__v16hu){0,65000,0,40000,0,100,10,65535,0,0,0,1000,0,1,10000,50000},(__m256i)(__v16hu){0,5000,0,40000,0,200,0,1,0,1,0,65535,0,0,0,25000}),0,60000,0,0,0,0,0,65534,0,0,0,0,0,1,0,25000));
__m128i test_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) {
// CHECK-LABEL: test_mm_mask2_permutex2var_epi16
@@ -1887,6 +1945,67 @@ __m256i test_mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A, __m256i
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_permutex2var_epi16(__U,__A,__I,__B);
}
+
+TEST_CONSTEXPR(match_v8hi(
+ _mm_permutex2var_epi16((__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70},
+ (__m128i)(__v8hi){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m128i)(__v8hi){100, 110, 120, 130, 140, 150, 160,
+ 170}),
+ 0, 70, 100, 170, 10, 110, 20, 120));
+TEST_CONSTEXPR(match_v8hi(
+ _mm_mask_permutex2var_epi16((__m128i)(__v8hi){-1, -2, -3, -4, -5, -6, -7, -8},
+ 0xAA,
+ (__m128i)(__v8hi){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m128i)(__v8hi){100, 110, 120, 130, 140, 150,
+ 160, 170}),
+ -1, -8, -3, 170, -5, 110, -7, 120));
+TEST_CONSTEXPR(match_v8hi(
+ _mm_maskz_permutex2var_epi16(0xAA,
+ (__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70},
+ (__m128i)(__v8hi){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m128i)(__v8hi){100, 110, 120, 130, 140, 150,
+ 160, 170}),
+ 0, 70, 0, 170, 0, 110, 0, 120));
+TEST_CONSTEXPR(match_v8hi(
+ _mm_mask2_permutex2var_epi16((__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70},
+ (__m128i)(__v8hi){0, 7, 8, 15, 1, 9, 2, 10},
+ 0x55,
+ (__m128i)(__v8hi){100, 110, 120, 130, 140, 150,
+ 160, 170}),
+ 0, 7, 100, 15, 10, 9, 20, 10));
+TEST_CONSTEXPR(match_v16hi(
+ _mm256_permutex2var_epi16(
+ (__m256i)(__v16hi){0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150},
+ (__m256i)(__v16hi){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m256i)(__v16hi){200, 210, 220, 230, 240, 250, 260, 270,
+ 280, 290, 300, 310, 320, 330, 340, 350}),
+ 0, 150, 200, 350, 10, 210, 20, 220,
+ 30, 230, 40, 240, 50, 250, 60, 260));
+TEST_CONSTEXPR(match_v16hi(
+ _mm256_mask_permutex2var_epi16(
+ (__m256i)(__v16hi){-1, -2, -3, -4, -5, -6, -7, -8,
+ -9, -10, -11, -12, -13, -14, -15, -16},
+ 0xAAAA,
+ (__m256i)(__v16hi){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m256i)(__v16hi){200, 210, 220, 230, 240, 250, 260, 270,
+ 280, 290, 300, 310, 320, 330, 340, 350}),
+ -1, -16, -3, 350, -5, 210, -7, 220,
+ -9, 230, -11, 240, -13, 250, -15, 260));
+TEST_CONSTEXPR(match_v16hi(
+ _mm256_maskz_permutex2var_epi16(
+ 0x5555,
+ (__m256i)(__v16hi){0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150},
+ (__m256i)(__v16hi){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m256i)(__v16hi){200, 210, 220, 230, 240, 250, 260, 270,
+ 280, 290, 300, 310, 320, 330, 340, 350}),
+ 0, 0, 200, 0, 10, 0, 20, 0,
+ 30, 0, 40, 0, 50, 0, 60, 0));
+
__m128i test_mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
// CHECK-LABEL: test_mm_mask_maddubs_epi16
// CHECK: @llvm.x86.ssse3.pmadd.ub.sw
@@ -2172,6 +2291,7 @@ __m128i test_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m1
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_unpackhi_epi8(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_mask_unpackhi_epi8((__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16},(__mmask16)0xFAAA,(__m128i)(__v16qs){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115},(__m128i)(__v16qs){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}),1,-9,3,-10,5,-11,7,-12,9,-13,11,-14,114,-15,115,-16));
__m128i test_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_unpackhi_epi8
@@ -2179,6 +2299,7 @@ __m128i test_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_unpackhi_epi8(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_maskz_unpackhi_epi8((__mmask16)0xFAAA,(__m128i)(__v16qs){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115},(__m128i)(__v16qs){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}),0,-9,0,-10,0,-11,0,-12,0,-13,0,-14,114,-15,115,-16));
__m256i test_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_unpackhi_epi8
@@ -2186,6 +2307,7 @@ __m256i test_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, _
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_unpackhi_epi8(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_mask_unpackhi_epi8((__m256i)(__v32qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask32)0xFAAAAAAA,(__m256i)(__v32qs){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,-104,-103,-102,-101,-100,-99,-98,-97,-96,-95,-94,-93,-92,-91,-90,-89},(__m256i)(__v32qs){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-49,-50,-51,-52,-53,-54,-55,-56,-57,-58,-59,-60,-61,-62,-63,-64}),1,-9,3,-10,5,-11,7,-12,9,-13,11,-14,13,-15,15,-16,49,-57,51,-58,53,-59,55,-60,57,-61,59,-62,-90,-63,-89,-64));
__m256i test_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_unpackhi_epi8
@@ -2193,6 +2315,7 @@ __m256i test_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B)
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_unpackhi_epi8(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_maskz_unpackhi_epi8((__mmask32)0xFAAAAAAA,(__m256i)(__v32qs){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,-104,-103,-102,-101,-100,-99,-98,-97,-96,-95,-94,-93,-92,-91,-90,-89},(__m256i)(__v32qs){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-49,-50,-51,-52,-53,-54,-55,-56,-57,-58,-59,-60,-61,-62,-63,-64}),0,-9,0,-10,0,-11,0,-12,0,-13,0,-14,0,-15,0,-16,0,-57,0,-58,0,-59,0,-60,0,-61,0,-62,-90,-63,-89,-64));
__m128i test_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_unpackhi_epi16
@@ -2200,6 +2323,7 @@ __m128i test_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m1
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_unpackhi_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_unpackhi_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8},(__mmask8)0xFA,(__m128i)(__v8hi){100,101,102,103,104,105,106,107},(__m128i)(__v8hi){200,201,202,203,204,205,206,207}),1,204,3,205,106,206,107,207));
__m128i test_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_unpackhi_epi16
@@ -2207,6 +2331,7 @@ __m128i test_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_unpackhi_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_unpackhi_epi16((__mmask8)0xFA,(__m128i)(__v8hi){100,101,102,103,104,105,106,107},(__m128i)(__v8hi){200,201,202,203,204,205,206,207}),0,204,0,205,106,206,107,207));
__m256i test_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_unpackhi_epi16
@@ -2214,6 +2339,7 @@ __m256i test_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A,
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_unpackhi_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_unpackhi_epi16((__m256i)(__v16hi){1,2,3,4,5,6,7,8,25,26,27,28,29,30,31,32},(__mmask16)0xFAAAu,(__m256i)(__v16hi){100,101,102,103,104,105,106,107,130,131,132,133,134,135,136,137},(__m256i)(__v16hi){200,201,202,203,204,205,206,207,230,231,232,233,234,235,236,237}),1,204,3,205,5,206,7,207,25,234,27,235,136,236,137,237));
__m256i test_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_unpackhi_epi16
@@ -2221,6 +2347,7 @@ __m256i test_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_unpackhi_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_unpackhi_epi16((__mmask16)0xFAAAu,(__m256i)(__v16hi){100,101,102,103,104,105,106,107,130,131,132,133,134,135,136,137},(__m256i)(__v16hi){200,201,202,203,204,205,206,207,230,231,232,233,234,235,236,237}),0,204,0,205,0,206,0,207,0,234,0,235,136,236,137,237));
__m128i test_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_unpacklo_epi8
@@ -2228,6 +2355,7 @@ __m128i test_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m1
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_unpacklo_epi8(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_mask_unpacklo_epi8((__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16},(__mmask16)0xFAAA,(__m128i)(__v16qs){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115},(__m128i)(__v16qs){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}),1,-1,3,-2,5,-3,7,-4,9,-5,11,-6,106,-7,107,-8));
__m128i test_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_unpacklo_epi8
@@ -2235,6 +2363,7 @@ __m128i test_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_unpacklo_epi8(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_maskz_unpacklo_epi8((__mmask16)0xFAAA,(__m128i)(__v16qs){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115},(__m128i)(__v16qs){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}),0,-1,0,-2,0,-3,0,-4,0,-5,0,-6,106,-7,107,-8));
__m256i test_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_unpacklo_epi8
@@ -2242,6 +2371,7 @@ __m256i test_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, _
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_unpacklo_epi8(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_mask_unpacklo_epi8((__m256i)(__v32qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask32)0xFAAAAAAA,(__m256i)(__v32qs){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,-50,-51,-52,-53,-54,-55,-56,-57,-58,-59,-60,-61,-62,-63,-64,-65},(__m256i)(__v32qs){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75}),1,-1,3,-2,5,-3,7,-4,9,-5,11,-6,13,-7,15,-8,49,60,51,61,53,62,55,63,57,64,59,65,-56,66,-57,67));
__m256i test_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_unpacklo_epi8
@@ -2249,6 +2379,7 @@ __m256i test_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B)
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_unpacklo_epi8(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_maskz_unpacklo_epi8((__mmask32)0xFAAAAAAA,(__m256i)(__v32qs){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,-50,-51,-52,-53,-54,-55,-56,-57,-58,-59,-60,-61,-62,-63,-64,-65},(__m256i)(__v32qs){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75}),0,-1,0,-2,0,-3,0,-4,0,-5,0,-6,0,-7,0,-8,0,60,0,61,0,62,0,63,0,64,0,65,-56,66,-57,67));
__m128i test_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_unpacklo_epi16
@@ -2256,6 +2387,7 @@ __m128i test_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m1
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_unpacklo_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_unpacklo_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8},(__mmask8)0xFAu,(__m128i)(__v8hi){100,101,102,103,104,105,106,107},(__m128i)(__v8hi){200,201,202,203,204,205,206,207}),1,200,3,201,102,202,103,203));
__m128i test_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_unpacklo_epi16
@@ -2263,6 +2395,7 @@ __m128i test_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_unpacklo_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_unpacklo_epi16((__mmask8)0xFAu,(__m128i)(__v8hi){100,101,102,103,104,105,106,107},(__m128i)(__v8hi){200,201,202,203,204,205,206,207}),0,200,0,201,102,202,103,203));
__m256i test_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_unpacklo_epi16
@@ -2270,6 +2403,7 @@ __m256i test_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A,
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_unpacklo_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_unpacklo_epi16((__m256i)(__v16hi){1,2,3,4,5,6,7,8,25,26,27,28,29,30,31,32},(__mmask16)0xFAAAu,(__m256i)(__v16hi){100,101,102,103,104,105,106,107,130,131,132,133,134,135,136,137},(__m256i)(__v16hi){200,201,202,203,204,205,206,207,230,231,232,233,234,235,236,237}),1,200,3,201,5,202,7,203,25,230,27,231,132,232,133,233));
__m256i test_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_unpacklo_epi16
@@ -2277,6 +2411,7 @@ __m256i test_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_unpacklo_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_unpacklo_epi16((__mmask16)0xFAAAu,(__m256i)(__v16hi){100,101,102,103,104,105,106,107,130,131,132,133,134,135,136,137},(__m256i)(__v16hi){200,201,202,203,204,205,206,207,230,231,232,233,234,235,236,237}),0,200,0,201,0,202,0,203,0,230,0,231,132,232,133,233));
__m128i test_mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_cvtepi8_epi16
@@ -2284,6 +2419,7 @@ __m128i test_mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_cvtepi8_epi16(__W, __U, __A);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_cvtepi8_epi16(_mm_set1_epi16(-777),(__mmask8)0xA5,(__m128i)(__v16qs){1,-2,3,-4,5,-6,7,-8,9,10,11,12,13,14,15,16}),1,-777,3,-777,-777,-6,-777,-8));
__m128i test_mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_cvtepi8_epi16
@@ -2291,6 +2427,7 @@ __m128i test_mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_cvtepi8_epi16(__U, __A);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_cvtepi8_epi16((__mmask8)0xA5,(__m128i)(__v16qs){1,-2,3,-4,5,-6,7,-8,9,10,11,12,13,14,15,16}),1,0,3,0,0,-6,0,-8));
__m256i test_mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) {
// CHECK-LABEL: test_mm256_mask_cvtepi8_epi16
@@ -2298,6 +2435,7 @@ __m256i test_mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_cvtepi8_epi16(__W, __U, __A);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_cvtepi8_epi16(_mm256_set1_epi16(-777),/*1001110010100101=*/0x9ca5,(__m128i)(__v16qs){1,-2,3,-4,5,-6,7,-8,25,-26,27,-28,29,-30,31,-32}),1,-777,3,-777,-777,-6,-777,-8,-777,-777,27,-28,29,-777,-777,-32));
__m256i test_mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) {
// CHECK-LABEL: test_mm256_maskz_cvtepi8_epi16
@@ -2305,6 +2443,7 @@ __m256i test_mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_cvtepi8_epi16(__U, __A);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_cvtepi8_epi16(/*1001110010100101=*/0x9ca5,(__m128i)(__v16qs){1,-2,3,-4,5,-6,7,-8,25,-26,27,-28,29,-30,31,-32}),1,0,3,0,0,-6,0,-8,0,0,27,-28,29,0,0,-32));
__m128i test_mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_cvtepu8_epi16
@@ -2312,6 +2451,7 @@ __m128i test_mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_cvtepu8_epi16(__W, __U, __A);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_cvtepu8_epi16(_mm_set1_epi16(-777),(__mmask8)0xA5,(__m128i)(__v16qu){25,26,27,28,29,30,31,32,0,0,0,0,0,0,0,0}),25,-777,27,-777,-777,30,-777,32));
__m128i test_mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_cvtepu8_epi16
@@ -2319,6 +2459,7 @@ __m128i test_mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_cvtepu8_epi16(__U, __A);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_cvtepu8_epi16((__mmask8)0xA5,(__m128i)(__v16qu){25,26,27,28,29,30,31,32,0,0,0,0,0,0,0,0}),25,0,27,0,0,30,0,32));
__m256i test_mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) {
// CHECK-LABEL: test_mm256_mask_cvtepu8_epi16
@@ -2326,6 +2467,7 @@ __m256i test_mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_cvtepu8_epi16(__W, __U, __A);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_cvtepu8_epi16(_mm256_set1_epi16(-777),/*1001110010100101=*/0x9ca5,(__m128i)(__v16qu){1,2,3,4,5,6,7,8,25,26,27,28,29,30,31,32}),1,-777,3,-777,-777,6,-777,8,-777,-777,27,28,29,-777,-777,32));
__m256i test_mm256_maskz_cvtepu8_epi16(__mmask16 __U, __m128i __A) {
// CHECK-LABEL: test_mm256_maskz_cvtepu8_epi16
@@ -2333,6 +2475,7 @@ __m256i test_mm256_maskz_cvtepu8_epi16(__mmask16 __U, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_cvtepu8_epi16(__U, __A);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_cvtepu8_epi16(/*1001110010100101=*/0x9ca5,(__m128i)(__v16qu){1,2,3,4,5,6,7,8,25,26,27,28,29,30,31,32}),1,0,3,0,0,6,0,8,0,0,27,28,29,0,0,32));
__m256i test_mm256_sllv_epi16(__m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_sllv_epi16
@@ -2407,6 +2550,7 @@ __m256i test_mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_sll_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_slli_epi16((__mmask8)0xAA, (__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 6, 7}, 20), 0, 0, 0, 0, 0, 0, 0, 0));
__m128i test_mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_slli_epi16
@@ -2414,6 +2558,7 @@ __m128i test_mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_slli_epi16(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_slli_epi16((__m128i)(__v8hi){100, 101, 102, 103, 104, 105, 106, 107}, (__mmask8)0xAA, (__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 6, 7}, 20), 100, 0, 102, 0, 104, 0, 106, 0));
__m128i test_mm_mask_slli_epi16_2(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) {
// CHECK-LABEL: test_mm_mask_slli_epi16_2
@@ -3051,12 +3196,22 @@ __mmask16 test_mm_movepi8_mask(__m128i __A) {
return _mm_movepi8_mask(__A);
}
+TEST_CONSTEXPR(_mm_movepi8_mask(
+ ((__m128i)(__v16qi){0, 1, char(129), 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})) == (__mmask16)0x0004);
+
__mmask32 test_mm256_movepi8_mask(__m256i __A) {
// CHECK-LABEL: test_mm256_movepi8_mask
// CHECK: [[CMP:%.*]] = icmp slt <32 x i8> %{{.*}}, zeroinitializer
return _mm256_movepi8_mask(__A);
}
+TEST_CONSTEXPR(_mm256_movepi8_mask(
+ ((__m256i)(__v32qi){0, 1, char(255), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, char(128)})
+) == (__mmask32)0x80000004);
+
__m128i test_mm_movm_epi8(__mmask16 __A) {
// CHECK-LABEL: test_mm_movm_epi8
// CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
@@ -3091,6 +3246,7 @@ __m128i test_mm_mask_broadcastb_epi8(__m128i __O, __mmask16 __M, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_broadcastb_epi8(__O, __M, __A);
}
+TEST_CONSTEXPR(match_v16qi(_mm_mask_broadcastb_epi8((__m128i)(__v16qs){0,1,2,3,4,5,6,7,56,57,58,59,60,61,62,63},(__mmask16)0xAAAA,(__m128i)(__v16qs){-120,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),0,-120,2,-120,4,-120,6,-120,56,-120,58,-120,60,-120,62,-120));
__m128i test_mm_maskz_broadcastb_epi8(__mmask16 __M, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_broadcastb_epi8
@@ -3098,6 +3254,7 @@ __m128i test_mm_maskz_broadcastb_epi8(__mmask16 __M, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_broadcastb_epi8(__M, __A);
}
+TEST_CONSTEXPR(match_v16qi(_mm_maskz_broadcastb_epi8((__mmask16)0xAAAA,(__m128i)(__v16qs){-120,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120));
__m256i test_mm256_mask_broadcastb_epi8(__m256i __O, __mmask32 __M, __m128i __A) {
// CHECK-LABEL: test_mm256_mask_broadcastb_epi8
@@ -3105,6 +3262,7 @@ __m256i test_mm256_mask_broadcastb_epi8(__m256i __O, __mmask32 __M, __m128i __A)
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_broadcastb_epi8(__O, __M, __A);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_mask_broadcastb_epi8((__m256i)(__v32qs){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63},(__mmask32)0xAAAAAAAA,(__m128i)(__v16qs){-120,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),0,-120,2,-120,4,-120,6,-120,8,-120,10,-120,12,-120,14,-120,48,-120,50,-120,52,-120,54,-120,56,-120,58,-120,60,-120,62,-120));
__m256i test_mm256_maskz_broadcastb_epi8(__mmask32 __M, __m128i __A) {
// CHECK-LABEL: test_mm256_maskz_broadcastb_epi8
@@ -3112,6 +3270,7 @@ __m256i test_mm256_maskz_broadcastb_epi8(__mmask32 __M, __m128i __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_broadcastb_epi8(__M, __A);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_maskz_broadcastb_epi8((__mmask32)0xAAAAAAAA,(__m128i)(__v16qs){-120,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120));
__m128i test_mm_mask_broadcastw_epi16(__m128i __O, __mmask8 __M, __m128i __A) {
// CHECK-LABEL: test_mm_mask_broadcastw_epi16
@@ -3119,6 +3278,7 @@ __m128i test_mm_mask_broadcastw_epi16(__m128i __O, __mmask8 __M, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_broadcastw_epi16(__O, __M, __A);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_broadcastw_epi16((__m128i)(__v8hi){0,1,2,3,4,5,6,7},(__mmask8)0xAA,(__m128i)(__v8hi){-120,1,2,3,4,5,6,7}),0,-120,2,-120,4,-120,6,-120));
__m128i test_mm_maskz_broadcastw_epi16(__mmask8 __M, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_broadcastw_epi16
@@ -3126,6 +3286,7 @@ __m128i test_mm_maskz_broadcastw_epi16(__mmask8 __M, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_broadcastw_epi16(__M, __A);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_broadcastw_epi16((__mmask8)0xAA,(__m128i)(__v8hi){-120,1,2,3,4,5,6,7}),0,-120,0,-120,0,-120,0,-120));
__m256i test_mm256_mask_broadcastw_epi16(__m256i __O, __mmask16 __M, __m128i __A) {
// CHECK-LABEL: test_mm256_mask_broadcastw_epi16
@@ -3133,6 +3294,7 @@ __m256i test_mm256_mask_broadcastw_epi16(__m256i __O, __mmask16 __M, __m128i __A
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_broadcastw_epi16(__O, __M, __A);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_broadcastw_epi16((__m256i)(__v16hi){0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31},(__mmask16)0xAAAA,(__m128i)(__v8hi){-120,1,2,3,4,5,6,7}),0,-120,2,-120,4,-120,6,-120,24,-120,26,-120,28,-120,30,-120));
__m256i test_mm256_maskz_broadcastw_epi16(__mmask16 __M, __m128i __A) {
// CHECK-LABEL: test_mm256_maskz_broadcastw_epi16
@@ -3140,6 +3302,8 @@ __m256i test_mm256_maskz_broadcastw_epi16(__mmask16 __M, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_broadcastw_epi16(__M, __A);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_broadcastw_epi16((__mmask16)0xAAAA,(__m128i)(__v8hi){-120,1,2,3,4,5,6,7}),0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120,0,-120));
+
__m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){
// CHECK-LABEL: test_mm_mask_set1_epi8
// CHECK: insertelement <16 x i8> poison, i8 %{{.*}}, i32 0
@@ -3161,6 +3325,8 @@ __m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_set1_epi8(__O, __M, __A);
}
+TEST_CONSTEXPR(match_v16qi(_mm_mask_set1_epi8((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16},(__mmask16)0xAAAA,(char)42),1,42,3,42,5,42,7,42,9,42,11,42,13,42,15,42));
+
__m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){
// CHECK-LABEL: test_mm_maskz_set1_epi8
// CHECK: insertelement <16 x i8> poison, i8 %{{.*}}, i32 0
@@ -3182,6 +3348,7 @@ __m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_set1_epi8( __M, __A);
}
+TEST_CONSTEXPR(match_v16qi(_mm_maskz_set1_epi8((__mmask16)0xAAAA,(char)42),0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42));
__m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) {
// CHECK-LABEL: test_mm256_mask_set1_epi8
@@ -3220,6 +3387,7 @@ __m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_set1_epi8(__O, __M, __A);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_mask_set1_epi8((__m256i)(__v32qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64},(__mmask32)0xAAAAAAAA,(char)42),1,42,3,42,5,42,7,42,9,42,11,42,13,42,15,42,49,42,51,42,53,42,55,42,57,42,59,42,61,42,63,42));
__m256i test_mm256_maskz_set1_epi8( __mmask32 __M, char __A) {
// CHECK-LABEL: test_mm256_maskz_set1_epi8
@@ -3258,7 +3426,7 @@ __m256i test_mm256_maskz_set1_epi8( __mmask32 __M, char __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_set1_epi8( __M, __A);
}
-
+TEST_CONSTEXPR( match_v32qi( _mm256_maskz_set1_epi8( (__mmask32)0xAAAAAAAA, (char)42 ), 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42 ) );
__m256i test_mm256_mask_set1_epi16(__m256i __O, __mmask16 __M, short __A) {
// CHECK-LABEL: test_mm256_mask_set1_epi16
@@ -3281,6 +3449,7 @@ __m256i test_mm256_mask_set1_epi16(__m256i __O, __mmask16 __M, short __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_set1_epi16(__O, __M, __A);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_set1_epi16((__m256i)(__v16hi){1,2,3,4,5,6,7,8,25,26,27,28,29,30,31,32},(__mmask16)0xAAAA,42),1,42,3,42,5,42,7,42,25,42,27,42,29,42,31,42));
__m256i test_mm256_maskz_set1_epi16(__mmask16 __M, short __A) {
// CHECK-LABEL: test_mm256_maskz_set1_epi16
@@ -3303,6 +3472,7 @@ __m256i test_mm256_maskz_set1_epi16(__mmask16 __M, short __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_set1_epi16(__M, __A);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_set1_epi16((__mmask16)0xAAAA,42),0,42,0,42,0,42,0,42,0,42,0,42,0,42,0,42));
__m128i test_mm_mask_set1_epi16(__m128i __O, __mmask8 __M, short __A) {
// CHECK-LABEL: test_mm_mask_set1_epi16
@@ -3317,6 +3487,7 @@ __m128i test_mm_mask_set1_epi16(__m128i __O, __mmask8 __M, short __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_set1_epi16(__O, __M, __A);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_set1_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8},(__mmask8)0xAA,42),1,42,3,42,5,42,7,42));
__m128i test_mm_maskz_set1_epi16(__mmask8 __M, short __A) {
// CHECK-LABEL: test_mm_maskz_set1_epi16
@@ -3331,12 +3502,17 @@ __m128i test_mm_maskz_set1_epi16(__mmask8 __M, short __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_set1_epi16(__M, __A);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_set1_epi16((__mmask8)0xAA,42),0,42,0,42,0,42,0,42));
+
__m128i test_mm_permutexvar_epi16(__m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_permutexvar_epi16
// CHECK: @llvm.x86.avx512.permvar.hi.128
- return _mm_permutexvar_epi16(__A, __B);
+ return _mm_permutexvar_epi16(__A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_permutexvar_epi16((__m128i)(__v8hi){7, 6, 5, 4, 3, 2, 1, 0}, (__m128i)(__v8hi){10, 11, 12, 13, 14, 15, 16, 17}), 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v8hi(_mm_permutexvar_epi16((__m128i)(__v8hi){0, 0, 0, 0, 0, 0, 0, 0}, (__m128i)(__v8hi){100, 101, 102, 103, 104, 105, 106, 107}), 100, 100, 100, 100, 100, 100, 100, 100));
+
__m128i test_mm_maskz_permutexvar_epi16(__mmask8 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_permutexvar_epi16
// CHECK: @llvm.x86.avx512.permvar.hi.128
@@ -3348,15 +3524,23 @@ __m128i test_mm_mask_permutexvar_epi16(__m128i __W, __mmask8 __M, __m128i __A, _
// CHECK-LABEL: test_mm_mask_permutexvar_epi16
// CHECK: @llvm.x86.avx512.permvar.hi.128
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
- return _mm_mask_permutexvar_epi16(__W, __M, __A, __B);
+ return _mm_mask_permutexvar_epi16(__W, __M, __A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_permutexvar_epi16((__m128i)(__v8hi){99, 99, 99, 99, 99, 99, 99, 99}, 0xFF, (__m128i)(__v8hi){7, 6, 5, 4, 3, 2, 1, 0}, (__m128i)(__v8hi){10, 11, 12, 13, 14, 15, 16, 17}), 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v8hi(_mm_mask_permutexvar_epi16((__m128i)(__v8hi){99, 99, 99, 99, 99, 99, 99, 99}, 0xAA, (__m128i)(__v8hi){7, 6, 5, 4, 3, 2, 1, 0}, (__m128i)(__v8hi){10, 11, 12, 13, 14, 15, 16, 17}), 99, 16, 99, 14, 99, 12, 99, 10));
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_permutexvar_epi16(0xFF, (__m128i)(__v8hi){7, 6, 5, 4, 3, 2, 1, 0}, (__m128i)(__v8hi){10, 11, 12, 13, 14, 15, 16, 17}), 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_permutexvar_epi16(0xAA, (__m128i)(__v8hi){7, 6, 5, 4, 3, 2, 1, 0}, (__m128i)(__v8hi){10, 11, 12, 13, 14, 15, 16, 17}), 0, 16, 0, 14, 0, 12, 0, 10));
+
__m256i test_mm256_permutexvar_epi16(__m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_permutexvar_epi16
// CHECK: @llvm.x86.avx512.permvar.hi.256
- return _mm256_permutexvar_epi16(__A, __B);
+ return _mm256_permutexvar_epi16(__A, __B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_permutexvar_epi16((__m256i)(__v16hi){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m256i)(__v16hi){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}), 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v16hi(_mm256_permutexvar_epi16((__m256i)(__v16hi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, (__m256i)(__v16hi){100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}), 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100));
+
__m256i test_mm256_maskz_permutexvar_epi16(__mmask16 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_permutexvar_epi16
// CHECK: @llvm.x86.avx512.permvar.hi.256
@@ -3368,14 +3552,21 @@ __m256i test_mm256_mask_permutexvar_epi16(__m256i __W, __mmask16 __M, __m256i __
// CHECK-LABEL: test_mm256_mask_permutexvar_epi16
// CHECK: @llvm.x86.avx512.permvar.hi.256
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
- return _mm256_mask_permutexvar_epi16(__W, __M, __A, __B);
+ return _mm256_mask_permutexvar_epi16(__W, __M, __A, __B);
}
+
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_permutexvar_epi16((__m256i)(__v16hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xFFFF, (__m256i)(__v16hi){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m256i)(__v16hi){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}), 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_permutexvar_epi16((__m256i)(__v16hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xAAAA, (__m256i)(__v16hi){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m256i)(__v16hi){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}), 99, 24, 99, 22, 99, 20, 99, 18, 99, 16, 99, 14, 99, 12, 99, 10));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_permutexvar_epi16(0xFFFF, (__m256i)(__v16hi){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m256i)(__v16hi){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}), 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_permutexvar_epi16(0xAAAA, (__m256i)(__v16hi){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, (__m256i)(__v16hi){10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}), 0, 24, 0, 22, 0, 20, 0, 18, 0, 16, 0, 14, 0, 12, 0, 10));
+
__m128i test_mm_mask_alignr_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_alignr_epi8
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_alignr_epi8(__W, __U, __A, __B, 2);
}
+TEST_CONSTEXPR(match_v16qi(_mm_mask_alignr_epi8(((__m128i)(__v16qs){127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127}), (__mmask16)0x000f, ((__m128i)(__v16qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qs){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 2), 19, 20, 21, 22, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127));
__m128i test_mm_maskz_alignr_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_alignr_epi8
@@ -3383,6 +3574,7 @@ __m128i test_mm_maskz_alignr_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_alignr_epi8(__U, __A, __B, 2);
}
+TEST_CONSTEXPR(match_v16qi( _mm_maskz_alignr_epi8((__mmask16)0x000f, ((__m128i)(__v16qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qs){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}),2), 19, 20, 21, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m256i test_mm256_mask_alignr_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_alignr_epi8
@@ -3390,6 +3582,7 @@ __m256i test_mm256_mask_alignr_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_alignr_epi8(__W, __U, __A, __B, 2);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_mask_alignr_epi8(((__m256i)(__v32qs){127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127}), (__mmask32)0xf000000f, ((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 2), 35, 36, 37, 38, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 63, 64, 17, 18));
__m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_alignr_epi8
@@ -3397,6 +3590,7 @@ __m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_alignr_epi8(__U, __A, __B, 2);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_maskz_alignr_epi8((__mmask32)0xf000000f, ((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 2), 35, 36, 37, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 64, 17, 18));
__m128i test_mm_dbsad_epu8(__m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dbsad_epu8
@@ -3442,6 +3636,7 @@ __mmask8 test_mm_movepi16_mask(__m128i __A) {
// CHECK: [[CMP:%.*]] = icmp slt <8 x i16> %{{.*}}, zeroinitializer
return _mm_movepi16_mask(__A);
}
+TEST_CONSTEXPR(_mm_movepi16_mask(((__m128i)(__v8hi){0, 1, -1, 3, 4, 5, 6, 7})) == (__mmask8)0x04);
__mmask16 test_mm256_movepi16_mask(__m256i __A) {
// CHECK-LABEL: test_mm256_movepi16_mask
@@ -3449,6 +3644,9 @@ __mmask16 test_mm256_movepi16_mask(__m256i __A) {
return _mm256_movepi16_mask(__A);
}
+TEST_CONSTEXPR(_mm256_movepi16_mask(
+ ((__m256i)(__v16hi){0, 1, short(32769), 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, short(65535)})) == (__mmask16)0x8004);
+
__m128i test_mm_mask_shufflehi_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_shufflehi_epi16
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4>
@@ -3596,3 +3794,22 @@ void test_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i _
// CHECK: @llvm.x86.avx512.mask.pmovs.wb.mem.256
_mm256_mask_cvtsepi16_storeu_epi8 ( __P, __M, __A);
}
+
+
+TEST_CONSTEXPR(match_v16qu(
+ _mm_permutex2var_epi8((__m128i)(__v16qu){0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 127, 126, 125},
+ (__m128i)(__v16qu){0, 16, 1, 17, 2, 18, 3, 19,
+ 4, 20, 5, 21, 6, 22, 7, 23},
+ (__m128i)(__v16qu){100, 110, 120, 130, 140, 150, 160, 170,
+ 180, 190, 200, 210, 220, 230, 240, 250}),
+ 0, 100, 10, 110, 20, 120, 30, 130,
+ 40, 140, 50, 150, 60, 160, 70, 170));
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_permutex2var_epi8((__m256i)(__v32qu){0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 127, 126, 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, 109},
+ (__m256i)(__v32qu){0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47},
+ (__m256i)(__v32qu){200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231}),
+ 0, 200, 10, 201, 20, 202, 30, 203,
+ 40, 204, 50, 205, 60, 206, 70, 207,
+ 80, 208, 90, 209, 100, 210, 110, 211,
+ 120, 212, 127, 213, 126, 214, 125, 215));
diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c
index 1bfc022..92d8e1a 100644
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@@ -921,12 +921,21 @@ __mmask8 test_mm_movepi32_mask(__m128i __A) {
return _mm_movepi32_mask(__A);
}
+TEST_CONSTEXPR(_mm512_movepi16_mask(
+ ((__m512i)(__v32hi){0, 1, short(32768), 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, short(32768)})
+) == (__mmask32)0x80000004);
+
__mmask8 test_mm256_movepi32_mask(__m256i __A) {
// CHECK-LABEL: test_mm256_movepi32_mask
// CHECK: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer
return _mm256_movepi32_mask(__A);
}
+TEST_CONSTEXPR(_mm256_movepi32_mask(((__m256i)(__v8si){0, 1, -1, 3, 4, 5, 6, -2147483648})) == (__mmask8)0x84);
+
__m128i test_mm_movm_epi32(__mmask8 __A) {
// CHECK-LABEL: test_mm_movm_epi32
// CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
@@ -965,6 +974,8 @@ __mmask8 test_mm_movepi64_mask(__m128i __A) {
return _mm_movepi64_mask(__A);
}
+TEST_CONSTEXPR(_mm_movepi64_mask(((__m128i)(__v2di){0, -1})) == (__mmask8)0x02);
+
__mmask8 test_mm256_movepi64_mask(__m256i __A) {
// CHECK-LABEL: test_mm256_movepi64_mask
// CHECK: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer
@@ -972,6 +983,7 @@ __mmask8 test_mm256_movepi64_mask(__m256i __A) {
return _mm256_movepi64_mask(__A);
}
+TEST_CONSTEXPR(_mm256_movepi64_mask(((__m256i)(__v4di){0, 1, -1, 3})) == (__mmask8)0x04);
__m256 test_mm256_broadcast_f32x2(__m128 __A) {
// CHECK-LABEL: test_mm256_broadcast_f32x2
diff --git a/clang/test/CodeGen/X86/avx512vlfp16-builtins-constrained.c b/clang/test/CodeGen/X86/avx512vlfp16-builtins-constrained.c
new file mode 100644
index 0000000..0fdc899
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx512vlfp16-builtins-constrained.c
@@ -0,0 +1,37 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512vl -target-feature +avx512fp16 -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+
+#ifdef STRICT
+// Test that the constrained intrinsics are picking up the exception
+// metadata from the AST instead of the global default from the command line.
+
+#pragma float_control(except, on)
+#endif
+
+
+#include <immintrin.h>
+
+__m128h test_mm_sqrt_ph(__m128h x) {
+ // COMMON-LABEL: test_mm_sqrt_ph
+ // UNCONSTRAINED: call {{.*}}<8 x half> @llvm.sqrt.v8f16(<8 x half> {{.*}})
+ // CONSTRAINED: call {{.*}}<8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half> {{.*}}, metadata !{{.*}})
+ // CHECK-ASM: vsqrtph %xmm{{.*}},
+ return _mm_sqrt_ph(x);
+}
+
+
+__m256h test_mm256_sqrt_ph(__m256h x) {
+ // COMMON-LABEL: test_mm256_sqrt_ph
+ // UNCONSTRAINED: call {{.*}}<16 x half> @llvm.sqrt.v16f16(<16 x half> {{.*}})
+ // CONSTRAINED: call {{.*}}<16 x half> @llvm.experimental.constrained.sqrt.v16f16(<16 x half> {{.*}}, metadata !{{.*}})
+ // CHECK-ASM: vsqrtph %ymm{{.*}},
+ return _mm256_sqrt_ph(x);
+}
+
diff --git a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
index f63b5c6..11dbd71 100644
--- a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
@@ -47,41 +47,41 @@ __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
__m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_dpwssd_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpwssd_epi32(__S, __U, __A, __B);
}
__m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_dpwssd_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpwssd_epi32(__U, __S, __A, __B);
}
__m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpwssd_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwssd_epi32(__S, __A, __B);
}
__m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_dpwssds_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpwssds_epi32(__S, __U, __A, __B);
}
__m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_dpwssds_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpwssds_epi32(__U, __S, __A, __B);
}
__m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpwssds_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwssds_epi32(__S, __A, __B);
}
@@ -127,41 +127,41 @@ __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
__m128i test_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_dpwssd_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpwssd_epi32(__S, __U, __A, __B);
}
__m128i test_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_dpwssd_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpwssd_epi32(__U, __S, __A, __B);
}
__m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpwssd_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwssd_epi32(__S, __A, __B);
}
__m128i test_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_dpwssds_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpwssds_epi32(__S, __U, __A, __B);
}
__m128i test_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_dpwssds_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpwssds_epi32(__U, __S, __A, __B);
}
__m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpwssds_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwssds_epi32(__S, __A, __B);
}
diff --git a/clang/test/CodeGen/X86/avx512vnni-builtins.c b/clang/test/CodeGen/X86/avx512vnni-builtins.c
index afe8045..6b84652 100644
--- a/clang/test/CodeGen/X86/avx512vnni-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vnni-builtins.c
@@ -47,41 +47,41 @@ __m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) {
__m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_dpwssd_epi32
- // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpwssd_epi32(__S, __U, __A, __B);
}
__m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_dpwssd_epi32
- // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpwssd_epi32(__U, __S, __A, __B);
}
__m512i test_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_dpwssd_epi32
- // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_dpwssd_epi32(__S, __A, __B);
}
__m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_dpwssds_epi32
- // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpwssds_epi32(__S, __U, __A, __B);
}
__m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_dpwssds_epi32
- // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpwssds_epi32(__U, __S, __A, __B);
}
__m512i test_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_dpwssds_epi32
- // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_dpwssds_epi32(__S, __A, __B);
}
diff --git a/clang/test/CodeGen/X86/avxvnni-builtins.c b/clang/test/CodeGen/X86/avxvnni-builtins.c
index 7948e0d..6557a26 100644
--- a/clang/test/CodeGen/X86/avxvnni-builtins.c
+++ b/clang/test/CodeGen/X86/avxvnni-builtins.c
@@ -19,13 +19,13 @@ __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
__m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpwssd_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwssd_epi32(__S, __A, __B);
}
__m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpwssds_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwssds_epi32(__S, __A, __B);
}
@@ -43,13 +43,13 @@ __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
__m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpwssd_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwssd_epi32(__S, __A, __B);
}
__m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpwssds_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwssds_epi32(__S, __A, __B);
}
@@ -67,13 +67,13 @@ __m256i test_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
__m256i test_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpwssd_avx_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwssd_avx_epi32(__S, __A, __B);
}
__m256i test_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpwssds_avx_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwssds_avx_epi32(__S, __A, __B);
}
@@ -91,12 +91,12 @@ __m128i test_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
__m128i test_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpwssd_avx_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwssd_avx_epi32(__S, __A, __B);
}
__m128i test_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpwssds_avx_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwssds_avx_epi32(__S, __A, __B);
}
diff --git a/clang/test/CodeGen/X86/avxvnniint16-builtins.c b/clang/test/CodeGen/X86/avxvnniint16-builtins.c
index 941da9a..f28fff2 100644
--- a/clang/test/CodeGen/X86/avxvnniint16-builtins.c
+++ b/clang/test/CodeGen/X86/avxvnniint16-builtins.c
@@ -11,72 +11,72 @@
__m128i test_mm_dpwsud_epi32(__m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_dpwsud_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwsud_epi32(__A, __B, __C);
}
__m256i test_mm256_dpwsud_epi32(__m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_dpwsud_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwsud_epi32(__A, __B, __C);
}
__m128i test_mm_dpwsuds_epi32(__m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_dpwsuds_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwsuds_epi32(__A, __B, __C);
}
__m256i test_mm256_dpwsuds_epi32(__m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_dpwsuds_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwsuds_epi32(__A, __B, __C);
}
__m128i test_mm_dpwusd_epi32(__m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_dpwusd_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwusd_epi32(__A, __B, __C);
}
__m256i test_mm256_dpwusd_epi32(__m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_dpwusd_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwusd_epi32(__A, __B, __C);
}
__m128i test_mm_dpwusds_epi32(__m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_dpwusds_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwusds_epi32(__A, __B, __C);
}
__m256i test_mm256_dpwusds_epi32(__m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_dpwusds_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwusds_epi32(__A, __B, __C);
}
__m128i test_mm_dpwuud_epi32(__m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_dpwuud_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwuud_epi32(__A, __B, __C);
}
__m256i test_mm256_dpwuud_epi32(__m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_dpwuud_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwuud_epi32(__A, __B, __C);
}
__m128i test_mm_dpwuuds_epi32(__m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_dpwuuds_epi32
- // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwuuds_epi32(__A, __B, __C);
}
__m256i test_mm256_dpwuuds_epi32(__m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_dpwuuds_epi32
- // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwuuds_epi32(__A, __B, __C);
}
diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c
index c08ef76..2ae4bc8 100755
--- a/clang/test/CodeGen/X86/f16c-builtins.c
+++ b/clang/test/CodeGen/X86/f16c-builtins.c
@@ -46,6 +46,31 @@ __m128 test_mm_cvtph_ps(__m128i a) {
return _mm_cvtph_ps(a);
}
+// A value exactly halfway between 1.0 and the next representable FP16 number.
+// In binary, its significand ends in ...000, followed by a tie-bit 1.
+#define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case
+
+//
+// _mm_cvtps_ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
+//
+// Test values: -2.5f, 1.123f, POS_HALFWAY
+TEST_CONSTEXPR(match_v8hi(
+ _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
+ 0xC100, 0x3C7E, 0x3C00, 0x0000, 0, 0, 0, 0
+));
+TEST_CONSTEXPR(match_v8hi(
+ _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
+ 0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
+));
+TEST_CONSTEXPR(match_v8hi(
+ _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
+ 0xC100, 0x3C7E, 0x3C01, 0x0000, 0, 0, 0, 0
+));
+TEST_CONSTEXPR(match_v8hi(
+ _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
+ 0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
+));
+
__m256 test_mm256_cvtph_ps(__m128i a) {
// CHECK-LABEL: test_mm256_cvtph_ps
// CHECK: fpext <8 x half> %{{.*}} to <8 x float>
@@ -56,12 +81,44 @@ TEST_CONSTEXPR(match_m256(
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f
));
+//
+// _mm256_cvtps_ph (256-bit, 8 floats -> 8 shorts)
+//
+// Test values: -2.5f, 1.123f, POS_HALFWAY
+TEST_CONSTEXPR(match_v8hi(
+ _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
+ 0xC100, 0x3C7E, 0x3C00, 0x0000, 0xC100, 0x3C7E, 0x3C00, 0x0000
+));
+TEST_CONSTEXPR(match_v8hi(
+ _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
+ 0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
+));
+TEST_CONSTEXPR(match_v8hi(
+ _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
+ 0xC100, 0x3C7E, 0x3C01, 0x0000, 0xC100, 0x3C7E, 0x3C01, 0x0000
+));
+TEST_CONSTEXPR(match_v8hi(
+ _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
+ 0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
+));
+
__m128i test_mm_cvtps_ph(__m128 a) {
// CHECK-LABEL: test_mm_cvtps_ph
// CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0)
return _mm_cvtps_ph(a, 0);
}
+//
+// Tests for Exact Dynamic Rounding
+//
+// Test that dynamic rounding SUCCEEDS for exactly representable values.
+// We use _MM_FROUND_CUR_DIRECTION (value 4) to specify dynamic rounding.
+// Inputs: -2.5f, 0.125f, -16.0f are all exactly representable in FP16.
+TEST_CONSTEXPR(match_v8hi(
+ __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 0.125f, -16.0f, 0.0f, -2.5f, 0.125f, -16.0f, 0.0f), _MM_FROUND_CUR_DIRECTION),
+ 0xC100, 0x3000, 0xCC00, 0x0000, 0xC100, 0x3000, 0xCC00, 0x0000
+));
+
__m128i test_mm256_cvtps_ph(__m256 a) {
// CHECK-LABEL: test_mm256_cvtps_ph
// CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
diff --git a/clang/test/CodeGen/X86/fma-builtins.c b/clang/test/CodeGen/X86/fma-builtins.c
index 5445e50..ea93bca 100644
--- a/clang/test/CodeGen/X86/fma-builtins.c
+++ b/clang/test/CodeGen/X86/fma-builtins.c
@@ -28,23 +28,25 @@ TEST_CONSTEXPR(match_m128d(_mm_fmadd_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0,
__m128 test_mm_fmadd_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fmadd_ss
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
// CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
+ // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_fmadd_ss(a, b, c);
}
+TEST_CONSTEXPR(match_m128(_mm_fmadd_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), -7.0f, 1.0f, -2.0f, -0.0f));
__m128d test_mm_fmadd_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_fmadd_sd
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
// CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
+ // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_fmadd_sd(a, b, c);
}
+TEST_CONSTEXPR(match_m128d(_mm_fmadd_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), -12.0, 1.0));
__m128 test_mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fmsub_ps
@@ -64,25 +66,27 @@ TEST_CONSTEXPR(match_m128d(_mm_fmsub_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0,
__m128 test_mm_fmsub_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fmsub_ss
- // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fneg float %{{.+}}
+ // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float [[NEG]])
+ // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_fmsub_ss(a, b, c);
}
+TEST_CONSTEXPR(match_m128(_mm_fmsub_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), -9.0f, 1.0f, -2.0f, -0.0f));
__m128d test_mm_fmsub_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_fmsub_sd
- // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fneg double %{{.+}}
+ // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double [[NEG]])
+ // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_fmsub_sd(a, b, c);
}
+TEST_CONSTEXPR(match_m128d(_mm_fmsub_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), 4.0, 1.0));
__m128 test_mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fnmadd_ps
@@ -102,25 +106,27 @@ TEST_CONSTEXPR(match_m128d(_mm_fnmadd_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0
__m128 test_mm_fnmadd_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fnmadd_ss
- // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fneg float %{{.+}}
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: call float @llvm.fma.f32(float %{{.*}}, float [[NEG]], float %{{.*}})
+ // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_fnmadd_ss(a, b, c);
}
+TEST_CONSTEXPR(match_m128(_mm_fnmadd_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), 9.0f, 1.0f, -2.0f, -0.0f));
__m128d test_mm_fnmadd_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_fnmadd_sd
- // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fneg double %{{.+}}
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: call double @llvm.fma.f64(double %{{.*}}, double [[NEG]], double %{{.*}})
+ // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_fnmadd_sd(a, b, c);
}
+TEST_CONSTEXPR(match_m128d(_mm_fnmadd_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), -4.0, 1.0));
__m128 test_mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fnmsub_ps
@@ -142,27 +148,29 @@ TEST_CONSTEXPR(match_m128d(_mm_fnmsub_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0
__m128 test_mm_fnmsub_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fnmsub_ss
- // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fneg float %{{.+}}
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: [[NEG2:%.+]] = fneg float %{{.+}}
+ // CHECK: call float @llvm.fma.f32(float %{{.*}}, float [[NEG]], float [[NEG2]])
+ // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_fnmsub_ss(a, b, c);
}
+TEST_CONSTEXPR(match_m128(_mm_fnmsub_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), 7.0f, 1.0f, -2.0f, -0.0f));
__m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_fnmsub_sd
- // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fneg double %{{.+}}
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: [[NEG2:%.+]] = fneg double %{{.+}}
+ // CHECK: call double @llvm.fma.f64(double %{{.*}}, double [[NEG]], double [[NEG2]])
+ // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_fnmsub_sd(a, b, c);
}
+TEST_CONSTEXPR(match_m128d(_mm_fnmsub_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), 12.0, 1.0));
__m128 test_mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fmaddsub_ps
diff --git a/clang/test/CodeGen/X86/fma4-builtins.c b/clang/test/CodeGen/X86/fma4-builtins.c
index fb449d5..9495198 100644
--- a/clang/test/CodeGen/X86/fma4-builtins.c
+++ b/clang/test/CodeGen/X86/fma4-builtins.c
@@ -28,23 +28,29 @@ TEST_CONSTEXPR(match_m128d(_mm_macc_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0,
__m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_macc_ss
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
// CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
+ // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0
+ // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1
+ // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2
+ // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3
return _mm_macc_ss(a, b, c);
}
+TEST_CONSTEXPR(match_m128(_mm_macc_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), -7.0f, 0.0f, 0.0f, 0.0f));
__m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_macc_sd
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
// CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
+ // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
+ // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
return _mm_macc_sd(a, b, c);
}
+TEST_CONSTEXPR(match_m128d(_mm_macc_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), -12.0, 0.0));
__m128 test_mm_msub_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_msub_ps
@@ -64,25 +70,31 @@ TEST_CONSTEXPR(match_m128d(_mm_msub_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0,
__m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_msub_ss
- // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
- // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float [[C]])
- // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fneg float %{{.+}}
+ // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float [[NEG]])
+ // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0
+ // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1
+ // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2
+ // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3
return _mm_msub_ss(a, b, c);
}
+TEST_CONSTEXPR(match_m128(_mm_msub_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), -9.0f, 0.0f, 0.0f, 0.0f));
__m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_msub_sd
- // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
- // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double [[C]])
- // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fneg double %{{.+}}
+ // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double [[NEG]])
+ // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
+ // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
return _mm_msub_sd(a, b, c);
}
+TEST_CONSTEXPR(match_m128d(_mm_msub_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), 4.0, 0.0));
__m128 test_mm_nmacc_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_nmacc_ps
@@ -102,25 +114,31 @@ TEST_CONSTEXPR(match_m128d(_mm_nmacc_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0,
__m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_nmacc_ss
- // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float %{{.*}})
- // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fneg float %{{.+}}
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: call float @llvm.fma.f32(float [[NEG]], float %{{.*}}, float %{{.*}})
+ // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0
+ // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1
+ // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2
+ // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3
return _mm_nmacc_ss(a, b, c);
}
+TEST_CONSTEXPR(match_m128(_mm_nmacc_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), 9.0f, 0.0f, 0.0f, 0.0f));
__m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_nmacc_sd
- // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double %{{.*}})
- // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fneg double %{{.+}}
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: call double @llvm.fma.f64(double [[NEG]], double %{{.*}}, double %{{.*}})
+ // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
+ // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
return _mm_nmacc_sd(a, b, c);
}
+TEST_CONSTEXPR(match_m128d(_mm_nmacc_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), -4.0, 0.0));
__m128 test_mm_nmsub_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_nmsub_ps
@@ -142,27 +160,33 @@ TEST_CONSTEXPR(match_m128d(_mm_nmsub_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0,
__m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_nmsub_ss
- // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0
- // CHECK: extractelement <4 x float> %{{.*}}, i64 0
- // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
- // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float [[C]])
- // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fneg float %{{.+}}
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: [[NEG2:%.+]] = fneg float %{{.+}}
+ // CHECK: call float @llvm.fma.f32(float [[NEG]], float %{{.*}}, float [[NEG2]])
+ // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0
+ // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1
+ // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2
+ // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3
return _mm_nmsub_ss(a, b, c);
}
+TEST_CONSTEXPR(match_m128(_mm_nmsub_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), 7.0f, 0.0f, 0.0f, 0.0f));
__m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_nmsub_sd
- // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
- // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
- // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double [[C]])
- // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: [[NEG:%.+]] = fneg double %{{.+}}
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: [[NEG2:%.+]] = fneg double %{{.+}}
+ // CHECK: call double @llvm.fma.f64(double [[NEG]], double %{{.*}}, double [[NEG2]])
+ // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0
+ // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
return _mm_nmsub_sd(a, b, c);
}
+TEST_CONSTEXPR(match_m128d(_mm_nmsub_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), 12.0, 0.0));
__m128 test_mm_maddsub_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_maddsub_ps
diff --git a/clang/test/CodeGen/X86/gfni-builtins.c b/clang/test/CodeGen/X86/gfni-builtins.c
index 7f196e0..586e735 100644
--- a/clang/test/CodeGen/X86/gfni-builtins.c
+++ b/clang/test/CodeGen/X86/gfni-builtins.c
@@ -1,192 +1,524 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -emit-llvm -o - | FileCheck %s --check-prefix SSE
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512,AVX512BW
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -emit-llvm -o - | FileCheck %s --check-prefix SSE
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512,AVX512BW
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -emit-llvm -o - | FileCheck %s --check-prefix SSE
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512,AVX512BW
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -fexperimental-new-constant-interpreter -emit-llvm -o - | FileCheck %s --check-prefix SSE
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx -fexperimental-new-constant-interpreter -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx512f -target-feature +avx512vl -fexperimental-new-constant-interpreter -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx512bw -target-feature +avx512vl -fexperimental-new-constant-interpreter -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512,AVX512BW
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m128i test_mm_gf2p8affineinv_epi64_epi8(__m128i A, __m128i B) {
- // SSE-LABEL: @test_mm_gf2p8affineinv_epi64_epi8
+ // SSE-LABEL: test_mm_gf2p8affineinv_epi64_epi8
// SSE: @llvm.x86.vgf2p8affineinvqb.128
return _mm_gf2p8affineinv_epi64_epi8(A, B, 1);
}
+TEST_CONSTEXPR(match_v16qu(
+ _mm_gf2p8affineinv_epi64_epi8(
+ _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
+ _mm_set_epi64x(0x0102040810204080ULL, 0x0102040810204080ULL), 0x63),
+ 164, 134, 130, 211, 163, 74, 44, 139, 178, 24, 49, 168, 149, 238, 98, 99));
__m128i test_mm_gf2p8affine_epi64_epi8(__m128i A, __m128i B) {
- // SSE-LABEL: @test_mm_gf2p8affine_epi64_epi8
+ // SSE-LABEL: test_mm_gf2p8affine_epi64_epi8
// SSE: @llvm.x86.vgf2p8affineqb.128
return _mm_gf2p8affine_epi64_epi8(A, B, 1);
}
+TEST_CONSTEXPR(match_v16qu(
+ _mm_gf2p8affine_epi64_epi8(
+ _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
+ _mm_set_epi64x(0x0102040810204080ULL, 0x0102040810204080ULL), 0x63),
+ 108, 109, 110, 111, 104, 105, 106, 107, 100, 101, 102, 103, 96, 97, 98, 99));
__m128i test_mm_gf2p8mul_epi8(__m128i A, __m128i B) {
- // SSE-LABEL: @test_mm_gf2p8mul_epi8
+ // SSE-LABEL: test_mm_gf2p8mul_epi8
// SSE: @llvm.x86.vgf2p8mulb.128
return _mm_gf2p8mul_epi8(A, B);
}
+TEST_CONSTEXPR(match_v16qu(
+ _mm_gf2p8mul_epi8(_mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
+ _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)),
+ 0, 14, 26, 20, 44, 34, 54, 56, 56, 54, 34, 44, 20, 26, 14, 0));
#ifdef __AVX__
__m256i test_mm256_gf2p8affineinv_epi64_epi8(__m256i A, __m256i B) {
- // AVX-LABEL: @test_mm256_gf2p8affineinv_epi64_epi8
+ // AVX-LABEL: test_mm256_gf2p8affineinv_epi64_epi8
// AVX: @llvm.x86.vgf2p8affineinvqb.256
return _mm256_gf2p8affineinv_epi64_epi8(A, B, 1);
}
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_gf2p8affineinv_epi64_epi8(
+ _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
+ _mm256_set_epi64x(0x0102040810204080ULL, 0x0102040810204080ULL,
+ 0x0102040810204080ULL, 0x0102040810204080ULL),
+ 0x63),
+ 209, 141, 35, 156, 175, 158, 92, 59, 60, 3, 72, 250, 40, 201, 215, 23, 164,
+ 134, 130, 211, 163, 74, 44, 139, 178, 24, 49, 168, 149, 238, 98, 99));
__m256i test_mm256_gf2p8affine_epi64_epi8(__m256i A, __m256i B) {
- // AVX-LABEL: @test_mm256_gf2p8affine_epi64_epi8
+ // AVX-LABEL: test_mm256_gf2p8affine_epi64_epi8
// AVX: @llvm.x86.vgf2p8affineqb.256
return _mm256_gf2p8affine_epi64_epi8(A, B, 1);
}
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_gf2p8affine_epi64_epi8(
+ _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
+ _mm256_set_epi64x(0x0102040810204080ULL, 0x0102040810204080ULL,
+ 0x0102040810204080ULL, 0x0102040810204080ULL),
+ 0x63),
+ 124, 125, 126, 127, 120, 121, 122, 123, 116, 117, 118, 119, 112, 113, 114,
+ 115, 108, 109, 110, 111, 104, 105, 106, 107, 100, 101, 102, 103, 96, 97, 98,
+ 99));
__m256i test_mm256_gf2p8mul_epi8(__m256i A, __m256i B) {
- // AVX-LABEL: @test_mm256_gf2p8mul_epi8
+ // AVX-LABEL: test_mm256_gf2p8mul_epi8
// AVX: @llvm.x86.vgf2p8mulb.256
return _mm256_gf2p8mul_epi8(A, B);
}
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_gf2p8mul_epi8(
+ _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
+ _mm256_set_epi8(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)),
+ 0, 30, 58, 36, 108, 114, 86, 72, 184, 166, 130, 156, 212, 202, 238, 240, 240,
+ 238, 202, 212, 156, 130, 166, 184, 72, 86, 114, 108, 36, 58, 30, 0));
#endif // __AVX__
#ifdef __AVX512F__
__m512i test_mm512_gf2p8affineinv_epi64_epi8(__m512i A, __m512i B) {
- // AVX512-LABEL: @test_mm512_gf2p8affineinv_epi64_epi8
+ // AVX512-LABEL: test_mm512_gf2p8affineinv_epi64_epi8
// AVX512: @llvm.x86.vgf2p8affineinvqb.512
return _mm512_gf2p8affineinv_epi64_epi8(A, B, 1);
}
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_gf2p8affineinv_epi64_epi8(
+ _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
+ 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+ 62, 63),
+ _mm512_set_epi64(0x0102040810204080ULL, 0x0102040810204080ULL,
+ 0x0102040810204080ULL, 0x0102040810204080ULL,
+ 0x0102040810204080ULL, 0x0102040810204080ULL,
+ 0x0102040810204080ULL, 0x0102040810204080ULL),
+ 0x63),
+ 122, 58, 216, 20, 12, 67, 86, 145, 33, 5, 90, 144, 15, 241, 38, 79, 161,
+ 193, 39, 83, 118, 251, 105, 162, 170, 203, 46, 54, 146, 57, 13, 89, 209, 141,
+ 35, 156, 175, 158, 92, 59, 60, 3, 72, 250, 40, 201, 215, 23, 164, 134, 130,
+ 211, 163, 74, 44, 139, 178, 24, 49, 168, 149, 238, 98, 99));
__m512i test_mm512_gf2p8affine_epi64_epi8(__m512i A, __m512i B) {
- // AVX512-LABEL: @test_mm512_gf2p8affine_epi64_epi8
+ // AVX512-LABEL: test_mm512_gf2p8affine_epi64_epi8
// AVX512: @llvm.x86.vgf2p8affineqb.512
return _mm512_gf2p8affine_epi64_epi8(A, B, 1);
}
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_gf2p8affine_epi64_epi8(
+ _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
+ 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+ 62, 63),
+ _mm512_set_epi64(0x0102040810204080ULL, 0x0102040810204080ULL,
+ 0x0102040810204080ULL, 0x0102040810204080ULL,
+ 0x0102040810204080ULL, 0x0102040810204080ULL,
+ 0x0102040810204080ULL, 0x0102040810204080ULL),
+ 0x63),
+ 92, 93, 94, 95, 88, 89, 90, 91, 84, 85, 86, 87, 80, 81, 82, 83, 76, 77, 78,
+ 79, 72, 73, 74, 75, 68, 69, 70, 71, 64, 65, 66, 67, 124, 125, 126, 127, 120,
+ 121, 122, 123, 116, 117, 118, 119, 112, 113, 114, 115, 108, 109, 110, 111,
+ 104, 105, 106, 107, 100, 101, 102, 103, 96, 97, 98, 99));
__m512i test_mm512_gf2p8mul_epi8(__m512i A, __m512i B) {
- // AVX512-LABEL: @test_mm512_gf2p8mul_epi8
+ // AVX512-LABEL: test_mm512_gf2p8mul_epi8
// AVX512: @llvm.x86.vgf2p8mulb.512
return _mm512_gf2p8mul_epi8(A, B);
}
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_gf2p8mul_epi8(
+ _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
+ 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+ 62, 63),
+ _mm512_set_epi8(63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
+ 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34,
+ 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19,
+ 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2,
+ 1, 0)),
+ 0, 62, 122, 68, 236, 210, 150, 168, 163, 157, 217, 231, 79, 113, 53, 11, 198,
+ 248, 188, 130, 42, 20, 80, 110, 101, 91, 31, 33, 137, 183, 243, 205, 205,
+ 243, 183, 137, 33, 31, 91, 101, 110, 80, 20, 42, 130, 188, 248, 198, 11, 53,
+ 113, 79, 231, 217, 157, 163, 168, 150, 210, 236, 68, 122, 62, 0));
#endif // __AVX512F__
#ifdef __AVX512BW__
__m512i test_mm512_mask_gf2p8affineinv_epi64_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) {
- // AVX512BW-LABEL: @test_mm512_mask_gf2p8affineinv_epi64_epi8
+ // AVX512BW-LABEL: test_mm512_mask_gf2p8affineinv_epi64_epi8
// AVX512BW: @llvm.x86.vgf2p8affineinvqb.512
// AVX512BW: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
return _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1);
}
__m512i test_mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 U, __m512i A, __m512i B) {
- // AVX512BW-LABEL: @test_mm512_maskz_gf2p8affineinv_epi64_epi8
+ // AVX512BW-LABEL: test_mm512_maskz_gf2p8affineinv_epi64_epi8
// AVX512BW: @llvm.x86.vgf2p8affineinvqb.512
// AVX512BW: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
return _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1);
}
__m128i test_mm_mask_gf2p8affineinv_epi64_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) {
- // AVX512BW-LABEL: @test_mm_mask_gf2p8affineinv_epi64_epi8
+ // AVX512BW-LABEL: test_mm_mask_gf2p8affineinv_epi64_epi8
// AVX512BW: @llvm.x86.vgf2p8affineinvqb.128
// AVX512BW: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
return _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1);
}
__m128i test_mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 U, __m128i A, __m128i B) {
- // AVX512BW-LABEL: @test_mm_maskz_gf2p8affineinv_epi64_epi8
+ // AVX512BW-LABEL: test_mm_maskz_gf2p8affineinv_epi64_epi8
// AVX512BW: @llvm.x86.vgf2p8affineinvqb.128
// AVX512BW: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
return _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1);
}
__m256i test_mm256_mask_gf2p8affineinv_epi64_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) {
- // AVX512BW-LABEL: @test_mm256_mask_gf2p8affineinv_epi64_epi8
+ // AVX512BW-LABEL: test_mm256_mask_gf2p8affineinv_epi64_epi8
// AVX512BW: @llvm.x86.vgf2p8affineinvqb.256
// AVX512BW: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
return _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1);
}
__m256i test_mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 U, __m256i A, __m256i B) {
- // AVX512BW-LABEL: @test_mm256_maskz_gf2p8affineinv_epi64_epi8
+ // AVX512BW-LABEL: test_mm256_maskz_gf2p8affineinv_epi64_epi8
// AVX512BW: @llvm.x86.vgf2p8affineinvqb.256
// AVX512BW: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
return _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1);
}
__m512i test_mm512_mask_gf2p8affine_epi64_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) {
- // AVX512BW-LABEL: @test_mm512_mask_gf2p8affine_epi64_epi8
+ // AVX512BW-LABEL: test_mm512_mask_gf2p8affine_epi64_epi8
// AVX512BW: @llvm.x86.vgf2p8affineqb.512
// AVX512BW: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
return _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1);
}
__m512i test_mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 U, __m512i A, __m512i B) {
- // AVX512BW-LABEL: @test_mm512_maskz_gf2p8affine_epi64_epi8
+ // AVX512BW-LABEL: test_mm512_maskz_gf2p8affine_epi64_epi8
// AVX512BW: @llvm.x86.vgf2p8affineqb.512
// AVX512BW: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
return _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, 1);
}
__m128i test_mm_mask_gf2p8affine_epi64_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) {
- // AVX512BW-LABEL: @test_mm_mask_gf2p8affine_epi64_epi8
+ // AVX512BW-LABEL: test_mm_mask_gf2p8affine_epi64_epi8
// AVX512BW: @llvm.x86.vgf2p8affineqb.128
// AVX512BW: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
return _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1);
}
__m128i test_mm_maskz_gf2p8affine_epi64_epi8(__mmask16 U, __m128i A, __m128i B) {
- // AVX512BW-LABEL: @test_mm_maskz_gf2p8affine_epi64_epi8
+ // AVX512BW-LABEL: test_mm_maskz_gf2p8affine_epi64_epi8
// AVX512BW: @llvm.x86.vgf2p8affineqb.128
// AVX512BW: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
return _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, 1);
}
__m256i test_mm256_mask_gf2p8affine_epi64_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) {
- // AVX512BW-LABEL: @test_mm256_mask_gf2p8affine_epi64_epi8
+ // AVX512BW-LABEL: test_mm256_mask_gf2p8affine_epi64_epi8
// AVX512BW: @llvm.x86.vgf2p8affineqb.256
// AVX512BW: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
return _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1);
}
__m256i test_mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 U, __m256i A, __m256i B) {
- // AVX512BW-LABEL: @test_mm256_maskz_gf2p8affine_epi64_epi8
+ // AVX512BW-LABEL: test_mm256_maskz_gf2p8affine_epi64_epi8
// AVX512BW: @llvm.x86.vgf2p8affineqb.256
// AVX512BW: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
return _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, 1);
}
__m512i test_mm512_mask_gf2p8mul_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) {
- // AVX512BW-LABEL: @test_mm512_mask_gf2p8mul_epi8
+ // AVX512BW-LABEL: test_mm512_mask_gf2p8mul_epi8
// AVX512BW: @llvm.x86.vgf2p8mulb.512
// AVX512BW: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
return _mm512_mask_gf2p8mul_epi8(S, U, A, B);
}
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_mask_gf2p8mul_epi8((__m512i)(__v64qi){-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1},
+ 0xAAAAAAAAAAAAAAAAULL,
+ (__m512i)(__v64qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m512i)(__v64qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05,
+ 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05,
+ 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05,
+ 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05,
+ 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05,
+ 0xFF, 0x05, 0xFF, 0x05));
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_mask_gf2p8mul_epi8((__m512i)(__v64qi){99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,
+ 99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,
+ 99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,
+ 99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99},
+ 0xFFFFFFFFFFFFFFFFULL,
+ (__m512i)(__v64qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m512i)(__v64qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05));
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_mask_gf2p8mul_epi8((__m512i)(__v64qi){42,42,42,42,42,42,42,42, 42,42,42,42,42,42,42,42,
+ 42,42,42,42,42,42,42,42, 42,42,42,42,42,42,42,42,
+ 42,42,42,42,42,42,42,42, 42,42,42,42,42,42,42,42,
+ 42,42,42,42,42,42,42,42, 42,42,42,42,42,42,42,42},
+ 0x0ULL,
+ (__m512i)(__v64qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m512i)(__v64qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+ 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+ 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+ 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42));
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_mask_gf2p8mul_epi8((__m512i)(__v64qi){99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,
+ 99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,
+ 99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,
+ 99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99},
+ 0xFFFFFFFFFFFFFFFFULL,
+ (__m512i)(__v64qi){0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0},
+ (__m512i)(__v64qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_mask_gf2p8mul_epi8((__m512i)(__v64qi){99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,
+ 99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,
+ 99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,
+ 99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99},
+ 0xFFFFFFFFFFFFFFFFULL,
+ (__m512i)(__v64qi){0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42, 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,
+ 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42, 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,
+ 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42, 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,
+ 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42, 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42},
+ (__m512i)(__v64qi){1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1}),
+ 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42,
+ 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42,
+ 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42,
+ 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42,
+ 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42,
+ 0x42, 0x42, 0x42, 0x42));
__m512i test_mm512_maskz_gf2p8mul_epi8(__mmask64 U, __m512i A, __m512i B) {
- // AVX512BW-LABEL: @test_mm512_maskz_gf2p8mul_epi8
+ // AVX512BW-LABEL: test_mm512_maskz_gf2p8mul_epi8
// AVX512BW: @llvm.x86.vgf2p8mulb.512
// AVX512BW: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
return _mm512_maskz_gf2p8mul_epi8(U, A, B);
}
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_maskz_gf2p8mul_epi8(0x5555555555555555ULL,
+ (__m512i)(__v64qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m512i)(__v64qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05,
+ 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0,
+ 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0,
+ 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0));
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_maskz_gf2p8mul_epi8(0x0ULL,
+ (__m512i)(__v64qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m512i)(__v64qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m128i test_mm_mask_gf2p8mul_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) {
- // AVX512BW-LABEL: @test_mm_mask_gf2p8mul_epi8
+ // AVX512BW-LABEL: test_mm_mask_gf2p8mul_epi8
// AVX512BW: @llvm.x86.vgf2p8mulb.128
// AVX512BW: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
return _mm_mask_gf2p8mul_epi8(S, U, A, B);
}
+TEST_CONSTEXPR(match_v16qu(
+ _mm_mask_gf2p8mul_epi8((__m128i)(__v16qi){-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1},
+ 0xAAAA,
+ (__m128i)(__v16qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m128i)(__v16qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF,
+ 0x05, 0xFF, 0x05));
+TEST_CONSTEXPR(match_v16qu(
+ _mm_mask_gf2p8mul_epi8((__m128i)(__v16qi){99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99},
+ 0xFFFF,
+ (__m128i)(__v16qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m128i)(__v16qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05));
+TEST_CONSTEXPR(match_v16qu(
+ _mm_mask_gf2p8mul_epi8((__m128i)(__v16qi){42,42,42,42,42,42,42,42, 42,42,42,42,42,42,42,42},
+ 0x0,
+ (__m128i)(__v16qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m128i)(__v16qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42));
+TEST_CONSTEXPR(match_v16qu(
+ _mm_mask_gf2p8mul_epi8((__m128i)(__v16qi){99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99},
+ 0xFFFF,
+ (__m128i)(__v16qi){0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0},
+ (__m128i)(__v16qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16qu(
+ _mm_mask_gf2p8mul_epi8((__m128i)(__v16qi){99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99},
+ 0xFFFF,
+ (__m128i)(__v16qi){0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42, 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42},
+ (__m128i)(__v16qi){1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1}),
+ 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42));
__m128i test_mm_maskz_gf2p8mul_epi8(__mmask16 U, __m128i A, __m128i B) {
- // AVX512BW-LABEL: @test_mm_maskz_gf2p8mul_epi8
+ // AVX512BW-LABEL: test_mm_maskz_gf2p8mul_epi8
// AVX512BW: @llvm.x86.vgf2p8mulb.128
// AVX512BW: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
return _mm_maskz_gf2p8mul_epi8(U, A, B);
}
+TEST_CONSTEXPR(match_v16qu(
+ _mm_maskz_gf2p8mul_epi8(0x5555,
+ (__m128i)(__v16qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m128i)(__v16qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0));
+TEST_CONSTEXPR(match_v16qu(
+ _mm_maskz_gf2p8mul_epi8(0x0,
+ (__m128i)(__v16qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m128i)(__v16qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m256i test_mm256_mask_gf2p8mul_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) {
- // AVX512BW-LABEL: @test_mm256_mask_gf2p8mul_epi8
+ // AVX512BW-LABEL: test_mm256_mask_gf2p8mul_epi8
// AVX512BW: @llvm.x86.vgf2p8mulb.256
// AVX512BW: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
return _mm256_mask_gf2p8mul_epi8(S, U, A, B);
}
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_mask_gf2p8mul_epi8((__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1},
+ 0xAAAAAAAA,
+ (__m256i)(__v32qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m256i)(__v32qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05,
+ 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05,
+ 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05, 0xFF, 0x05));
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_mask_gf2p8mul_epi8((__m256i)(__v32qi){99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,
+ 99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99},
+ 0xFFFFFFFF,
+ (__m256i)(__v32qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m256i)(__v32qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05));
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_mask_gf2p8mul_epi8((__m256i)(__v32qi){42,42,42,42,42,42,42,42, 42,42,42,42,42,42,42,42,
+ 42,42,42,42,42,42,42,42, 42,42,42,42,42,42,42,42},
+ 0x0,
+ (__m256i)(__v32qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m256i)(__v32qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+ 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42));
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_mask_gf2p8mul_epi8((__m256i)(__v32qi){99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,
+ 99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99},
+ 0xFFFFFFFF,
+ (__m256i)(__v32qi){0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0},
+ (__m256i)(__v32qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_mask_gf2p8mul_epi8((__m256i)(__v32qi){99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,
+ 99,99,99,99,99,99,99,99, 99,99,99,99,99,99,99,99},
+ 0xFFFFFFFF,
+ (__m256i)(__v32qi){0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42, 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,
+ 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42, 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42},
+ (__m256i)(__v32qi){1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1}),
+ 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42,
+ 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42));
__m256i test_mm256_maskz_gf2p8mul_epi8(__mmask32 U, __m256i A, __m256i B) {
- // AVX512BW-LABEL: @test_mm256_maskz_gf2p8mul_epi8
+ // AVX512BW-LABEL: test_mm256_maskz_gf2p8mul_epi8
// AVX512BW: @llvm.x86.vgf2p8mulb.256
// AVX512BW: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
return _mm256_maskz_gf2p8mul_epi8(U, A, B);
}
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_maskz_gf2p8mul_epi8(0x55555555,
+ (__m256i)(__v32qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m256i)(__v32qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0,
+ 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0, 0x05, 0));
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_maskz_gf2p8mul_epi8(0x0,
+ (__m256i)(__v32qi){0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12},
+ (__m256i)(__v32qi){0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,
+ 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34, 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34}),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
#endif // __AVX512BW__
diff --git a/clang/test/CodeGen/X86/math-builtins.c b/clang/test/CodeGen/X86/math-builtins.c
index a56f8ba..c7cd9ff 100644
--- a/clang/test/CodeGen/X86/math-builtins.c
+++ b/clang/test/CodeGen/X86/math-builtins.c
@@ -118,36 +118,36 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
__builtin_copysign(f,f); __builtin_copysignf(f,f); __builtin_copysignl(f,f); __builtin_copysignf128(f,f);
-// NO__ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.copysign.f32(float, float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.copysign.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.copysign.f128(fp128, fp128) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]]
-// HAS_ERRNO: declare float @llvm.copysign.f32(float, float) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare x86_fp80 @llvm.copysign.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare fp128 @llvm.copysign.f128(fp128, fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC2:#[0-9]+]]
+// NO__ERRNO: declare float @llvm.copysign.f32(float, float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.copysign.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.copysign.f128(fp128, fp128) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC2:#[0-9]+]]
+// HAS_ERRNO: declare float @llvm.copysign.f32(float, float) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare x86_fp80 @llvm.copysign.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare fp128 @llvm.copysign.f128(fp128, fp128) [[READNONE_INTRINSIC2]]
__builtin_fabs(f); __builtin_fabsf(f); __builtin_fabsl(f); __builtin_fabsf128(f);
-// NO__ERRNO: declare double @llvm.fabs.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.fabs.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.fabs.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.fabs.f128(fp128) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare double @llvm.fabs.f64(double) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare float @llvm.fabs.f32(float) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare x86_fp80 @llvm.fabs.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare fp128 @llvm.fabs.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.fabs.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.fabs.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.fabs.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.fabs.f128(fp128) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare double @llvm.fabs.f64(double) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare float @llvm.fabs.f32(float) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare x86_fp80 @llvm.fabs.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare fp128 @llvm.fabs.f128(fp128) [[READNONE_INTRINSIC2]]
__builtin_frexp(f,i); __builtin_frexpf(f,i); __builtin_frexpl(f,i); __builtin_frexpf128(f,i);
-// NO__ERRNO: declare { double, i32 } @llvm.frexp.f64.i32(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare { float, i32 } @llvm.frexp.f32.i32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare { x86_fp80, i32 } @llvm.frexp.f80.i32(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare { fp128, i32 } @llvm.frexp.f128.i32(fp128) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare { double, i32 } @llvm.frexp.f64.i32(double) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare { float, i32 } @llvm.frexp.f32.i32(float) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare { x86_fp80, i32 } @llvm.frexp.f80.i32(x86_fp80) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare { fp128, i32 } @llvm.frexp.f128.i32(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare { double, i32 } @llvm.frexp.f64.i32(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare { float, i32 } @llvm.frexp.f32.i32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare { x86_fp80, i32 } @llvm.frexp.f80.i32(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare { fp128, i32 } @llvm.frexp.f128.i32(fp128) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare { double, i32 } @llvm.frexp.f64.i32(double) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare { float, i32 } @llvm.frexp.f32.i32(float) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare { x86_fp80, i32 } @llvm.frexp.f80.i32(x86_fp80) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare { fp128, i32 } @llvm.frexp.f128.i32(fp128) [[READNONE_INTRINSIC2]]
__builtin_huge_val(); __builtin_huge_valf(); __builtin_huge_vall(); __builtin_huge_valf128();
@@ -165,10 +165,10 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
__builtin_ldexp(f,f); __builtin_ldexpf(f,f); __builtin_ldexpl(f,f); __builtin_ldexpf128(f,f);
-// NO__ERRNO: declare double @llvm.ldexp.f64.i32(double, i32) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.ldexp.f32.i32(float, i32) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.ldexp.f80.i32(x86_fp80, i32) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.ldexp.f128.i32(fp128, i32) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.ldexp.f64.i32(double, i32) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.ldexp.f32.i32(float, i32) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.ldexp.f80.i32(x86_fp80, i32) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.ldexp.f128.i32(fp128, i32) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare double @ldexp(double noundef, i32 noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @ldexpf(float noundef, i32 noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @ldexpl(x86_fp80 noundef, i32 noundef) [[NOT_READNONE]]
@@ -180,7 +180,7 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
// NO__ERRNO: declare { float, float } @llvm.modf.f32(float) [[READNONE_INTRINSIC]]
// NO__ERRNO: declare { x86_fp80, x86_fp80 } @llvm.modf.f80(x86_fp80) [[READNONE_INTRINSIC]]
// NO__ERRNO: declare fp128 @modff128(fp128 noundef, ptr noundef) [[NOT_READNONE:#[0-9]+]]
-// HAS_ERRNO: declare { double, double } @llvm.modf.f64(double) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare { double, double } @llvm.modf.f64(double) [[READNONE_INTRINSIC:#[0-9]+]]
// HAS_ERRNO: declare { float, float } @llvm.modf.f32(float) [[READNONE_INTRINSIC]]
// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @llvm.modf.f80(x86_fp80) [[READNONE_INTRINSIC]]
// HAS_ERRNO: declare fp128 @modff128(fp128 noundef, ptr noundef) [[NOT_READNONE]]
@@ -209,10 +209,10 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
__builtin_pow(f,f); __builtin_powf(f,f); __builtin_powl(f,f); __builtin_powf128(f,f);
-// NO__ERRNO: declare double @llvm.pow.f64(double, double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.pow.f32(float, float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.pow.f128(fp128, fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.pow.f64(double, double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.pow.f32(float, float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.pow.f128(fp128, fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare double @pow(double noundef, double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @powf(float noundef, float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @powl(x86_fp80 noundef, x86_fp80 noundef) [[NOT_READNONE]]
@@ -220,12 +220,12 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
__builtin_powi(f,f); __builtin_powif(f,f); __builtin_powil(f,f);
-// NO__ERRNO: declare double @llvm.powi.f64.i32(double, i32) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.powi.f32.i32(float, i32) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.powi.f80.i32(x86_fp80, i32) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare double @llvm.powi.f64.i32(double, i32) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare float @llvm.powi.f32.i32(float, i32) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare x86_fp80 @llvm.powi.f80.i32(x86_fp80, i32) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.powi.f64.i32(double, i32) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.powi.f32.i32(float, i32) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.powi.f80.i32(x86_fp80, i32) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare double @llvm.powi.f64.i32(double, i32) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare float @llvm.powi.f32.i32(float, i32) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare x86_fp80 @llvm.powi.f80.i32(x86_fp80, i32) [[READNONE_INTRINSIC2]]
/* math */
__builtin_acos(f); __builtin_acosf(f); __builtin_acosl(f); __builtin_acosf128(f);
@@ -307,21 +307,21 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
__builtin_ceil(f); __builtin_ceilf(f); __builtin_ceill(f); __builtin_ceilf128(f);
-// NO__ERRNO: declare double @llvm.ceil.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.ceil.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.ceil.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.ceil.f128(fp128) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare double @llvm.ceil.f64(double) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare float @llvm.ceil.f32(float) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare x86_fp80 @llvm.ceil.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare fp128 @llvm.ceil.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.ceil.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.ceil.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.ceil.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.ceil.f128(fp128) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare double @llvm.ceil.f64(double) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare float @llvm.ceil.f32(float) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare x86_fp80 @llvm.ceil.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare fp128 @llvm.ceil.f128(fp128) [[READNONE_INTRINSIC2]]
__builtin_cos(f); __builtin_cosf(f); __builtin_cosl(f); __builtin_cosf128(f);
-// NO__ERRNO: declare double @llvm.cos.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.cos.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.cos.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.cos.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.cos.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.cos.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.cos.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.cos.f128(fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare double @cos(double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @cosf(float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @cosl(x86_fp80 noundef) [[NOT_READNONE]]
@@ -362,10 +362,10 @@ __builtin_erfc(f); __builtin_erfcf(f); __builtin_erfcl(f); __builtin_
__builtin_exp(f); __builtin_expf(f); __builtin_expl(f); __builtin_expf128(f);
-// NO__ERRNO: declare double @llvm.exp.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.exp.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.exp.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.exp.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.exp.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.exp.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.exp.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.exp.f128(fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare double @exp(double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @expf(float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @expl(x86_fp80 noundef) [[NOT_READNONE]]
@@ -373,10 +373,10 @@ __builtin_exp(f); __builtin_expf(f); __builtin_expl(f); __builtin_e
__builtin_exp2(f); __builtin_exp2f(f); __builtin_exp2l(f); __builtin_exp2f128(f);
-// NO__ERRNO: declare double @llvm.exp2.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.exp2.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.exp2.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.exp2.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.exp2.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.exp2.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.exp2.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.exp2.f128(fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare double @exp2(double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @exp2f(float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @exp2l(x86_fp80 noundef) [[NOT_READNONE]]
@@ -384,10 +384,10 @@ __builtin_exp2(f); __builtin_exp2f(f); __builtin_exp2l(f); __builtin_
__builtin_exp10(f); __builtin_exp10f(f); __builtin_exp10l(f); __builtin_exp10f128(f);
-// NO__ERRNO: declare double @llvm.exp10.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.exp10.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.exp10.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.exp10.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.exp10.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.exp10.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.exp10.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.exp10.f128(fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare double @exp10(double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @exp10f(float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @exp10l(x86_fp80 noundef) [[NOT_READNONE]]
@@ -417,22 +417,22 @@ __builtin_fdim(f,f); __builtin_fdimf(f,f); __builtin_fdiml(f,f); __bu
__builtin_floor(f); __builtin_floorf(f); __builtin_floorl(f); __builtin_floorf128(f);
-// NO__ERRNO: declare double @llvm.floor.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.floor.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.floor.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.floor.f128(fp128) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare double @llvm.floor.f64(double) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare float @llvm.floor.f32(float) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare x86_fp80 @llvm.floor.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare fp128 @llvm.floor.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.floor.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.floor.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.floor.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.floor.f128(fp128) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare double @llvm.floor.f64(double) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare float @llvm.floor.f32(float) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare x86_fp80 @llvm.floor.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare fp128 @llvm.floor.f128(fp128) [[READNONE_INTRINSIC2]]
__builtin_fma(f,f,f); __builtin_fmaf(f,f,f); __builtin_fmal(f,f,f); __builtin_fmaf128(f,f,f); __builtin_fmaf16(f,f,f);
-// NO__ERRNO: declare double @llvm.fma.f64(double, double, double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.fma.f32(float, float, float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.fma.f128(fp128, fp128, fp128) [[READNONE_INTRINSIC]]
-// NO__ERRONO: declare half @llvm.fma.f16(half, half, half) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.fma.f64(double, double, double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.fma.f32(float, float, float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.fma.f128(fp128, fp128, fp128) [[READNONE_INTRINSIC2]]
+// NO__ERRONO: declare half @llvm.fma.f16(half, half, half) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare double @fma(double noundef, double noundef, double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @fmaf(float noundef, float noundef, float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @fmal(x86_fp80 noundef, x86_fp80 noundef, x86_fp80 noundef) [[NOT_READNONE]]
@@ -454,25 +454,25 @@ __builtin_fma(f,f,f); __builtin_fmaf(f,f,f); __builtin_fmal(f,f,f);
__builtin_fmax(f,f); __builtin_fmaxf(f,f); __builtin_fmaxl(f,f); __builtin_fmaxf128(f,f);
-// NO__ERRNO: declare double @llvm.maxnum.f64(double, double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.maxnum.f32(float, float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.maxnum.f128(fp128, fp128) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare double @llvm.maxnum.f64(double, double) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare float @llvm.maxnum.f32(float, float) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare fp128 @llvm.maxnum.f128(fp128, fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.maxnum.f64(double, double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.maxnum.f32(float, float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.maxnum.f128(fp128, fp128) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare double @llvm.maxnum.f64(double, double) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare float @llvm.maxnum.f32(float, float) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare fp128 @llvm.maxnum.f128(fp128, fp128) [[READNONE_INTRINSIC2]]
__builtin_fmin(f,f); __builtin_fminf(f,f); __builtin_fminl(f,f); __builtin_fminf128(f,f);
-// NO__ERRNO: declare double @llvm.minnum.f64(double, double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.minnum.f32(float, float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.minnum.f128(fp128, fp128) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare double @llvm.minnum.f64(double, double) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare float @llvm.minnum.f32(float, float) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare fp128 @llvm.minnum.f128(fp128, fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.minnum.f64(double, double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.minnum.f32(float, float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.minnum.f128(fp128, fp128) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare double @llvm.minnum.f64(double, double) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare float @llvm.minnum.f32(float, float) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare fp128 @llvm.minnum.f128(fp128, fp128) [[READNONE_INTRINSIC2]]
__builtin_hypot(f,f); __builtin_hypotf(f,f); __builtin_hypotl(f,f); __builtin_hypotf128(f,f);
@@ -509,10 +509,10 @@ __builtin_lgamma(f); __builtin_lgammaf(f); __builtin_lgammal(f); __builti
__builtin_llrint(f); __builtin_llrintf(f); __builtin_llrintl(f); __builtin_llrintf128(f);
-// NO__ERRNO: declare i64 @llvm.llrint.i64.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare i64 @llvm.llrint.i64.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare i64 @llvm.llrint.i64.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare i64 @llvm.llrint.i64.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare i64 @llvm.llrint.i64.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare i64 @llvm.llrint.i64.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare i64 @llvm.llrint.i64.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare i64 @llvm.llrint.i64.f128(fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare i64 @llrint(double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare i64 @llrintf(float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare i64 @llrintl(x86_fp80 noundef) [[NOT_READNONE]]
@@ -520,10 +520,10 @@ __builtin_llrint(f); __builtin_llrintf(f); __builtin_llrintl(f); __builti
__builtin_llround(f); __builtin_llroundf(f); __builtin_llroundl(f); __builtin_llroundf128(f);
-// NO__ERRNO: declare i64 @llvm.llround.i64.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare i64 @llvm.llround.i64.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare i64 @llvm.llround.i64.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare i64 @llvm.llround.i64.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare i64 @llvm.llround.i64.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare i64 @llvm.llround.i64.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare i64 @llvm.llround.i64.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare i64 @llvm.llround.i64.f128(fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare i64 @llround(double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare i64 @llroundf(float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare i64 @llroundl(x86_fp80 noundef) [[NOT_READNONE]]
@@ -531,10 +531,10 @@ __builtin_llround(f); __builtin_llroundf(f); __builtin_llroundl(f); __built
__builtin_log(f); __builtin_logf(f); __builtin_logl(f); __builtin_logf128(f);
-// NO__ERRNO: declare double @llvm.log.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.log.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.log.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.log.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.log.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.log.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.log.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.log.f128(fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare double @log(double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @logf(float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @logl(x86_fp80 noundef) [[NOT_READNONE]]
@@ -542,10 +542,10 @@ __builtin_log(f); __builtin_logf(f); __builtin_logl(f); __builtin_l
__builtin_log10(f); __builtin_log10f(f); __builtin_log10l(f); __builtin_log10f128(f);
-// NO__ERRNO: declare double @llvm.log10.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.log10.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.log10.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.log10.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.log10.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.log10.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.log10.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.log10.f128(fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare double @log10(double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @log10f(float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @log10l(x86_fp80 noundef) [[NOT_READNONE]]
@@ -564,10 +564,10 @@ __builtin_log1p(f); __builtin_log1pf(f); __builtin_log1pl(f); __builtin
__builtin_log2(f); __builtin_log2f(f); __builtin_log2l(f); __builtin_log2f128(f);
-// NO__ERRNO: declare double @llvm.log2.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.log2.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.log2.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.log2.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.log2.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.log2.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.log2.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.log2.f128(fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare double @log2(double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @log2f(float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @log2l(x86_fp80 noundef) [[NOT_READNONE]]
@@ -586,10 +586,10 @@ __builtin_logb(f); __builtin_logbf(f); __builtin_logbl(f); __builtin_
__builtin_lrint(f); __builtin_lrintf(f); __builtin_lrintl(f); __builtin_lrintf128(f);
-// NO__ERRNO: declare i64 @llvm.lrint.i64.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare i64 @llvm.lrint.i64.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare i64 @llvm.lrint.i64.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare i64 @llvm.lrint.i64.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare i64 @llvm.lrint.i64.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare i64 @llvm.lrint.i64.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare i64 @llvm.lrint.i64.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare i64 @llvm.lrint.i64.f128(fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare i64 @lrint(double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare i64 @lrintf(float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare i64 @lrintl(x86_fp80 noundef) [[NOT_READNONE]]
@@ -597,10 +597,10 @@ __builtin_lrint(f); __builtin_lrintf(f); __builtin_lrintl(f); __builtin
__builtin_lround(f); __builtin_lroundf(f); __builtin_lroundl(f); __builtin_lroundf128(f);
-// NO__ERRNO: declare i64 @llvm.lround.i64.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare i64 @llvm.lround.i64.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare i64 @llvm.lround.i64.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare i64 @llvm.lround.i64.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare i64 @llvm.lround.i64.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare i64 @llvm.lround.i64.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare i64 @llvm.lround.i64.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare i64 @llvm.lround.i64.f128(fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare i64 @lround(double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare i64 @lroundf(float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare i64 @lroundl(x86_fp80 noundef) [[NOT_READNONE]]
@@ -608,14 +608,14 @@ __builtin_lround(f); __builtin_lroundf(f); __builtin_lroundl(f); __built
__builtin_nearbyint(f); __builtin_nearbyintf(f); __builtin_nearbyintl(f); __builtin_nearbyintf128(f);
-// NO__ERRNO: declare double @llvm.nearbyint.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.nearbyint.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.nearbyint.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.nearbyint.f128(fp128) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare double @llvm.nearbyint.f64(double) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare float @llvm.nearbyint.f32(float) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare x86_fp80 @llvm.nearbyint.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare fp128 @llvm.nearbyint.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.nearbyint.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.nearbyint.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.nearbyint.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.nearbyint.f128(fp128) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare double @llvm.nearbyint.f64(double) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare float @llvm.nearbyint.f32(float) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare x86_fp80 @llvm.nearbyint.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare fp128 @llvm.nearbyint.f128(fp128) [[READNONE_INTRINSIC2]]
__builtin_nextafter(f,f); __builtin_nextafterf(f,f); __builtin_nextafterl(f,f); __builtin_nextafterf128(f,f);
@@ -663,25 +663,25 @@ __builtin_remquo(f,f,i); __builtin_remquof(f,f,i); __builtin_remquol(f,f,i); __
__builtin_rint(f); __builtin_rintf(f); __builtin_rintl(f); __builtin_rintf128(f);
-// NO__ERRNO: declare double @llvm.rint.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.rint.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.rint.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.rint.f128(fp128) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare double @llvm.rint.f64(double) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare float @llvm.rint.f32(float) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare x86_fp80 @llvm.rint.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare fp128 @llvm.rint.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.rint.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.rint.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.rint.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.rint.f128(fp128) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare double @llvm.rint.f64(double) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare float @llvm.rint.f32(float) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare x86_fp80 @llvm.rint.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare fp128 @llvm.rint.f128(fp128) [[READNONE_INTRINSIC2]]
__builtin_round(f); __builtin_roundf(f); __builtin_roundl(f); __builtin_roundf128(f);
-// NO__ERRNO: declare double @llvm.round.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.round.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.round.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.round.f128(fp128) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare double @llvm.round.f64(double) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare float @llvm.round.f32(float) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare x86_fp80 @llvm.round.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare fp128 @llvm.round.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.round.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.round.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.round.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.round.f128(fp128) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare double @llvm.round.f64(double) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare float @llvm.round.f32(float) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare x86_fp80 @llvm.round.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare fp128 @llvm.round.f128(fp128) [[READNONE_INTRINSIC2]]
__builtin_scalbln(f,f); __builtin_scalblnf(f,f); __builtin_scalblnl(f,f); __builtin_scalblnf128(f,f);
@@ -707,10 +707,10 @@ __builtin_scalbn(f,f); __builtin_scalbnf(f,f); __builtin_scalbnl(f,f); __
__builtin_sin(f); __builtin_sinf(f); __builtin_sinl(f); __builtin_sinf128(f);
-// NO__ERRNO: declare double @llvm.sin.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.sin.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.sin.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.sin.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.sin.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.sin.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.sin.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.sin.f128(fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare double @sin(double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @sinf(float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @sinl(x86_fp80 noundef) [[NOT_READNONE]]
@@ -747,10 +747,10 @@ __builtin_sincospi(f,d,d); __builtin_sincospif(f,fp,fp); __builtin_sincospil(f,l
__builtin_sqrt(f); __builtin_sqrtf(f); __builtin_sqrtl(f); __builtin_sqrtf128(f);
-// NO__ERRNO: declare double @llvm.sqrt.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.sqrt.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.sqrt.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.sqrt.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.sqrt.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.sqrt.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.sqrt.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.sqrt.f128(fp128) [[READNONE_INTRINSIC2]]
// HAS_ERRNO: declare double @sqrt(double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @sqrtf(float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @sqrtl(x86_fp80 noundef) [[NOT_READNONE]]
@@ -791,22 +791,24 @@ __builtin_tgamma(f); __builtin_tgammaf(f); __builtin_tgammal(f); __builti
__builtin_trunc(f); __builtin_truncf(f); __builtin_truncl(f); __builtin_truncf128(f);
-// NO__ERRNO: declare double @llvm.trunc.f64(double) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.trunc.f32(float) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.trunc.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare fp128 @llvm.trunc.f128(fp128) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare double @llvm.trunc.f64(double) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare float @llvm.trunc.f32(float) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare x86_fp80 @llvm.trunc.f80(x86_fp80) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare fp128 @llvm.trunc.f128(fp128) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.trunc.f64(double) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare float @llvm.trunc.f32(float) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare x86_fp80 @llvm.trunc.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// NO__ERRNO: declare fp128 @llvm.trunc.f128(fp128) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare double @llvm.trunc.f64(double) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare float @llvm.trunc.f32(float) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare x86_fp80 @llvm.trunc.f80(x86_fp80) [[READNONE_INTRINSIC2]]
+// HAS_ERRNO: declare fp128 @llvm.trunc.f128(fp128) [[READNONE_INTRINSIC2]]
};
// NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} }
+// NO__ERRNO: attributes [[READNONE_INTRINSIC2]] = { {{.*}}memory(none){{.*}} }
// NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} }
// NO__ERRNO: attributes [[PURE]] = { {{.*}}memory(read){{.*}} }
// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} }
// HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} }
+// HAS_ERRNO: attributes [[READNONE_INTRINSIC2]] = { {{.*}}memory(none){{.*}} }
// HAS_ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} }
// HAS_ERRNO: attributes [[PURE]] = { {{.*}}memory(read){{.*}} }
// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} }
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 2731380..37d6306 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -102,6 +102,8 @@ __m64 test_mm_alignr_pi8(__m64 a, __m64 b) {
// CHECK: shufflevector <16 x i8> {{%.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
return _mm_alignr_pi8(a, b, 2);
}
+TEST_CONSTEXPR(match_v8qi(_mm_alignr_pi8(((__m64)(__v8qs){1, 2, 3, 4, 5, 6, 7, 8}), ((__m64)(__v8qs){9, 10, 11, 12, 13, 14, 15, 16}), 2), 11, 12, 13, 14, 15, 16, 1, 2));
+TEST_CONSTEXPR(match_v8qi(_mm_alignr_pi8(((__m64)(__v8qs){1, 2, 3, 4, 5, 6, 7, 8}), ((__m64)(__v8qs){9, 10, 11, 12, 13, 14, 15, 16}), 16), 0, 0, 0, 0, 0, 0, 0, 0));
__m64 test_mm_and_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_and_si64
@@ -630,18 +632,24 @@ __m64 test_mm_sll_pi16(__m64 a, __m64 b) {
// CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(
return _mm_sll_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_sll_pi16((__m64)(__v4hi){1, 2, 3, 4}, (__m64)(__v4hi){1, 0, 0, 0}), 2, 4, 6, 8));
+TEST_CONSTEXPR(match_v4hi(_mm_sll_pi16((__m64)(__v4hi){1, 2, 3, 4}, (__m64)(__v4hi){16, 0, 0, 0}), 0, 0, 0, 0));
__m64 test_mm_sll_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sll_pi32
// CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(
return _mm_sll_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_sll_pi32((__m64)(__v2si){1, 2}, (__m64)(__v2si){1, 0}), 2, 4));
+TEST_CONSTEXPR(match_v2si(_mm_sll_pi32((__m64)(__v2si){1, 2}, (__m64)(__v2si){32, 0}), 0, 0));
__m64 test_mm_sll_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sll_si64
// CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(
return _mm_sll_si64(a, b);
}
+TEST_CONSTEXPR(match_v1di(_mm_sll_si64((__m64)(__v1di){1}, (__m64)(__v1di){1}), 2));
+TEST_CONSTEXPR(match_v1di(_mm_sll_si64((__m64)(__v1di){1}, (__m64)(__v1di){64}), 0));
__m64 test_mm_slli_pi16(__m64 a) {
// CHECK-LABEL: test_mm_slli_pi16
@@ -683,12 +691,16 @@ __m64 test_mm_sra_pi16(__m64 a, __m64 b) {
// CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(
return _mm_sra_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_sra_pi16((__m64)(__v4hi){-16, 16, -8, 8}, (__m64)(__v4hi){1, 0, 0, 0}), -8, 8, -4, 4));
+TEST_CONSTEXPR(match_v4hi(_mm_sra_pi16((__m64)(__v4hi){-16, 16, -8, 8}, (__m64)(__v4hi){16, 0, 0, 0}), -1, 0, -1, 0));
__m64 test_mm_sra_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sra_pi32
// CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(
return _mm_sra_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_sra_pi32((__m64)(__v2si){-16, 16}, (__m64)(__v2si){1, 0}), -8, 8));
+TEST_CONSTEXPR(match_v2si(_mm_sra_pi32((__m64)(__v2si){-16, 16}, (__m64)(__v2si){32, 0}), -1, 0));
__m64 test_mm_srai_pi16(__m64 a) {
// CHECK-LABEL: test_mm_srai_pi16
@@ -720,18 +732,24 @@ __m64 test_mm_srl_pi16(__m64 a, __m64 b) {
// CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(
return _mm_srl_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hu(_mm_srl_pi16((__m64)(__v4hu){0x8000, 16, 8, 4}, (__m64)(__v4hi){1, 0, 0, 0}), 0x4000, 8, 4, 2));
+TEST_CONSTEXPR(match_v4hi(_mm_srl_pi16((__m64)(__v4hi){-1, 16, 8, 4}, (__m64)(__v4hi){16, 0, 0, 0}), 0, 0, 0, 0));
__m64 test_mm_srl_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_srl_pi32
// CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(
return _mm_srl_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2su(_mm_srl_pi32((__m64)(__v2su){0x80000000, 16}, (__m64)(__v2si){1, 0}), 0x40000000, 8));
+TEST_CONSTEXPR(match_v2si(_mm_srl_pi32((__m64)(__v2si){-1, 16}, (__m64)(__v2si){32, 0}), 0, 0));
__m64 test_mm_srl_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_srl_si64
// CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(
return _mm_srl_si64(a, b);
}
+TEST_CONSTEXPR(match_v1du(_mm_srl_si64((__m64)(__v1du){0x8000000000000000ULL}, (__m64)(__v1di){1}), 0x4000000000000000ULL));
+TEST_CONSTEXPR(match_v1di(_mm_srl_si64((__m64)(__v1di){-1}, (__m64)(__v1di){64}), 0));
__m64 test_mm_srli_pi16(__m64 a) {
// CHECK-LABEL: test_mm_srli_pi16
diff --git a/clang/test/CodeGen/X86/mmx-inline-asm-error.c b/clang/test/CodeGen/X86/mmx-inline-asm-error.c
index 1e22461..8a2f991 100644
--- a/clang/test/CodeGen/X86/mmx-inline-asm-error.c
+++ b/clang/test/CodeGen/X86/mmx-inline-asm-error.c
@@ -1,13 +1,15 @@
// RUN: %clang_cc1 -verify -triple x86_64-unknown-unknown -emit-llvm-only %s
+// RUN: %clang_cc1 -verify=omp -triple x86_64-unknown-unknown -emit-llvm-only -fopenmp %s
typedef int vec256 __attribute__((ext_vector_type(8)));
vec256 foo(vec256 in) {
vec256 out;
asm("something %0" : : "y"(in)); // expected-error {{invalid input size for constraint 'y'}}
+ // omp-error@+1 {{invalid type 'vec256' (vector of 8 'int' values) in asm input for constraint 'y'}}
asm("something %0" : "=y"(out)); // expected-error {{invalid output size for constraint '=y'}}
+ // omp-error@+1 {{invalid type 'vec256' (vector of 8 'int' values) in asm input for constraint 'y'}}
asm("something %0, %0" : "+y"(out)); // expected-error {{invalid output size for constraint '+y'}}
return out;
}
-
diff --git a/clang/test/CodeGen/X86/sse-builtins-constrained.c b/clang/test/CodeGen/X86/sse-builtins-constrained.c
index 92240bb..f3b8d20 100644
--- a/clang/test/CodeGen/X86/sse-builtins-constrained.c
+++ b/clang/test/CodeGen/X86/sse-builtins-constrained.c
@@ -28,11 +28,10 @@ __m128 test_mm_sqrt_ps(__m128 x) {
__m128 test_sqrt_ss(__m128 x) {
// COMMON-LABEL: test_sqrt_ss
- // COMMONIR: extractelement <4 x float> {{.*}}, i64 0
+ // COMMONIR: extractelement <4 x float> {{.*}}, i32 0
// UNCONSTRAINED: call float @llvm.sqrt.f32(float {{.*}})
// CONSTRAINED: call float @llvm.experimental.constrained.sqrt.f32(float {{.*}}, metadata !{{.*}})
// CHECK-ASM: sqrtss
- // COMMONIR: insertelement <4 x float> {{.*}}, float {{.*}}, i64 0
+ // COMMONIR: insertelement <4 x float> {{.*}}, float {{.*}}, i32 0
return _mm_sqrt_ss(x);
}
-
diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c
index 6c5297e..fd47757 100644
--- a/clang/test/CodeGen/X86/sse-builtins.c
+++ b/clang/test/CodeGen/X86/sse-builtins.c
@@ -751,9 +751,9 @@ __m128 test_mm_sqrt_ps(__m128 x) {
__m128 test_mm_sqrt_ss(__m128 x) {
// CHECK-LABEL: test_mm_sqrt_ss
- // CHECK: extractelement <4 x float> {{.*}}, i64 0
+ // CHECK: extractelement <4 x float> {{.*}}, i32 0
// CHECK: call float @llvm.sqrt.f32(float {{.*}})
- // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i64 0
+ // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 0
return _mm_sqrt_ss(x);
}
diff --git a/clang/test/CodeGen/X86/sse2-builtins-constrained.c b/clang/test/CodeGen/X86/sse2-builtins-constrained.c
new file mode 100644
index 0000000..a4a08297
--- /dev/null
+++ b/clang/test/CodeGen/X86/sse2-builtins-constrained.c
@@ -0,0 +1,37 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse2 -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse2 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse2 -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse2 -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse2 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse2 -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+
+#ifdef STRICT
+// Test that the constrained intrinsics are picking up the exception
+// metadata from the AST instead of the global default from the command line.
+
+#pragma float_control(except, on)
+#endif
+
+
+#include <immintrin.h>
+
+__m128d test_mm_sqrt_pd(__m128d x) {
+ // COMMON-LABEL: test_mm_sqrt_pd
+ // UNCONSTRAINED: call {{.*}}<2 x double> @llvm.sqrt.v2f64(<2 x double> {{.*}})
+ // CONSTRAINED: call {{.*}}<2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double> {{.*}}, metadata !{{.*}})
+ // CHECK-ASM: sqrtpd
+ return _mm_sqrt_pd(x);
+}
+
+__m128d test_sqrt_sd(__m128d x, __m128d y) {
+ // COMMON-LABEL: test_sqrt_sd
+ // COMMONIR: extractelement <2 x double> {{.*}}, i32 0
+ // UNCONSTRAINED: call double @llvm.sqrt.f64(double {{.*}})
+ // CONSTRAINED: call double @llvm.experimental.constrained.sqrt.f64(double {{.*}}, metadata !{{.*}})
+ // CHECK-ASM: sqrtsd
+ // COMMONIR: insertelement <2 x double> {{.*}}, double {{.*}}, i32 0
+ return _mm_sqrt_sd(x, y);
+}
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 379ae48..c4975b4 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -573,6 +573,8 @@ __m128 test_mm_cvtpd_ps(__m128d A) {
return _mm_cvtpd_ps(A);
}
+TEST_CONSTEXPR(match_m128(_mm_cvtpd_ps((__m128d){ -1.0, +2.0 }), -1.0f, +2.0f, 0.0f, 0.0f));
+
__m128i test_mm_cvtps_epi32(__m128 A) {
// CHECK-LABEL: test_mm_cvtps_epi32
// CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}})
@@ -614,6 +616,8 @@ __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
return _mm_cvtsd_ss(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_cvtsd_ss((__m128){ 9.0f, 5.0f, 6.0f, 7.0f }, (__m128d){ -1.0, 42.0 }), -1.0f, 5.0f, 6.0f, 7.0f));
+
int test_mm_cvtsi128_si32(__m128i A) {
// CHECK-LABEL: test_mm_cvtsi128_si32
// CHECK: extractelement <4 x i32> %{{.*}}, i32 0
@@ -1336,18 +1340,31 @@ __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
// CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_sll_epi16(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 2, 4, 6, 8, 10, 12, 14, 16));
+TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 2, 4, 6, 8, 10, 12, 14, 16));
__m128i test_mm_sll_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sll_epi32
// CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_sll_epi32(A, B);
}
+TEST_CONSTEXPR(match_v4si(_mm_sll_epi32((__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){1, 0, 0, 0}), 2, 4, 6, 8));
+TEST_CONSTEXPR(match_v4si(_mm_sll_epi32((__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4si(_mm_sll_epi32((__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){0, 1, 0, 0}), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4si(_mm_sll_epi32((__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){1, 0, 1, 1}), 2, 4, 6, 8));
__m128i test_mm_sll_epi64(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sll_epi64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_sll_epi64(A, B);
}
+TEST_CONSTEXPR(match_v2di(_mm_sll_epi64((__m128i)(__v2di){1, 2}, (__m128i)(__v2di){1, 0}), 2, 4));
+TEST_CONSTEXPR(match_v2di(_mm_sll_epi64((__m128i)(__v2di){1, 2}, (__m128i)(__v2di){64, 0}), 0, 0));
+TEST_CONSTEXPR(match_v2di(_mm_sll_epi64((__m128i)(__v2di){1, 2}, (__m128i)(__v2di){1, 1}), 2, 4));
__m128i test_mm_slli_epi16(__m128i A) {
// CHECK-LABEL: test_mm_slli_epi16
@@ -1440,9 +1457,10 @@ __m128d test_mm_sqrt_pd(__m128d A) {
__m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_sqrt_sd
- // CHECK: extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
// CHECK: call double @llvm.sqrt.f64(double {{.*}})
- // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
+ // CHECK: %[[sqrt_vec:.*]] = insertelement <2 x double> poison, double %{{.*}}, i32 0
+ // CHECK: insertelement <2 x double> %[[sqrt_vec]], double %{{.*}}, i32 1
return _mm_sqrt_sd(A, B);
}
@@ -1451,12 +1469,22 @@ __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
// CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_sra_epi16(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), -8, 8, -4, 4, -2, 2, -1, 1));
+TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), -1, 0, -1, 0, -1, 0, -1, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), -1, 0, -1, 0, -1, 0, -1, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), -1, 0, -1, 0, -1, 0, -1, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), -1, 0, -1, 0, -1, 0, -1, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), -8, 8, -4, 4, -2, 2, -1, 1));
__m128i test_mm_sra_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sra_epi32
// CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_sra_epi32(A, B);
}
+TEST_CONSTEXPR(match_v4si(_mm_sra_epi32((__m128i)(__v4si){-16, 16, -8, 8}, (__m128i)(__v4si){1, 0, 0, 0}), -8, 8, -4, 4));
+TEST_CONSTEXPR(match_v4si(_mm_sra_epi32((__m128i)(__v4si){-16, 16, -8, 8}, (__m128i)(__v4si){32, 0, 0, 0}), -1, 0, -1, 0));
+TEST_CONSTEXPR(match_v4si(_mm_sra_epi32((__m128i)(__v4si){-16, 16, -8, 8}, (__m128i)(__v4si){0, 1, 0, 0}), -1, 0, -1, 0));
+TEST_CONSTEXPR(match_v4si(_mm_sra_epi32((__m128i)(__v4si){-16, 16, -8, 8}, (__m128i)(__v4si){1, 0, 1, 1}), -8, 8, -4, 4));
__m128i test_mm_srai_epi16(__m128i A) {
// CHECK-LABEL: test_mm_srai_epi16
@@ -1502,18 +1530,31 @@ __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
// CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_srl_epi16(A, B);
}
+TEST_CONSTEXPR(match_v8hu(_mm_srl_epi16((__m128i)(__v8hu){0x8000, 16, 8, 4, 2, 1, 0, 0xFFFF}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0x4000, 8, 4, 2, 1, 0, 0, 0x7FFF));
+TEST_CONSTEXPR(match_v8hi(_mm_srl_epi16((__m128i)(__v8hi){-1, 16, 8, 4, 2, 1, 0, -1}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_srl_epi16((__m128i)(__v8hi){-1, 16, 8, 4, 2, 1, 0, -1}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_srl_epi16((__m128i)(__v8hi){-1, 16, 8, 4, 2, 1, 0, -1}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_srl_epi16((__m128i)(__v8hi){-1, 16, 8, 4, 2, 1, 0, -1}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hu(_mm_srl_epi16((__m128i)(__v8hu){0x8000, 16, 8, 4, 2, 1, 0, 0xFFFF}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 0x4000, 8, 4, 2, 1, 0, 0, 0x7FFF));
__m128i test_mm_srl_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_srl_epi32
// CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_srl_epi32(A, B);
}
+TEST_CONSTEXPR(match_v4su(_mm_srl_epi32((__m128i)(__v4su){0x80000000, 16, 8, 4}, (__m128i)(__v4si){1, 0, 0, 0}), 0x40000000, 8, 4, 2));
+TEST_CONSTEXPR(match_v4si(_mm_srl_epi32((__m128i)(__v4si){-1, 16, 8, 4}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4si(_mm_srl_epi32((__m128i)(__v4si){-1, 16, 8, 4}, (__m128i)(__v4si){0, 1, 0, 0}), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4su(_mm_srl_epi32((__m128i)(__v4su){0x80000000, 16, 8, 4}, (__m128i)(__v4si){1, 0, 1, 1}), 0x40000000, 8, 4, 2));
__m128i test_mm_srl_epi64(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_srl_epi64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_srl_epi64(A, B);
}
+TEST_CONSTEXPR(match_v2du(_mm_srl_epi64((__m128i)(__v2du){0x8000000000000000ULL, 16}, (__m128i)(__v2di){1, 0}), 0x4000000000000000ULL, 8));
+TEST_CONSTEXPR(match_v2di(_mm_srl_epi64((__m128i)(__v2di){-1, 16}, (__m128i)(__v2di){64, 0}), 0, 0));
+TEST_CONSTEXPR(match_v2du(_mm_srl_epi64((__m128i)(__v2du){0x8000000000000000ULL, 16}, (__m128i)(__v2di){1, 1}), 0x4000000000000000ULL, 8));
__m128i test_mm_srli_epi16(__m128i A) {
// CHECK-LABEL: test_mm_srli_epi16
diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c
index a82dd40..44389fb 100644
--- a/clang/test/CodeGen/X86/sse3-builtins.c
+++ b/clang/test/CodeGen/X86/sse3-builtins.c
@@ -19,12 +19,14 @@ __m128d test_mm_addsub_pd(__m128d A, __m128d B) {
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_addsub_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_addsub_pd((__m128d){+2.0, +2.0}, (__m128d){+1.0, +2.0}), +1.0, +4.0));
__m128 test_mm_addsub_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_addsub_ps
// CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_addsub_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_addsub_ps((__m128){+3.0f, +4.0f, +5.0f, +6.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +2.0f, +6.0f, +2.0f, +10.0f));
__m128d test_mm_hadd_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_hadd_pd
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 62cd392..35fa65a 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -307,6 +307,16 @@ __m128 test_mm_insert_ps(__m128 x, __m128 y) {
return _mm_insert_ps(x, y, 4);
}
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x10), 1.0f, 10.0f, 3.0f, 4.0f))); // Insert Y[0] into X[1]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x00), 10.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x20), 1.0f, 2.0f, 10.0f, 4.0f))); // Insert Y[0] into X[2]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x30), 1.0f, 2.0f, 3.0f, 10.0f))); // Insert Y[0] into X[3]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x80), 30.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[2] into X[0]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x01), 0.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0], zero X[0]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0A), 10.0f, 0.0f, 3.0f, 0.0f))); // Insert Y[0] into X[0], zero X[1] and X[3]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0F), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[0] into X[0], zero all
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0xCF), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[3] into X[0], zero all
+
__m128i test_mm_max_epi8(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_max_epi8
// CHECK: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c
index b7a4a2f..193fa37 100644
--- a/clang/test/CodeGen/X86/ssse3-builtins.c
+++ b/clang/test/CodeGen/X86/ssse3-builtins.c
@@ -48,6 +48,8 @@ __m128i test_mm_alignr_epi8(__m128i a, __m128i b) {
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
return _mm_alignr_epi8(a, b, 2);
}
+TEST_CONSTEXPR(match_v16qi(_mm_alignr_epi8(((__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qi){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 2), 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2));
+TEST_CONSTEXPR(match_v16qi(_mm_alignr_epi8(((__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qi){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 32), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m128i test2_mm_alignr_epi8(__m128i a, __m128i b) {
// CHECK-LABEL: test2_mm_alignr_epi8