aboutsummaryrefslogtreecommitdiff
path: root/clang/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'clang/test/CodeGen/X86')
-rwxr-xr-xclang/test/CodeGen/X86/amx_movrs_tranpose.c53
-rwxr-xr-xclang/test/CodeGen/X86/amx_movrs_tranpose_api.c81
-rwxr-xr-xclang/test/CodeGen/X86/amx_movrs_transpose_errors.c22
-rw-r--r--clang/test/CodeGen/X86/amx_tf32.c5
-rw-r--r--clang/test/CodeGen/X86/amx_tf32_api.c7
-rw-r--r--clang/test/CodeGen/X86/amx_tf32_errors.c8
-rw-r--r--clang/test/CodeGen/X86/amx_transpose.c75
-rw-r--r--clang/test/CodeGen/X86/amx_transpose_api.c114
-rw-r--r--clang/test/CodeGen/X86/amx_transpose_errors.c75
-rw-r--r--clang/test/CodeGen/X86/sse41-builtins.c10
10 files changed, 10 insertions, 440 deletions
diff --git a/clang/test/CodeGen/X86/amx_movrs_tranpose.c b/clang/test/CodeGen/X86/amx_movrs_tranpose.c
deleted file mode 100755
index 192c153..0000000
--- a/clang/test/CodeGen/X86/amx_movrs_tranpose.c
+++ /dev/null
@@ -1,53 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-movrs -emit-llvm -o - -Wall -Werror -pedantic \
-// RUN: -target-feature +amx-transpose -Wno-gnu-statement-expression| FileCheck %s
-
-#include <immintrin.h>
-#include <stddef.h>
-
-char buf[2048];
-#define STRIDE 32
-
-// CHECK-LABEL: define dso_local void @test_tile_2rpntlvwz0rs_internal(
-// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}})
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-void test_tile_2rpntlvwz0rs_internal(int row, int col0, int col1, void *D0, void *D1, void *B) {
- _tile_2rpntlvwz0rs_internal(row, col0, col1, D0, D1, B, 1);
-}
-
-// CHECK-LABEL: define dso_local void @test_tile_2rpntlvwz0rst1_internal(
-// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}})
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-void test_tile_2rpntlvwz0rst1_internal(int row, int col0, int col1, void *D0, void *D1, void *B) {
- _tile_2rpntlvwz0rst1_internal(row, col0, col1, D0, D1, B, 1);
-}
-
-// CHECK-LABEL: define dso_local void @test_tile_2rpntlvwz1rs_internal(
-// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}})
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-void test_tile_2rpntlvwz1rs_internal(int row, int col0, int col1, void *D0, void *D1, void *B) {
- _tile_2rpntlvwz1rs_internal(row, col0, col1, D0, D1, B, 1);
-}
-
-// CHECK-LABEL: define dso_local void @test_tile_2rpntlvwz1rst1_internal(
-// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rst1.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}})
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-void test_tile_2rpntlvwz1rst1_internal(int row, int col0, int col1, void *D0, void *D1, void *B) {
- _tile_2rpntlvwz1rst1_internal(row, col0, col1, D0, D1, B, 1);
-}
diff --git a/clang/test/CodeGen/X86/amx_movrs_tranpose_api.c b/clang/test/CodeGen/X86/amx_movrs_tranpose_api.c
deleted file mode 100755
index b174cc5..0000000
--- a/clang/test/CodeGen/X86/amx_movrs_tranpose_api.c
+++ /dev/null
@@ -1,81 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-movrs -emit-llvm -o - -Wall -Werror -pedantic \
-// RUN: -target-feature +amx-transpose -Wno-gnu-statement-expression| FileCheck %s
-
-#include <immintrin.h>
-#include <stddef.h>
-
-char buf[2048];
-#define STRIDE 32
-
-void test_tile_2rpntlvwz0rs(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz0rs
- // CHECK: call void @llvm.x86.t2rpntlvwz0rs(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz0rs(1, A, B);
-}
-
-void test_tile_2rpntlvwz0rst1(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz0rst1
- // CHECK: call void @llvm.x86.t2rpntlvwz0rst1(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz0rst1(1, A, B);
-}
-
-void test_tile_2rpntlvwz1rs(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz1rs
- // CHECK: call void @llvm.x86.t2rpntlvwz1rs(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz1rs(1, A, B);
-}
-
-void test_tile_2rpntlvwz1rst1(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz1rst1
- // CHECK: call void @llvm.x86.t2rpntlvwz1rst1(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz1rst1(1, A, B);
-}
-
-void test__tile_2rpntlvwz0rs(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test__tile_2rpntlvwz0rs
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz0rs(&dst0, &dst1, buf, STRIDE);
-}
-
-void test__tile_2rpntlvwz0rst1(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test__tile_2rpntlvwz0rst1
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz0rst1(&dst0, &dst1, buf, STRIDE);
-}
-
-void test__tile_2rpntlvwz1rs(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test__tile_2rpntlvwz1rs
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz1rs(&dst0, &dst1, buf, STRIDE);
-}
-
-void test__tile_2rpntlvwz1rst1(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test__tile_2rpntlvwz1rst1
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rst1.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz1rst1(&dst0, &dst1, buf, STRIDE);
-}
diff --git a/clang/test/CodeGen/X86/amx_movrs_transpose_errors.c b/clang/test/CodeGen/X86/amx_movrs_transpose_errors.c
deleted file mode 100755
index 840b52b..0000000
--- a/clang/test/CodeGen/X86/amx_movrs_transpose_errors.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-int8 -target-feature +amx-transpose -target-feature +amx-movrs \
-// RUN: -verify
-
-#include <immintrin.h>
-#include <stddef.h>
-
-void test_tile_2rpntlvwz0rs(const void *A, size_t B) {
- _tile_2rpntlvwz0rs(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz0rst1(const void *A, size_t B) {
- _tile_2rpntlvwz0rst1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz1rs(const void *A, size_t B) {
- _tile_2rpntlvwz1rs(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz1rst1(const void *A, size_t B) {
- _tile_2rpntlvwz1rst1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
diff --git a/clang/test/CodeGen/X86/amx_tf32.c b/clang/test/CodeGen/X86/amx_tf32.c
index 661a9df..54ad6bb 100644
--- a/clang/test/CodeGen/X86/amx_tf32.c
+++ b/clang/test/CodeGen/X86/amx_tf32.c
@@ -10,8 +10,3 @@ void test_tile_mmultf32ps(void) {
_tile_mmultf32ps(1, 2, 3);
}
-void test_tile_tmmultf32ps(void) {
- // CHECK-LABEL: @test_tile_tmmultf32ps(
- // CHECK: call void @llvm.x86.ttmmultf32ps(i8 1, i8 2, i8 3)
- _tile_tmmultf32ps(1, 2, 3);
-}
diff --git a/clang/test/CodeGen/X86/amx_tf32_api.c b/clang/test/CodeGen/X86/amx_tf32_api.c
index 2ac8489..8f574b7 100644
--- a/clang/test/CodeGen/X86/amx_tf32_api.c
+++ b/clang/test/CodeGen/X86/amx_tf32_api.c
@@ -18,10 +18,3 @@ void test_tile_mmultf32ps(__tile1024i a, __tile1024i b, __tile1024i c) {
__tile_mmultf32ps(&c, a, b);
}
-void test_tile_tmmultf32ps(__tile1024i a, __tile1024i b, __tile1024i c) {
- //CHECK-LABEL: @test_tile_tmmultf32ps
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.ttmmultf32ps.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_tmmultf32ps(&c, a, b);
-}
diff --git a/clang/test/CodeGen/X86/amx_tf32_errors.c b/clang/test/CodeGen/X86/amx_tf32_errors.c
index 4502130..f0fdd06 100644
--- a/clang/test/CodeGen/X86/amx_tf32_errors.c
+++ b/clang/test/CodeGen/X86/amx_tf32_errors.c
@@ -13,11 +13,3 @@ void test_tile_mmultf32ps() {
_tile_mmultf32ps(1, 3, 3); // expected-error {{tile arguments must refer to different tiles}}
}
-void test_tile_tmmultf32ps() {
- _tile_tmmultf32ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
- _tile_tmmultf32ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
- _tile_tmmultf32ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}}
- _tile_tmmultf32ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}}
- _tile_tmmultf32ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}}
- _tile_tmmultf32ps(1, 2, 2); // expected-error {{tile arguments must refer to different tiles}}
-}
diff --git a/clang/test/CodeGen/X86/amx_transpose.c b/clang/test/CodeGen/X86/amx_transpose.c
deleted file mode 100644
index 7e88fd8..0000000
--- a/clang/test/CodeGen/X86/amx_transpose.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-transpose \
-// RUN: -target-feature +amx-bf16 -target-feature +amx-fp16 -target-feature +amx-complex \
-// RUN: -target-feature +avx512f -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression| FileCheck %s
-
-#include <immintrin.h>
-#include <stddef.h>
-
-void test_tile_2rpntlvwz0(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz0
- // CHECK: call void @llvm.x86.t2rpntlvwz0(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz0(1, A, B);
-}
-
-void test_tile_2rpntlvwz0t1(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz0t1
- // CHECK: call void @llvm.x86.t2rpntlvwz0t1(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz0t1(1, A, B);
-}
-
-void test_tile_2rpntlvwz1(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz1
- // CHECK: call void @llvm.x86.t2rpntlvwz1(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz1(1, A, B);
-}
-
-void test_tile_2rpntlvwz1t1(const void *A, size_t B) {
- // CHECK-LABEL: @test_tile_2rpntlvwz1t1
- // CHECK: call void @llvm.x86.t2rpntlvwz1t1(i8 1, ptr %{{.*}}, i64 %{{.*}})
- _tile_2rpntlvwz1t1(1, A, B);
-}
-
-void test_tile_transposed(void)
-{
- // CHECK-LABEL: @test_tile_transposed
- // CHECK: call void @llvm.x86.ttransposed(i8 1, i8 2)
- _tile_transposed(1, 2);
-}
-
-void test_tile_tdpbf16ps(void)
-{
- // CHECK-LABEL: @test_tile_tdpbf16ps
- // CHECK: call void @llvm.x86.ttdpbf16ps(i8 1, i8 2, i8 3)
- _tile_tdpbf16ps(1, 2, 3);
-}
-
-void test_tile_tdpfp16ps(void)
-{
- // CHECK-LABEL: @test_tile_tdpfp16ps
- // CHECK: call void @llvm.x86.ttdpfp16ps(i8 4, i8 5, i8 6)
- _tile_tdpfp16ps(4, 5, 6);
-}
-
-void test_tile_tcmmimfp16ps(void) {
- // CHECK-LABEL: @test_tile_tcmmimfp16ps
- // CHECK: call void @llvm.x86.ttcmmimfp16ps(i8 1, i8 2, i8 3)
- _tile_tcmmimfp16ps(1, 2, 3);
-}
-
-void test_tile_tcmmrlfp16ps(void) {
- // CHECK-LABEL: @test_tile_tcmmrlfp16ps
- // CHECK: call void @llvm.x86.ttcmmrlfp16ps(i8 1, i8 2, i8 3)
- _tile_tcmmrlfp16ps(1, 2, 3);
-}
-
-void test_tile_conjtcmmimfp16ps(void) {
- // CHECK-LABEL: @test_tile_conjtcmmimfp16ps
- // CHECK: call void @llvm.x86.tconjtcmmimfp16ps(i8 1, i8 2, i8 3)
- _tile_conjtcmmimfp16ps(1, 2, 3);
-}
-
-void test_tile_conjtfp16(void) {
- // CHECK-LABEL: @test_tile_conjtfp16
- // CHECK: call void @llvm.x86.tconjtfp16(i8 1, i8 2)
- _tile_conjtfp16(1, 2);
-}
diff --git a/clang/test/CodeGen/X86/amx_transpose_api.c b/clang/test/CodeGen/X86/amx_transpose_api.c
deleted file mode 100644
index dc3ef51..0000000
--- a/clang/test/CodeGen/X86/amx_transpose_api.c
+++ /dev/null
@@ -1,114 +0,0 @@
-// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512f \
-// RUN: -target-feature +amx-transpose -target-feature +amx-bf16 -target-feature +amx-fp16 -target-feature +amx-complex \
-// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
-
-#include <immintrin.h>
-
-char buf[2048];
-#define STRIDE 32
-
-char buf2[2048];
-
-void test_tile_2rpntlvwz0(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test_tile_2rpntlvwz0
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz0(&dst0, &dst1, buf, STRIDE);
-}
-
-void test_tile_2rpntlvwz0t1(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test_tile_2rpntlvwz0t1
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz0t1(&dst0, &dst1, buf, STRIDE);
-}
-
-void test_tile_2rpntlvwz1(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test_tile_2rpntlvwz1
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz1(&dst0, &dst1, buf, STRIDE);
-}
-
-void test_tile_2rpntlvwz1t1(__tile1024i dst0, __tile1024i dst1) {
- //CHECK-LABEL: @test_tile_2rpntlvwz1t1
- //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1t1.internal
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
- //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
- __tile_2rpntlvwz1t1(&dst0, &dst1, buf, STRIDE);
-}
-
-void test_tile_transposed(__tile1024i dst, __tile1024i src) {
- //CHECK-LABEL: @test_tile_transposed
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.ttransposed.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_transposed(&dst, src);
-}
-
-void test_tile_tdpbf16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
- //CHECK-LABEL: @test_tile_tdpbf16ps
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.ttdpbf16ps.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_tdpbf16ps(&c, a, b);
-}
-
-void test_tile_tdpfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
- //CHECK-LABEL: @test_tile_tdpfp16ps
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.ttdpfp16ps.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_tdpfp16ps(&c, a, b);
-}
-
-void test_tile_tcmmimfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
- //CHECK-LABEL: @test_tile_tcmmimfp16ps
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.ttcmmimfp16ps.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_tcmmimfp16ps(&c, a, b);
-}
-
-void test_tile_tcmmrlfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
- //CHECK-LABEL: @test_tile_tcmmrlfp16ps
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.ttcmmrlfp16ps.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_tcmmrlfp16ps(&c, a, b);
-}
-
-void test_tile_conjtcmmimfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
- //CHECK-LABEL: @test_tile_conjtcmmimfp16ps
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.tconjtcmmimfp16ps.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_conjtcmmimfp16ps(&c, a, b);
-}
-
-void test_tile_conjtfp16(__tile1024i dst, __tile1024i src) {
- //CHECK-LABEL: @test_tile_conjtfp16
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call x86_amx @llvm.x86.tconjtfp16.internal
- //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
- __tile_conjtfp16(&dst, src);
-}
diff --git a/clang/test/CodeGen/X86/amx_transpose_errors.c b/clang/test/CodeGen/X86/amx_transpose_errors.c
deleted file mode 100644
index 80368c5..0000000
--- a/clang/test/CodeGen/X86/amx_transpose_errors.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-int8 -target-feature +amx-bf16 -target-feature +amx-transpose \
-// RUN: -target-feature +avx512f -target-feature +amx-fp16 -target-feature +amx-complex -verify
-
-#include <immintrin.h>
-#include <stddef.h>
-
-// Transpose
-void test_tile_2rpntlvwz0(const void *A, size_t B) {
- _tile_2rpntlvwz0(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz0t1(const void *A, size_t B) {
- _tile_2rpntlvwz0t1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz1(const void *A, size_t B) {
- _tile_2rpntlvwz1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz1t1(const void *A, size_t B) {
- _tile_2rpntlvwz1t1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_tdpbf16ps()
-{
- _tile_tdpbf16ps(8, 2, 3); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_tdpbf16ps(1, 8, 3); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_tdpbf16ps(1, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_tdpbf16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}}
- _tile_tdpbf16ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}}
- _tile_tdpbf16ps(1, 2, 2); // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_tdpfp16ps()
-{
- _tile_tdpfp16ps(8, 5, 6); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_tdpfp16ps(1, 8, 6); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_tdpfp16ps(1, 5, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_tdpfp16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}}
- _tile_tdpfp16ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}}
- _tile_tdpfp16ps(1, 2, 2); // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_transposed()
-{
- _tile_transposed(8, 2); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
- _tile_transposed(1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_tcmmimfp16ps() {
- _tile_tcmmimfp16ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
- _tile_tcmmimfp16ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
- _tile_tcmmimfp16ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}}
- _tile_tcmmimfp16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_tcmmrlfp16ps() {
- _tile_tcmmrlfp16ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
- _tile_tcmmrlfp16ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
- _tile_tcmmrlfp16ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}}
- _tile_tcmmrlfp16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_conjtcmmimfp16ps() {
- _tile_conjtcmmimfp16ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
- _tile_conjtcmmimfp16ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
- _tile_conjtcmmimfp16ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}}
- _tile_conjtcmmimfp16ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_conjtfp16() {
- _tile_conjtfp16(16, 2); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
- _tile_conjtfp16(1, 26); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
-}
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 62cd392..35fa65a 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -307,6 +307,16 @@ __m128 test_mm_insert_ps(__m128 x, __m128 y) {
return _mm_insert_ps(x, y, 4);
}
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x10), 1.0f, 10.0f, 3.0f, 4.0f))); // Insert Y[0] into X[1]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x00), 10.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x20), 1.0f, 2.0f, 10.0f, 4.0f))); // Insert Y[0] into X[2]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x30), 1.0f, 2.0f, 3.0f, 10.0f))); // Insert Y[0] into X[3]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x80), 30.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[2] into X[0]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x01), 0.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0], zero X[0]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0A), 10.0f, 0.0f, 3.0f, 0.0f))); // Insert Y[0] into X[0], zero X[1] and X[3]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0F), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[0] into X[0], zero all
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0xCF), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[3] into X[0], zero all
+
__m128i test_mm_max_epi8(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_max_epi8
// CHECK: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})