diff options
Diffstat (limited to 'clang/test/CodeGen/X86')
| -rwxr-xr-x | clang/test/CodeGen/X86/amx_movrs_tranpose.c | 53 | ||||
| -rwxr-xr-x | clang/test/CodeGen/X86/amx_movrs_tranpose_api.c | 81 | ||||
| -rwxr-xr-x | clang/test/CodeGen/X86/amx_movrs_transpose_errors.c | 22 | ||||
| -rw-r--r-- | clang/test/CodeGen/X86/amx_tf32.c | 5 | ||||
| -rw-r--r-- | clang/test/CodeGen/X86/amx_tf32_api.c | 7 | ||||
| -rw-r--r-- | clang/test/CodeGen/X86/amx_tf32_errors.c | 8 | ||||
| -rw-r--r-- | clang/test/CodeGen/X86/amx_transpose.c | 75 | ||||
| -rw-r--r-- | clang/test/CodeGen/X86/amx_transpose_api.c | 114 | ||||
| -rw-r--r-- | clang/test/CodeGen/X86/amx_transpose_errors.c | 75 | ||||
| -rw-r--r-- | clang/test/CodeGen/X86/avx512vlbw-builtins.c | 38 | ||||
| -rw-r--r-- | clang/test/CodeGen/X86/sse41-builtins.c | 10 |
11 files changed, 48 insertions, 440 deletions
diff --git a/clang/test/CodeGen/X86/amx_movrs_tranpose.c b/clang/test/CodeGen/X86/amx_movrs_tranpose.c deleted file mode 100755 index 192c153..0000000 --- a/clang/test/CodeGen/X86/amx_movrs_tranpose.c +++ /dev/null @@ -1,53 +0,0 @@ -// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \ -// RUN: -target-feature +amx-movrs -emit-llvm -o - -Wall -Werror -pedantic \ -// RUN: -target-feature +amx-transpose -Wno-gnu-statement-expression| FileCheck %s - -#include <immintrin.h> -#include <stddef.h> - -char buf[2048]; -#define STRIDE 32 - -// CHECK-LABEL: define dso_local void @test_tile_2rpntlvwz0rs_internal( -// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}}) -// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0 -// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}}) -// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024 -// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1 -// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}}) -void test_tile_2rpntlvwz0rs_internal(int row, int col0, int col1, void *D0, void *D1, void *B) { - _tile_2rpntlvwz0rs_internal(row, col0, col1, D0, D1, B, 1); -} - -// CHECK-LABEL: define dso_local void @test_tile_2rpntlvwz0rst1_internal( -// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}}) -// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0 -// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}}) -// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024 -// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1 -// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}}) -void test_tile_2rpntlvwz0rst1_internal(int row, int col0, int col1, void *D0, void *D1, void *B) { - _tile_2rpntlvwz0rst1_internal(row, col0, col1, D0, D1, B, 1); -} - -// CHECK-LABEL: define dso_local void @test_tile_2rpntlvwz1rs_internal( -// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}}) -// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0 -// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}}) -// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024 -// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1 -// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}}) -void test_tile_2rpntlvwz1rs_internal(int row, int col0, int col1, void *D0, void *D1, void *B) { - _tile_2rpntlvwz1rs_internal(row, col0, col1, D0, D1, B, 1); -} - -// CHECK-LABEL: define dso_local void @test_tile_2rpntlvwz1rst1_internal( -// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rst1.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}}) -// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0 -// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}}) -// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024 -// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1 -// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}}) -void test_tile_2rpntlvwz1rst1_internal(int row, int col0, int col1, void *D0, void *D1, void *B) { - _tile_2rpntlvwz1rst1_internal(row, col0, col1, D0, D1, B, 1); -} diff --git a/clang/test/CodeGen/X86/amx_movrs_tranpose_api.c b/clang/test/CodeGen/X86/amx_movrs_tranpose_api.c deleted file mode 100755 index b174cc5..0000000 --- a/clang/test/CodeGen/X86/amx_movrs_tranpose_api.c +++ /dev/null @@ -1,81 +0,0 @@ -// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \ -// RUN: -target-feature +amx-movrs -emit-llvm -o - -Wall -Werror -pedantic \ -// RUN: -target-feature +amx-transpose -Wno-gnu-statement-expression| FileCheck %s - -#include <immintrin.h> -#include <stddef.h> - -char buf[2048]; -#define STRIDE 32 - -void test_tile_2rpntlvwz0rs(const void *A, size_t B) { - // CHECK-LABEL: @test_tile_2rpntlvwz0rs - // CHECK: call void @llvm.x86.t2rpntlvwz0rs(i8 1, ptr %{{.*}}, i64 %{{.*}}) - _tile_2rpntlvwz0rs(1, A, B); -} - -void test_tile_2rpntlvwz0rst1(const void *A, size_t B) { - // CHECK-LABEL: @test_tile_2rpntlvwz0rst1 - // CHECK: call void @llvm.x86.t2rpntlvwz0rst1(i8 1, ptr %{{.*}}, i64 %{{.*}}) - _tile_2rpntlvwz0rst1(1, A, B); -} - -void test_tile_2rpntlvwz1rs(const void *A, size_t B) { - // CHECK-LABEL: @test_tile_2rpntlvwz1rs - // CHECK: call void @llvm.x86.t2rpntlvwz1rs(i8 1, ptr %{{.*}}, i64 %{{.*}}) - _tile_2rpntlvwz1rs(1, A, B); -} - -void test_tile_2rpntlvwz1rst1(const void *A, size_t B) { - // CHECK-LABEL: @test_tile_2rpntlvwz1rst1 - // CHECK: call void @llvm.x86.t2rpntlvwz1rst1(i8 1, ptr %{{.*}}, i64 %{{.*}}) - _tile_2rpntlvwz1rst1(1, A, B); -} - -void test__tile_2rpntlvwz0rs(__tile1024i dst0, __tile1024i dst1) { - //CHECK-LABEL: @test__tile_2rpntlvwz0rs - //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - __tile_2rpntlvwz0rs(&dst0, &dst1, buf, STRIDE); -} - -void test__tile_2rpntlvwz0rst1(__tile1024i dst0, __tile1024i dst1) { - //CHECK-LABEL: @test__tile_2rpntlvwz0rst1 - //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - __tile_2rpntlvwz0rst1(&dst0, &dst1, buf, STRIDE); -} - -void test__tile_2rpntlvwz1rs(__tile1024i dst0, __tile1024i dst1) { - //CHECK-LABEL: @test__tile_2rpntlvwz1rs - //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - __tile_2rpntlvwz1rs(&dst0, &dst1, buf, STRIDE); -} - -void test__tile_2rpntlvwz1rst1(__tile1024i dst0, __tile1024i dst1) { - //CHECK-LABEL: @test__tile_2rpntlvwz1rst1 - //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rst1.internal - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - __tile_2rpntlvwz1rst1(&dst0, &dst1, buf, STRIDE); -} diff --git a/clang/test/CodeGen/X86/amx_movrs_transpose_errors.c b/clang/test/CodeGen/X86/amx_movrs_transpose_errors.c deleted file mode 100755 index 840b52b..0000000 --- a/clang/test/CodeGen/X86/amx_movrs_transpose_errors.c +++ /dev/null @@ -1,22 +0,0 @@ -// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \ -// RUN: -target-feature +amx-int8 -target-feature +amx-transpose -target-feature +amx-movrs \ -// RUN: -verify - -#include <immintrin.h> -#include <stddef.h> - -void test_tile_2rpntlvwz0rs(const void *A, size_t B) { - _tile_2rpntlvwz0rs(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -} - -void test_tile_2rpntlvwz0rst1(const void *A, size_t B) { - _tile_2rpntlvwz0rst1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -} - -void test_tile_2rpntlvwz1rs(const void *A, size_t B) { - _tile_2rpntlvwz1rs(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -} - -void test_tile_2rpntlvwz1rst1(const void *A, size_t B) { - _tile_2rpntlvwz1rst1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -} diff --git a/clang/test/CodeGen/X86/amx_tf32.c b/clang/test/CodeGen/X86/amx_tf32.c index 661a9df..54ad6bb 100644 --- a/clang/test/CodeGen/X86/amx_tf32.c +++ b/clang/test/CodeGen/X86/amx_tf32.c @@ -10,8 +10,3 @@ void test_tile_mmultf32ps(void) { _tile_mmultf32ps(1, 2, 3); } -void test_tile_tmmultf32ps(void) { - // CHECK-LABEL: @test_tile_tmmultf32ps( - // CHECK: call void @llvm.x86.ttmmultf32ps(i8 1, i8 2, i8 3) - _tile_tmmultf32ps(1, 2, 3); -} diff --git a/clang/test/CodeGen/X86/amx_tf32_api.c b/clang/test/CodeGen/X86/amx_tf32_api.c index 2ac8489..8f574b7 100644 --- a/clang/test/CodeGen/X86/amx_tf32_api.c +++ b/clang/test/CodeGen/X86/amx_tf32_api.c @@ -18,10 +18,3 @@ void test_tile_mmultf32ps(__tile1024i a, __tile1024i b, __tile1024i c) { __tile_mmultf32ps(&c, a, b); } -void test_tile_tmmultf32ps(__tile1024i a, __tile1024i b, __tile1024i c) { - //CHECK-LABEL: @test_tile_tmmultf32ps - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call x86_amx @llvm.x86.ttmmultf32ps.internal - //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - __tile_tmmultf32ps(&c, a, b); -} diff --git a/clang/test/CodeGen/X86/amx_tf32_errors.c b/clang/test/CodeGen/X86/amx_tf32_errors.c index 4502130..f0fdd06 100644 --- a/clang/test/CodeGen/X86/amx_tf32_errors.c +++ b/clang/test/CodeGen/X86/amx_tf32_errors.c @@ -13,11 +13,3 @@ void test_tile_mmultf32ps() { _tile_mmultf32ps(1, 3, 3); // expected-error {{tile arguments must refer to different tiles}} } -void test_tile_tmmultf32ps() { - _tile_tmmultf32ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}} - _tile_tmmultf32ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}} - _tile_tmmultf32ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}} - _tile_tmmultf32ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}} - _tile_tmmultf32ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}} - _tile_tmmultf32ps(1, 2, 2); // expected-error {{tile arguments must refer to different tiles}} -} diff --git a/clang/test/CodeGen/X86/amx_transpose.c b/clang/test/CodeGen/X86/amx_transpose.c deleted file mode 100644 index 7e88fd8..0000000 --- a/clang/test/CodeGen/X86/amx_transpose.c +++ /dev/null @@ -1,75 +0,0 @@ -// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-transpose \ -// RUN: -target-feature +amx-bf16 -target-feature +amx-fp16 -target-feature +amx-complex \ -// RUN: -target-feature +avx512f -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression| FileCheck %s - -#include <immintrin.h> -#include <stddef.h> - -void test_tile_2rpntlvwz0(const void *A, size_t B) { - // CHECK-LABEL: @test_tile_2rpntlvwz0 - // CHECK: call void @llvm.x86.t2rpntlvwz0(i8 1, ptr %{{.*}}, i64 %{{.*}}) - _tile_2rpntlvwz0(1, A, B); -} - -void test_tile_2rpntlvwz0t1(const void *A, size_t B) { - // CHECK-LABEL: @test_tile_2rpntlvwz0t1 - // CHECK: call void @llvm.x86.t2rpntlvwz0t1(i8 1, ptr %{{.*}}, i64 %{{.*}}) - _tile_2rpntlvwz0t1(1, A, B); -} - -void test_tile_2rpntlvwz1(const void *A, size_t B) { - // CHECK-LABEL: @test_tile_2rpntlvwz1 - // CHECK: call void @llvm.x86.t2rpntlvwz1(i8 1, ptr %{{.*}}, i64 %{{.*}}) - _tile_2rpntlvwz1(1, A, B); -} - -void test_tile_2rpntlvwz1t1(const void *A, size_t B) { - // CHECK-LABEL: @test_tile_2rpntlvwz1t1 - // CHECK: call void @llvm.x86.t2rpntlvwz1t1(i8 1, ptr %{{.*}}, i64 %{{.*}}) - _tile_2rpntlvwz1t1(1, A, B); -} - -void test_tile_transposed(void) -{ - // CHECK-LABEL: @test_tile_transposed - // CHECK: call void @llvm.x86.ttransposed(i8 1, i8 2) - _tile_transposed(1, 2); -} - -void test_tile_tdpbf16ps(void) -{ - // CHECK-LABEL: @test_tile_tdpbf16ps - // CHECK: call void @llvm.x86.ttdpbf16ps(i8 1, i8 2, i8 3) - _tile_tdpbf16ps(1, 2, 3); -} - -void test_tile_tdpfp16ps(void) -{ - // CHECK-LABEL: @test_tile_tdpfp16ps - // CHECK: call void @llvm.x86.ttdpfp16ps(i8 4, i8 5, i8 6) - _tile_tdpfp16ps(4, 5, 6); -} - -void test_tile_tcmmimfp16ps(void) { - // CHECK-LABEL: @test_tile_tcmmimfp16ps - // CHECK: call void @llvm.x86.ttcmmimfp16ps(i8 1, i8 2, i8 3) - _tile_tcmmimfp16ps(1, 2, 3); -} - -void test_tile_tcmmrlfp16ps(void) { - // CHECK-LABEL: @test_tile_tcmmrlfp16ps - // CHECK: call void @llvm.x86.ttcmmrlfp16ps(i8 1, i8 2, i8 3) - _tile_tcmmrlfp16ps(1, 2, 3); -} - -void test_tile_conjtcmmimfp16ps(void) { - // CHECK-LABEL: @test_tile_conjtcmmimfp16ps - // CHECK: call void @llvm.x86.tconjtcmmimfp16ps(i8 1, i8 2, i8 3) - _tile_conjtcmmimfp16ps(1, 2, 3); -} - -void test_tile_conjtfp16(void) { - // CHECK-LABEL: @test_tile_conjtfp16 - // CHECK: call void @llvm.x86.tconjtfp16(i8 1, i8 2) - _tile_conjtfp16(1, 2); -} diff --git a/clang/test/CodeGen/X86/amx_transpose_api.c b/clang/test/CodeGen/X86/amx_transpose_api.c deleted file mode 100644 index dc3ef51..0000000 --- a/clang/test/CodeGen/X86/amx_transpose_api.c +++ /dev/null @@ -1,114 +0,0 @@ -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512f \ -// RUN: -target-feature +amx-transpose -target-feature +amx-bf16 -target-feature +amx-fp16 -target-feature +amx-complex \ -// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK - -#include <immintrin.h> - -char buf[2048]; -#define STRIDE 32 - -char buf2[2048]; - -void test_tile_2rpntlvwz0(__tile1024i dst0, __tile1024i dst1) { - //CHECK-LABEL: @test_tile_2rpntlvwz0 - //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - __tile_2rpntlvwz0(&dst0, &dst1, buf, STRIDE); -} - -void test_tile_2rpntlvwz0t1(__tile1024i dst0, __tile1024i dst1) { - //CHECK-LABEL: @test_tile_2rpntlvwz0t1 - //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - __tile_2rpntlvwz0t1(&dst0, &dst1, buf, STRIDE); -} - -void test_tile_2rpntlvwz1(__tile1024i dst0, __tile1024i dst1) { - //CHECK-LABEL: @test_tile_2rpntlvwz1 - //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - __tile_2rpntlvwz1(&dst0, &dst1, buf, STRIDE); -} - -void test_tile_2rpntlvwz1t1(__tile1024i dst0, __tile1024i dst1) { - //CHECK-LABEL: @test_tile_2rpntlvwz1t1 - //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1t1.internal - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1 - //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}} - __tile_2rpntlvwz1t1(&dst0, &dst1, buf, STRIDE); -} - -void test_tile_transposed(__tile1024i dst, __tile1024i src) { - //CHECK-LABEL: @test_tile_transposed - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call x86_amx @llvm.x86.ttransposed.internal - //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - __tile_transposed(&dst, src); -} - -void test_tile_tdpbf16ps(__tile1024i a, __tile1024i b, __tile1024i c) { - //CHECK-LABEL: @test_tile_tdpbf16ps - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call x86_amx @llvm.x86.ttdpbf16ps.internal - //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - __tile_tdpbf16ps(&c, a, b); -} - -void test_tile_tdpfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) { - //CHECK-LABEL: @test_tile_tdpfp16ps - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call x86_amx @llvm.x86.ttdpfp16ps.internal - //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - __tile_tdpfp16ps(&c, a, b); -} - -void test_tile_tcmmimfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) { - //CHECK-LABEL: @test_tile_tcmmimfp16ps - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call x86_amx @llvm.x86.ttcmmimfp16ps.internal - //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - __tile_tcmmimfp16ps(&c, a, b); -} - -void test_tile_tcmmrlfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) { - //CHECK-LABEL: @test_tile_tcmmrlfp16ps - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call x86_amx @llvm.x86.ttcmmrlfp16ps.internal - //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - __tile_tcmmrlfp16ps(&c, a, b); -} - -void test_tile_conjtcmmimfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) { - //CHECK-LABEL: @test_tile_conjtcmmimfp16ps - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call x86_amx @llvm.x86.tconjtcmmimfp16ps.internal - //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - __tile_conjtcmmimfp16ps(&c, a, b); -} - -void test_tile_conjtfp16(__tile1024i dst, __tile1024i src) { - //CHECK-LABEL: @test_tile_conjtfp16 - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call x86_amx @llvm.x86.tconjtfp16.internal - //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) - __tile_conjtfp16(&dst, src); -} diff --git a/clang/test/CodeGen/X86/amx_transpose_errors.c b/clang/test/CodeGen/X86/amx_transpose_errors.c deleted file mode 100644 index 80368c5..0000000 --- a/clang/test/CodeGen/X86/amx_transpose_errors.c +++ /dev/null @@ -1,75 +0,0 @@ -// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \ -// RUN: -target-feature +amx-int8 -target-feature +amx-bf16 -target-feature +amx-transpose \ -// RUN: -target-feature +avx512f -target-feature +amx-fp16 -target-feature +amx-complex -verify - -#include <immintrin.h> -#include <stddef.h> - -// Transpose -void test_tile_2rpntlvwz0(const void *A, size_t B) { - _tile_2rpntlvwz0(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -} - -void test_tile_2rpntlvwz0t1(const void *A, size_t B) { - _tile_2rpntlvwz0t1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -} - -void test_tile_2rpntlvwz1(const void *A, size_t B) { - _tile_2rpntlvwz1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -} - -void test_tile_2rpntlvwz1t1(const void *A, size_t B) { - _tile_2rpntlvwz1t1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -} - -void test_tile_tdpbf16ps() -{ - _tile_tdpbf16ps(8, 2, 3); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - _tile_tdpbf16ps(1, 8, 3); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - _tile_tdpbf16ps(1, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - _tile_tdpbf16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}} - _tile_tdpbf16ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}} - _tile_tdpbf16ps(1, 2, 2); // expected-error {{tile arguments must refer to different tiles}} -} - -void test_tile_tdpfp16ps() -{ - _tile_tdpfp16ps(8, 5, 6); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - _tile_tdpfp16ps(1, 8, 6); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - _tile_tdpfp16ps(1, 5, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - _tile_tdpfp16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}} - _tile_tdpfp16ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}} - _tile_tdpfp16ps(1, 2, 2); // expected-error {{tile arguments must refer to different tiles}} -} - -void test_tile_transposed() -{ - _tile_transposed(8, 2); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - _tile_transposed(1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -} - -void test_tile_tcmmimfp16ps() { - _tile_tcmmimfp16ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}} - _tile_tcmmimfp16ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}} - _tile_tcmmimfp16ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}} - _tile_tcmmimfp16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}} -} - -void test_tile_tcmmrlfp16ps() { - _tile_tcmmrlfp16ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}} - _tile_tcmmrlfp16ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}} - _tile_tcmmrlfp16ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}} - _tile_tcmmrlfp16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}} -} - -void test_tile_conjtcmmimfp16ps() { - _tile_conjtcmmimfp16ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}} - _tile_conjtcmmimfp16ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}} - _tile_conjtcmmimfp16ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}} - _tile_conjtcmmimfp16ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}} -} - -void test_tile_conjtfp16() { - _tile_conjtfp16(16, 2); // expected-error {{argument value 16 is outside the valid range [0, 7]}} - _tile_conjtfp16(1, 26); // expected-error {{argument value 26 is outside the valid range [0, 7]}} -} diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index 116d86f..febef46 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -645,6 +645,21 @@ __mmask16 test_mm_cmp_epi8_mask(__m128i __a, __m128i __b) { return (__mmask16)_mm_cmp_epi8_mask(__a, __b, 0); } +TEST_CONSTEXPR(_mm_cmpeq_epi8_mask( + ((__m128i)(__v16qi){5, 3, 7, 2, 9, 3, 7, 1, 5, 4, 8, 2, 9, 6, 7, 5}), + ((__m128i)(__v16qi){5, 2, 7, 3, 9, 4, 6, 1, 5, 3, 8, 1, 9, 5, 7, 5}) +) == (__mmask16)0xd595); + +TEST_CONSTEXPR(_mm_cmplt_epi8_mask( + ((__m128i)(__v16qi){1, 5, 3, 7, 2, 8, 4, 6, 9, 5, 3, 11, 2, 6, 15, 8}), + ((__m128i)(__v16qi){2, 4, 6, 8, 3, 5, 7, 9, 4, 6, 8, 10, 5, 7, 9, 11}) +) == (__mmask16)0xb6dd); + +TEST_CONSTEXPR(_mm_cmple_epi8_mask( + ((__m128i)(__v16qi){1, 3, 5, 7, 2, 6, 6, 8, 1, 3, 9, 7, 2, 4, 6, 10}), + ((__m128i)(__v16qi){2, 3, 4, 7, 3, 4, 5, 8, 2, 3, 4, 7, 3, 4, 5, 8}) +) == (__mmask16)0x3b9b); + __mmask16 test_mm_mask_cmp_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { // CHECK-LABEL: test_mm_mask_cmp_epi8_mask // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} @@ -2894,6 +2909,12 @@ __mmask16 test_mm_test_epi8_mask(__m128i __A, __m128i __B) { return _mm_test_epi8_mask(__A, __B); } +TEST_CONSTEXPR(_mm_test_epi8_mask( + (__m128i)(__v16qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + (__m128i)(__v16qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} +) +== (__mmask16)0xfffb); + __mmask16 test_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_test_epi8_mask // CHECK: and <2 x i64> %{{.*}}, %{{.*}} @@ -2901,6 +2922,12 @@ __mmask16 test_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return _mm_mask_test_epi8_mask(__U, __A, __B); } +TEST_CONSTEXPR(_mm_mask_test_epi8_mask( + 0xFFFF, + (__m128i)(__v16qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + (__m128i)(__v16qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} +) +== (__mmask16)0xfffb); __mmask32 test_mm256_test_epi8_mask(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_test_epi8_mask @@ -2908,6 +2935,11 @@ __mmask32 test_mm256_test_epi8_mask(__m256i __A, __m256i __B) { // CHECK: icmp ne <32 x i8> %{{.*}}, %{{.*}} return _mm256_test_epi8_mask(__A, __B); } +TEST_CONSTEXPR(_mm256_test_epi8_mask( + (__m256i)(__v32qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + (__m256i)(__v32qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} +) +== (__mmask32)0xfffbfffb); __mmask32 test_mm256_mask_test_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_test_epi8_mask @@ -2954,6 +2986,12 @@ __mmask16 test_mm_testn_epi8_mask(__m128i __A, __m128i __B) { return _mm_testn_epi8_mask(__A, __B); } +TEST_CONSTEXPR(_mm_testn_epi8_mask( + (__m128i)(__v16qi){1, 2, 77, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 16, 16}, + (__m128i)(__v16qi){2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15} +) +== (__mmask16)0xe001); + __mmask16 test_mm_mask_testn_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_testn_epi8_mask // CHECK: and <2 x i64> %{{.*}}, %{{.*}} diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 62cd392..35fa65a 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -307,6 +307,16 @@ __m128 test_mm_insert_ps(__m128 x, __m128 y) { return _mm_insert_ps(x, y, 4); } +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x10), 1.0f, 10.0f, 3.0f, 4.0f))); // Insert Y[0] into X[1] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x00), 10.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x20), 1.0f, 2.0f, 10.0f, 4.0f))); // Insert Y[0] into X[2] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x30), 1.0f, 2.0f, 3.0f, 10.0f))); // Insert Y[0] into X[3] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x80), 30.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[2] into X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x01), 0.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0], zero X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0A), 10.0f, 0.0f, 3.0f, 0.0f))); // Insert Y[0] into X[0], zero X[1] and X[3] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0F), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[0] into X[0], zero all +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0xCF), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[3] into X[0], zero all + __m128i test_mm_max_epi8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epi8 // CHECK: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) |
