diff options
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/bf16.ll | 193 |
2 files changed, 163 insertions, 38 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 8e575ab..fa63854 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4660,6 +4660,10 @@ def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>; def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>; def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>; +// bf16 pre-index store +def : Pat<(pre_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off), + (STRHpre FPR16:$Rt, GPR64sp:$addr, simm9:$off)>; + // truncstore i64 def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, @@ -4685,6 +4689,8 @@ def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; @@ -4700,6 +4706,8 @@ def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; //--- // (immediate post-indexed) diff --git a/llvm/test/CodeGen/AArch64/bf16.ll b/llvm/test/CodeGen/AArch64/bf16.ll index d3911ae4..a36b367 100644 --- a/llvm/test/CodeGen/AArch64/bf16.ll +++ b/llvm/test/CodeGen/AArch64/bf16.ll @@ -1,20 +1,23 @@ -; RUN: llc < %s -asm-verbose=0 -mtriple=arm64-eabi -mattr=+bf16 | FileCheck %s -; RUN: llc < %s -asm-verbose=0 -mtriple=aarch64 -mattr=+bf16 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=arm64-eabi -mattr=+bf16 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mattr=+bf16 | FileCheck %s ; test argument passing and simple load/store define bfloat @test_load(ptr %p) nounwind { ; CHECK-LABEL: test_load: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ret %tmp1 = load bfloat, ptr %p, align 16 ret bfloat %tmp1 } define bfloat @test_load_offset1(ptr %p) nounwind { ; CHECK-LABEL: test_load_offset1: -; CHECK-NEXT: ldur h0, [x0, #1] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldur h0, [x0, #1] +; CHECK-NEXT: ret %g = getelementptr inbounds i8, ptr %p, i64 1 %tmp1 = load bfloat, ptr %g, align 2 ret bfloat %tmp1 @@ -22,8 +25,9 @@ define bfloat @test_load_offset1(ptr %p) nounwind { define bfloat @test_load_offset2(ptr %p) nounwind { ; CHECK-LABEL: test_load_offset2: -; CHECK-NEXT: ldr h0, [x0, #2] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0, #2] +; CHECK-NEXT: ret %g = getelementptr inbounds i8, ptr %p, i64 2 %tmp1 = load bfloat, ptr %g, align 2 ret bfloat %tmp1 @@ -31,24 +35,27 @@ define bfloat @test_load_offset2(ptr %p) nounwind { define <4 x bfloat> @test_vec_load(ptr %p) nounwind { ; CHECK-LABEL: test_vec_load: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret %tmp1 = load <4 x bfloat>, ptr %p, align 16 ret <4 x bfloat> %tmp1 } define void @test_store(ptr %a, bfloat %b) nounwind { ; CHECK-LABEL: test_store: -; CHECK-NEXT: str h0, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret store bfloat %b, ptr %a, align 16 ret void } define void @test_store_negative_offset(ptr %a, bfloat %b) nounwind { ; CHECK-LABEL: test_store_negative_offset: -; CHECK-NEXT: stur h0, [x0, #-4] -; CHECK-NEXT: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stur h0, [x0, #-4] +; CHECK-NEXT: ret entry: %0 = getelementptr inbounds bfloat, ptr %a, i64 -2 store bfloat %b, ptr %0, align 2 @@ -58,8 +65,9 @@ entry: ; Simple store of v4bf16 define void @test_vec_store(ptr %a, <4 x bfloat> %b) nounwind { ; CHECK-LABEL: test_vec_store: -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret entry: store <4 x bfloat> %b, ptr %a, align 16 ret void @@ -67,60 +75,169 @@ entry: define <8 x bfloat> @test_build_vector_const() { ; CHECK-LABEL: test_build_vector_const: -; CHECK: mov [[TMP:w[0-9]+]], #16256 -; CHECK: dup v0.8h, [[TMP]] +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #16256 // =0x3f80 +; CHECK-NEXT: dup v0.8h, w8 +; CHECK-NEXT: ret ret <8 x bfloat> <bfloat 0xR3F80, bfloat 0xR3F80, bfloat 0xR3F80, bfloat 0xR3F80, bfloat 0xR3F80, bfloat 0xR3F80, bfloat 0xR3F80, bfloat 0xR3F80> } -define { bfloat, ptr } @test_store_post(bfloat %val, ptr %ptr) { +define ptr @test_store_post(bfloat %val, ptr %ptr) { ; CHECK-LABEL: test_store_post: -; CHECK: str h0, [x0], #2 - +; CHECK: // %bb.0: +; CHECK-NEXT: str h0, [x0], #2 +; CHECK-NEXT: ret store bfloat %val, ptr %ptr - %res.tmp = insertvalue { bfloat, ptr } undef, bfloat %val, 0 + %next = getelementptr bfloat, ptr %ptr, i32 1 + ret ptr %next +} + +define ptr @test_store_post_v4bf16(<4 x bfloat> %val, ptr %ptr) { +; CHECK-LABEL: test_store_post_v4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str d0, [x0], #8 +; CHECK-NEXT: ret + store <4 x bfloat> %val, ptr %ptr + %next = getelementptr <4 x bfloat>, ptr %ptr, i32 1 + ret ptr %next +} + +define ptr @test_store_post_v8bf16(<8 x bfloat> %val, ptr %ptr) { +; CHECK-LABEL: test_store_post_v8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str q0, [x0], #16 +; CHECK-NEXT: ret + store <8 x bfloat> %val, ptr %ptr + %next = getelementptr <8 x bfloat>, ptr %ptr, i32 1 + ret ptr %next +} +define { bfloat, ptr } @test_load_post(ptr %ptr) { +; CHECK-LABEL: test_load_post: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0], #2 +; CHECK-NEXT: ret + %val = load bfloat, ptr %ptr + %res.tmp = insertvalue { bfloat, ptr } undef, bfloat %val, 0 %next = getelementptr bfloat, ptr %ptr, i32 1 %res = insertvalue { bfloat, ptr } %res.tmp, ptr %next, 1 - ret { bfloat, ptr } %res } -define { <4 x bfloat>, ptr } @test_store_post_v4bf16(<4 x bfloat> %val, ptr %ptr) { -; CHECK-LABEL: test_store_post_v4bf16: -; CHECK: str d0, [x0], #8 - - store <4 x bfloat> %val, ptr %ptr +define { <4 x bfloat>, ptr } @test_load_post_v4bf16(ptr %ptr) { +; CHECK-LABEL: test_load_post_v4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0], #8 +; CHECK-NEXT: ret + %val = load <4 x bfloat>, ptr %ptr %res.tmp = insertvalue { <4 x bfloat>, ptr } undef, <4 x bfloat> %val, 0 - %next = getelementptr <4 x bfloat>, ptr %ptr, i32 1 %res = insertvalue { <4 x bfloat>, ptr } %res.tmp, ptr %next, 1 - ret { <4 x bfloat>, ptr } %res } -define { <8 x bfloat>, ptr } @test_store_post_v8bf16(<8 x bfloat> %val, ptr %ptr) { -; CHECK-LABEL: test_store_post_v8bf16: -; CHECK: str q0, [x0], #16 - - store <8 x bfloat> %val, ptr %ptr +define { <8 x bfloat>, ptr } @test_load_post_v8bf16(ptr %ptr) { +; CHECK-LABEL: test_load_post_v8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0], #16 +; CHECK-NEXT: ret + %val = load <8 x bfloat>, ptr %ptr %res.tmp = insertvalue { <8 x bfloat>, ptr } undef, <8 x bfloat> %val, 0 - %next = getelementptr <8 x bfloat>, ptr %ptr, i32 1 %res = insertvalue { <8 x bfloat>, ptr } %res.tmp, ptr %next, 1 + ret { <8 x bfloat>, ptr } %res +} +define ptr @test_store_pre(bfloat %val, ptr %ptr) { +; CHECK-LABEL: test_store_pre: +; CHECK: // %bb.0: +; CHECK-NEXT: str h0, [x0, #2]! +; CHECK-NEXT: ret + %next = getelementptr bfloat, ptr %ptr, i32 1 + store bfloat %val, ptr %next + ret ptr %next +} + +define ptr @test_store_pre_v4bf16(<4 x bfloat> %val, ptr %ptr) { +; CHECK-LABEL: test_store_pre_v4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str d0, [x0, #8]! +; CHECK-NEXT: ret + %next = getelementptr <4 x bfloat>, ptr %ptr, i32 1 + store <4 x bfloat> %val, ptr %next + ret ptr %next +} + +define ptr @test_store_pre_v8bf16(<8 x bfloat> %val, ptr %ptr) { +; CHECK-LABEL: test_store_pre_v8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str q0, [x0, #16]! +; CHECK-NEXT: ret + %next = getelementptr <8 x bfloat>, ptr %ptr, i32 1 + store <8 x bfloat> %val, ptr %next + ret ptr %next +} + +define ptr @test_store_pre_v8bf16_trunc(ptr %ptr) { +; CHECK-LABEL: test_store_pre_v8bf16_trunc: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: str q0, [x0, #16]! +; CHECK-NEXT: ret + %t = load <8 x bfloat>, ptr %ptr + %next = getelementptr <8 x bfloat>, ptr %ptr, i32 1 + store <8 x bfloat> %t, ptr %next + ret ptr %next +} + +define { bfloat, ptr } @test_load_pre(ptr %ptr) { +; CHECK-LABEL: test_load_pre: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0, #2]! +; CHECK-NEXT: ret + %next = getelementptr bfloat, ptr %ptr, i32 1 + %val = load bfloat, ptr %next + %res.tmp = insertvalue { bfloat, ptr } undef, bfloat %val, 0 + %res = insertvalue { bfloat, ptr } %res.tmp, ptr %next, 1 + ret { bfloat, ptr } %res +} + +define { <4 x bfloat>, ptr } @test_load_pre_v4bf16(ptr %ptr) { +; CHECK-LABEL: test_load_pre_v4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0, #8]! +; CHECK-NEXT: ret + %next = getelementptr <4 x bfloat>, ptr %ptr, i32 1 + %val = load <4 x bfloat>, ptr %next + %res.tmp = insertvalue { <4 x bfloat>, ptr } undef, <4 x bfloat> %val, 0 + %res = insertvalue { <4 x bfloat>, ptr } %res.tmp, ptr %next, 1 + ret { <4 x bfloat>, ptr } %res +} + +define { <8 x bfloat>, ptr } @test_load_pre_v8bf16(ptr %ptr) { +; CHECK-LABEL: test_load_pre_v8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0, #16]! +; CHECK-NEXT: ret + %next = getelementptr <8 x bfloat>, ptr %ptr, i32 1 + %val = load <8 x bfloat>, ptr %next + %res.tmp = insertvalue { <8 x bfloat>, ptr } undef, <8 x bfloat> %val, 0 + %res = insertvalue { <8 x bfloat>, ptr } %res.tmp, ptr %next, 1 ret { <8 x bfloat>, ptr } %res } define bfloat @test_bitcast_halftobfloat(half %a) nounwind { ; CHECK-LABEL: test_bitcast_halftobfloat: -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ret %r = bitcast half %a to bfloat ret bfloat %r } define half @test_bitcast_bfloattohalf(bfloat %a) nounwind { ; CHECK-LABEL: test_bitcast_bfloattohalf: -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ret %r = bitcast bfloat %a to half ret half %r } |
