diff options
author | Farzon Lotfi <1802579+farzonl@users.noreply.github.com> | 2024-06-14 17:18:20 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-14 17:18:20 -0400 |
commit | 6355fb45a5433d90a3f1a342920ff56a7fee7e16 (patch) | |
tree | 6aea0825e101e9fb32bae33b43d21dd7dcc15cb8 | |
parent | 40a72f8cc414726a8be234a260650fd62354da21 (diff) | |
download | llvm-6355fb45a5433d90a3f1a342920ff56a7fee7e16.zip llvm-6355fb45a5433d90a3f1a342920ff56a7fee7e16.tar.gz llvm-6355fb45a5433d90a3f1a342920ff56a7fee7e16.tar.bz2 |
[CodeGen] Support vectors across all backends (#95518)
Add a default f16 type promotion
-rw-r--r-- | llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/half-intrinsics.ll | 120 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/simd-unsupported.ll | 16 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/intrinsic.ll | 54 |
4 files changed, 192 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 8240a1f..de53499 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -961,7 +961,7 @@ void TargetLoweringBase::initActions() { setOperationAction( {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, - ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT}, + ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN}, VT, Expand); // Constrained floating-point operations default to expand. @@ -1020,6 +1020,7 @@ void TargetLoweringBase::initActions() { ISD::FTAN}, {MVT::f32, MVT::f64, MVT::f128}, Expand); + setOperationAction(ISD::FTAN, MVT::f16, Promote); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll index c493a9b..bfc26b0 100644 --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -2862,3 +2862,123 @@ define i1 @isnan_d_fpclass(half %x) { %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan ret i1 %1 } + +declare half @llvm.tan.f16(half) + +define half @tan_f16(half %a) nounwind { +; RV32IZFH-LABEL: tan_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call tanf +; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: tan_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFH-NEXT: call tanf +; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; +; RV32IZHINX-LABEL: tan_f16: +; RV32IZHINX: # %bb.0: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: fcvt.s.h a0, a0 +; RV32IZHINX-NEXT: call tanf +; RV32IZHINX-NEXT: fcvt.h.s a0, a0 +; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINX-NEXT: addi sp, sp, 16 +; RV32IZHINX-NEXT: ret +; +; RV64IZHINX-LABEL: tan_f16: +; RV64IZHINX: # %bb.0: +; RV64IZHINX-NEXT: addi sp, sp, -16 +; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZHINX-NEXT: fcvt.s.h a0, a0 +; RV64IZHINX-NEXT: call tanf +; RV64IZHINX-NEXT: fcvt.h.s a0, a0 +; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZHINX-NEXT: addi sp, sp, 16 +; RV64IZHINX-NEXT: ret +; +; RV32I-LABEL: tan_f16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: call __extendhfsf2 +; RV32I-NEXT: call tanf +; RV32I-NEXT: call __truncsfhf2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: tan_f16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: call __extendhfsf2 +; RV64I-NEXT: call tanf +; RV64I-NEXT: call __truncsfhf2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: tan_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call tanf +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: tan_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call tanf +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret +; +; RV32IZHINXMIN-LABEL: tan_f16: +; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: addi sp, sp, -16 +; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV32IZHINXMIN-NEXT: call tanf +; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINXMIN-NEXT: addi sp, sp, 16 +; RV32IZHINXMIN-NEXT: ret +; +; RV64IZHINXMIN-LABEL: tan_f16: +; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: addi sp, sp, -16 +; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV64IZHINXMIN-NEXT: call tanf +; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV64IZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZHINXMIN-NEXT: addi sp, sp, 16 +; RV64IZHINXMIN-NEXT: ret + %1 = call half @llvm.tan.f16(half %a) + ret half %1 +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll index d214a3a..1d6e073 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll @@ -377,6 +377,14 @@ define <4 x float> @cos_v4f32(<4 x float> %x) { ret <4 x float> %v } +; CHECK-LABEL: tan_v4f32: +; CHECK: call $push[[L:[0-9]+]]=, tanf +declare <4 x float> @llvm.tan.v4f32(<4 x float>) +define <4 x float> @tan_v4f32(<4 x float> %x) { + %v = call <4 x float> @llvm.tan.v4f32(<4 x float> %x) + ret <4 x float> %v +} + ; CHECK-LABEL: powi_v4f32: ; CHECK: call $push[[L:[0-9]+]]=, __powisf2 declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32) @@ -469,6 +477,14 @@ define <2 x double> @cos_v2f64(<2 x double> %x) { ret <2 x double> %v } +; CHECK-LABEL: tan_v2f64: +; CHECK: call $push[[L:[0-9]+]]=, tan +declare <2 x double> @llvm.tan.v2f64(<2 x double>) +define <2 x double> @tan_v2f64(<2 x double> %x) { + %v = call <2 x double> @llvm.tan.v2f64(<2 x double> %x) + ret <2 x double> %v +} + ; CHECK-LABEL: powi_v2f64: ; CHECK: call $push[[L:[0-9]+]]=, __powidf2 declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32) diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll index 0f07034..9c910d7 100644 --- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll +++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll @@ -162,6 +162,60 @@ for.end: ; preds = %for.body, %entry declare double @llvm.cos.f64(double) +define void @tan_f32(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @tan_f32( +; CHECK: llvm.tan.v4f32 +; CHECK: ret void +; +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 + %call = tail call float @llvm.tan.f32(float %0) + %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv + store float %call, ptr %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare float @llvm.tan.f32(float) + +define void @tan_f64(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @tan_f64( +; CHECK: llvm.tan.v4f64 +; CHECK: ret void +; +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv + %0 = load double, ptr %arrayidx, align 8 + %call = tail call double @llvm.tan.f64(double %0) + %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv + store double %call, ptr %arrayidx2, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare double @llvm.tan.f64(double) + define void @exp_f32(i32 %n, ptr %y, ptr %x) { ; CHECK-LABEL: @exp_f32( ; CHECK: llvm.exp.v4f32 |