aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2023-06-20 06:19:08 -0400
committerMatt Arsenault <Matthew.Arsenault@amd.com>2023-07-25 07:54:11 -0400
commit47b3ada432f8afee9723a4b3d27b3efbef34dedf (patch)
tree151f331eecc6640c66d5ea70030d494cd0067c61 /llvm
parent71be91eba96d80d15689e4f516141c533c3c086d (diff)
downloadllvm-47b3ada432f8afee9723a4b3d27b3efbef34dedf.zip
llvm-47b3ada432f8afee9723a4b3d27b3efbef34dedf.tar.gz
llvm-47b3ada432f8afee9723a4b3d27b3efbef34dedf.tar.bz2
AMDGPU: Add more sqrt f64 lowering tests
Almost all permutations of the flags are potentially relevant.
Diffstat (limited to 'llvm')
-rw-r--r--llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll218
-rw-r--r--llvm/test/CodeGen/AMDGPU/rsq.f64.ll2812
2 files changed, 2727 insertions, 303 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
index dbf3871..6239393 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG %s
-; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -global-isel=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL %s
define double @v_sqrt_f64(double %x) {
; GCN-LABEL: v_sqrt_f64:
@@ -115,9 +115,219 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
ret <2 x i32> %insert.1
}
+define amdgpu_ps <2 x i32> @s_sqrt_f64_afn(double inreg %x) {
+; GCN-LABEL: s_sqrt_f64_afn:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; GCN-NEXT: v_readfirstlane_b32 s0, v0
+; GCN-NEXT: v_readfirstlane_b32 s1, v1
+; GCN-NEXT: ; return to shader part epilog
+ %result = call afn double @llvm.sqrt.f64(double %x)
+ %cast = bitcast double %result to <2 x i32>
+ %cast.0 = extractelement <2 x i32> %cast, i32 0
+ %cast.1 = extractelement <2 x i32> %cast, i32 1
+ %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+ %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+ %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+ %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+ ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) {
+; GCN-LABEL: s_sqrt_f64_afn_nnan_ninf:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; GCN-NEXT: v_readfirstlane_b32 s0, v0
+; GCN-NEXT: v_readfirstlane_b32 s1, v1
+; GCN-NEXT: ; return to shader part epilog
+ %result = call afn nnan ninf double @llvm.sqrt.f64(double %x)
+ %cast = bitcast double %result to <2 x i32>
+ %cast.0 = extractelement <2 x i32> %cast, i32 0
+ %cast.1 = extractelement <2 x i32> %cast, i32 1
+ %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+ %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+ %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+ %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+ ret <2 x i32> %insert.1
+}
+
+define double @v_sqrt_f64_nsz(double %x) {
+; GCN-LABEL: v_sqrt_f64_nsz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call nsz double @llvm.sqrt.f64(double %x)
+ ret double %result
+}
+
+define double @v_sqrt_f64_nnan_ninf(double %x) {
+; GCN-LABEL: v_sqrt_f64_nnan_ninf:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call nnan ninf double @llvm.sqrt.f64(double %x)
+ ret double %result
+}
+
+define double @v_sqrt_f64_nnan_ninf_nsz(double %x) {
+; GCN-LABEL: v_sqrt_f64_nnan_ninf_nsz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call nnan ninf nsz double @llvm.sqrt.f64(double %x)
+ ret double %result
+}
+
+define double @v_sqrt_f64_afn(double %x) {
+; GCN-LABEL: v_sqrt_f64_afn:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call afn double @llvm.sqrt.f64(double %x)
+ ret double %result
+}
+
+define double @v_sqrt_f64_afn_nsz(double %x) {
+; GCN-LABEL: v_sqrt_f64_afn_nsz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call afn nsz double @llvm.sqrt.f64(double %x)
+ ret double %result
+}
+
+define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
+; GCN-LABEL: v_sqrt_v2f64_afn:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+ ret <2 x double> %result
+}
+
+define double @v_sqrt_f64_afn_nnan(double %x) {
+; GCN-LABEL: v_sqrt_f64_afn_nnan:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call afn nnan double @llvm.sqrt.f64(double %x)
+ ret double %result
+}
+
+define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
+; GCN-LABEL: v_sqrt_f64_fabs_afn_ninf:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e64 v[0:1], |v[0:1]|
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fabs = call double @llvm.fabs.f64(double %x)
+ %result = call afn ninf double @llvm.sqrt.f64(double %fabs)
+ ret double %result
+}
+
+define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
+; GCN-LABEL: v_sqrt_f64_afn_nnan_ninf:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call afn nnan ninf double @llvm.sqrt.f64(double %x)
+ ret double %result
+}
+
+define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
+; GCN-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call afn nnan ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+ ret <2 x double> %result
+}
+
+define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
+; GCN-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call afn nnan ninf nsz double @llvm.sqrt.f64(double %x)
+ ret double %result
+}
+
+define double @v_sqrt_f64__approx_func_fp_math(double %x) #2 {
+; GCN-LABEL: v_sqrt_f64__approx_func_fp_math:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call nsz double @llvm.sqrt.f64(double %x)
+ ret double %result
+}
+
+define double @v_sqrt_f64__enough_unsafe_attrs(double %x) #3 {
+; GCN-LABEL: v_sqrt_f64__enough_unsafe_attrs:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call nsz double @llvm.sqrt.f64(double %x)
+ ret double %result
+}
+
+define double @v_sqrt_f64__unsafe_attr(double %x) #4 {
+; GCN-LABEL: v_sqrt_f64__unsafe_attr:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call nsz double @llvm.sqrt.f64(double %x)
+ ret double %result
+}
+
+define <2 x double> @v_sqrt_v2f64(<2 x double> %x) {
+; GCN-LABEL: v_sqrt_v2f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+ ret <2 x double> %result
+}
+
+define <3 x double> @v_sqrt_v3f64(<3 x double> %x) {
+; GCN-LABEL: v_sqrt_v3f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; GCN-NEXT: v_sqrt_f64_e32 v[4:5], v[4:5]
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %result = call <3 x double> @llvm.sqrt.v3f64(<3 x double> %x)
+ ret <3 x double> %result
+}
+
declare double @llvm.fabs.f64(double) #0
declare double @llvm.sqrt.f64(double) #0
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0
+declare <3 x double> @llvm.sqrt.v3f64(<3 x double>) #0
declare i32 @llvm.amdgcn.readfirstlane(i32) #1
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { convergent nounwind willreturn memory(none) }
+attributes #2 = { "approx-func-fp-math"="true" }
+attributes #3 = { "approx-func-fp-math"="true" "no-nans-fp-math"="true" "no-infs-fp-math"="true" }
+attributes #4 = { "unsafe-fp-math"="true" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GISEL: {{.*}}
+; SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 93a8869..a20aaac 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -1,302 +1,2516 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN,GCN-UNSAFE,SI,SI-UNSAFE %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GCN-SAFE,SI,SI-SAFE %s
-
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=hawaii -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN,GCN-UNSAFE,CI,CI-UNSAFE %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GCN-SAFE,CI,CI-SAFE %s
-
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-declare double @llvm.sqrt.f64(double) nounwind readnone
-
-
-define amdgpu_kernel void @rsq_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
-; GCN-UNSAFE-LABEL: rsq_f64:
-; GCN-UNSAFE: ; %bb.0:
-; GCN-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GCN-UNSAFE-NEXT: s_mov_b32 s7, 0xf000
-; GCN-UNSAFE-NEXT: s_mov_b32 s6, -1
-; GCN-UNSAFE-NEXT: s_mov_b32 s10, s6
-; GCN-UNSAFE-NEXT: s_mov_b32 s11, s7
-; GCN-UNSAFE-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-UNSAFE-NEXT: s_mov_b32 s8, s2
-; GCN-UNSAFE-NEXT: s_mov_b32 s9, s3
-; GCN-UNSAFE-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; GCN-UNSAFE-NEXT: s_mov_b32 s4, s0
-; GCN-UNSAFE-NEXT: s_mov_b32 s5, s1
-; GCN-UNSAFE-NEXT: s_waitcnt vmcnt(0)
-; GCN-UNSAFE-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
-; GCN-UNSAFE-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; GCN-UNSAFE-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GCN-UNSAFE-NEXT: s_endpgm
-;
-; SI-SAFE-LABEL: rsq_f64:
-; SI-SAFE: ; %bb.0:
-; SI-SAFE-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
-; SI-SAFE-NEXT: s_mov_b32 s3, 0xf000
-; SI-SAFE-NEXT: s_mov_b32 s2, -1
-; SI-SAFE-NEXT: s_mov_b32 s10, s2
-; SI-SAFE-NEXT: s_mov_b32 s11, s3
-; SI-SAFE-NEXT: s_waitcnt lgkmcnt(0)
-; SI-SAFE-NEXT: s_mov_b32 s8, s6
-; SI-SAFE-NEXT: s_mov_b32 s9, s7
-; SI-SAFE-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; SI-SAFE-NEXT: s_waitcnt vmcnt(0)
-; SI-SAFE-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
-; SI-SAFE-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
-; SI-SAFE-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SAFE-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SAFE-NEXT: v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
-; SI-SAFE-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SAFE-NEXT: s_mov_b32 s0, 0x3ff00000
-; SI-SAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SAFE-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, v7
-; SI-SAFE-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SAFE-NEXT: s_xor_b64 vcc, s[0:1], vcc
-; SI-SAFE-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SAFE-NEXT: s_mov_b32 s0, s4
-; SI-SAFE-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SAFE-NEXT: s_mov_b32 s1, s5
-; SI-SAFE-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; SI-SAFE-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; SI-SAFE-NEXT: s_endpgm
-;
-; CI-SAFE-LABEL: rsq_f64:
-; CI-SAFE: ; %bb.0:
-; CI-SAFE-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; CI-SAFE-NEXT: s_mov_b32 s7, 0xf000
-; CI-SAFE-NEXT: s_mov_b32 s6, -1
-; CI-SAFE-NEXT: s_mov_b32 s10, s6
-; CI-SAFE-NEXT: s_mov_b32 s11, s7
-; CI-SAFE-NEXT: s_waitcnt lgkmcnt(0)
-; CI-SAFE-NEXT: s_mov_b32 s8, s2
-; CI-SAFE-NEXT: s_mov_b32 s9, s3
-; CI-SAFE-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; CI-SAFE-NEXT: s_mov_b32 s4, s0
-; CI-SAFE-NEXT: s_mov_b32 s5, s1
-; CI-SAFE-NEXT: s_waitcnt vmcnt(0)
-; CI-SAFE-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
-; CI-SAFE-NEXT: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], 1.0
-; CI-SAFE-NEXT: v_div_scale_f64 v[8:9], vcc, 1.0, v[0:1], 1.0
-; CI-SAFE-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; CI-SAFE-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; CI-SAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; CI-SAFE-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; CI-SAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; CI-SAFE-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; CI-SAFE-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; CI-SAFE-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; CI-SAFE-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
-; CI-SAFE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; CI-SAFE-NEXT: s_endpgm
- %val = load double, ptr addrspace(1) %in, align 4
- %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
- %div = fdiv double 1.0, %sqrt
- store double %div, ptr addrspace(1) %out, align 4
- ret void
-}
-
-define amdgpu_kernel void @neg_rsq_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
-; GCN-UNSAFE-LABEL: neg_rsq_f64:
-; GCN-UNSAFE: ; %bb.0:
-; GCN-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GCN-UNSAFE-NEXT: s_mov_b32 s7, 0xf000
-; GCN-UNSAFE-NEXT: s_mov_b32 s6, -1
-; GCN-UNSAFE-NEXT: s_mov_b32 s10, s6
-; GCN-UNSAFE-NEXT: s_mov_b32 s11, s7
-; GCN-UNSAFE-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-UNSAFE-NEXT: s_mov_b32 s8, s2
-; GCN-UNSAFE-NEXT: s_mov_b32 s9, s3
-; GCN-UNSAFE-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; GCN-UNSAFE-NEXT: s_mov_b32 s4, s0
-; GCN-UNSAFE-NEXT: s_mov_b32 s5, s1
-; GCN-UNSAFE-NEXT: s_waitcnt vmcnt(0)
-; GCN-UNSAFE-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
-; GCN-UNSAFE-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; GCN-UNSAFE-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
-; GCN-UNSAFE-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; GCN-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
-; GCN-UNSAFE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GCN-UNSAFE-NEXT: s_endpgm
-;
-; SI-SAFE-LABEL: neg_rsq_f64:
-; SI-SAFE: ; %bb.0:
-; SI-SAFE-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
-; SI-SAFE-NEXT: s_mov_b32 s3, 0xf000
-; SI-SAFE-NEXT: s_mov_b32 s2, -1
-; SI-SAFE-NEXT: s_mov_b32 s10, s2
-; SI-SAFE-NEXT: s_mov_b32 s11, s3
-; SI-SAFE-NEXT: s_waitcnt lgkmcnt(0)
-; SI-SAFE-NEXT: s_mov_b32 s8, s6
-; SI-SAFE-NEXT: s_mov_b32 s9, s7
-; SI-SAFE-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; SI-SAFE-NEXT: s_waitcnt vmcnt(0)
-; SI-SAFE-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
-; SI-SAFE-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; SI-SAFE-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SAFE-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SAFE-NEXT: v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
-; SI-SAFE-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SAFE-NEXT: s_mov_b32 s0, 0xbff00000
-; SI-SAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SAFE-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, v7
-; SI-SAFE-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SAFE-NEXT: s_xor_b64 vcc, s[0:1], vcc
-; SI-SAFE-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SAFE-NEXT: s_mov_b32 s0, s4
-; SI-SAFE-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SAFE-NEXT: s_mov_b32 s1, s5
-; SI-SAFE-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-SAFE-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; SI-SAFE-NEXT: s_endpgm
-;
-; CI-SAFE-LABEL: neg_rsq_f64:
-; CI-SAFE: ; %bb.0:
-; CI-SAFE-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; CI-SAFE-NEXT: s_mov_b32 s7, 0xf000
-; CI-SAFE-NEXT: s_mov_b32 s6, -1
-; CI-SAFE-NEXT: s_mov_b32 s10, s6
-; CI-SAFE-NEXT: s_mov_b32 s11, s7
-; CI-SAFE-NEXT: s_waitcnt lgkmcnt(0)
-; CI-SAFE-NEXT: s_mov_b32 s8, s2
-; CI-SAFE-NEXT: s_mov_b32 s9, s3
-; CI-SAFE-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; CI-SAFE-NEXT: s_mov_b32 s4, s0
-; CI-SAFE-NEXT: s_mov_b32 s5, s1
-; CI-SAFE-NEXT: s_waitcnt vmcnt(0)
-; CI-SAFE-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
-; CI-SAFE-NEXT: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], -1.0
-; CI-SAFE-NEXT: v_div_scale_f64 v[8:9], vcc, -1.0, v[0:1], -1.0
-; CI-SAFE-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; CI-SAFE-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; CI-SAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; CI-SAFE-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; CI-SAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; CI-SAFE-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; CI-SAFE-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; CI-SAFE-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; CI-SAFE-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; CI-SAFE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; CI-SAFE-NEXT: s_endpgm
- %val = load double, ptr addrspace(1) %in, align 4
- %sqrt = call double @llvm.sqrt.f64(double %val)
- %div = fdiv double -1.0, %sqrt
- store double %div, ptr addrspace(1) %out, align 4
- ret void
-}
-
-define amdgpu_kernel void @neg_rsq_neg_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
-; GCN-UNSAFE-LABEL: neg_rsq_neg_f64:
-; GCN-UNSAFE: ; %bb.0:
-; GCN-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GCN-UNSAFE-NEXT: s_mov_b32 s7, 0xf000
-; GCN-UNSAFE-NEXT: s_mov_b32 s6, -1
-; GCN-UNSAFE-NEXT: s_mov_b32 s10, s6
-; GCN-UNSAFE-NEXT: s_mov_b32 s11, s7
-; GCN-UNSAFE-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-UNSAFE-NEXT: s_mov_b32 s8, s2
-; GCN-UNSAFE-NEXT: s_mov_b32 s9, s3
-; GCN-UNSAFE-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; GCN-UNSAFE-NEXT: s_mov_b32 s4, s0
-; GCN-UNSAFE-NEXT: s_mov_b32 s5, s1
-; GCN-UNSAFE-NEXT: s_waitcnt vmcnt(0)
-; GCN-UNSAFE-NEXT: v_sqrt_f64_e64 v[0:1], -v[0:1]
-; GCN-UNSAFE-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
-; GCN-UNSAFE-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; GCN-UNSAFE-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-UNSAFE-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
-; GCN-UNSAFE-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; GCN-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
-; GCN-UNSAFE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GCN-UNSAFE-NEXT: s_endpgm
-;
-; SI-SAFE-LABEL: neg_rsq_neg_f64:
-; SI-SAFE: ; %bb.0:
-; SI-SAFE-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
-; SI-SAFE-NEXT: s_mov_b32 s3, 0xf000
-; SI-SAFE-NEXT: s_mov_b32 s2, -1
-; SI-SAFE-NEXT: s_mov_b32 s10, s2
-; SI-SAFE-NEXT: s_mov_b32 s11, s3
-; SI-SAFE-NEXT: s_waitcnt lgkmcnt(0)
-; SI-SAFE-NEXT: s_mov_b32 s8, s6
-; SI-SAFE-NEXT: s_mov_b32 s9, s7
-; SI-SAFE-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; SI-SAFE-NEXT: s_waitcnt vmcnt(0)
-; SI-SAFE-NEXT: v_sqrt_f64_e64 v[0:1], -v[0:1]
-; SI-SAFE-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
-; SI-SAFE-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; SI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; SI-SAFE-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; SI-SAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-SAFE-NEXT: v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
-; SI-SAFE-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; SI-SAFE-NEXT: s_mov_b32 s0, 0xbff00000
-; SI-SAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SAFE-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, v7
-; SI-SAFE-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; SI-SAFE-NEXT: s_xor_b64 vcc, s[0:1], vcc
-; SI-SAFE-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; SI-SAFE-NEXT: s_mov_b32 s0, s4
-; SI-SAFE-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; SI-SAFE-NEXT: s_mov_b32 s1, s5
-; SI-SAFE-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; SI-SAFE-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; SI-SAFE-NEXT: s_endpgm
-;
-; CI-SAFE-LABEL: neg_rsq_neg_f64:
-; CI-SAFE: ; %bb.0:
-; CI-SAFE-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; CI-SAFE-NEXT: s_mov_b32 s7, 0xf000
-; CI-SAFE-NEXT: s_mov_b32 s6, -1
-; CI-SAFE-NEXT: s_mov_b32 s10, s6
-; CI-SAFE-NEXT: s_mov_b32 s11, s7
-; CI-SAFE-NEXT: s_waitcnt lgkmcnt(0)
-; CI-SAFE-NEXT: s_mov_b32 s8, s2
-; CI-SAFE-NEXT: s_mov_b32 s9, s3
-; CI-SAFE-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
-; CI-SAFE-NEXT: s_mov_b32 s4, s0
-; CI-SAFE-NEXT: s_mov_b32 s5, s1
-; CI-SAFE-NEXT: s_waitcnt vmcnt(0)
-; CI-SAFE-NEXT: v_sqrt_f64_e64 v[0:1], -v[0:1]
-; CI-SAFE-NEXT: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], -1.0
-; CI-SAFE-NEXT: v_div_scale_f64 v[8:9], vcc, -1.0, v[0:1], -1.0
-; CI-SAFE-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; CI-SAFE-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; CI-SAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; CI-SAFE-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; CI-SAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; CI-SAFE-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; CI-SAFE-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
-; CI-SAFE-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; CI-SAFE-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
-; CI-SAFE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; CI-SAFE-NEXT: s_endpgm
- %val = load double, ptr addrspace(1) %in, align 4
- %val.fneg = fneg double %val
- %sqrt = call double @llvm.sqrt.f64(double %val.fneg)
- %div = fdiv double -1.0, %sqrt
- store double %div, ptr addrspace(1) %out, align 4
- ret void
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SDAG,SI-SDAG %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GISEL,SI-GISEL %s
+
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,SDAG,VI-SDAG %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GISEL,VI-GISEL %s
+
+declare i32 @llvm.amdgcn.workitem.id.x()
+declare i32 @llvm.amdgcn.readfirstlane(i32)
+declare double @llvm.sqrt.f64(double)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+declare double @llvm.amdgcn.sqrt.f64(double)
+declare double @llvm.fabs.f64(double)
+
+define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
+; SI-SDAG-LABEL: s_rsq_f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-LABEL: s_rsq_f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-LABEL: s_rsq_f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-LABEL: s_rsq_f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-NEXT: ; return to shader part epilog
+ %rsq = call contract double @llvm.sqrt.f64(double %x)
+ %result = fdiv contract double 1.0, %rsq
+ %cast = bitcast double %result to <2 x i32>
+ %cast.0 = extractelement <2 x i32> %cast, i32 0
+ %cast.1 = extractelement <2 x i32> %cast, i32 1
+ %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+ %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+ %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+ %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+ ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
+; SI-SDAG-LABEL: s_rsq_f64_fabs:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: v_sqrt_f64_e64 v[0:1], |s[0:1]|
+; SI-SDAG-NEXT: s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-LABEL: s_rsq_f64_fabs:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: v_sqrt_f64_e64 v[0:1], |s[0:1]|
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-LABEL: s_rsq_f64_fabs:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: v_sqrt_f64_e64 v[0:1], |s[0:1]|
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-LABEL: s_rsq_f64_fabs:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: v_sqrt_f64_e64 v[0:1], |s[0:1]|
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-NEXT: ; return to shader part epilog
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %rsq = call contract double @llvm.sqrt.f64(double %fabs.x)
+ %result = fdiv contract double 1.0, %rsq
+ %cast = bitcast double %result to <2 x i32>
+ %cast.0 = extractelement <2 x i32> %cast, i32 0
+ %cast.1 = extractelement <2 x i32> %cast, i32 1
+ %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+ %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+ %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+ %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+ ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
+; SI-SDAG-LABEL: s_neg_rsq_f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s2, 0xbff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-LABEL: s_neg_rsq_f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-LABEL: s_neg_rsq_f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-LABEL: s_neg_rsq_f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-NEXT: ; return to shader part epilog
+ %rsq = call contract double @llvm.sqrt.f64(double %x)
+ %result = fdiv contract double -1.0, %rsq
+ %cast = bitcast double %result to <2 x i32>
+ %cast.0 = extractelement <2 x i32> %cast, i32 0
+ %cast.1 = extractelement <2 x i32> %cast, i32 1
+ %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+ %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+ %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+ %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+ ret <2 x i32> %insert.1
+}
+
+define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
+; SI-SDAG-LABEL: s_neg_rsq_neg_f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: v_sqrt_f64_e64 v[0:1], -s[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s2, 0xbff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[0:1], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-LABEL: s_neg_rsq_neg_f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: v_sqrt_f64_e64 v[0:1], -s[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-LABEL: s_neg_rsq_neg_f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: v_sqrt_f64_e64 v[0:1], -s[0:1]
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-LABEL: s_neg_rsq_neg_f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: v_sqrt_f64_e64 v[0:1], -s[0:1]
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-NEXT: ; return to shader part epilog
+ %x.neg = fneg double %x
+ %rsq = call contract double @llvm.sqrt.f64(double %x.neg)
+ %result = fdiv contract double -1.0, %rsq
+ %cast = bitcast double %result to <2 x i32>
+ %cast.0 = extractelement <2 x i32> %cast, i32 0
+ %cast.1 = extractelement <2 x i32> %cast, i32 1
+ %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+ %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+ %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+ %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+ ret <2 x i32> %insert.1
+}
+
+define double @v_rsq_f64(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64_fabs(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64_fabs:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e64 v[0:1], |v[0:1]|
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64_fabs:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e64 v[0:1], |v[0:1]|
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64_fabs:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e64 v[0:1], |v[0:1]|
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64_fabs:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e64 v[0:1], |v[0:1]|
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %sqrt = call contract double @llvm.sqrt.f64(double %fabs.x)
+ %rsq = fdiv contract double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64_missing_contract0(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64_missing_contract0:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64_missing_contract0:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64_missing_contract0:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64_missing_contract0:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64_missing_contract1(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64_missing_contract1:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64_missing_contract1:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64_missing_contract1:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64_missing_contract1:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_neg_rsq_f64(double %x) {
+; SI-SDAG-LABEL: v_neg_rsq_f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_neg_rsq_f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_neg_rsq_f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_neg_rsq_f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract double -1.0, %sqrt
+ ret double %rsq
+}
+
+define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
+; SI-SDAG-LABEL: v_rsq_v2f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[12:13], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v13
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-SDAG-NEXT: v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[4:5], v[16:17], v[12:13]
+; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[16:17]
+; SI-SDAG-NEXT: v_mul_f64 v[12:13], v[18:19], v[10:11]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v9
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[8:9], v[12:13], v[18:19]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v19
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; SI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[12:13]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_v2f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT: v_mov_b32_e32 v20, 0x3ff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_div_scale_f64 v[12:13], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v13
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT: v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[16:17], v[12:13]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[16:17]
+; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[4:5]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v19
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[10:11]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_v2f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; VI-SDAG-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT: v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-SDAG-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-SDAG-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-SDAG-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; VI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_v2f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
+; VI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+ %rsq = fdiv <2 x double> <double 1.0, double 1.0>, %sqrt
+ ret <2 x double> %rsq
+}
+
+define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
+; SI-SDAG-LABEL: v_neg_rsq_v2f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], -1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[18:19], s[4:5], -1.0, v[2:3], -1.0
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v13
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-SDAG-NEXT: v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[4:5], v[16:17], v[12:13]
+; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[16:17]
+; SI-SDAG-NEXT: v_mul_f64 v[12:13], v[18:19], v[10:11]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v9
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[8:9], v[12:13], v[18:19]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v19
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[12:13]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_neg_rsq_v2f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT: v_mov_b32_e32 v20, 0xbff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], -1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], -1.0, v[2:3], -1.0
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v13
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT: v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[16:17], v[12:13]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[16:17]
+; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[4:5]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v19
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], -1.0
+; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[10:11]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], -1.0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_neg_rsq_v2f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], -1.0
+; VI-SDAG-NEXT: v_div_scale_f64 v[16:17], s[4:5], -1.0, v[2:3], -1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT: v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-SDAG-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-SDAG-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-SDAG-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_neg_rsq_v2f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], -1.0
+; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], -1.0, v[2:3], -1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], -1.0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+ %rsq = fdiv <2 x double> <double -1.0, double -1.0>, %sqrt
+ ret <2 x double> %rsq
+}
+
+define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
+; SI-SDAG-LABEL: v_neg_rsq_v2f64_poisonelt:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT: v_mov_b32_e32 v16, 0xbff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], s[4:5]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], s[4:5], v[2:3], s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v16, v13
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT: v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[16:17], v[12:13]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[16:17]
+; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[4:5]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s4, v19
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], -1.0
+; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[10:11]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], s[4:5]
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_neg_rsq_v2f64_poisonelt:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x7ff80000
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], s[4:5], v[2:3], s[4:5]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], s[4:5]
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+ %rsq = fdiv <2 x double> <double -1.0, double poison>, %sqrt
+ ret <2 x double> %rsq
+}
+
+define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
+; SI-SDAG-LABEL: v_neg_pos_rsq_v2f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT: v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v13
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-SDAG-NEXT: v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[4:5], v[16:17], v[12:13]
+; SI-SDAG-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3ff00000
+; SI-SDAG-NEXT: v_mul_f64 v[12:13], v[18:19], v[10:11]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[16:17]
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[8:9], v[12:13], v[18:19]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v9
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, v19
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; SI-SDAG-NEXT: s_nop 0
+; SI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[12:13]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT: v_mov_b32_e32 v16, 0xbff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_div_scale_f64 v[12:13], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v16, v13
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT: v_mul_f64 v[16:17], v[12:13], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[16:17], v[12:13]
+; SI-GISEL-NEXT: v_fma_f64 v[14:15], -v[8:9], v[10:11], 1.0
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[14:15], v[10:11]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[12:13], v[6:7], v[16:17]
+; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[4:5]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
+; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
+; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0x3ff00000
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v8, v19
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], -1.0
+; SI-GISEL-NEXT: s_nop 1
+; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[4:5], v[10:11]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_neg_pos_rsq_v2f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; VI-SDAG-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT: v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-SDAG-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-SDAG-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-SDAG-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-SDAG-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-SDAG-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
+; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_div_scale_f64 v[12:13], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
+; VI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9]
+; VI-GISEL-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
+; VI-GISEL-NEXT: s_mov_b64 vcc, s[4:5]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
+; VI-GISEL-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+ %rsq = fdiv <2 x double> <double -1.0, double 1.0>, %sqrt
+ ret <2 x double> %rsq
+}
+
+define double @v_rsq_f64_fneg_fabs(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64_fneg_fabs:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e64 v[0:1], -|v[0:1]|
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64_fneg_fabs:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e64 v[0:1], -|v[0:1]|
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64_fneg_fabs:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e64 v[0:1], -|v[0:1]|
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64_fneg_fabs:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e64 v[0:1], -|v[0:1]|
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %fabs = call double @llvm.fabs.f64(double %x)
+ %fneg.fabs = fneg double %fabs
+ %sqrt = call contract double @llvm.sqrt.f64(double %fneg.fabs)
+ %rsq = fdiv contract double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__afn_sqrt(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64__afn_sqrt:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64__afn_sqrt:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64__afn_sqrt:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64__afn_sqrt:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__afn_fdiv(double %x) {
+; SDAG-LABEL: v_rsq_f64__afn_fdiv:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_rsq_f64__afn_fdiv:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[0:1]
+; GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[4:5], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__afn(double %x) {
+; SDAG-LABEL: v_rsq_f64__afn:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_rsq_f64__afn:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[0:1]
+; GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[4:5], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_neg_rsq_f64__afn(double %x) {
+; SDAG-LABEL: v_neg_rsq_f64__afn:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
+; SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
+; SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_neg_rsq_f64__afn:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_mul_f64 v[4:5], -1.0, v[0:1]
+; GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[4:5], -1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn double -1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__afn_ninf(double %x) {
+; SDAG-LABEL: v_rsq_f64__afn_ninf:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_rsq_f64__afn_ninf:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[0:1]
+; GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[4:5], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn ninf double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn ninf double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__afn_nnan(double %x) {
+; SDAG-LABEL: v_rsq_f64__afn_nnan:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_rsq_f64__afn_nnan:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[0:1]
+; GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[4:5], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn nnan double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn nnan double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__afn_nnan_ninf(double %x) {
+; SDAG-LABEL: v_rsq_f64__afn_nnan_ninf:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_rsq_f64__afn_nnan_ninf:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[0:1]
+; GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[4:5], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn nnan ninf double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
+; SDAG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
+; SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
+; SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_mul_f64 v[4:5], -1.0, v[0:1]
+; GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[4:5], -1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract afn nnan ninf double -1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_f64__nnan_ninf(double %x) {
+; SI-SDAG-LABEL: v_rsq_f64__nnan_ninf:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_f64__nnan_ninf:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_f64__nnan_ninf:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_f64__nnan_ninf:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract nnan ninf double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract nnan ninf double 1.0, %sqrt
+ ret double %rsq
+}
+
+define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
+; SDAG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[0:1]
+; SDAG-NEXT: v_rcp_f64_e32 v[6:7], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; SDAG-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; SDAG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; SDAG-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; SDAG-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
+; SDAG-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; SDAG-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
+; SDAG-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
+; SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[6:7], v[2:3]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[0:1], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[2:3], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[8:9], v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[10:11], v[2:3], v[2:3]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[0:1], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[2:3], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[8:9], v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[10:11], v[2:3], v[2:3]
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], 1.0, v[0:1]
+; SI-GISEL-NEXT: v_mul_f64 v[10:11], 1.0, v[2:3]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[8:9], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[10:11], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[10:11]
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_v2f64__afn_nnan_ninf:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[4:5], v[0:1]
+; VI-GISEL-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[2:3], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[8:9], v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[10:11], v[2:3], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[2:3], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[8:9], v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[10:11], v[2:3], v[2:3]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], 1.0, v[0:1]
+; VI-GISEL-NEXT: v_mul_f64 v[10:11], 1.0, v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[8:9], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[6:7], v[10:11], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[8:9]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[10:11]
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract afn nnan ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
+ %rsq = fdiv contract afn nnan ninf <2 x double> <double 1.0, double 1.0>, %sqrt
+ ret <2 x double> %rsq
+}
+
+define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 {
+; SDAG-LABEL: s_rsq_f64_unsafe:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GISEL-LABEL: s_rsq_f64_unsafe:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; GISEL-NEXT: v_rsq_f64_e32 v[2:3], s[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; GISEL-NEXT: ; return to shader part epilog
+ %rsq = call contract double @llvm.sqrt.f64(double %x)
+ %result = fdiv contract double 1.0, %rsq
+ %cast = bitcast double %result to <2 x i32>
+ %cast.0 = extractelement <2 x i32> %cast, i32 0
+ %cast.1 = extractelement <2 x i32> %cast, i32 1
+ %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+ %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+ %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+ %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+ ret <2 x i32> %insert.1
+}
+
+define double @v_rsq_f64_unsafe(double %x) #0 {
+; SDAG-LABEL: v_rsq_f64_unsafe:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SDAG-NEXT: v_rcp_f64_e32 v[2:3], v[0:1]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
+; SDAG-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: v_rsq_f64_unsafe:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[0:1]
+; GISEL-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_fma_f64 v[4:5], -v[2:3], v[0:1], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[4:5], v[0:1], v[0:1]
+; GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[0:1]
+; GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[4:5], 1.0
+; GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[4:5]
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv double 1.0, %sqrt
+ ret double %rsq
+}
+
+define double @v_rsq_amdgcn_sqrt_f64(double %x) {
+; SI-SDAG-LABEL: v_rsq_amdgcn_sqrt_f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_rsq_amdgcn_sqrt_f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_rsq_amdgcn_sqrt_f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_rsq_amdgcn_sqrt_f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract double @llvm.amdgcn.sqrt.f64(double %x)
+ %rsq = fdiv contract double 1.0, %sqrt
+ ret double %rsq
}
+
+define double @v_neg_rsq_amdgcn_sqrt_f64(double %x) {
+; SI-SDAG-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], -1.0, v[0:1], -1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_neg_rsq_amdgcn_sqrt_f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], -1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, -1.0, v[0:1], -1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], -1.0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract double @llvm.amdgcn.sqrt.f64(double %x)
+ %rsq = fdiv contract double -1.0, %sqrt
+ ret double %rsq
+}
+
+define amdgpu_ps <2 x i32> @s_rsq_amdgcn_sqrt_f64(double inreg %x) {
+; SI-SDAG-LABEL: s_rsq_amdgcn_sqrt_f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s2, 0x3ff00000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[0:1], 1.0, v[0:1], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[0:1], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-NEXT: ; return to shader part epilog
+;
+; SI-GISEL-LABEL: s_rsq_amdgcn_sqrt_f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x3ff00000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; SI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; SI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; SI-GISEL-NEXT: ; return to shader part epilog
+;
+; VI-SDAG-LABEL: s_rsq_amdgcn_sqrt_f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; VI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; VI-SDAG-NEXT: ; return to shader part epilog
+;
+; VI-GISEL-LABEL: s_rsq_amdgcn_sqrt_f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], s[0:1]
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], 1.0
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
+; VI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; VI-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; VI-GISEL-NEXT: ; return to shader part epilog
+ %rsq = call contract double @llvm.amdgcn.sqrt.f64(double %x)
+ %result = fdiv contract double 1.0, %rsq
+ %cast = bitcast double %result to <2 x i32>
+ %cast.0 = extractelement <2 x i32> %cast, i32 0
+ %cast.1 = extractelement <2 x i32> %cast, i32 1
+ %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+ %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+ %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+ %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+ ret <2 x i32> %insert.1
+}
+
+define double @v_div_contract_sqrt_f64(double %x, double %y) {
+; SI-SDAG-LABEL: v_div_contract_sqrt_f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[0:1], v[2:3], v[0:1]
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v9
+; SI-SDAG-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_div_contract_sqrt_f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[0:1], v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v5
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v11
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_div_contract_sqrt_f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; VI-SDAG-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_div_contract_sqrt_f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract double @llvm.sqrt.f64(double %y)
+ %rsq = fdiv contract double %x, %sqrt
+ ret double %rsq
+}
+
+define double @v_div_arcp_sqrt_f64(double %x, double %y) {
+; SI-SDAG-LABEL: v_div_arcp_sqrt_f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[0:1], v[2:3], v[0:1]
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v9
+; SI-SDAG-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_div_arcp_sqrt_f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[0:1], v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v5
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v11
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_div_arcp_sqrt_f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; VI-SDAG-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_div_arcp_sqrt_f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call double @llvm.sqrt.f64(double %y)
+ %rsq = fdiv arcp double %x, %sqrt
+ ret double %rsq
+}
+
+define double @v_div_contract_arcp_sqrt_f64(double %x, double %y) {
+; SI-SDAG-LABEL: v_div_contract_arcp_sqrt_f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-SDAG-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[0:1], v[2:3], v[0:1]
+; SI-SDAG-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v9
+; SI-SDAG-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_div_contract_arcp_sqrt_f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; SI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[0:1], v[2:3], v[0:1]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v5
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v11
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_div_contract_arcp_sqrt_f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-SDAG-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
+; VI-SDAG-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; VI-SDAG-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_div_contract_arcp_sqrt_f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[2:3], v[2:3]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract double @llvm.sqrt.f64(double %y)
+ %rsq = fdiv contract arcp double %x, %sqrt
+ ret double %rsq
+}
+
+define double @v_div_const_contract_sqrt_f64(double %x) {
+; SI-SDAG-LABEL: v_div_const_contract_sqrt_f64:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s6, 0
+; SI-SDAG-NEXT: s_mov_b32 s7, 0x40700000
+; SI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], s[6:7]
+; SI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], s[6:7], v[0:1], s[6:7]
+; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s7, v7
+; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc
+; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; SI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; SI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], s[6:7]
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_div_const_contract_sqrt_f64:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; SI-GISEL-NEXT: s_mov_b32 s6, 0
+; SI-GISEL-NEXT: s_mov_b32 s7, 0x40700000
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x40700000
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], s[6:7]
+; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], s[6:7], v[0:1], s[6:7]
+; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], s[6:7]
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_div_const_contract_sqrt_f64:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-SDAG-NEXT: s_mov_b32 s4, 0
+; VI-SDAG-NEXT: s_mov_b32 s5, 0x40700000
+; VI-SDAG-NEXT: v_div_scale_f64 v[2:3], s[6:7], v[0:1], v[0:1], s[4:5]
+; VI-SDAG-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_div_scale_f64 v[6:7], vcc, s[4:5], v[0:1], s[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-SDAG-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-SDAG-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], s[4:5]
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_div_const_contract_sqrt_f64:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sqrt_f64_e32 v[0:1], v[0:1]
+; VI-GISEL-NEXT: s_mov_b32 s4, 0
+; VI-GISEL-NEXT: s_mov_b32 s5, 0x40700000
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[6:7], v[0:1], v[0:1], s[4:5]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, s[4:5], v[0:1], s[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], s[4:5]
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %sqrt = call contract double @llvm.sqrt.f64(double %x)
+ %rsq = fdiv contract double 256.0, %sqrt
+ ret double %rsq
+}
+
+attributes #0 = { "unsafe-fp-math"="true" }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CI: {{.*}}
-; CI-UNSAFE: {{.*}}
; GCN: {{.*}}
-; GCN-SAFE: {{.*}}
-; SI: {{.*}}
-; SI-UNSAFE: {{.*}}