aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/Transforms/SLPVectorizer/X86/powi-regression.ll
blob: 0cf0d77e3f29b15ffb006dd6ac4c37d6430f5ce0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s

; FIXME: Ensure llvm.powi.* intrinsics are vectorized.

define <2 x double> @PR53887_v2f64(<2 x double> noundef %x) {
; CHECK-LABEL: @PR53887_v2f64(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x double> [[X:%.*]], i64 0
; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT]], i32 6)
; CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x double> zeroinitializer, double [[TMP2]], i64 0
; CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <2 x double> [[X]], i64 1
; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT1]], i32 6)
; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> [[VECINIT]], double [[TMP1]], i64 1
; CHECK-NEXT:    ret <2 x double> [[TMP0]]
;
entry:
  %vecext = extractelement <2 x double> %x, i64 0
  %0 = tail call fast double @llvm.powi.f64.i32(double %vecext, i32 6)
  %vecinit = insertelement <2 x double> zeroinitializer, double %0, i64 0
  %vecext1 = extractelement <2 x double> %x, i64 1
  %1 = tail call fast double @llvm.powi.f64.i32(double %vecext1, i32 6)
  %vecinit3 = insertelement <2 x double> %vecinit, double %1, i64 1
  ret <2 x double> %vecinit3
}

define <4 x double> @PR53887_v4f64(<4 x double> noundef %x) {
; CHECK-LABEL: @PR53887_v4f64(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x double> [[X:%.*]], i64 0
; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT]], i32 6)
; CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x double> zeroinitializer, double [[TMP4]], i64 0
; CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <4 x double> [[X]], i64 1
; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT1]], i32 6)
; CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x double> [[VECINIT]], double [[TMP1]], i64 1
; CHECK-NEXT:    [[VECEXT4:%.*]] = extractelement <4 x double> [[X]], i64 2
; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT4]], i32 6)
; CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <4 x double> [[VECINIT3]], double [[TMP2]], i64 2
; CHECK-NEXT:    [[VECEXT7:%.*]] = extractelement <4 x double> [[X]], i64 3
; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT7]], i32 6)
; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> [[VECINIT6]], double [[TMP3]], i64 3
; CHECK-NEXT:    ret <4 x double> [[TMP0]]
;
entry:
  %vecext = extractelement <4 x double> %x, i64 0
  %0 = tail call fast double @llvm.powi.f64.i32(double %vecext, i32 6) #2
  %vecinit = insertelement <4 x double> zeroinitializer, double %0, i64 0
  %vecext1 = extractelement <4 x double> %x, i64 1
  %1 = tail call fast double @llvm.powi.f64.i32(double %vecext1, i32 6) #2
  %vecinit3 = insertelement <4 x double> %vecinit, double %1, i64 1
  %vecext4 = extractelement <4 x double> %x, i64 2
  %2 = tail call fast double @llvm.powi.f64.i32(double %vecext4, i32 6) #2
  %vecinit6 = insertelement <4 x double> %vecinit3, double %2, i64 2
  %vecext7 = extractelement <4 x double> %x, i64 3
  %3 = tail call fast double @llvm.powi.f64.i32(double %vecext7, i32 6) #2
  %vecinit9 = insertelement <4 x double> %vecinit6, double %3, i64 3
  ret <4 x double> %vecinit9
}

declare double @llvm.powi.f64.i32(double, i32)