1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s
; FIXME: Ensure llvm.powi.* intrinsics are vectorized.
define <2 x double> @PR53887_v2f64(<2 x double> noundef %x) {
; CHECK-LABEL: @PR53887_v2f64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x double> [[X:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT]], i32 6)
; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x double> zeroinitializer, double [[TMP2]], i64 0
; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <2 x double> [[X]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT1]], i32 6)
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> [[VECINIT]], double [[TMP1]], i64 1
; CHECK-NEXT: ret <2 x double> [[TMP0]]
;
entry:
%vecext = extractelement <2 x double> %x, i64 0
%0 = tail call fast double @llvm.powi.f64.i32(double %vecext, i32 6)
%vecinit = insertelement <2 x double> zeroinitializer, double %0, i64 0
%vecext1 = extractelement <2 x double> %x, i64 1
%1 = tail call fast double @llvm.powi.f64.i32(double %vecext1, i32 6)
%vecinit3 = insertelement <2 x double> %vecinit, double %1, i64 1
ret <2 x double> %vecinit3
}
define <4 x double> @PR53887_v4f64(<4 x double> noundef %x) {
; CHECK-LABEL: @PR53887_v4f64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x double> [[X:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT]], i32 6)
; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x double> zeroinitializer, double [[TMP4]], i64 0
; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x double> [[X]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT1]], i32 6)
; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x double> [[VECINIT]], double [[TMP1]], i64 1
; CHECK-NEXT: [[VECEXT4:%.*]] = extractelement <4 x double> [[X]], i64 2
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT4]], i32 6)
; CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x double> [[VECINIT3]], double [[TMP2]], i64 2
; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x double> [[X]], i64 3
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT7]], i32 6)
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> [[VECINIT6]], double [[TMP3]], i64 3
; CHECK-NEXT: ret <4 x double> [[TMP0]]
;
entry:
%vecext = extractelement <4 x double> %x, i64 0
%0 = tail call fast double @llvm.powi.f64.i32(double %vecext, i32 6) #2
%vecinit = insertelement <4 x double> zeroinitializer, double %0, i64 0
%vecext1 = extractelement <4 x double> %x, i64 1
%1 = tail call fast double @llvm.powi.f64.i32(double %vecext1, i32 6) #2
%vecinit3 = insertelement <4 x double> %vecinit, double %1, i64 1
%vecext4 = extractelement <4 x double> %x, i64 2
%2 = tail call fast double @llvm.powi.f64.i32(double %vecext4, i32 6) #2
%vecinit6 = insertelement <4 x double> %vecinit3, double %2, i64 2
%vecext7 = extractelement <4 x double> %x, i64 3
%3 = tail call fast double @llvm.powi.f64.i32(double %vecext7, i32 6) #2
%vecinit9 = insertelement <4 x double> %vecinit6, double %3, i64 3
ret <4 x double> %vecinit9
}
declare double @llvm.powi.f64.i32(double, i32)
|