diff options
Diffstat (limited to 'llvm/test')
26 files changed, 1245 insertions, 861 deletions
diff --git a/llvm/test/CodeGen/AVR/llvm.sincos.ll b/llvm/test/CodeGen/AVR/llvm.sincos.ll index 897101d..b70b8d3 100644 --- a/llvm/test/CodeGen/AVR/llvm.sincos.ll +++ b/llvm/test/CodeGen/AVR/llvm.sincos.ll @@ -3,630 +3,266 @@ ; RUN: llc -mtriple=avr-unknown-linux-gnu < %s | FileCheck -check-prefixes=CHECK,GNU %s define { half, half } @test_sincos_f16(half %a) #0 { -; NONGNU-LABEL: test_sincos_f16: -; NONGNU: ; %bb.0: -; NONGNU-NEXT: push r12 -; NONGNU-NEXT: push r13 -; NONGNU-NEXT: push r14 -; NONGNU-NEXT: push r15 -; NONGNU-NEXT: push r16 -; NONGNU-NEXT: push r17 -; NONGNU-NEXT: mov r24, r22 -; NONGNU-NEXT: mov r25, r23 -; NONGNU-NEXT: rcall __extendhfsf2 -; NONGNU-NEXT: mov r16, r22 -; NONGNU-NEXT: mov r17, r23 -; NONGNU-NEXT: mov r14, r24 -; NONGNU-NEXT: mov r15, r25 -; NONGNU-NEXT: rcall sin -; NONGNU-NEXT: rcall __truncsfhf2 -; NONGNU-NEXT: mov r12, r24 -; NONGNU-NEXT: mov r13, r25 -; NONGNU-NEXT: mov r22, r16 -; NONGNU-NEXT: mov r23, r17 -; NONGNU-NEXT: mov r24, r14 -; NONGNU-NEXT: mov r25, r15 -; NONGNU-NEXT: rcall cos -; NONGNU-NEXT: rcall __truncsfhf2 -; NONGNU-NEXT: mov r22, r24 -; NONGNU-NEXT: mov r23, r25 -; NONGNU-NEXT: mov r18, r12 -; NONGNU-NEXT: mov r19, r13 -; NONGNU-NEXT: pop r17 -; NONGNU-NEXT: pop r16 -; NONGNU-NEXT: pop r15 -; NONGNU-NEXT: pop r14 -; NONGNU-NEXT: pop r13 -; NONGNU-NEXT: pop r12 -; NONGNU-NEXT: ret -; -; GNU-LABEL: test_sincos_f16: -; GNU: ; %bb.0: -; GNU-NEXT: push r16 -; GNU-NEXT: push r17 -; GNU-NEXT: push r28 -; GNU-NEXT: push r29 -; GNU-NEXT: in r28, 61 -; GNU-NEXT: in r29, 62 -; GNU-NEXT: sbiw r28, 8 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: mov r24, r22 -; GNU-NEXT: mov r25, r23 -; GNU-NEXT: rcall __extendhfsf2 -; GNU-NEXT: mov r20, r28 -; GNU-NEXT: mov r21, r29 -; GNU-NEXT: subi r20, 251 -; GNU-NEXT: sbci r21, 255 -; GNU-NEXT: mov r18, r28 -; GNU-NEXT: mov r19, r29 -; GNU-NEXT: subi r18, 255 -; GNU-NEXT: sbci r19, 255 -; GNU-NEXT: rcall sincosf -; GNU-NEXT: ldd r22, Y+5 -; GNU-NEXT: ldd r23, Y+6 -; GNU-NEXT: ldd r24, Y+7 -; GNU-NEXT: ldd r25, Y+8 -; GNU-NEXT: rcall __truncsfhf2 -; GNU-NEXT: mov r16, r24 -; GNU-NEXT: mov r17, r25 -; GNU-NEXT: ldd r22, Y+1 -; GNU-NEXT: ldd r23, Y+2 -; GNU-NEXT: ldd r24, Y+3 -; GNU-NEXT: ldd r25, Y+4 -; GNU-NEXT: rcall __truncsfhf2 -; GNU-NEXT: mov r22, r24 -; GNU-NEXT: mov r23, r25 -; GNU-NEXT: mov r18, r16 -; GNU-NEXT: mov r19, r17 -; GNU-NEXT: adiw r28, 8 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: pop r29 -; GNU-NEXT: pop r28 -; GNU-NEXT: pop r17 -; GNU-NEXT: pop r16 -; GNU-NEXT: ret +; CHECK-LABEL: test_sincos_f16: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r12 +; CHECK-NEXT: push r13 +; CHECK-NEXT: push r14 +; CHECK-NEXT: push r15 +; CHECK-NEXT: push r16 +; CHECK-NEXT: push r17 +; CHECK-NEXT: mov r24, r22 +; CHECK-NEXT: mov r25, r23 +; CHECK-NEXT: rcall __extendhfsf2 +; CHECK-NEXT: mov r16, r22 +; CHECK-NEXT: mov r17, r23 +; CHECK-NEXT: mov r14, r24 +; CHECK-NEXT: mov r15, r25 +; CHECK-NEXT: rcall sin +; CHECK-NEXT: rcall __truncsfhf2 +; CHECK-NEXT: mov r12, r24 +; CHECK-NEXT: mov r13, r25 +; CHECK-NEXT: mov r22, r16 +; CHECK-NEXT: mov r23, r17 +; CHECK-NEXT: mov r24, r14 +; CHECK-NEXT: mov r25, r15 +; CHECK-NEXT: rcall cos +; CHECK-NEXT: rcall __truncsfhf2 +; CHECK-NEXT: mov r22, r24 +; CHECK-NEXT: mov r23, r25 +; CHECK-NEXT: mov r18, r12 +; CHECK-NEXT: mov r19, r13 +; CHECK-NEXT: pop r17 +; CHECK-NEXT: pop r16 +; CHECK-NEXT: pop r15 +; CHECK-NEXT: pop r14 +; CHECK-NEXT: pop r13 +; CHECK-NEXT: pop r12 +; CHECK-NEXT: ret %result = call { half, half } @llvm.sincos.f16(half %a) ret { half, half } %result } define half @test_sincos_f16_only_use_sin(half %a) #0 { -; NONGNU-LABEL: test_sincos_f16_only_use_sin: -; NONGNU: ; %bb.0: -; NONGNU-NEXT: mov r24, r22 -; NONGNU-NEXT: mov r25, r23 -; NONGNU-NEXT: rcall __extendhfsf2 -; NONGNU-NEXT: rcall sin -; NONGNU-NEXT: rcall __truncsfhf2 -; NONGNU-NEXT: mov r22, r24 -; NONGNU-NEXT: mov r23, r25 -; NONGNU-NEXT: ret -; -; GNU-LABEL: test_sincos_f16_only_use_sin: -; GNU: ; %bb.0: -; GNU-NEXT: push r28 -; GNU-NEXT: push r29 -; GNU-NEXT: in r28, 61 -; GNU-NEXT: in r29, 62 -; GNU-NEXT: sbiw r28, 8 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: mov r24, r22 -; GNU-NEXT: mov r25, r23 -; GNU-NEXT: rcall __extendhfsf2 -; GNU-NEXT: mov r20, r28 -; GNU-NEXT: mov r21, r29 -; GNU-NEXT: subi r20, 251 -; GNU-NEXT: sbci r21, 255 -; GNU-NEXT: mov r18, r28 -; GNU-NEXT: mov r19, r29 -; GNU-NEXT: subi r18, 255 -; GNU-NEXT: sbci r19, 255 -; GNU-NEXT: rcall sincosf -; GNU-NEXT: ldd r22, Y+5 -; GNU-NEXT: ldd r23, Y+6 -; GNU-NEXT: ldd r24, Y+7 -; GNU-NEXT: ldd r25, Y+8 -; GNU-NEXT: rcall __truncsfhf2 -; GNU-NEXT: mov r22, r24 -; GNU-NEXT: mov r23, r25 -; GNU-NEXT: adiw r28, 8 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: pop r29 -; GNU-NEXT: pop r28 -; GNU-NEXT: ret +; CHECK-LABEL: test_sincos_f16_only_use_sin: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov r24, r22 +; CHECK-NEXT: mov r25, r23 +; CHECK-NEXT: rcall __extendhfsf2 +; CHECK-NEXT: rcall sin +; CHECK-NEXT: rcall __truncsfhf2 +; CHECK-NEXT: mov r22, r24 +; CHECK-NEXT: mov r23, r25 +; CHECK-NEXT: ret %result = call { half, half } @llvm.sincos.f16(half %a) %result.0 = extractvalue { half, half } %result, 0 ret half %result.0 } define half @test_sincos_f16_only_use_cos(half %a) #0 { -; NONGNU-LABEL: test_sincos_f16_only_use_cos: -; NONGNU: ; %bb.0: -; NONGNU-NEXT: mov r24, r22 -; NONGNU-NEXT: mov r25, r23 -; NONGNU-NEXT: rcall __extendhfsf2 -; NONGNU-NEXT: rcall cos -; NONGNU-NEXT: rcall __truncsfhf2 -; NONGNU-NEXT: mov r22, r24 -; NONGNU-NEXT: mov r23, r25 -; NONGNU-NEXT: ret -; -; GNU-LABEL: test_sincos_f16_only_use_cos: -; GNU: ; %bb.0: -; GNU-NEXT: push r28 -; GNU-NEXT: push r29 -; GNU-NEXT: in r28, 61 -; GNU-NEXT: in r29, 62 -; GNU-NEXT: sbiw r28, 8 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: mov r24, r22 -; GNU-NEXT: mov r25, r23 -; GNU-NEXT: rcall __extendhfsf2 -; GNU-NEXT: mov r20, r28 -; GNU-NEXT: mov r21, r29 -; GNU-NEXT: subi r20, 251 -; GNU-NEXT: sbci r21, 255 -; GNU-NEXT: mov r18, r28 -; GNU-NEXT: mov r19, r29 -; GNU-NEXT: subi r18, 255 -; GNU-NEXT: sbci r19, 255 -; GNU-NEXT: rcall sincosf -; GNU-NEXT: ldd r22, Y+1 -; GNU-NEXT: ldd r23, Y+2 -; GNU-NEXT: ldd r24, Y+3 -; GNU-NEXT: ldd r25, Y+4 -; GNU-NEXT: rcall __truncsfhf2 -; GNU-NEXT: mov r22, r24 -; GNU-NEXT: mov r23, r25 -; GNU-NEXT: adiw r28, 8 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: pop r29 -; GNU-NEXT: pop r28 -; GNU-NEXT: ret +; CHECK-LABEL: test_sincos_f16_only_use_cos: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov r24, r22 +; CHECK-NEXT: mov r25, r23 +; CHECK-NEXT: rcall __extendhfsf2 +; CHECK-NEXT: rcall cos +; CHECK-NEXT: rcall __truncsfhf2 +; CHECK-NEXT: mov r22, r24 +; CHECK-NEXT: mov r23, r25 +; CHECK-NEXT: ret %result = call { half, half } @llvm.sincos.f16(half %a) %result.1 = extractvalue { half, half } %result, 1 ret half %result.1 } define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) #0 { -; NONGNU-LABEL: test_sincos_v2f16: -; NONGNU: ; %bb.0: -; NONGNU-NEXT: push r6 -; NONGNU-NEXT: push r7 -; NONGNU-NEXT: push r8 -; NONGNU-NEXT: push r9 -; NONGNU-NEXT: push r10 -; NONGNU-NEXT: push r11 -; NONGNU-NEXT: push r12 -; NONGNU-NEXT: push r13 -; NONGNU-NEXT: push r14 -; NONGNU-NEXT: push r15 -; NONGNU-NEXT: push r16 -; NONGNU-NEXT: push r17 -; NONGNU-NEXT: mov r10, r22 -; NONGNU-NEXT: mov r11, r23 -; NONGNU-NEXT: rcall __extendhfsf2 -; NONGNU-NEXT: mov r16, r22 -; NONGNU-NEXT: mov r17, r23 -; NONGNU-NEXT: mov r14, r24 -; NONGNU-NEXT: mov r15, r25 -; NONGNU-NEXT: rcall sin -; NONGNU-NEXT: rcall __truncsfhf2 -; NONGNU-NEXT: mov r12, r24 -; NONGNU-NEXT: mov r13, r25 -; NONGNU-NEXT: mov r24, r10 -; NONGNU-NEXT: mov r25, r11 -; NONGNU-NEXT: rcall __extendhfsf2 -; NONGNU-NEXT: mov r10, r22 -; NONGNU-NEXT: mov r11, r23 -; NONGNU-NEXT: mov r8, r24 -; NONGNU-NEXT: mov r9, r25 -; NONGNU-NEXT: rcall cos -; NONGNU-NEXT: rcall __truncsfhf2 -; NONGNU-NEXT: mov r6, r24 -; NONGNU-NEXT: mov r7, r25 -; NONGNU-NEXT: mov r22, r10 -; NONGNU-NEXT: mov r23, r11 -; NONGNU-NEXT: mov r24, r8 -; NONGNU-NEXT: mov r25, r9 -; NONGNU-NEXT: rcall sin -; NONGNU-NEXT: rcall __truncsfhf2 -; NONGNU-NEXT: mov r10, r24 -; NONGNU-NEXT: mov r11, r25 -; NONGNU-NEXT: mov r22, r16 -; NONGNU-NEXT: mov r23, r17 -; NONGNU-NEXT: mov r24, r14 -; NONGNU-NEXT: mov r25, r15 -; NONGNU-NEXT: rcall cos -; NONGNU-NEXT: rcall __truncsfhf2 -; NONGNU-NEXT: mov r18, r10 -; NONGNU-NEXT: mov r19, r11 -; NONGNU-NEXT: mov r20, r12 -; NONGNU-NEXT: mov r21, r13 -; NONGNU-NEXT: mov r22, r6 -; NONGNU-NEXT: mov r23, r7 -; NONGNU-NEXT: pop r17 -; NONGNU-NEXT: pop r16 -; NONGNU-NEXT: pop r15 -; NONGNU-NEXT: pop r14 -; NONGNU-NEXT: pop r13 -; NONGNU-NEXT: pop r12 -; NONGNU-NEXT: pop r11 -; NONGNU-NEXT: pop r10 -; NONGNU-NEXT: pop r9 -; NONGNU-NEXT: pop r8 -; NONGNU-NEXT: pop r7 -; NONGNU-NEXT: pop r6 -; NONGNU-NEXT: ret -; -; GNU-LABEL: test_sincos_v2f16: -; GNU: ; %bb.0: -; GNU-NEXT: push r12 -; GNU-NEXT: push r13 -; GNU-NEXT: push r14 -; GNU-NEXT: push r15 -; GNU-NEXT: push r16 -; GNU-NEXT: push r17 -; GNU-NEXT: push r28 -; GNU-NEXT: push r29 -; GNU-NEXT: in r28, 61 -; GNU-NEXT: in r29, 62 -; GNU-NEXT: sbiw r28, 16 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: mov r16, r24 -; GNU-NEXT: mov r17, r25 -; GNU-NEXT: mov r24, r22 -; GNU-NEXT: mov r25, r23 -; GNU-NEXT: rcall __extendhfsf2 -; GNU-NEXT: mov r20, r28 -; GNU-NEXT: mov r21, r29 -; GNU-NEXT: subi r20, 243 -; GNU-NEXT: sbci r21, 255 -; GNU-NEXT: mov r18, r28 -; GNU-NEXT: mov r19, r29 -; GNU-NEXT: subi r18, 247 -; GNU-NEXT: sbci r19, 255 -; GNU-NEXT: rcall sincosf -; GNU-NEXT: mov r24, r16 -; GNU-NEXT: mov r25, r17 -; GNU-NEXT: rcall __extendhfsf2 -; GNU-NEXT: mov r20, r28 -; GNU-NEXT: mov r21, r29 -; GNU-NEXT: subi r20, 251 -; GNU-NEXT: sbci r21, 255 -; GNU-NEXT: mov r18, r28 -; GNU-NEXT: mov r19, r29 -; GNU-NEXT: subi r18, 255 -; GNU-NEXT: sbci r19, 255 -; GNU-NEXT: rcall sincosf -; GNU-NEXT: ldd r22, Y+13 -; GNU-NEXT: ldd r23, Y+14 -; GNU-NEXT: ldd r24, Y+15 -; GNU-NEXT: ldd r25, Y+16 -; GNU-NEXT: rcall __truncsfhf2 -; GNU-NEXT: mov r16, r24 -; GNU-NEXT: mov r17, r25 -; GNU-NEXT: ldd r22, Y+5 -; GNU-NEXT: ldd r23, Y+6 -; GNU-NEXT: ldd r24, Y+7 -; GNU-NEXT: ldd r25, Y+8 -; GNU-NEXT: rcall __truncsfhf2 -; GNU-NEXT: mov r14, r24 -; GNU-NEXT: mov r15, r25 -; GNU-NEXT: ldd r22, Y+9 -; GNU-NEXT: ldd r23, Y+10 -; GNU-NEXT: ldd r24, Y+11 -; GNU-NEXT: ldd r25, Y+12 -; GNU-NEXT: rcall __truncsfhf2 -; GNU-NEXT: mov r12, r24 -; GNU-NEXT: mov r13, r25 -; GNU-NEXT: ldd r22, Y+1 -; GNU-NEXT: ldd r23, Y+2 -; GNU-NEXT: ldd r24, Y+3 -; GNU-NEXT: ldd r25, Y+4 -; GNU-NEXT: rcall __truncsfhf2 -; GNU-NEXT: mov r18, r16 -; GNU-NEXT: mov r19, r17 -; GNU-NEXT: mov r20, r14 -; GNU-NEXT: mov r21, r15 -; GNU-NEXT: mov r22, r12 -; GNU-NEXT: mov r23, r13 -; GNU-NEXT: adiw r28, 16 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: pop r29 -; GNU-NEXT: pop r28 -; GNU-NEXT: pop r17 -; GNU-NEXT: pop r16 -; GNU-NEXT: pop r15 -; GNU-NEXT: pop r14 -; GNU-NEXT: pop r13 -; GNU-NEXT: pop r12 -; GNU-NEXT: ret +; CHECK-LABEL: test_sincos_v2f16: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r6 +; CHECK-NEXT: push r7 +; CHECK-NEXT: push r8 +; CHECK-NEXT: push r9 +; CHECK-NEXT: push r10 +; CHECK-NEXT: push r11 +; CHECK-NEXT: push r12 +; CHECK-NEXT: push r13 +; CHECK-NEXT: push r14 +; CHECK-NEXT: push r15 +; CHECK-NEXT: push r16 +; CHECK-NEXT: push r17 +; CHECK-NEXT: mov r10, r22 +; CHECK-NEXT: mov r11, r23 +; CHECK-NEXT: rcall __extendhfsf2 +; CHECK-NEXT: mov r16, r22 +; CHECK-NEXT: mov r17, r23 +; CHECK-NEXT: mov r14, r24 +; CHECK-NEXT: mov r15, r25 +; CHECK-NEXT: rcall sin +; CHECK-NEXT: rcall __truncsfhf2 +; CHECK-NEXT: mov r12, r24 +; CHECK-NEXT: mov r13, r25 +; CHECK-NEXT: mov r24, r10 +; CHECK-NEXT: mov r25, r11 +; CHECK-NEXT: rcall __extendhfsf2 +; CHECK-NEXT: mov r10, r22 +; CHECK-NEXT: mov r11, r23 +; CHECK-NEXT: mov r8, r24 +; CHECK-NEXT: mov r9, r25 +; CHECK-NEXT: rcall cos +; CHECK-NEXT: rcall __truncsfhf2 +; CHECK-NEXT: mov r6, r24 +; CHECK-NEXT: mov r7, r25 +; CHECK-NEXT: mov r22, r10 +; CHECK-NEXT: mov r23, r11 +; CHECK-NEXT: mov r24, r8 +; CHECK-NEXT: mov r25, r9 +; CHECK-NEXT: rcall sin +; CHECK-NEXT: rcall __truncsfhf2 +; CHECK-NEXT: mov r10, r24 +; CHECK-NEXT: mov r11, r25 +; CHECK-NEXT: mov r22, r16 +; CHECK-NEXT: mov r23, r17 +; CHECK-NEXT: mov r24, r14 +; CHECK-NEXT: mov r25, r15 +; CHECK-NEXT: rcall cos +; CHECK-NEXT: rcall __truncsfhf2 +; CHECK-NEXT: mov r18, r10 +; CHECK-NEXT: mov r19, r11 +; CHECK-NEXT: mov r20, r12 +; CHECK-NEXT: mov r21, r13 +; CHECK-NEXT: mov r22, r6 +; CHECK-NEXT: mov r23, r7 +; CHECK-NEXT: pop r17 +; CHECK-NEXT: pop r16 +; CHECK-NEXT: pop r15 +; CHECK-NEXT: pop r14 +; CHECK-NEXT: pop r13 +; CHECK-NEXT: pop r12 +; CHECK-NEXT: pop r11 +; CHECK-NEXT: pop r10 +; CHECK-NEXT: pop r9 +; CHECK-NEXT: pop r8 +; CHECK-NEXT: pop r7 +; CHECK-NEXT: pop r6 +; CHECK-NEXT: ret %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) ret { <2 x half>, <2 x half> } %result } define { float, float } @test_sincos_f32(float %a) #0 { -; NONGNU-LABEL: test_sincos_f32: -; NONGNU: ; %bb.0: -; NONGNU-NEXT: push r10 -; NONGNU-NEXT: push r11 -; NONGNU-NEXT: push r12 -; NONGNU-NEXT: push r13 -; NONGNU-NEXT: push r14 -; NONGNU-NEXT: push r15 -; NONGNU-NEXT: push r16 -; NONGNU-NEXT: push r17 -; NONGNU-NEXT: mov r16, r24 -; NONGNU-NEXT: mov r17, r25 -; NONGNU-NEXT: mov r14, r22 -; NONGNU-NEXT: mov r15, r23 -; NONGNU-NEXT: rcall sin -; NONGNU-NEXT: mov r12, r22 -; NONGNU-NEXT: mov r13, r23 -; NONGNU-NEXT: mov r10, r24 -; NONGNU-NEXT: mov r11, r25 -; NONGNU-NEXT: mov r22, r14 -; NONGNU-NEXT: mov r23, r15 -; NONGNU-NEXT: mov r24, r16 -; NONGNU-NEXT: mov r25, r17 -; NONGNU-NEXT: rcall cos -; NONGNU-NEXT: mov r18, r12 -; NONGNU-NEXT: mov r19, r13 -; NONGNU-NEXT: mov r20, r10 -; NONGNU-NEXT: mov r21, r11 -; NONGNU-NEXT: pop r17 -; NONGNU-NEXT: pop r16 -; NONGNU-NEXT: pop r15 -; NONGNU-NEXT: pop r14 -; NONGNU-NEXT: pop r13 -; NONGNU-NEXT: pop r12 -; NONGNU-NEXT: pop r11 -; NONGNU-NEXT: pop r10 -; NONGNU-NEXT: ret -; -; GNU-LABEL: test_sincos_f32: -; GNU: ; %bb.0: -; GNU-NEXT: push r28 -; GNU-NEXT: push r29 -; GNU-NEXT: in r28, 61 -; GNU-NEXT: in r29, 62 -; GNU-NEXT: sbiw r28, 8 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: mov r20, r28 -; GNU-NEXT: mov r21, r29 -; GNU-NEXT: subi r20, 251 -; GNU-NEXT: sbci r21, 255 -; GNU-NEXT: mov r18, r28 -; GNU-NEXT: mov r19, r29 -; GNU-NEXT: subi r18, 255 -; GNU-NEXT: sbci r19, 255 -; GNU-NEXT: rcall sincosf -; GNU-NEXT: ldd r18, Y+5 -; GNU-NEXT: ldd r19, Y+6 -; GNU-NEXT: ldd r20, Y+7 -; GNU-NEXT: ldd r21, Y+8 -; GNU-NEXT: ldd r22, Y+1 -; GNU-NEXT: ldd r23, Y+2 -; GNU-NEXT: ldd r24, Y+3 -; GNU-NEXT: ldd r25, Y+4 -; GNU-NEXT: adiw r28, 8 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: pop r29 -; GNU-NEXT: pop r28 -; GNU-NEXT: ret +; CHECK-LABEL: test_sincos_f32: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r10 +; CHECK-NEXT: push r11 +; CHECK-NEXT: push r12 +; CHECK-NEXT: push r13 +; CHECK-NEXT: push r14 +; CHECK-NEXT: push r15 +; CHECK-NEXT: push r16 +; CHECK-NEXT: push r17 +; CHECK-NEXT: mov r16, r24 +; CHECK-NEXT: mov r17, r25 +; CHECK-NEXT: mov r14, r22 +; CHECK-NEXT: mov r15, r23 +; CHECK-NEXT: rcall sin +; CHECK-NEXT: mov r12, r22 +; CHECK-NEXT: mov r13, r23 +; CHECK-NEXT: mov r10, r24 +; CHECK-NEXT: mov r11, r25 +; CHECK-NEXT: mov r22, r14 +; CHECK-NEXT: mov r23, r15 +; CHECK-NEXT: mov r24, r16 +; CHECK-NEXT: mov r25, r17 +; CHECK-NEXT: rcall cos +; CHECK-NEXT: mov r18, r12 +; CHECK-NEXT: mov r19, r13 +; CHECK-NEXT: mov r20, r10 +; CHECK-NEXT: mov r21, r11 +; CHECK-NEXT: pop r17 +; CHECK-NEXT: pop r16 +; CHECK-NEXT: pop r15 +; CHECK-NEXT: pop r14 +; CHECK-NEXT: pop r13 +; CHECK-NEXT: pop r12 +; CHECK-NEXT: pop r11 +; CHECK-NEXT: pop r10 +; CHECK-NEXT: ret %result = call { float, float } @llvm.sincos.f32(float %a) ret { float, float } %result } define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) #0 { -; NONGNU-LABEL: test_sincos_v2f32: -; NONGNU: ; %bb.0: -; NONGNU-NEXT: push r8 -; NONGNU-NEXT: push r9 -; NONGNU-NEXT: push r10 -; NONGNU-NEXT: push r11 -; NONGNU-NEXT: push r12 -; NONGNU-NEXT: push r13 -; NONGNU-NEXT: push r14 -; NONGNU-NEXT: push r15 -; NONGNU-NEXT: mov r14, r22 -; NONGNU-NEXT: mov r15, r23 -; NONGNU-NEXT: mov r12, r20 -; NONGNU-NEXT: mov r13, r21 -; NONGNU-NEXT: mov r10, r18 -; NONGNU-NEXT: mov r11, r19 -; NONGNU-NEXT: mov r8, r24 -; NONGNU-NEXT: mov r9, r25 -; NONGNU-NEXT: mov r22, r12 -; NONGNU-NEXT: mov r23, r13 -; NONGNU-NEXT: mov r24, r14 -; NONGNU-NEXT: mov r25, r15 -; NONGNU-NEXT: rcall cos -; NONGNU-NEXT: mov r30, r8 -; NONGNU-NEXT: mov r31, r9 -; NONGNU-NEXT: std Z+15, r25 -; NONGNU-NEXT: std Z+14, r24 -; NONGNU-NEXT: std Z+13, r23 -; NONGNU-NEXT: std Z+12, r22 -; NONGNU-NEXT: mov r22, r16 -; NONGNU-NEXT: mov r23, r17 -; NONGNU-NEXT: mov r24, r10 -; NONGNU-NEXT: mov r25, r11 -; NONGNU-NEXT: rcall cos -; NONGNU-NEXT: mov r30, r8 -; NONGNU-NEXT: mov r31, r9 -; NONGNU-NEXT: std Z+11, r25 -; NONGNU-NEXT: std Z+10, r24 -; NONGNU-NEXT: std Z+9, r23 -; NONGNU-NEXT: std Z+8, r22 -; NONGNU-NEXT: mov r22, r12 -; NONGNU-NEXT: mov r23, r13 -; NONGNU-NEXT: mov r24, r14 -; NONGNU-NEXT: mov r25, r15 -; NONGNU-NEXT: rcall sin -; NONGNU-NEXT: mov r30, r8 -; NONGNU-NEXT: mov r31, r9 -; NONGNU-NEXT: std Z+7, r25 -; NONGNU-NEXT: std Z+6, r24 -; NONGNU-NEXT: std Z+5, r23 -; NONGNU-NEXT: std Z+4, r22 -; NONGNU-NEXT: mov r22, r16 -; NONGNU-NEXT: mov r23, r17 -; NONGNU-NEXT: mov r24, r10 -; NONGNU-NEXT: mov r25, r11 -; NONGNU-NEXT: rcall sin -; NONGNU-NEXT: mov r30, r8 -; NONGNU-NEXT: mov r31, r9 -; NONGNU-NEXT: std Z+3, r25 -; NONGNU-NEXT: std Z+2, r24 -; NONGNU-NEXT: std Z+1, r23 -; NONGNU-NEXT: st Z, r22 -; NONGNU-NEXT: pop r15 -; NONGNU-NEXT: pop r14 -; NONGNU-NEXT: pop r13 -; NONGNU-NEXT: pop r12 -; NONGNU-NEXT: pop r11 -; NONGNU-NEXT: pop r10 -; NONGNU-NEXT: pop r9 -; NONGNU-NEXT: pop r8 -; NONGNU-NEXT: ret -; -; GNU-LABEL: test_sincos_v2f32: -; GNU: ; %bb.0: -; GNU-NEXT: push r12 -; GNU-NEXT: push r13 -; GNU-NEXT: push r14 -; GNU-NEXT: push r15 -; GNU-NEXT: push r28 -; GNU-NEXT: push r29 -; GNU-NEXT: in r28, 61 -; GNU-NEXT: in r29, 62 -; GNU-NEXT: sbiw r28, 16 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: mov r30, r22 -; GNU-NEXT: mov r31, r23 -; GNU-NEXT: mov r14, r18 -; GNU-NEXT: mov r15, r19 -; GNU-NEXT: mov r12, r24 -; GNU-NEXT: mov r13, r25 -; GNU-NEXT: mov r26, r28 -; GNU-NEXT: mov r27, r29 -; GNU-NEXT: adiw r26, 13 -; GNU-NEXT: mov r18, r28 -; GNU-NEXT: mov r19, r29 -; GNU-NEXT: subi r18, 247 -; GNU-NEXT: sbci r19, 255 -; GNU-NEXT: mov r22, r20 -; GNU-NEXT: mov r23, r21 -; GNU-NEXT: mov r24, r30 -; GNU-NEXT: mov r25, r31 -; GNU-NEXT: mov r20, r26 -; GNU-NEXT: mov r21, r27 -; GNU-NEXT: rcall sincosf -; GNU-NEXT: mov r20, r28 -; GNU-NEXT: mov r21, r29 -; GNU-NEXT: subi r20, 251 -; GNU-NEXT: sbci r21, 255 -; GNU-NEXT: mov r18, r28 -; GNU-NEXT: mov r19, r29 -; GNU-NEXT: subi r18, 255 -; GNU-NEXT: sbci r19, 255 -; GNU-NEXT: mov r22, r16 -; GNU-NEXT: mov r23, r17 -; GNU-NEXT: mov r24, r14 -; GNU-NEXT: mov r25, r15 -; GNU-NEXT: rcall sincosf -; GNU-NEXT: ldd r24, Y+11 -; GNU-NEXT: ldd r25, Y+12 -; GNU-NEXT: mov r30, r12 -; GNU-NEXT: mov r31, r13 -; GNU-NEXT: std Z+15, r25 -; GNU-NEXT: std Z+14, r24 -; GNU-NEXT: ldd r24, Y+9 -; GNU-NEXT: ldd r25, Y+10 -; GNU-NEXT: std Z+13, r25 -; GNU-NEXT: std Z+12, r24 -; GNU-NEXT: ldd r24, Y+3 -; GNU-NEXT: ldd r25, Y+4 -; GNU-NEXT: std Z+11, r25 -; GNU-NEXT: std Z+10, r24 -; GNU-NEXT: ldd r24, Y+1 -; GNU-NEXT: ldd r25, Y+2 -; GNU-NEXT: std Z+9, r25 -; GNU-NEXT: std Z+8, r24 -; GNU-NEXT: ldd r24, Y+15 -; GNU-NEXT: ldd r25, Y+16 -; GNU-NEXT: std Z+7, r25 -; GNU-NEXT: std Z+6, r24 -; GNU-NEXT: ldd r24, Y+13 -; GNU-NEXT: ldd r25, Y+14 -; GNU-NEXT: std Z+5, r25 -; GNU-NEXT: std Z+4, r24 -; GNU-NEXT: ldd r24, Y+7 -; GNU-NEXT: ldd r25, Y+8 -; GNU-NEXT: std Z+3, r25 -; GNU-NEXT: std Z+2, r24 -; GNU-NEXT: ldd r24, Y+5 -; GNU-NEXT: ldd r25, Y+6 -; GNU-NEXT: std Z+1, r25 -; GNU-NEXT: st Z, r24 -; GNU-NEXT: adiw r28, 16 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: pop r29 -; GNU-NEXT: pop r28 -; GNU-NEXT: pop r15 -; GNU-NEXT: pop r14 -; GNU-NEXT: pop r13 -; GNU-NEXT: pop r12 -; GNU-NEXT: ret +; CHECK-LABEL: test_sincos_v2f32: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r8 +; CHECK-NEXT: push r9 +; CHECK-NEXT: push r10 +; CHECK-NEXT: push r11 +; CHECK-NEXT: push r12 +; CHECK-NEXT: push r13 +; CHECK-NEXT: push r14 +; CHECK-NEXT: push r15 +; CHECK-NEXT: mov r14, r22 +; CHECK-NEXT: mov r15, r23 +; CHECK-NEXT: mov r12, r20 +; CHECK-NEXT: mov r13, r21 +; CHECK-NEXT: mov r10, r18 +; CHECK-NEXT: mov r11, r19 +; CHECK-NEXT: mov r8, r24 +; CHECK-NEXT: mov r9, r25 +; CHECK-NEXT: mov r22, r12 +; CHECK-NEXT: mov r23, r13 +; CHECK-NEXT: mov r24, r14 +; CHECK-NEXT: mov r25, r15 +; CHECK-NEXT: rcall cos +; CHECK-NEXT: mov r30, r8 +; CHECK-NEXT: mov r31, r9 +; CHECK-NEXT: std Z+15, r25 +; CHECK-NEXT: std Z+14, r24 +; CHECK-NEXT: std Z+13, r23 +; CHECK-NEXT: std Z+12, r22 +; CHECK-NEXT: mov r22, r16 +; CHECK-NEXT: mov r23, r17 +; CHECK-NEXT: mov r24, r10 +; CHECK-NEXT: mov r25, r11 +; CHECK-NEXT: rcall cos +; CHECK-NEXT: mov r30, r8 +; CHECK-NEXT: mov r31, r9 +; CHECK-NEXT: std Z+11, r25 +; CHECK-NEXT: std Z+10, r24 +; CHECK-NEXT: std Z+9, r23 +; CHECK-NEXT: std Z+8, r22 +; CHECK-NEXT: mov r22, r12 +; CHECK-NEXT: mov r23, r13 +; CHECK-NEXT: mov r24, r14 +; CHECK-NEXT: mov r25, r15 +; CHECK-NEXT: rcall sin +; CHECK-NEXT: mov r30, r8 +; CHECK-NEXT: mov r31, r9 +; CHECK-NEXT: std Z+7, r25 +; CHECK-NEXT: std Z+6, r24 +; CHECK-NEXT: std Z+5, r23 +; CHECK-NEXT: std Z+4, r22 +; CHECK-NEXT: mov r22, r16 +; CHECK-NEXT: mov r23, r17 +; CHECK-NEXT: mov r24, r10 +; CHECK-NEXT: mov r25, r11 +; CHECK-NEXT: rcall sin +; CHECK-NEXT: mov r30, r8 +; CHECK-NEXT: mov r31, r9 +; CHECK-NEXT: std Z+3, r25 +; CHECK-NEXT: std Z+2, r24 +; CHECK-NEXT: std Z+1, r23 +; CHECK-NEXT: st Z, r22 +; CHECK-NEXT: pop r15 +; CHECK-NEXT: pop r14 +; CHECK-NEXT: pop r13 +; CHECK-NEXT: pop r12 +; CHECK-NEXT: pop r11 +; CHECK-NEXT: pop r10 +; CHECK-NEXT: pop r9 +; CHECK-NEXT: pop r8 +; CHECK-NEXT: ret %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) ret { <2 x float>, <2 x float> } %result } @@ -644,235 +280,127 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) #0 { ; } define { fp128, fp128 } @test_sincos_f128(fp128 %a) #0 { -; NONGNU-LABEL: test_sincos_f128: -; NONGNU: ; %bb.0: -; NONGNU-NEXT: push r2 -; NONGNU-NEXT: push r3 -; NONGNU-NEXT: push r4 -; NONGNU-NEXT: push r5 -; NONGNU-NEXT: push r6 -; NONGNU-NEXT: push r7 -; NONGNU-NEXT: push r28 -; NONGNU-NEXT: push r29 -; NONGNU-NEXT: in r28, 61 -; NONGNU-NEXT: in r29, 62 -; NONGNU-NEXT: sbiw r28, 34 -; NONGNU-NEXT: in r0, 63 -; NONGNU-NEXT: cli -; NONGNU-NEXT: out 62, r29 -; NONGNU-NEXT: out 63, r0 -; NONGNU-NEXT: out 61, r28 -; NONGNU-NEXT: std Y+2, r23 ; 2-byte Folded Spill -; NONGNU-NEXT: std Y+1, r22 ; 2-byte Folded Spill -; NONGNU-NEXT: mov r2, r20 -; NONGNU-NEXT: mov r3, r21 -; NONGNU-NEXT: mov r4, r18 -; NONGNU-NEXT: mov r5, r19 -; NONGNU-NEXT: mov r6, r24 -; NONGNU-NEXT: mov r7, r25 -; NONGNU-NEXT: mov r24, r28 -; NONGNU-NEXT: mov r25, r29 -; NONGNU-NEXT: adiw r24, 3 -; NONGNU-NEXT: rcall cosl -; NONGNU-NEXT: mov r24, r28 -; NONGNU-NEXT: mov r25, r29 -; NONGNU-NEXT: adiw r24, 19 -; NONGNU-NEXT: mov r18, r4 -; NONGNU-NEXT: mov r19, r5 -; NONGNU-NEXT: mov r20, r2 -; NONGNU-NEXT: mov r21, r3 -; NONGNU-NEXT: ldd r22, Y+1 ; 2-byte Folded Reload -; NONGNU-NEXT: ldd r23, Y+2 ; 2-byte Folded Reload -; NONGNU-NEXT: rcall sinl -; NONGNU-NEXT: ldd r24, Y+17 -; NONGNU-NEXT: ldd r25, Y+18 -; NONGNU-NEXT: mov r30, r6 -; NONGNU-NEXT: mov r31, r7 -; NONGNU-NEXT: std Z+31, r25 -; NONGNU-NEXT: std Z+30, r24 -; NONGNU-NEXT: ldd r24, Y+15 -; NONGNU-NEXT: ldd r25, Y+16 -; NONGNU-NEXT: std Z+29, r25 -; NONGNU-NEXT: std Z+28, r24 -; NONGNU-NEXT: ldd r24, Y+13 -; NONGNU-NEXT: ldd r25, Y+14 -; NONGNU-NEXT: std Z+27, r25 -; NONGNU-NEXT: std Z+26, r24 -; NONGNU-NEXT: ldd r24, Y+11 -; NONGNU-NEXT: ldd r25, Y+12 -; NONGNU-NEXT: std Z+25, r25 -; NONGNU-NEXT: std Z+24, r24 -; NONGNU-NEXT: ldd r24, Y+9 -; NONGNU-NEXT: ldd r25, Y+10 -; NONGNU-NEXT: std Z+23, r25 -; NONGNU-NEXT: std Z+22, r24 -; NONGNU-NEXT: ldd r24, Y+7 -; NONGNU-NEXT: ldd r25, Y+8 -; NONGNU-NEXT: std Z+21, r25 -; NONGNU-NEXT: std Z+20, r24 -; NONGNU-NEXT: ldd r24, Y+5 -; NONGNU-NEXT: ldd r25, Y+6 -; NONGNU-NEXT: std Z+19, r25 -; NONGNU-NEXT: std Z+18, r24 -; NONGNU-NEXT: ldd r24, Y+3 -; NONGNU-NEXT: ldd r25, Y+4 -; NONGNU-NEXT: std Z+17, r25 -; NONGNU-NEXT: std Z+16, r24 -; NONGNU-NEXT: ldd r24, Y+33 -; NONGNU-NEXT: ldd r25, Y+34 -; NONGNU-NEXT: std Z+15, r25 -; NONGNU-NEXT: std Z+14, r24 -; NONGNU-NEXT: ldd r24, Y+31 -; NONGNU-NEXT: ldd r25, Y+32 -; NONGNU-NEXT: std Z+13, r25 -; NONGNU-NEXT: std Z+12, r24 -; NONGNU-NEXT: ldd r24, Y+29 -; NONGNU-NEXT: ldd r25, Y+30 -; NONGNU-NEXT: std Z+11, r25 -; NONGNU-NEXT: std Z+10, r24 -; NONGNU-NEXT: ldd r24, Y+27 -; NONGNU-NEXT: ldd r25, Y+28 -; NONGNU-NEXT: std Z+9, r25 -; NONGNU-NEXT: std Z+8, r24 -; NONGNU-NEXT: ldd r24, Y+25 -; NONGNU-NEXT: ldd r25, Y+26 -; NONGNU-NEXT: std Z+7, r25 -; NONGNU-NEXT: std Z+6, r24 -; NONGNU-NEXT: ldd r24, Y+23 -; NONGNU-NEXT: ldd r25, Y+24 -; NONGNU-NEXT: std Z+5, r25 -; NONGNU-NEXT: std Z+4, r24 -; NONGNU-NEXT: ldd r24, Y+21 -; NONGNU-NEXT: ldd r25, Y+22 -; NONGNU-NEXT: std Z+3, r25 -; NONGNU-NEXT: std Z+2, r24 -; NONGNU-NEXT: ldd r24, Y+19 -; NONGNU-NEXT: ldd r25, Y+20 -; NONGNU-NEXT: std Z+1, r25 -; NONGNU-NEXT: st Z, r24 -; NONGNU-NEXT: adiw r28, 34 -; NONGNU-NEXT: in r0, 63 -; NONGNU-NEXT: cli -; NONGNU-NEXT: out 62, r29 -; NONGNU-NEXT: out 63, r0 -; NONGNU-NEXT: out 61, r28 -; NONGNU-NEXT: pop r29 -; NONGNU-NEXT: pop r28 -; NONGNU-NEXT: pop r7 -; NONGNU-NEXT: pop r6 -; NONGNU-NEXT: pop r5 -; NONGNU-NEXT: pop r4 -; NONGNU-NEXT: pop r3 -; NONGNU-NEXT: pop r2 -; NONGNU-NEXT: ret -; -; GNU-LABEL: test_sincos_f128: -; GNU: ; %bb.0: -; GNU-NEXT: push r6 -; GNU-NEXT: push r7 -; GNU-NEXT: push r28 -; GNU-NEXT: push r29 -; GNU-NEXT: in r28, 61 -; GNU-NEXT: in r29, 62 -; GNU-NEXT: sbiw r28, 52 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: mov r6, r24 -; GNU-NEXT: mov r7, r25 -; GNU-NEXT: mov r24, r28 -; GNU-NEXT: mov r25, r29 -; GNU-NEXT: adiw r24, 21 -; GNU-NEXT: std Y+4, r25 -; GNU-NEXT: std Y+3, r24 -; GNU-NEXT: mov r24, r28 -; GNU-NEXT: mov r25, r29 -; GNU-NEXT: adiw r24, 37 -; GNU-NEXT: std Y+2, r25 -; GNU-NEXT: std Y+1, r24 -; GNU-NEXT: mov r24, r28 -; GNU-NEXT: mov r25, r29 -; GNU-NEXT: adiw r24, 5 -; GNU-NEXT: rcall sincosl -; GNU-NEXT: ldd r24, Y+35 -; GNU-NEXT: ldd r25, Y+36 -; GNU-NEXT: mov r30, r6 -; GNU-NEXT: mov r31, r7 -; GNU-NEXT: std Z+31, r25 -; GNU-NEXT: std Z+30, r24 -; GNU-NEXT: ldd r24, Y+33 -; GNU-NEXT: ldd r25, Y+34 -; GNU-NEXT: std Z+29, r25 -; GNU-NEXT: std Z+28, r24 -; GNU-NEXT: ldd r24, Y+31 -; GNU-NEXT: ldd r25, Y+32 -; GNU-NEXT: std Z+27, r25 -; GNU-NEXT: std Z+26, r24 -; GNU-NEXT: ldd r24, Y+29 -; GNU-NEXT: ldd r25, Y+30 -; GNU-NEXT: std Z+25, r25 -; GNU-NEXT: std Z+24, r24 -; GNU-NEXT: ldd r24, Y+27 -; GNU-NEXT: ldd r25, Y+28 -; GNU-NEXT: std Z+23, r25 -; GNU-NEXT: std Z+22, r24 -; GNU-NEXT: ldd r24, Y+25 -; GNU-NEXT: ldd r25, Y+26 -; GNU-NEXT: std Z+21, r25 -; GNU-NEXT: std Z+20, r24 -; GNU-NEXT: ldd r24, Y+23 -; GNU-NEXT: ldd r25, Y+24 -; GNU-NEXT: std Z+19, r25 -; GNU-NEXT: std Z+18, r24 -; GNU-NEXT: ldd r24, Y+21 -; GNU-NEXT: ldd r25, Y+22 -; GNU-NEXT: std Z+17, r25 -; GNU-NEXT: std Z+16, r24 -; GNU-NEXT: ldd r24, Y+51 -; GNU-NEXT: ldd r25, Y+52 -; GNU-NEXT: std Z+15, r25 -; GNU-NEXT: std Z+14, r24 -; GNU-NEXT: ldd r24, Y+49 -; GNU-NEXT: ldd r25, Y+50 -; GNU-NEXT: std Z+13, r25 -; GNU-NEXT: std Z+12, r24 -; GNU-NEXT: ldd r24, Y+47 -; GNU-NEXT: ldd r25, Y+48 -; GNU-NEXT: std Z+11, r25 -; GNU-NEXT: std Z+10, r24 -; GNU-NEXT: ldd r24, Y+45 -; GNU-NEXT: ldd r25, Y+46 -; GNU-NEXT: std Z+9, r25 -; GNU-NEXT: std Z+8, r24 -; GNU-NEXT: ldd r24, Y+43 -; GNU-NEXT: ldd r25, Y+44 -; GNU-NEXT: std Z+7, r25 -; GNU-NEXT: std Z+6, r24 -; GNU-NEXT: ldd r24, Y+41 -; GNU-NEXT: ldd r25, Y+42 -; GNU-NEXT: std Z+5, r25 -; GNU-NEXT: std Z+4, r24 -; GNU-NEXT: ldd r24, Y+39 -; GNU-NEXT: ldd r25, Y+40 -; GNU-NEXT: std Z+3, r25 -; GNU-NEXT: std Z+2, r24 -; GNU-NEXT: ldd r24, Y+37 -; GNU-NEXT: ldd r25, Y+38 -; GNU-NEXT: std Z+1, r25 -; GNU-NEXT: st Z, r24 -; GNU-NEXT: adiw r28, 52 -; GNU-NEXT: in r0, 63 -; GNU-NEXT: cli -; GNU-NEXT: out 62, r29 -; GNU-NEXT: out 63, r0 -; GNU-NEXT: out 61, r28 -; GNU-NEXT: pop r29 -; GNU-NEXT: pop r28 -; GNU-NEXT: pop r7 -; GNU-NEXT: pop r6 -; GNU-NEXT: ret +; CHECK-LABEL: test_sincos_f128: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r2 +; CHECK-NEXT: push r3 +; CHECK-NEXT: push r4 +; CHECK-NEXT: push r5 +; CHECK-NEXT: push r6 +; CHECK-NEXT: push r7 +; CHECK-NEXT: push r28 +; CHECK-NEXT: push r29 +; CHECK-NEXT: in r28, 61 +; CHECK-NEXT: in r29, 62 +; CHECK-NEXT: sbiw r28, 34 +; CHECK-NEXT: in r0, 63 +; CHECK-NEXT: cli +; CHECK-NEXT: out 62, r29 +; CHECK-NEXT: out 63, r0 +; CHECK-NEXT: out 61, r28 +; CHECK-NEXT: std Y+2, r23 ; 2-byte Folded Spill +; CHECK-NEXT: std Y+1, r22 ; 2-byte Folded Spill +; CHECK-NEXT: mov r2, r20 +; CHECK-NEXT: mov r3, r21 +; CHECK-NEXT: mov r4, r18 +; CHECK-NEXT: mov r5, r19 +; CHECK-NEXT: mov r6, r24 +; CHECK-NEXT: mov r7, r25 +; CHECK-NEXT: mov r24, r28 +; CHECK-NEXT: mov r25, r29 +; CHECK-NEXT: adiw r24, 3 +; CHECK-NEXT: rcall cosl +; CHECK-NEXT: mov r24, r28 +; CHECK-NEXT: mov r25, r29 +; CHECK-NEXT: adiw r24, 19 +; CHECK-NEXT: mov r18, r4 +; CHECK-NEXT: mov r19, r5 +; CHECK-NEXT: mov r20, r2 +; CHECK-NEXT: mov r21, r3 +; CHECK-NEXT: ldd r22, Y+1 ; 2-byte Folded Reload +; CHECK-NEXT: ldd r23, Y+2 ; 2-byte Folded Reload +; CHECK-NEXT: rcall sinl +; CHECK-NEXT: ldd r24, Y+17 +; CHECK-NEXT: ldd r25, Y+18 +; CHECK-NEXT: mov r30, r6 +; CHECK-NEXT: mov r31, r7 +; CHECK-NEXT: std Z+31, r25 +; CHECK-NEXT: std Z+30, r24 +; CHECK-NEXT: ldd r24, Y+15 +; CHECK-NEXT: ldd r25, Y+16 +; CHECK-NEXT: std Z+29, r25 +; CHECK-NEXT: std Z+28, r24 +; CHECK-NEXT: ldd r24, Y+13 +; CHECK-NEXT: ldd r25, Y+14 +; CHECK-NEXT: std Z+27, r25 +; CHECK-NEXT: std Z+26, r24 +; CHECK-NEXT: ldd r24, Y+11 +; CHECK-NEXT: ldd r25, Y+12 +; CHECK-NEXT: std Z+25, r25 +; CHECK-NEXT: std Z+24, r24 +; CHECK-NEXT: ldd r24, Y+9 +; CHECK-NEXT: ldd r25, Y+10 +; CHECK-NEXT: std Z+23, r25 +; CHECK-NEXT: std Z+22, r24 +; CHECK-NEXT: ldd r24, Y+7 +; CHECK-NEXT: ldd r25, Y+8 +; CHECK-NEXT: std Z+21, r25 +; CHECK-NEXT: std Z+20, r24 +; CHECK-NEXT: ldd r24, Y+5 +; CHECK-NEXT: ldd r25, Y+6 +; CHECK-NEXT: std Z+19, r25 +; CHECK-NEXT: std Z+18, r24 +; CHECK-NEXT: ldd r24, Y+3 +; CHECK-NEXT: ldd r25, Y+4 +; CHECK-NEXT: std Z+17, r25 +; CHECK-NEXT: std Z+16, r24 +; CHECK-NEXT: ldd r24, Y+33 +; CHECK-NEXT: ldd r25, Y+34 +; CHECK-NEXT: std Z+15, r25 +; CHECK-NEXT: std Z+14, r24 +; CHECK-NEXT: ldd r24, Y+31 +; CHECK-NEXT: ldd r25, Y+32 +; CHECK-NEXT: std Z+13, r25 +; CHECK-NEXT: std Z+12, r24 +; CHECK-NEXT: ldd r24, Y+29 +; CHECK-NEXT: ldd r25, Y+30 +; CHECK-NEXT: std Z+11, r25 +; CHECK-NEXT: std Z+10, r24 +; CHECK-NEXT: ldd r24, Y+27 +; CHECK-NEXT: ldd r25, Y+28 +; CHECK-NEXT: std Z+9, r25 +; CHECK-NEXT: std Z+8, r24 +; CHECK-NEXT: ldd r24, Y+25 +; CHECK-NEXT: ldd r25, Y+26 +; CHECK-NEXT: std Z+7, r25 +; CHECK-NEXT: std Z+6, r24 +; CHECK-NEXT: ldd r24, Y+23 +; CHECK-NEXT: ldd r25, Y+24 +; CHECK-NEXT: std Z+5, r25 +; CHECK-NEXT: std Z+4, r24 +; CHECK-NEXT: ldd r24, Y+21 +; CHECK-NEXT: ldd r25, Y+22 +; CHECK-NEXT: std Z+3, r25 +; CHECK-NEXT: std Z+2, r24 +; CHECK-NEXT: ldd r24, Y+19 +; CHECK-NEXT: ldd r25, Y+20 +; CHECK-NEXT: std Z+1, r25 +; CHECK-NEXT: st Z, r24 +; CHECK-NEXT: adiw r28, 34 +; CHECK-NEXT: in r0, 63 +; CHECK-NEXT: cli +; CHECK-NEXT: out 62, r29 +; CHECK-NEXT: out 63, r0 +; CHECK-NEXT: out 61, r28 +; CHECK-NEXT: pop r29 +; CHECK-NEXT: pop r28 +; CHECK-NEXT: pop r7 +; CHECK-NEXT: pop r6 +; CHECK-NEXT: pop r5 +; CHECK-NEXT: pop r4 +; CHECK-NEXT: pop r3 +; CHECK-NEXT: pop r2 +; CHECK-NEXT: ret %result = call { fp128, fp128 } @llvm.sincos.f128(fp128 %a) ret { fp128, fp128 } %result } @@ -880,4 +408,5 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) #0 { attributes #0 = { nounwind } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} +; GNU: {{.*}} +; NONGNU: {{.*}} diff --git a/llvm/test/CodeGen/SPIRV/GlobalVarAddrspace.ll b/llvm/test/CodeGen/SPIRV/GlobalVarAddrspace.ll new file mode 100644 index 0000000..2bccfde --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/GlobalVarAddrspace.ll @@ -0,0 +1,23 @@ +; This test case checks that LLVM -> SPIR-V translation produces valid +; SPIR-V module, where a global variable, defined with non-default +; address space, have correct non-function storage class. +; +; No additional checks are needed in addition to simple translation +; to SPIR-V. In case of an error newly produced SPIR-V module validation +; would fail due to spirv-val that detects problematic SPIR-V code from +; translator and reports it as the following error: +; +; "Variables can not have a function[7] storage class outside of a function". +; +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#Ptr:]] = OpTypePointer CrossWorkgroup %[[#]] +; CHECK: %[[#]] = OpVariable %[[#Ptr]] CrossWorkgroup %[[#]] + +@G = addrspace(1) global i1 true + +define spir_func i1 @f(i1 %0) { + store i1 %0, ptr addrspace(1) @G, align 1 + ret i1 %0 +} diff --git a/llvm/test/CodeGen/SPIRV/SamplerArgNonKernel.ll b/llvm/test/CodeGen/SPIRV/SamplerArgNonKernel.ll new file mode 100644 index 0000000..5b3a5d8 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/SamplerArgNonKernel.ll @@ -0,0 +1,37 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +;CHECK: OpEntryPoint Kernel %[[#KernelId:]] +;CHECK: %[[#image2d_t:]] = OpTypeImage +;CHECK: %[[#sampler_t:]] = OpTypeSampler +;CHECK: %[[#sampled_image_t:]] = OpTypeSampledImage + +define spir_func float @test(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %Img, target("spirv.Sampler") %Smp) { +;CHECK-NOT: %[[#KernelId]] = OpFunction %[[#]] +;CHECK: OpFunction +;CHECK: %[[#image:]] = OpFunctionParameter %[[#image2d_t]] +;CHECK: %[[#sampler:]] = OpFunctionParameter %[[#sampler_t]] +entry: + %call = call spir_func <4 x i32> @_Z11read_imagef11ocl_image2d11ocl_samplerDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %Img, target("spirv.Sampler") %Smp, <2 x i32> zeroinitializer) +;CHECK: %[[#sampled_image:]] = OpSampledImage %[[#sampled_image_t]] %[[#image]] %[[#sampler]] +;CHECK: %[[#]] = OpImageSampleExplicitLod %[[#]] %[[#sampled_image]] %[[#]] Lod %[[#]] + + %0 = extractelement <4 x i32> %call, i32 0 + %conv = sitofp i32 %0 to float + ret float %conv +} + +declare spir_func <4 x i32> @_Z11read_imagef11ocl_image2d11ocl_samplerDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), i32, <2 x i32>) + +define spir_kernel void @test2(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %Img, target("spirv.Sampler") %Smp, ptr addrspace(1) %result) { +;CHECK: %[[#KernelId]] = OpFunction %[[#]] +entry: + %call = call spir_func float @test(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %Img, target("spirv.Sampler") %Smp) + %0 = load float, ptr addrspace(1) %result, align 4 + %add = fadd float %0, %call + store float %add, ptr addrspace(1) %result, align 4 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/SpecConstants/spec-constant-length-array.ll b/llvm/test/CodeGen/SPIRV/SpecConstants/spec-constant-length-array.ll new file mode 100644 index 0000000..fccddd7 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/SpecConstants/spec-constant-length-array.ll @@ -0,0 +1,56 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_variable_length_array %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_variable_length_array %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability VariableLengthArrayINTEL +; CHECK: OpExtension "SPV_INTEL_variable_length_array" + +; CHECK-DAG: OpDecorate %[[#]] SpecId 0 +; CHECK-DAG: OpDecorate %[[#]] SpecId 1 +; CHECK-DAG: OpDecorate %[[#]] SpecId 2 +; CHECK-DAG: OpDecorate %[[#A0:]] Alignment 4 +; CHECK-DAG: OpDecorate %[[#A1:]] Alignment 2 +; CHECK-DAG: OpDecorate %[[#A2:]] Alignment 16 + +; CHECK: %[[#VOID_TY:]] = OpTypeVoid +; CHECK: %[[#FUNC_TY:]] = OpTypeFunction %[[#VOID_TY]] +; CHECK-DAG: %[[#I64:]] = OpTypeInt 64 0 +; CHECK-DAG: %[[#I32:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#I8:]] = OpTypeInt 8 0 +; CHECK-DAG: %[[#F64:]] = OpTypeFloat 64 +; CHECK-DAG: %[[#STRUCT_TY:]] = OpTypeStruct %[[#F64]] %[[#F64]] +; CHECK-DAG: %[[#PTR_STRUCT:]] = OpTypePointer Function %[[#STRUCT_TY]] +; CHECK-DAG: %[[#PTR_I8:]] = OpTypePointer Function %[[#I8]] +; CHECK-DAG: %[[#F32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#PTR_F32:]] = OpTypePointer Function %[[#F32]] + +; CHECK-DAG: %[[#SC0:]] = OpSpecConstant %[[#I64]] 1 +; CHECK-DAG: %[[#SC1:]] = OpSpecConstant %[[#I32]] 2 +; CHECK-DAG: %[[#SC2:]] = OpSpecConstant %[[#I8]] 4 + +; CHECK: %[[#]] = OpFunction %[[#VOID_TY]] None %[[#FUNC_TY]] +; CHECK: %[[#LABEL:]] = OpLabel + +; CHECK: %[[#A0]] = OpVariableLengthArrayINTEL %[[#PTR_F32]] %[[#SC0]] +; CHECK: %[[#A1]] = OpVariableLengthArrayINTEL %[[#PTR_I8]] %[[#SC1]] +; CHECK: %[[#A2]] = OpVariableLengthArrayINTEL %[[#PTR_STRUCT]] %[[#SC2]] + +%struct_type = type { double, double } + +define spir_kernel void @test() { + entry: + %length0 = call i64 @_Z20__spirv_SpecConstantix(i32 0, i64 1), !SYCL_SPEC_CONST_SYM_ID !0 + %length1 = call i32 @_Z20__spirv_SpecConstantii(i32 1, i32 2), !SYCL_SPEC_CONST_SYM_ID !1 + %length2 = call i8 @_Z20__spirv_SpecConstantic(i32 2, i8 4), !SYCL_SPEC_CONST_SYM_ID !2 + %scla0 = alloca float, i64 %length0, align 4 + %scla1 = alloca i8, i32 %length1, align 2 + %scla2 = alloca %struct_type, i8 %length2, align 16 + ret void +} + +declare i8 @_Z20__spirv_SpecConstantic(i32, i8) +declare i32 @_Z20__spirv_SpecConstantii(i32, i32) +declare i64 @_Z20__spirv_SpecConstantix(i32, i64) + +!0 = !{!"i64_spec_const", i32 0} +!1 = !{!"i32_spec_const", i32 1} +!2 = !{!"i8_spec_const", i32 2} diff --git a/llvm/test/CodeGen/SPIRV/align-duplicate.ll b/llvm/test/CodeGen/SPIRV/align-duplicate.ll new file mode 100644 index 0000000..8a8d8ae --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/align-duplicate.ll @@ -0,0 +1,16 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Test that duplicate align information does not result in SPIR-V validation +; errors due to duplicate Alignment Decorations. + +;CHECK: OpDecorate %[[#Var:]] Alignment +;CHECK: %[[#Var]] = OpVariable %[[#]] + +define spir_func void @f() { + %res = alloca i16, align 2, !spirv.Decorations !1 + ret void +} + +!1 = !{!2} +!2 = !{i32 44, i32 2} diff --git a/llvm/test/CodeGen/SPIRV/duplicate-types.ll b/llvm/test/CodeGen/SPIRV/duplicate-types.ll new file mode 100644 index 0000000..df1ae04 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/duplicate-types.ll @@ -0,0 +1,16 @@ +; Check that we don't end up with duplicated array types in TypeMap. +; No FileCheck needed, we only want to check the absence of errors. +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#]] = OpTypeArray %[[#]] %[[#]] +; CHECK-NOT: OpTypeArray + +%duplicate = type { [2 x ptr addrspace(4)] } + +define spir_kernel void @foo() { +entry: + alloca [2 x ptr addrspace(4)], align 8 + alloca %duplicate, align 8 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/entry-point-interfaces.ll b/llvm/test/CodeGen/SPIRV/entry-point-interfaces.ll new file mode 100644 index 0000000..f1e0927 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/entry-point-interfaces.ll @@ -0,0 +1,31 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpEntryPoint Kernel %[[#Func:]] "test" %[[#Interface1:]] %[[#Interface2:]] %[[#Interface3:]] %[[#Interface4:]] +; CHECK-DAG: OpName %[[#Func]] "test" +; CHECK-DAG: OpName %[[#Interface1]] "var" +; CHECK-DAG: OpName %[[#Interface3]] "var2" +; CHECK-DAG: OpName %[[#Interface2]] "var.const" +; CHECK-DAG: OpName %[[#Interface4]] "var2.const" +; CHECK-DAG: %[[#TypeInt:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#Const1:]] = OpConstant %[[#TypeInt]] 1 +; CHECK-DAG: %[[#Const2:]] = OpConstant %[[#TypeInt]] 3 + +; CHECK: %[[#Interface1]] = OpVariable %[[#]] UniformConstant %[[#Const1]] +; CHECK: %[[#Interface3]] = OpVariable %[[#]] UniformConstant %[[#Const2]] +; CHECK: %[[#Interface2]] = OpVariable %[[#]] UniformConstant %[[#Const1]] +; CHECK: %[[#Interface4]] = OpVariable %[[#]] UniformConstant %[[#Const2]] + +@var = dso_local addrspace(2) constant i32 1, align 4 +@var2 = dso_local addrspace(2) constant i32 3, align 4 +@var.const = private unnamed_addr addrspace(2) constant i32 1, align 4 +@var2.const = private unnamed_addr addrspace(2) constant i32 3, align 4 + +define dso_local spir_kernel void @test() { +entry: + %0 = load i32, ptr addrspace(2) @var.const, align 4 + %1 = load i32, ptr addrspace(2) @var2.const, align 4 + %mul = mul nsw i32 %0, %1 + %mul1 = mul nsw i32 %mul, 2 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/get_global_size.ll b/llvm/test/CodeGen/SPIRV/get_global_size.ll new file mode 100644 index 0000000..959371a7 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/get_global_size.ll @@ -0,0 +1,50 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#int32:]] = OpTypeInt 32 0 +; CHECK: %[[#int64:]] = OpTypeInt 64 0 +; CHECK: %[[#vec3:]] = OpTypeVector %[[#int64]] 3 +; CHECK: %[[#ptr_input_vec3:]] = OpTypePointer Input %[[#vec3]] +; CHECK: %[[#global_size_var:]] = OpVariable %[[#ptr_input_vec3]] Input + +; CHECK: %[[#load_gs1:]] = OpLoad %[[#vec3]] %[[#global_size_var]] Aligned 1 +; CHECK: %[[#extract3:]] = OpCompositeExtract %[[#int64]] %[[#load_gs1]] 0 + +; CHECK: %[[#bitcast1:]] = OpBitcast %[[#]] %[[#]] +; CHECK: %[[#load_out1:]] = OpLoad %[[#]] %[[#bitcast1]] Aligned 8 +; CHECK: %[[#gep1:]] = OpInBoundsPtrAccessChain %[[#]] %[[#load_out1]] %[[#]] +; CHECK: OpStore %[[#gep1]] %[[#extract3]] Aligned 8 + +; CHECK: %[[#load_param_x:]] = OpLoad %[[#int32]] %[[#]] +; CHECK: %[[#load_gs2:]] = OpLoad %[[#vec3]] %[[#global_size_var]] Aligned 1 +; CHECK: %[[#dyn_extract:]] = OpVectorExtractDynamic %[[#int64]] %[[#load_gs2]] %[[#load_param_x]] +; CHECK: %[[#cmp:]] = OpULessThan %[[#]] %[[#load_param_x]] %[[#]] +; CHECK: %[[#select2:]] = OpSelect %[[#int64]] %[[#cmp]] %[[#dyn_extract]] %[[#]] +; CHECK: %[[#bitcast2:]] = OpBitcast %[[#]] %[[#]] +; CHECK: %[[#load_out2:]] = OpLoad %[[#]] %[[#bitcast2]] Aligned 8 +; CHECK: %[[#gep2:]] = OpInBoundsPtrAccessChain %[[#]] %[[#load_out2]] %[[#]] +; CHECK: OpStore %[[#gep2]] %[[#select2]] Aligned 8 + +define dso_local spir_kernel void @ggs(ptr noundef align 8 %out, i32 noundef %x) { +entry: + %out.addr = alloca ptr, align 8 + %x.addr = alloca i32, align 4 + store ptr %out, ptr %out.addr, align 8 + store i32 %x, ptr %x.addr, align 4 + %call = call i64 @_Z15get_global_sizej(i32 noundef 0) + %0 = load ptr, ptr %out.addr, align 8 + %arrayidx = getelementptr inbounds i64, ptr %0, i64 0 + store i64 %call, ptr %arrayidx, align 8 + %call1 = call i64 @_Z15get_global_sizej(i32 noundef 3) + %1 = load ptr, ptr %out.addr, align 8 + %arrayidx2 = getelementptr inbounds i64, ptr %1, i64 1 + store i64 %call1, ptr %arrayidx2, align 8 + %2 = load i32, ptr %x.addr, align 4 + %call3 = call i64 @_Z15get_global_sizej(i32 noundef %2) + %3 = load ptr, ptr %out.addr, align 8 + %arrayidx4 = getelementptr inbounds i64, ptr %3, i64 2 + store i64 %call3, ptr %arrayidx4, align 8 + ret void +} + +declare i64 @_Z15get_global_sizej(i32 noundef) diff --git a/llvm/test/CodeGen/SPIRV/layout.ll b/llvm/test/CodeGen/SPIRV/layout.ll new file mode 100644 index 0000000..94fa432 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/layout.ll @@ -0,0 +1,84 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Kernel +; CHECK: OpCapability Addresses +; CHECK: OpCapability GenericPointer +; CHECK: OpCapability Int64 +; CHECK: OpCapability Int8 +; CHECK: OpCapability Linkage + +; CHECK: OpExtInstImport "OpenCL.std" +; CHECK: OpMemoryModel Physical64 OpenCL +; CHECK: OpEntryPoint Kernel %[[#]] "foo" %[[#]] +; CHECK: OpSource OpenCL_C 200000 + +; CHECK-DAG: OpName %[[#]] +; CHECK-DAG: OpDecorate %[[#]] + + +; CHECK: %[[#I8:]] = OpTypeInt 8 0 +; CHECK: %[[#PTR_CW_I8:]] = OpTypePointer CrossWorkgroup %[[#I8]] +; CHECK: %[[#I32:]] = OpTypeInt 32 0 +; CHECK: %[[#VEC4:]] = OpTypeVector %[[#I32]] 4 +; CHECK: %[[#VOID:]] = OpTypeVoid +; CHECK: %[[#FUNC_TYPE0:]] = OpTypeFunction %[[#VOID]] %[[#PTR_CW_I8]] %[[#VEC4]] +; CHECK: %[[#FUNC_TYPE1:]] = OpTypeFunction %[[#VOID]] %[[#PTR_CW_I8]] +; CHECK: %[[#VEC3:]] = OpTypeVector %[[#I32]] 3 +; CHECK: %[[#FUNC_TYPE2:]] = OpTypeFunction %[[#VOID]] %[[#PTR_CW_I8]] %[[#VEC3]] +; CHECK: %[[#PTR_GEN_I8:]] = OpTypePointer Generic %[[#I8]] +; CHECK: %[[#STRUCT_B:]] = OpTypeStruct %[[#I32]] %[[#PTR_GEN_I8]] +; CHECK: %[[#STRUCT_C:]] = OpTypeStruct %[[#I32]] %[[#STRUCT_B]] +; CHECK: %[[#STRUCT_A:]] = OpTypeStruct %[[#I32]] %[[#STRUCT_C]] +; CHECK: %[[#F32:]] = OpTypeFloat 32 +; CHECK: %[[#CONST_2:]] = OpConstant %[[#I32]] 2 +; CHECK: %[[#ARRAY_F:]] = OpTypeArray %[[#F32]] %[[#CONST_2]] +; CHECK: %[[#ARRAY_I:]] = OpTypeArray %[[#I32]] %[[#CONST_2]] +; CHECK: %[[#PTR_CW_STRUCT_A:]] = OpTypePointer CrossWorkgroup %[[#STRUCT_A]] +; CHECK: %[[#PTR_UC_VEC4:]] = OpTypePointer UniformConstant %[[#VEC4]] +; CHECK: %[[#PTR_UC_ARRAY_F:]] = OpTypePointer UniformConstant %[[#ARRAY_F]] +; CHECK: %[[#PTR_CW_PTR_CW_I8:]] = OpTypePointer CrossWorkgroup %[[#PTR_CW_I8]] +; CHECK: %[[#I64:]] = OpTypeInt 64 0 +; CHECK: %[[#PTR_CW_ARRAY_I:]] = OpTypePointer CrossWorkgroup %[[#ARRAY_I]] + +; CHECK: %[[#NULL_I32:]] = OpConstantNull %[[#I32]] +; CHECK: %[[#CONST_I64_4:]] = OpConstant %[[#I64]] 4 +; CHECK: %[[#CONST_I32_1:]] = OpConstant %[[#I32]] 1 +; CHECK: %[[#COMP_I32:]] = OpConstantComposite %[[#ARRAY_I]] %[[#CONST_I32_1]] %[[#CONST_2]] + +; CHECK: %[[#VAR_V:]] = OpVariable %[[#PTR_CW_ARRAY_I]] CrossWorkgroup %[[#COMP_I32]] +; CHECK: %[[#SPECCONSTOP:]] = OpSpecConstantOp %[[#PTR_CW_I8]] InBoundsPtrAccessChain %[[#VAR_V]] %[[#NULL_I32]] %[[#CONST_I64_4]] +; CHECK: %[[#VAR_S:]] = OpVariable %[[#PTR_CW_PTR_CW_I8]] CrossWorkgroup %[[#SPECCONSTOP]] +; CHECK: %[[#NULL_ARRAY_F:]] = OpConstantNull %[[#ARRAY_F]] +; CHECK: %[[#VAR_F:]] = OpVariable %[[#PTR_UC_ARRAY_F]] UniformConstant %[[#NULL_ARRAY_F]] +; CHECK: %[[#NULL_STRUCT_A:]] = OpConstantNull %[[#STRUCT_A]] +; CHECK: %[[#VAR_A:]] = OpVariable %[[#PTR_CW_STRUCT_A]] CrossWorkgroup %[[#NULL_STRUCT_A]] + +; CHECK: %[[#FN_BAR1:]] = OpFunction %[[#VOID]] None %[[#FUNC_TYPE1]] +; CHECK: %[[#P_BAR1:]] = OpFunctionParameter %[[#PTR_CW_I8]] +; CHECK: OpFunctionEnd + +@v = addrspace(1) global [2 x i32] [i32 1, i32 2], align 4 +@s = addrspace(1) global ptr addrspace(1) getelementptr inbounds ([2 x i32], ptr addrspace(1) @v, i32 0, i32 1), align 4 + +%struct.A = type { i32, %struct.C } +%struct.C = type { i32, %struct.B } +%struct.B = type { i32, ptr addrspace(4) } + +@f = addrspace(2) constant [2 x float] zeroinitializer, align 4 +@b = external addrspace(2) constant <4 x i32> +@a = common addrspace(1) global %struct.A zeroinitializer, align 4 + +define spir_kernel void @foo(ptr addrspace(1) %a, <4 x i32> %vec_in) { +entry: + call spir_func void @bar1(ptr addrspace(1) %a) + %extractVec = shufflevector <4 x i32> %vec_in, <4 x i32> %vec_in, <3 x i32> <i32 0, i32 1, i32 2> + call spir_func void @bar2(ptr addrspace(1) %a, <3 x i32> %extractVec) + ret void +} + +declare spir_func void @bar1(ptr addrspace(1)) +declare spir_func void @bar2(ptr addrspace(1), <3 x i32>) + +!opencl.ocl.version = !{!7} +!7 = !{i32 2, i32 0} diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll new file mode 100644 index 0000000..77b8c51 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll @@ -0,0 +1,92 @@ +;; Check that llvm.bitreverse.* intrinsics are lowered for +;; 2/4-bit scalar and vector types. + +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers %s -o - | FileCheck %s +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability ArbitraryPrecisionIntegersINTEL +; CHECK: OpExtension "SPV_INTEL_arbitrary_precision_integers" + +; CHECK: %[[#I4:]] = OpTypeInt 4 0 +; CHECK: %[[#I2:]] = OpTypeInt 2 0 +; CHECK: %[[#Z4:]] = OpConstantNull %[[#I4]] +; CHECK: %[[#Z2:]] = OpConstantNull %[[#I2]] +; CHECK: %[[#V2I2:]] = OpTypeVector %[[#I2]] 2 +; CHECK: %[[#V2I4:]] = OpTypeVector %[[#I4]] 2 +; CHECK: %[[#V3I2:]] = OpTypeVector %[[#I2]] 3 +; CHECK: %[[#V3I4:]] = OpTypeVector %[[#I4]] 3 +; CHECK: %[[#V4I2:]] = OpTypeVector %[[#I2]] 4 +; CHECK: %[[#V4I4:]] = OpTypeVector %[[#I4]] 4 +; CHECK: %[[#V8I2:]] = OpTypeVector %[[#I2]] 8 +; CHECK: %[[#V8I4:]] = OpTypeVector %[[#I4]] 8 +; CHECK: %[[#V16I2:]] = OpTypeVector %[[#I2]] 16 +; CHECK: %[[#V16I4:]] = OpTypeVector %[[#I4]] 16 + + +; CHECK: %[[#]] = OpBitReverse %[[#I2]] %[[#Z2]] +; CHECK: %[[#]] = OpBitReverse %[[#I4]] %[[#Z4]] +; CHECK: %[[#]] = OpBitReverse %[[#V2I2]] %[[#]] +; CHECK: %[[#]] = OpBitReverse %[[#V2I4]] %[[#]] +; CHECK: %[[#]] = OpBitReverse %[[#V3I2]] %[[#]] +; CHECK: %[[#]] = OpBitReverse %[[#V3I4]] %[[#]] +; CHECK: %[[#]] = OpBitReverse %[[#V4I2]] %[[#]] +; CHECK: %[[#]] = OpBitReverse %[[#V4I4]] %[[#]] +; CHECK: %[[#]] = OpBitReverse %[[#V8I2]] %[[#]] +; CHECK: %[[#]] = OpBitReverse %[[#V8I4]] %[[#]] +; CHECK: %[[#]] = OpBitReverse %[[#V16I2]] %[[#]] +; CHECK: %[[#]] = OpBitReverse %[[#V16I4]] %[[#]] + +define spir_kernel void @testBitRev() { +entry: + %call2 = call i2 @llvm.bitreverse.i2(i2 0) + %call4 = call i4 @llvm.bitreverse.i4(i4 0) + ret void +} + +define spir_kernel void @testBitRevV2(<2 x i2> %a, <2 x i4> %b) { +entry: + %call2 = call <2 x i2> @llvm.bitreverse.v2i2(<2 x i2> %a) + %call4 = call <2 x i4> @llvm.bitreverse.v2i4(<2 x i4> %b) + ret void +} + +define spir_kernel void @testBitRevV3(<3 x i2> %a, <3 x i4> %b) { +entry: + %call2 = call <3 x i2> @llvm.bitreverse.v3i2(<3 x i2> %a) + %call4 = call <3 x i4> @llvm.bitreverse.v3i4(<3 x i4> %b) + ret void +} + +define spir_kernel void @testBitRevV4(<4 x i2> %a, <4 x i4> %b) { +entry: + %call2 = call <4 x i2> @llvm.bitreverse.v4i2(<4 x i2> %a) + %call4 = call <4 x i4> @llvm.bitreverse.v4i4(<4 x i4> %b) + ret void +} + +define spir_kernel void @testBitRevV8(<8 x i2> %a, <8 x i4> %b) { +entry: + %call2 = call <8 x i2> @llvm.bitreverse.v8i2(<8 x i2> %a) + %call4 = call <8 x i4> @llvm.bitreverse.v8i4(<8 x i4> %b) + ret void +} + +define spir_kernel void @testBitRevV16(<16 x i2> %a, <16 x i4> %b) { +entry: + %call2 = call <16 x i2> @llvm.bitreverse.v16i2(<16 x i2> %a) + %call4 = call <16 x i4> @llvm.bitreverse.v16i4(<16 x i4> %b) + ret void +} + +declare i2 @llvm.bitreverse.i2(i2) +declare i4 @llvm.bitreverse.i4(i4) +declare <2 x i2> @llvm.bitreverse.v2i2(<2 x i2>) +declare <2 x i4> @llvm.bitreverse.v2i4(<2 x i4>) +declare <3 x i2> @llvm.bitreverse.v3i2(<3 x i2>) +declare <3 x i4> @llvm.bitreverse.v3i4(<3 x i4>) +declare <4 x i2> @llvm.bitreverse.v4i2(<4 x i2>) +declare <4 x i4> @llvm.bitreverse.v4i4(<4 x i4>) +declare <8 x i2> @llvm.bitreverse.v8i2(<8 x i2>) +declare <8 x i4> @llvm.bitreverse.v8i4(<8 x i4>) +declare <16 x i2> @llvm.bitreverse.v16i2(<16 x i2>) +declare <16 x i4> @llvm.bitreverse.v16i4(<16 x i4>) diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/fp-to-int-intrinsics.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/fp-to-int-intrinsics.ll new file mode 100644 index 0000000..66c744f --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/fp-to-int-intrinsics.ll @@ -0,0 +1,196 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv64-unkown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unkown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpDecorate %[[#SAT1:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT2:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT3:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT4:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT5:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT6:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT7:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT8:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT9:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT10:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT11:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT12:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT13:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT14:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT15:]] SaturatedConversion +; CHECK: OpDecorate %[[#SAT16:]] SaturatedConversion + + +; CHECK: %[[#SAT1]] = OpConvertFToS %[[#]] %[[#]] +define spir_kernel void @testfunction_float_to_signed_i8(float %input) { +entry: + %ptr = alloca i8 + %signed_int = call i8 @llvm.fptosi.sat.i8.f32(float %input) + store i8 %signed_int, i8* %ptr + ret void + +} +declare i8 @llvm.fptosi.sat.i8.f32(float) + + +; CHECK: %[[#SAT2]] = OpConvertFToS %[[#]] %[[#]] +define spir_kernel void @testfunction_float_to_signed_i16(float %input) { +entry: + %ptr = alloca i16 + %signed_int = call i16 @llvm.fptosi.sat.i16.f32(float %input) + store i16 %signed_int, i16* %ptr + ret void + +} +declare i16 @llvm.fptosi.sat.i16.f32(float) + +; CHECK: %[[#SAT3]] = OpConvertFToS %[[#]] %[[#]] +define spir_kernel void @testfunction_float_to_signed_i32(float %input) { +entry: + %ptr = alloca i32 + %signed_int = call i32 @llvm.fptosi.sat.i32.f32(float %input) + store i32 %signed_int, i32* %ptr + ret void + +} +declare i32 @llvm.fptosi.sat.i32.f32(float) + + +; CHECK: %[[#SAT4]] = OpConvertFToS %[[#]] %[[#]] +define spir_kernel void @testfunction_float_to_signed_i64(float %input) { +entry: + %ptr = alloca i64 + %signed_int = call i64 @llvm.fptosi.sat.i64.f32(float %input) + store i64 %signed_int, i64* %ptr + ret void +} +declare i64 @llvm.fptosi.sat.i64.f32(float) + + +; CHECK: %[[#SAT5]] = OpConvertFToS %[[#]] %[[#]] +define spir_kernel void @testfunction_double_to_signed_i8(double %input) { +entry: + %ptr = alloca i8 + %signed_int = call i8 @llvm.fptosi.sat.i8.f64(double %input) + store i8 %signed_int, i8* %ptr + ret void +} +declare i8 @llvm.fptosi.sat.i8.f64(double) + + +; CHECK: %[[#SAT6]] = OpConvertFToS %[[#]] %[[#]] +define spir_kernel void @testfunction_double_to_signed_i16(double %input) { +entry: + %ptr = alloca i16 + %signed_int = call i16 @llvm.fptosi.sat.i16.f64(double %input) + store i16 %signed_int, i16* %ptr + ret void +} +declare i16 @llvm.fptosi.sat.i16.f64(double) + + +; CHECK: %[[#SAT7]] = OpConvertFToS %[[#]] %[[#]] +define spir_kernel void @testfunction_double_to_signed_i32(double %input) { +entry: + %ptr = alloca i32 + %signed_int = call i32 @llvm.fptosi.sat.i32.f64(double %input) + store i32 %signed_int, i32* %ptr + ret void +} +declare i32 @llvm.fptosi.sat.i32.f64(double) + + +; CHECK: %[[#SAT8]] = OpConvertFToS %[[#]] %[[#]] +define spir_kernel void @testfunction_double_to_signed_i64(double %input) { +entry: + %ptr = alloca i64 + %signed_int = call i64 @llvm.fptosi.sat.i64.f64(double %input) + store i64 %signed_int, i64* %ptr + ret void +} +declare i64 @llvm.fptosi.sat.i64.f64(double) + +; CHECK: %[[#SAT9]] = OpConvertFToU %[[#]] %[[#]] +define spir_kernel void @testfunction_float_to_unsigned_i8(float %input) { +entry: + %ptr = alloca i8 + %unsigned_int = call i8 @llvm.fptoui.sat.i8.f32(float %input) + store i8 %unsigned_int, i8* %ptr + ret void +} +declare i8 @llvm.fptoui.sat.i8.f32(float) + + +; CHECK: %[[#SAT10]] = OpConvertFToU %[[#]] %[[#]] +define spir_kernel void @testfunction_float_to_unsigned_i16(float %input) { +entry: + %ptr = alloca i16 + %unsigned_int = call i16 @llvm.fptoui.sat.i16.f32(float %input) + store i16 %unsigned_int, i16* %ptr + ret void +} +declare i16 @llvm.fptoui.sat.i16.f32(float) + + +; CHECK: %[[#SAT11]] = OpConvertFToU %[[#]] %[[#]] +define spir_kernel void @testfunction_float_to_unsigned_i32(float %input) { +entry: + %ptr = alloca i32 + %unsigned_int = call i32 @llvm.fptoui.sat.i32.f32(float %input) + store i32 %unsigned_int, i32* %ptr + ret void +} +declare i32 @llvm.fptoui.sat.i32.f32(float) + + +; CHECK: %[[#SAT12]] = OpConvertFToU %[[#]] %[[#]] +define spir_kernel void @testfunction_float_to_unsigned_i64(float %input) { +entry: + %ptr = alloca i64 + %unsigned_int = call i64 @llvm.fptoui.sat.i64.f32(float %input) + store i64 %unsigned_int, i64* %ptr + ret void +} +declare i64 @llvm.fptoui.sat.i64.f32(float) + + +; CHECK: %[[#SAT13]] = OpConvertFToU %[[#]] %[[#]] +define spir_kernel void @testfunction_double_to_unsigned_i8(double %input) { +entry: + %ptr = alloca i8 + %unsigned_int = call i8 @llvm.fptoui.sat.i8.f64(double %input) + store i8 %unsigned_int, i8* %ptr + ret void +} +declare i8 @llvm.fptoui.sat.i8.f64(double) + + +; CHECK: %[[#SAT14]] = OpConvertFToU %[[#]] %[[#]] +define spir_kernel void @testfunction_double_to_unsigned_i16(double %input) { +entry: + %ptr = alloca i16 + %unsigned_int = call i16 @llvm.fptoui.sat.i16.f64(double %input) + store i16 %unsigned_int, i16* %ptr + ret void +} +declare i16 @llvm.fptoui.sat.i16.f64(double) + + +; CHECK: %[[#SAT15]] = OpConvertFToU %[[#]] %[[#]] +define spir_kernel void @testfunction_double_to_unsigned_i32(double %input) { +entry: + %ptr = alloca i32 + %unsigned_int = call i32 @llvm.fptoui.sat.i32.f64(double %input) + store i32 %unsigned_int, i32* %ptr + ret void +} +declare i32 @llvm.fptoui.sat.i32.f64(double) + + +; CHECK: %[[#SAT16]] = OpConvertFToU %[[#]] %[[#]] +define spir_kernel void @testfunction_double_to_unsigned_i64(double %input) { +entry: + %ptr = alloca i64 + %unsigned_int = call i64 @llvm.fptoui.sat.i64.f64(double %input) + store i64 %unsigned_int, i64* %ptr + ret void +} +declare i64 @llvm.fptoui.sat.i64.f64(double) diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/memcpy.align.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/memcpy.align.ll new file mode 100644 index 0000000..66a12b1 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/memcpy.align.ll @@ -0,0 +1,54 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +%struct.B = type { [2 x i32] } +%struct.A = type { i64, %struct.B } + +@__const.foo.b = private unnamed_addr addrspace(2) constant %struct.B { [2 x i32] [i32 1, i32 2] }, align 4 +@__const.bar.a = private unnamed_addr addrspace(2) constant %struct.A { i64 0, %struct.B { [2 x i32] [i32 1, i32 2] } }, align 8 + +define spir_func void @foo(%struct.A* noalias sret(%struct.A) %agg.result) { +entry: + %b = alloca %struct.B, align 4 + %0 = bitcast %struct.B* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %0) + %1 = bitcast %struct.B* %b to i8* + call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %1, i8 addrspace(2)* align 4 bitcast (%struct.B addrspace(2)* @__const.foo.b to i8 addrspace(2)*), i32 8, i1 false) +; CHECK: OpCopyMemorySized %[[#]] %[[#]] %[[#]] Aligned 4 + %b1 = getelementptr inbounds %struct.A, %struct.A* %agg.result, i32 0, i32 1 + %2 = bitcast %struct.B* %b1 to i8* + %3 = bitcast %struct.B* %b to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %2, i8* align 4 %3, i32 8, i1 false) +; CHECK: %[[#PTR1:]] = OpInBoundsPtrAccessChain %[[#]] %[[#]] %[[#]] %[[#]] +; CHECK: OpCopyMemorySized %[[#PTR1]] %[[#]] %[[#]] Aligned 8 + %4 = bitcast %struct.B* %b to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %4) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64, i8* captures(none)) + +declare void @llvm.memcpy.p0i8.p2i8.i32(i8* captures(none) writeonly, i8 addrspace(2)* captures(none) readonly, i32, i1) + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* captures(none) writeonly, i8* captures(none) readonly, i32, i1) + +declare void @llvm.lifetime.end.p0i8(i64, i8* captures(none)) + +define spir_func void @bar(%struct.B* noalias sret(%struct.B) %agg.result) { +entry: + %a = alloca %struct.A, align 8 + %0 = bitcast %struct.A* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 16, i8* %0) + %1 = bitcast %struct.A* %a to i8* + call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 8 %1, i8 addrspace(2)* align 8 bitcast (%struct.A addrspace(2)* @__const.bar.a to i8 addrspace(2)*), i32 16, i1 false) +; CHECK: OpCopyMemorySized %[[#]] %[[#]] %[[#]] Aligned 8 + %b = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 1 + %2 = bitcast %struct.B* %agg.result to i8* + %3 = bitcast %struct.B* %b to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %2, i8* align 8 %3, i32 8, i1 false) +; CHECK: %[[#PTR2:]] = OpInBoundsPtrAccessChain %[[#]] %[[#]] %[[#]] %[[#]] +; CHECK: OpCopyMemorySized %[[#]] %[[#PTR2]] %[[#]] Aligned 4 + %4 = bitcast %struct.A* %a to i8* + call void @llvm.lifetime.end.p0i8(i64 16, i8* %4) + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/tan.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/tan.ll new file mode 100644 index 0000000..dfb185da --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/tan.ll @@ -0,0 +1,21 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[#ext:]] = OpExtInstImport "OpenCL.std" +; CHECK-DAG: %[[#type_f32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#type_f64:]] = OpTypeFloat 64 +; CHECK: %[[#extinst_f32:]] = OpExtInst %[[#type_f32]] %[[#ext]] tan %[[#]] +; CHECK: %[[#extinst_f64:]] = OpExtInst %[[#type_f64]] %[[#ext]] tan %[[#]] + +define float @test_tan_f32(float %x) { + %res = call float @llvm.tan.f32(float %x) + ret float %res +} + +define double @test_tan_f64(double %x) { + %res = call double @llvm.tan.f64(double %x) + ret double %res +} + +declare float @llvm.tan.f32(float) +declare double @llvm.tan.f64(double) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/convert_functions.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/convert_functions.ll new file mode 100644 index 0000000..13a61b0 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/convert_functions.ll @@ -0,0 +1,56 @@ +; This test checks that functions with `convert_` prefix are translated as +; OpenCL builtins only in case they match the specification. Otherwise, we +; expect such functions to be translated to SPIR-V FunctionCall. + +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV: OpName %[[#Func:]] "_Z18convert_float_func" +; CHECK-SPIRV: OpName %[[#Func1:]] "_Z20convert_uint_satfunc" +; CHECK-SPIRV: OpName %[[#Func2:]] "_Z21convert_float_rtzfunc" +; CHECK-SPIRV-DAG: %[[#VoidTy:]] = OpTypeVoid +; CHECK-SPIRV-DAG: %[[#CharTy:]] = OpTypeInt 8 +; CHECK-SPIRV-DAG: %[[#FloatTy:]] = OpTypeFloat 32 + +; CHECK-SPIRV: %[[#Func]] = OpFunction %[[#VoidTy]] None %[[#]] +; CHECK-SPIRV: %[[#ConvertId1:]] = OpUConvert %[[#CharTy]] %[[#]] +; CHECK-SPIRV: %[[#ConvertId2:]] = OpConvertSToF %[[#FloatTy]] %[[#]] +; CHECK-SPIRV: %[[#]] = OpFunctionCall %[[#VoidTy]] %[[#Func]] %[[#ConvertId2]] +; CHECK-SPIRV: %[[#]] = OpFunctionCall %[[#VoidTy]] %[[#Func1]] %[[#]] +; CHECK-SPIRV: %[[#]] = OpFunctionCall %[[#VoidTy]] %[[#Func2]] %[[#ConvertId2]] +; CHECK-SPIRV-NOT: OpFConvert +; CHECK-SPIRV-NOT: OpConvertUToF + +define dso_local spir_func void @_Z18convert_float_func(float noundef %x) { +entry: + %x.addr = alloca float, align 4 + store float %x, ptr %x.addr, align 4 + ret void +} + +define dso_local spir_func void @_Z20convert_uint_satfunc(i32 noundef %x) { +entry: + ret void +} + +define dso_local spir_func void @_Z21convert_float_rtzfunc(float noundef %x) { +entry: + ret void +} + +define dso_local spir_func void @convert_int_bf16(i32 noundef %x) { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, ptr %x.addr, align 4 + %0 = load i32, ptr %x.addr, align 4 + call spir_func signext i8 @_Z16convert_char_rtei(i32 noundef %0) + %call = call spir_func float @_Z13convert_floati(i32 noundef %0) + call spir_func void @_Z18convert_float_func(float noundef %call) + call spir_func void @_Z20convert_uint_satfunc(i32 noundef %0) + call spir_func void @_Z21convert_float_rtzfunc(float noundef %call) + ret void +} + +declare spir_func signext i8 @_Z16convert_char_rtei(i32 noundef) + +declare spir_func float @_Z13convert_floati(i32 noundef) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/nan.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/nan.ll new file mode 100644 index 0000000..1072f07 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/nan.ll @@ -0,0 +1,15 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Check OpenCL built-in nan translation. + +; CHECK-SPIRV: %[[#]] = OpExtInst %[[#]] %[[#]] nan %[[#]] + +define dso_local spir_kernel void @test(ptr addrspace(1) align 4 %a, i32 %b) { +entry: + %call = tail call spir_func float @_Z3nanj(i32 %b) + store float %call, ptr addrspace(1) %a, align 4 + ret void +} + +declare spir_func float @_Z3nanj(i32) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/shuffle.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/shuffle.ll new file mode 100644 index 0000000..aeca431 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpenCL/shuffle.ll @@ -0,0 +1,23 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Check OpenCL built-in shuffle and shuffle2 translation. + +; CHECK-SPIRV: %[[#]] = OpExtInst %[[#]] %[[#]] shuffle %[[#]] %[[#]] +; CHECK-SPIRV: %[[#]] = OpExtInst %[[#]] %[[#]] shuffle2 %[[#]] %[[#]] %[[#]] + +define spir_kernel void @test() { +entry: + %call = call spir_func <2 x float> @_Z7shuffleDv2_fDv2_j(<2 x float> zeroinitializer, <2 x i32> zeroinitializer) + ret void +} + +declare spir_func <2 x float> @_Z7shuffleDv2_fDv2_j(<2 x float>, <2 x i32>) + +define spir_kernel void @test2() { +entry: + %call = call spir_func <4 x float> @_Z8shuffle2Dv2_fS_Dv4_j(<2 x float> zeroinitializer, <2 x float> zeroinitializer, <4 x i32> zeroinitializer) + ret void +} + +declare spir_func <4 x float> @_Z8shuffle2Dv2_fS_Dv4_j(<2 x float>, <2 x float>, <4 x i32>) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/printf.ll b/llvm/test/CodeGen/SPIRV/transcoding/printf.ll new file mode 100644 index 0000000..338f0a5 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/printf.ll @@ -0,0 +1,14 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +@.str = private unnamed_addr addrspace(2) constant [12 x i8] c"Hello World\00", align 1 + +; CHECK-SPIRV: %[[#]] = OpExtInst %[[#]] %[[#]] printf %[[#]] + +define dso_local spir_kernel void @BuiltinPrintf() { +entry: + %call = tail call i32 (ptr addrspace(2), ...) @printf(ptr addrspace(2) noundef @.str) + ret void +} + +declare noundef i32 @printf(ptr addrspace(2) nocapture noundef readonly, ...) diff --git a/llvm/test/CodeGen/SPIRV/zero-length-array.ll b/llvm/test/CodeGen/SPIRV/zero-length-array.ll new file mode 100644 index 0000000..668bf20 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/zero-length-array.ll @@ -0,0 +1,12 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#type:]] = OpTypeInt 32 0 +; CHECK: %[[#ext:]] = OpTypeRuntimeArray %[[#type]] +; CHECK: %[[#]] = OpTypePointer Function %[[#ext]] + +define spir_func void @_Z3foov() { +entry: + %i = alloca [0 x i32], align 4 + ret void +} diff --git a/llvm/test/CodeGen/XCore/llvm.sincos.ll b/llvm/test/CodeGen/XCore/llvm.sincos.ll index 690c038..e01f208 100644 --- a/llvm/test/CodeGen/XCore/llvm.sincos.ll +++ b/llvm/test/CodeGen/XCore/llvm.sincos.ll @@ -26,9 +26,8 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) nounwind { } ; CHECK-LABEL: test_sincos_f32: -; OTHER: bl sinf -; OTHER: bl cosf -; GNU: bl sincosf +; CHECK: bl sinf +; CHECK: bl cosf define { float, float } @test_sincos_f32(float %a) nounwind { %result = call { float, float } @llvm.sincos.f32(float %a) ret { float, float } %result diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter.td b/llvm/test/TableGen/RuntimeLibcallEmitter.td index 783a861..642f8b8 100644 --- a/llvm/test/TableGen/RuntimeLibcallEmitter.td +++ b/llvm/test/TableGen/RuntimeLibcallEmitter.td @@ -104,18 +104,6 @@ def BlahLibrary : SystemRuntimeLibrary<isBlahArch, (add calloc, LibraryWithCondi // CHECK-NEXT: #endif // CHECK: #ifdef GET_INIT_RUNTIME_LIBCALL_NAMES -// CHECK-NEXT: const RTLIB::LibcallImpl llvm::RTLIB::RuntimeLibcallsInfo::DefaultLibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1] = { -// CHECK-NEXT: RTLIB::Unsupported, // RTLIB::BZERO -// CHECK-NEXT: RTLIB::Unsupported, // RTLIB::CALLOC -// CHECK-NEXT: RTLIB::Unsupported, // RTLIB::MEMCPY -// CHECK-NEXT: RTLIB::Unsupported, // RTLIB::MEMSET -// CHECK-NEXT: RTLIB::__ashlsi3, // RTLIB::SHL_I32 -// CHECK-NEXT: RTLIB::sqrtl_f80, // RTLIB::SQRT_F80 -// CHECK-NEXT: RTLIB::sqrtl_f128, // RTLIB::SQRT_F128 -// CHECK-NEXT: RTLIB::__lshrdi3, // RTLIB::SRL_I64 -// CHECK-NEXT: RTLIB::Unsupported -// CHECK-NEXT: }; -// CHECK-EMPTY: // CHECK-EMPTY: // CHECK-NEXT: #ifdef __GNUC__ // CHECK-NEXT: #pragma GCC diagnostic push @@ -257,7 +245,7 @@ def BlahLibrary : SystemRuntimeLibrary<isBlahArch, (add calloc, LibraryWithCondi // CHECK-EMPTY: // CHECK-NEXT: return; // CHECK-NEXT: } -// CHECK-NEXT: initDefaultLibCallImpls(); +// CHECK-NEXT: LLVM_DEBUG(dbgs() << "no system runtime library applied to target \'" << TT.str() << "\'\n"); // CHECK-NEXT: } // CHECK-EMPTY: // CHECK: #endif diff --git a/llvm/test/Transforms/PGOProfile/prof-inject-existing.ll b/llvm/test/Transforms/PGOProfile/prof-inject-existing.ll new file mode 100644 index 0000000..f51ec17 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/prof-inject-existing.ll @@ -0,0 +1,22 @@ +; Test that prof-inject does not modify existing metadata (incl. "unknown") + +; RUN: opt -passes=prof-inject %s -S -o - | FileCheck %s + +define void @foo(i32 %i) { + %c = icmp eq i32 %i, 0 + br i1 %c, label %yes, label %no, !prof !0 +yes: + br i1 %c, label %yes2, label %no, !prof !1 +yes2: + ret void +no: + ret void +} + +!0 = !{!"branch_weights", i32 1, i32 2} +!1 = !{!"unknown"} +; CHECK: define void @foo(i32 %i) !prof !0 +; CHECK: br i1 %c, label %yes, label %no, !prof !1 +; CHECK: !0 = !{!"function_entry_count", i64 1000} +; CHECK: !1 = !{!"branch_weights", i32 1, i32 2} +; CHECK: !2 = !{!"unknown"} diff --git a/llvm/test/Transforms/PGOProfile/prof-verify-as-needed.ll b/llvm/test/Transforms/PGOProfile/prof-verify-as-needed.ll index 07e1f2d..63342da 100644 --- a/llvm/test/Transforms/PGOProfile/prof-verify-as-needed.ll +++ b/llvm/test/Transforms/PGOProfile/prof-verify-as-needed.ll @@ -1,6 +1,6 @@ ; Test that prof-inject only injects missing metadata -; RUN: opt -passes=prof-inject %s -S -o - | FileCheck %s +; RUN: opt -passes=prof-inject -profcheck-default-function-entry-count=10 %s -S -o - | FileCheck %s define void @foo(i32 %i) { %c = icmp eq i32 %i, 0 @@ -13,8 +13,26 @@ no: ret void } +define void @cold(i32 %i) !prof !1 { + %c = icmp eq i32 %i, 0 + br i1 %c, label %yes, label %no +yes: + br i1 %c, label %yes2, label %no +yes2: + ret void +no: + ret void +} !0 = !{!"branch_weights", i32 1, i32 2} -; CHECK: br i1 %c, label %yes, label %no, !prof !0 -; CHECK: br i1 %c, label %yes2, label %no, !prof !1 -; CHECK: !0 = !{!"branch_weights", i32 1, i32 2} -; CHECK: !1 = !{!"branch_weights", i32 3, i32 5} +!1 = !{!"function_entry_count", i32 0} + +; CHECK-LABEL: @foo +; CHECK: br i1 %c, label %yes, label %no, !prof !1 +; CHECK: br i1 %c, label %yes2, label %no, !prof !2 +; CHECK-LABEL: @cold +; CHECK: br i1 %c, label %yes, label %no{{$}} +; CHECK: br i1 %c, label %yes2, label %no{{$}} +; CHECK: !0 = !{!"function_entry_count", i64 10} +; CHECK: !1 = !{!"branch_weights", i32 1, i32 2} +; CHECK: !2 = !{!"branch_weights", i32 3, i32 5} +; CHECK: !3 = !{!"function_entry_count", i32 0} diff --git a/llvm/test/Transforms/PGOProfile/prof-verify-existing.ll b/llvm/test/Transforms/PGOProfile/prof-verify-existing.ll index ea4f0f9..793b221 100644 --- a/llvm/test/Transforms/PGOProfile/prof-verify-existing.ll +++ b/llvm/test/Transforms/PGOProfile/prof-verify-existing.ll @@ -1,21 +1,23 @@ ; Test that prof-inject does not modify existing metadata (incl. "unknown") -; RUN: opt -passes=prof-inject %s -S -o - | FileCheck %s ; RUN: opt -passes=prof-verify %s -S --disable-output -define void @foo(i32 %i) { +define void @foo(i32 %i) !prof !0 { %c = icmp eq i32 %i, 0 - br i1 %c, label %yes, label %no, !prof !0 + br i1 %c, label %yes, label %no, !prof !1 yes: - br i1 %c, label %yes2, label %no, !prof !1 + br i1 %c, label %yes2, label %no, !prof !2 yes2: ret void no: ret void } -!0 = !{!"branch_weights", i32 1, i32 2} -!1 = !{!"unknown"} -; CHECK: br i1 %c, label %yes, label %no, !prof !0 -; CHECK: !0 = !{!"branch_weights", i32 1, i32 2} -; CHECK: !1 = !{!"unknown"} +!0 = !{!"function_entry_count", i32 1} +!1 = !{!"branch_weights", i32 1, i32 2} +!2 = !{!"unknown"} +; CHECK: define void @foo(i32 %i) !prof !0 +; CHECK: br i1 %c, label %yes, label %no, !prof !1 +; CHECK: !0 = !{!"function_entry_count", i64 1} +; CHECK: !1 = !{!"branch_weights", i32 1, i32 2} +; CHECK: !2 = !{!"unknown"} diff --git a/llvm/test/Transforms/PGOProfile/prof-verify-known-cold.ll b/llvm/test/Transforms/PGOProfile/prof-verify-known-cold.ll new file mode 100644 index 0000000..7875300 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/prof-verify-known-cold.ll @@ -0,0 +1,15 @@ +; Test prof-verify for functions explicitly marked as cold + +; RUN: opt -passes=prof-inject,prof-verify %s -o - 2>&1 | FileCheck %s + +define void @foo(i32 %i) !prof !0 { + %c = icmp eq i32 %i, 0 + br i1 %c, label %yes, label %no +yes: + ret void +no: + ret void +} +!0 = !{!"function_entry_count", i32 0} + +; CHECK-NOT: Profile verification failed diff --git a/llvm/test/Transforms/PGOProfile/prof-verify-no-entrycount.ll b/llvm/test/Transforms/PGOProfile/prof-verify-no-entrycount.ll new file mode 100644 index 0000000..3b059fd --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/prof-verify-no-entrycount.ll @@ -0,0 +1,14 @@ +; Test prof-verify for functions without entry count + +; RUN: not opt -passes=prof-verify %s -o - 2>&1 | FileCheck %s + +define void @foo(i32 %i) { + %c = icmp eq i32 %i, 0 + br i1 %c, label %yes, label %no +yes: + ret void +no: + ret void +} + +; CHECK: Profile verification failed: function entry count missing (set to 0 if cold) diff --git a/llvm/test/Transforms/PGOProfile/prof-verify.ll b/llvm/test/Transforms/PGOProfile/prof-verify.ll index 3d984d8..5015950 100644 --- a/llvm/test/Transforms/PGOProfile/prof-verify.ll +++ b/llvm/test/Transforms/PGOProfile/prof-verify.ll @@ -5,7 +5,7 @@ ; RUN: opt -passes=prof-inject,prof-verify %s --disable-output ; RUN: opt -enable-profcheck %s -S -o - | FileCheck %s --check-prefix=INJECT -define void @foo(i32 %i) { +define void @foo(i32 %i) !prof !0 { %c = icmp eq i32 %i, 0 br i1 %c, label %yes, label %no yes: @@ -13,8 +13,9 @@ yes: no: ret void } +!0 = !{!"function_entry_count", i32 1} -; INJECT: br i1 %c, label %yes, label %no, !prof !0 -; INJECT: !0 = !{!"branch_weights", i32 3, i32 5} +; INJECT: br i1 %c, label %yes, label %no, !prof !1 +; INJECT: !1 = !{!"branch_weights", i32 3, i32 5} -; VERIFY: Profile verification failed
\ No newline at end of file +; VERIFY: Profile verification failed: branch annotation missing
\ No newline at end of file |