1 files changed, 236 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/trunc-srl-load.ll b/llvm/test/CodeGen/X86/trunc-srl-load.ll
new file mode 100644
index 0000000..d9c21d3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/trunc-srl-load.ll
@@ -0,0 +1,236 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown                   | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64    | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64
+
+; Tests showing for the analysis of non-constant shift amounts to improve load address math
+
+; Alignment of shift amounts should allow sub-integer loads.
+
+define i16 @extractSub64_16(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub64_16:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $48, %ecx
+; X86-NEXT:    shrl $3, %ecx
+; X86-NEXT:    movzwl (%eax,%ecx), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: extractSub64_16:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    andl $48, %esi
+; X64-NEXT:    shrl $3, %esi
+; X64-NEXT:    movzwl (%rdi,%rsi), %eax
+; X64-NEXT:    retq
+  %idx_bounds = and i32 %idx, 63
+  %idx_align = and i32 %idx_bounds, -16
+  %sh = zext nneg i32 %idx_align to i64
+  %ld = load i64, ptr %word, align 8
+  %sub = lshr i64 %ld, %sh
+  %res = trunc i64 %sub to i16
+  ret i16 %res
+}
+
+define i16 @extractSub128_16(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub128_16:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $112, %ecx
+; X86-NEXT:    shrl $3, %ecx
+; X86-NEXT:    movzwl (%eax,%ecx), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: extractSub128_16:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    andl $112, %esi
+; X64-NEXT:    shrl $3, %esi
+; X64-NEXT:    movzwl (%rdi,%rsi), %eax
+; X64-NEXT:    retq
+  %idx_bounds = and i32 %idx, 127
+  %idx_align = and i32 %idx_bounds, -16
+  %sh = zext nneg i32 %idx_align to i128
+  %ld = load i128, ptr %word, align 8
+  %sub = lshr i128 %ld, %sh
+  %res = trunc i128 %sub to i16
+  ret i16 %res
+}
+
+define i32 @extractSub128_32(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub128_32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $96, %ecx
+; X86-NEXT:    shrl $3, %ecx
+; X86-NEXT:    movl (%eax,%ecx), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: extractSub128_32:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    andl $96, %esi
+; X64-NEXT:    shrl $3, %esi
+; X64-NEXT:    movl (%rdi,%rsi), %eax
+; X64-NEXT:    retq
+  %idx_bounds = and i32 %idx, 127
+  %idx_align = and i32 %idx_bounds, -32
+  %sh = zext nneg i32 %idx_align to i128
+  %ld = load i128, ptr %word, align 8
+  %sub = lshr i128 %ld, %sh
+  %res = trunc i128 %sub to i32
+  ret i32 %res
+}
+
+define i64 @extractSub128_64(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub128_64:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    andl $64, %edx
+; X86-NEXT:    shrl $3, %edx
+; X86-NEXT:    movl (%ecx,%edx), %eax
+; X86-NEXT:    movl 4(%ecx,%edx), %edx
+; X86-NEXT:    retl
+;
+; X64-LABEL: extractSub128_64:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    andl $64, %esi
+; X64-NEXT:    shrl $3, %esi
+; X64-NEXT:    movq (%rdi,%rsi), %rax
+; X64-NEXT:    retq
+  %idx_bounds = and i32 %idx, 127
+  %idx_align = and i32 %idx_bounds, -64
+  %sh = zext nneg i32 %idx_align to i128
+  %ld = load i128, ptr %word, align 8
+  %sub = lshr i128 %ld, %sh
+  %res = trunc i128 %sub to i64
+  ret i64 %res
+}
+
+define i8 @extractSub512_8(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub512_8:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shrl $3, %ecx
+; X86-NEXT:    andl $63, %ecx
+; X86-NEXT:    movzbl (%eax,%ecx), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: extractSub512_8:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    shrl $3, %esi
+; X64-NEXT:    andl $63, %esi
+; X64-NEXT:    movzbl (%rdi,%rsi), %eax
+; X64-NEXT:    retq
+  %idx_bounds = and i32 %idx, 511
+  %idx_align = and i32 %idx_bounds, -8
+  %ld = load i512, ptr %word, align 8
+  %sh = zext nneg i32 %idx_align to i512
+  %sub = lshr i512 %ld, %sh
+  %res = trunc i512 %sub to i8
+  ret i8 %res
+}
+
+define i64 @extractSub512_64(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub512_64:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shrl $3, %edx
+; X86-NEXT:    andl $56, %edx
+; X86-NEXT:    movl (%ecx,%edx), %eax
+; X86-NEXT:    movl 4(%ecx,%edx), %edx
+; X86-NEXT:    retl
+;
+; X64-LABEL: extractSub512_64:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    shrl $3, %esi
+; X64-NEXT:    andl $56, %esi
+; X64-NEXT:    movq (%rdi,%rsi), %rax
+; X64-NEXT:    retq
+  %idx_bounds = and i32 %idx, 511
+  %idx_align = and i32 %idx_bounds, -64
+  %sh = zext nneg i32 %idx_align to i512
+  %ld = load i512, ptr %word, align 8
+  %sub = lshr i512 %ld, %sh
+  %res = trunc i512 %sub to i64
+  ret i64 %res
+}
+
+define i128 @extractSub512_128(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub512_128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shrl $3, %edx
+; X86-NEXT:    andl $48, %edx
+; X86-NEXT:    movl (%ecx,%edx), %esi
+; X86-NEXT:    movl 4(%ecx,%edx), %edi
+; X86-NEXT:    movl 8(%ecx,%edx), %ebx
+; X86-NEXT:    movl 12(%ecx,%edx), %ecx
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    movl %ebx, 8(%eax)
+; X86-NEXT:    movl %edi, 4(%eax)
+; X86-NEXT:    movl %esi, (%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: extractSub512_128:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    shrl $3, %esi
+; X64-NEXT:    andl $48, %esi
+; X64-NEXT:    movq (%rdi,%rsi), %rax
+; X64-NEXT:    movq 8(%rdi,%rsi), %rdx
+; X64-NEXT:    retq
+  %idx_bounds = and i32 %idx, 511
+  %idx_align = and i32 %idx_bounds, -128
+  %sh = zext nneg i32 %idx_align to i512
+  %ld = load i512, ptr %word, align 8
+  %sub = lshr i512 %ld, %sh
+  %res = trunc i512 %sub to i128
+  ret i128 %res
+}
+
+define i64 @extractSub4096_64(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub4096_64:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $4032, %edx # imm = 0xFC0
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shrl $3, %edx
+; X86-NEXT:    movl (%ecx,%edx), %eax
+; X86-NEXT:    movl 4(%ecx,%edx), %edx
+; X86-NEXT:    retl
+;
+; X64-LABEL: extractSub4096_64:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    andl $4032, %esi # imm = 0xFC0
+; X64-NEXT:    shrl $3, %esi
+; X64-NEXT:    movq (%rdi,%rsi), %rax
+; X64-NEXT:    retq
+  %idx_bounds = and i32 %idx, 4095
+  %idx_align = and i32 %idx_bounds, -64
+  %sh = zext nneg i32 %idx_align to i4096
+  %ld = load i4096, ptr %word, align 8
+  %sub = lshr i4096 %ld, %sh
+  %res = trunc i4096 %sub to i64
+  ret i64 %res
+}