llvm-project/llvm/test/CodeGen/X86/select.ll

404 lines
9.3 KiB
LLVM
Raw Normal View History

; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=generic | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=atom | FileCheck -check-prefix=ATOM %s
; PR5757
%0 = type { i64, i32 }
define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
%t0 = load %0, %0* %p
%t1 = load %0, %0* %q
%t4 = select i1 %r, %0 %t0, %0 %t1
%t5 = extractvalue %0 %t4, 1
ret i32 %t5
; CHECK-LABEL: test1:
; CHECK: cmovneq %rdi, %rsi
; CHECK: movl (%rsi), %eax
; ATOM-LABEL: test1:
; ATOM: cmovneq %rdi, %rsi
; ATOM: movl (%rsi), %eax
}
; PR2139
define i32 @test2() nounwind {
entry:
%tmp73 = tail call i1 @return_false() ; <i8> [#uses=1]
%g.0 = select i1 %tmp73, i16 0, i16 -480 ; <i16> [#uses=2]
%tmp7778 = sext i16 %g.0 to i32 ; <i32> [#uses=1]
%tmp80 = shl i32 %tmp7778, 3 ; <i32> [#uses=2]
%tmp87 = icmp sgt i32 %tmp80, 32767 ; <i1> [#uses=1]
br i1 %tmp87, label %bb90, label %bb91
bb90: ; preds = %bb84, %bb72
unreachable
bb91: ; preds = %bb84
ret i32 0
; CHECK-LABEL: test2:
; CHECK: cmovnew
; CHECK: cwtl
; ATOM-LABEL: test2:
; ATOM: cmovnew
; ATOM: cwtl
}
declare i1 @return_false()
;; Select between two floating point constants.
define float @test3(i32 %x) nounwind readnone {
entry:
%0 = icmp eq i32 %x, 0 ; <i1> [#uses=1]
%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01 ; <float> [#uses=1]
ret float %iftmp.0.0
; CHECK-LABEL: test3:
; CHECK: movss {{.*}},4), %xmm0
; ATOM-LABEL: test3:
; ATOM: movss {{.*}},4), %xmm0
}
define signext i8 @test4(i8* nocapture %P, double %F) nounwind readonly {
entry:
%0 = fcmp olt double %F, 4.200000e+01 ; <i1> [#uses=1]
%iftmp.0.0 = select i1 %0, i32 4, i32 0 ; <i32> [#uses=1]
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction One of several parallel first steps to remove the target type of pointers, replacing them with a single opaque pointer type. This adds an explicit type parameter to the gep instruction so that when the first parameter becomes an opaque pointer type, the type to gep through is still available to the instructions. * This doesn't modify gep operators, only instructions (operators will be handled separately) * Textual IR changes only. Bitcode (including upgrade) and changing the in-memory representation will be in separate changes. * geps of vectors are transformed as: getelementptr <4 x float*> %x, ... ->getelementptr float, <4 x float*> %x, ... Then, once the opaque pointer type is introduced, this will ultimately look like: getelementptr float, <4 x ptr> %x with the unambiguous interpretation that it is a vector of pointers to float. * address spaces remain on the pointer, not the type: getelementptr float addrspace(1)* %x ->getelementptr float, float addrspace(1)* %x Then, eventually: getelementptr float, ptr addrspace(1) %x Importantly, the massive amount of test case churn has been automated by same crappy python code. I had to manually update a few test cases that wouldn't fit the script's model (r228970,r229196,r229197,r229198). The python script just massages stdin and writes the result to stdout, I then wrapped that in a shell script to handle replacing files, then using the usual find+xargs to migrate all the files. update.py: import fileinput import sys import re ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))") normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))") def conv(match, line): if not match: return line line = match.groups()[0] if len(match.groups()[5]) == 0: line += match.groups()[2] line += match.groups()[3] line += ", " line += match.groups()[1] line += "\n" return line for line in sys.stdin: if line.find("getelementptr ") == line.find("getelementptr inbounds"): if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("): line = conv(re.match(ibrep, line), line) elif line.find("getelementptr ") != line.find("getelementptr ("): line = conv(re.match(normrep, line), line) sys.stdout.write(line) apply.sh: for name in "$@" do python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name" rm -f "$name.tmp" done The actual commands: From llvm/src: find test/ -name *.ll | xargs ./apply.sh From llvm/src/tools/clang: find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}" From llvm/src/tools/polly: find test/ -name *.ll | xargs ./apply.sh After that, check-all (with llvm, clang, clang-tools-extra, lld, compiler-rt, and polly all checked out). The extra 'rm' in the apply.sh script is due to a few files in clang's test suite using interesting unicode stuff that my python script was throwing exceptions on. None of those files needed to be migrated, so it seemed sufficient to ignore those cases. Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7636 llvm-svn: 230786
2015-02-28 03:29:02 +08:00
%1 = getelementptr i8, i8* %P, i32 %iftmp.0.0 ; <i8*> [#uses=1]
%2 = load i8, i8* %1, align 1 ; <i8> [#uses=1]
ret i8 %2
; CHECK-LABEL: test4:
; CHECK: movsbl ({{.*}},4), %eax
; ATOM-LABEL: test4:
; ATOM: movsbl ({{.*}},4), %eax
}
define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
%x = select i1 %c, <2 x i16> %a, <2 x i16> %b
store <2 x i16> %x, <2 x i16>* %p
ret void
; CHECK-LABEL: test5:
; ATOM-LABEL: test5:
}
define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
%tmp = load <4 x float>, <4 x float>* %A ; <<4 x float>> [#uses=1]
%tmp3 = load <4 x float>, <4 x float>* %B ; <<4 x float>> [#uses=2]
%tmp9 = fmul <4 x float> %tmp3, %tmp3 ; <<4 x float>> [#uses=1]
%tmp.upgrd.1 = icmp eq i32 %C, 0 ; <i1> [#uses=1]
%iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp ; <<4 x float>> [#uses=1]
store <4 x float> %iftmp.38.0, <4 x float>* %A
ret void
; Verify that the fmul gets sunk into the one part of the diamond where it is
; needed.
; CHECK-LABEL: test6:
; CHECK: je
; CHECK: ret
; CHECK: mulps
; CHECK: ret
; ATOM-LABEL: test6:
; ATOM: je
; ATOM: ret
; ATOM: mulps
; ATOM: ret
}
; Select with fp80's
define x86_fp80 @test7(i32 %tmp8) nounwind {
%tmp9 = icmp sgt i32 %tmp8, -1 ; <i1> [#uses=1]
%retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000
ret x86_fp80 %retval
; CHECK-LABEL: test7:
; CHECK: leaq
; CHECK: fldt (%r{{.}}x,%r{{.}}x)
; ATOM-LABEL: test7:
; ATOM: leaq
; ATOM: fldt (%r{{.}}x,%r{{.}}x)
}
; widening select v6i32 and then a sub
define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind {
%x = select i1 %c, <6 x i32> %src1, <6 x i32> %src2
%val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
store <6 x i32> %val, <6 x i32>* %dst.addr
ret void
; CHECK-LABEL: test8:
; ATOM-LABEL: test8:
}
;; Test integer select between values and constants.
define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
%cmp = icmp ne i64 %x, 0
%cond = select i1 %cmp, i64 %y, i64 -1
ret i64 %cond
; CHECK-LABEL: test9:
; CHECK: cmpq $1, %rdi
; CHECK: sbbq %rax, %rax
; CHECK: orq %rsi, %rax
; CHECK: ret
; ATOM-LABEL: test9:
; ATOM: cmpq $1, %rdi
; ATOM: sbbq %rax, %rax
; ATOM: orq %rsi, %rax
; ATOM: ret
}
;; Same as test9
define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
%cmp = icmp eq i64 %x, 0
%cond = select i1 %cmp, i64 -1, i64 %y
ret i64 %cond
; CHECK-LABEL: test9a:
; CHECK: cmpq $1, %rdi
; CHECK: sbbq %rax, %rax
; CHECK: orq %rsi, %rax
; CHECK: ret
; ATOM-LABEL: test9a:
; ATOM: cmpq $1, %rdi
; ATOM: sbbq %rax, %rax
; ATOM: orq %rsi, %rax
; ATOM: ret
}
define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
%cmp = icmp eq i64 %x, 0
%A = sext i1 %cmp to i64
%cond = or i64 %y, %A
ret i64 %cond
; CHECK-LABEL: test9b:
; CHECK: cmpq $1, %rdi
; CHECK: sbbq %rax, %rax
; CHECK: orq %rsi, %rax
; CHECK: ret
; ATOM-LABEL: test9b:
; ATOM: cmpq $1, %rdi
; ATOM: sbbq %rax, %rax
; ATOM: orq %rsi, %rax
; ATOM: ret
}
;; Select between -1 and 1.
define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
%cmp = icmp eq i64 %x, 0
%cond = select i1 %cmp, i64 -1, i64 1
ret i64 %cond
; CHECK-LABEL: test10:
; CHECK: cmpq $1, %rdi
; CHECK: sbbq %rax, %rax
; CHECK: orq $1, %rax
; CHECK: ret
; ATOM-LABEL: test10:
; ATOM: cmpq $1, %rdi
; ATOM: sbbq %rax, %rax
; ATOM: orq $1, %rax
; ATOM: ret
}
define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
%cmp = icmp eq i64 %x, 0
%cond = select i1 %cmp, i64 %y, i64 -1
ret i64 %cond
; CHECK-LABEL: test11:
; CHECK: cmpq $1, %rdi
; CHECK: sbbq %rax, %rax
; CHECK: notq %rax
; CHECK: orq %rsi, %rax
; CHECK: ret
; ATOM-LABEL: test11:
; ATOM: cmpq $1, %rdi
; ATOM: sbbq %rax, %rax
; ATOM: notq %rax
; ATOM: orq %rsi, %rax
; ATOM: ret
}
define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
%cmp = icmp ne i64 %x, 0
%cond = select i1 %cmp, i64 -1, i64 %y
ret i64 %cond
; CHECK-LABEL: test11a:
; CHECK: cmpq $1, %rdi
; CHECK: sbbq %rax, %rax
; CHECK: notq %rax
; CHECK: orq %rsi, %rax
; CHECK: ret
; ATOM-LABEL: test11a:
; ATOM: cmpq $1, %rdi
; ATOM: sbbq %rax, %rax
; ATOM: notq %rax
; ATOM: orq %rsi, %rax
; ATOM: ret
}
declare noalias i8* @_Znam(i64) noredzone
define noalias i8* @test12(i64 %count) nounwind ssp noredzone {
entry:
%A = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %count, i64 4)
%B = extractvalue { i64, i1 } %A, 1
%C = extractvalue { i64, i1 } %A, 0
%D = select i1 %B, i64 -1, i64 %C
%call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
ret i8* %call
; CHECK-LABEL: test12:
; CHECK: mulq
; CHECK: movq $-1, %[[R:r..]]
Allocate local registers in order for optimal coloring. Also avoid locals evicting locals just because they want a cheaper register. Problem: MI Sched knows exactly how many registers we have and assumes they can be colored. In cases where we have large blocks, usually from unrolled loops, greedy coloring fails. This is a source of "regressions" from the MI Scheduler on x86. I noticed this issue on x86 where we have long chains of two-address defs in the same live range. It's easy to see this in matrix multiplication benchmarks like IRSmk and even the unit test misched-matmul.ll. A fundamental difference between the LLVM register allocator and conventional graph coloring is that in our model a live range can't discover its neighbors, it can only verify its neighbors. That's why we initially went for greedy coloring and added eviction to deal with the hard cases. However, for singly defined and two-address live ranges, we can optimally color without visiting neighbors simply by processing the live ranges in instruction order. Other beneficial side effects: It is much easier to understand and debug regalloc for large blocks when the live ranges are allocated in order. Yes, global allocation is still very confusing, but it's nice to be able to comprehend what happened locally. Heuristics could be added to bias register assignment based on instruction locality (think late register pairing, banks...). Intuituvely this will make some test cases that are on the threshold of register pressure more stable. llvm-svn: 187139
2013-07-26 02:35:14 +08:00
; CHECK: cmovnoq %rax, %[[R]]
; CHECK: jmp __Znam
; ATOM-LABEL: test12:
; ATOM: mulq
; ATOM: movq $-1, %rdi
; ATOM: cmovnoq %rax, %rdi
; ATOM: jmp __Znam
}
declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
define i32 @test13(i32 %a, i32 %b) nounwind {
%c = icmp ult i32 %a, %b
%d = sext i1 %c to i32
ret i32 %d
; CHECK-LABEL: test13:
; CHECK: cmpl
; CHECK-NEXT: sbbl
; CHECK-NEXT: ret
; ATOM-LABEL: test13:
; ATOM: cmpl
; ATOM-NEXT: sbbl
; ATOM: ret
}
define i32 @test14(i32 %a, i32 %b) nounwind {
%c = icmp uge i32 %a, %b
%d = sext i1 %c to i32
ret i32 %d
; CHECK-LABEL: test14:
; CHECK: cmpl
; CHECK-NEXT: sbbl
; CHECK-NEXT: notl
; CHECK-NEXT: ret
; ATOM-LABEL: test14:
; ATOM: cmpl
; ATOM-NEXT: sbbl
; ATOM-NEXT: notl
; ATOM: ret
}
; rdar://10961709
define i32 @test15(i32 %x) nounwind {
entry:
%cmp = icmp ne i32 %x, 0
%sub = sext i1 %cmp to i32
ret i32 %sub
; CHECK-LABEL: test15:
; CHECK: negl
; CHECK: sbbl
; ATOM-LABEL: test15:
; ATOM: negl
; ATOM: sbbl
}
define i64 @test16(i64 %x) nounwind uwtable readnone ssp {
entry:
%cmp = icmp ne i64 %x, 0
%conv1 = sext i1 %cmp to i64
ret i64 %conv1
; CHECK-LABEL: test16:
; CHECK: negq
; CHECK: sbbq
; ATOM-LABEL: test16:
; ATOM: negq
; ATOM: sbbq
}
define i16 @test17(i16 %x) nounwind {
entry:
%cmp = icmp ne i16 %x, 0
%sub = sext i1 %cmp to i16
ret i16 %sub
; CHECK-LABEL: test17:
; CHECK: negw
; CHECK: sbbw
; ATOM-LABEL: test17:
; ATOM: negw
; ATOM: sbbw
}
define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind {
%cmp = icmp slt i32 %x, 15
%sel = select i1 %cmp, i8 %a, i8 %b
ret i8 %sel
; CHECK-LABEL: test18:
; CHECK: cmpl $15, %edi
; CHECK: cmovgel %edx
; ATOM-LABEL: test18:
; ATOM: cmpl $15, %edi
; ATOM: cmovgel %edx
}
; CHECK-LABEL: @trunc_select_miscompile
; CHECK-NOT: sarb
define i32 @trunc_select_miscompile(i32 %a, i1 zeroext %cc) {
%tmp1 = select i1 %cc, i32 3, i32 2
%tmp2 = shl i32 %a, %tmp1
ret i32 %tmp2
}
define void @test19() {
; This is a massive reduction of an llvm-stress test case that generates
; interesting chains feeding setcc and eventually a f32 select operation. This
; is intended to exercise the SELECT formation in the DAG combine simplifying
; a simplified select_cc node. If it it regresses and is no longer triggering
; that code path, it can be deleted.
;
; CHECK-LABEL: @test19
; CHECK: testb
; CHECK: cmpl
; CHECK: ucomiss
BB:
br label %CF
CF:
%Cmp10 = icmp ule i8 undef, undef
br i1 %Cmp10, label %CF, label %CF250
CF250:
%E12 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 2
%Cmp32 = icmp ugt i1 %Cmp10, false
br i1 %Cmp32, label %CF, label %CF242
CF242:
%Cmp38 = icmp uge i32 %E12, undef
%FC = uitofp i1 %Cmp38 to float
%Sl59 = select i1 %Cmp32, float %FC, float undef
%Cmp60 = fcmp ugt float undef, undef
br i1 %Cmp60, label %CF242, label %CF244
CF244:
%B122 = fadd float %Sl59, undef
ret void
}