forked from OSchip/llvm-project
549 lines
18 KiB
LLVM
549 lines
18 KiB
LLVM
; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; This test checks that x86-cmov-converter optimization transform CMOV
|
|
;; instruction into branches when it is profitable.
|
|
;; There are 5 cases below:
|
|
;; 1. CmovInCriticalPath:
|
|
;; CMOV depends on the condition and it is in the hot path.
|
|
;; Thus, it worths transforming.
|
|
;;
|
|
;; 2. CmovNotInCriticalPath:
|
|
;; Similar test like in (1), just that CMOV is not in the hot path.
|
|
;; Thus, it does not worth transforming.
|
|
;;
|
|
;; 3. MaxIndex:
|
|
;; Maximum calculation algorithm that is looking for the max index,
|
|
;; calculating CMOV value is cheaper than calculating CMOV condition.
|
|
;; Thus, it worths transforming.
|
|
;;
|
|
;; 4. MaxValue:
|
|
;; Maximum calculation algorithm that is looking for the max value,
|
|
;; calculating CMOV value is not cheaper than calculating CMOV condition.
|
|
;; Thus, it does not worth transforming.
|
|
;;
|
|
;; 5. BinarySearch:
|
|
;; Usually, binary search CMOV is not predicted.
|
|
;; Thus, it does not worth transforming.
|
|
;;
|
|
;; Test was created using the following command line:
|
|
;; > clang -S -O2 -m64 -fno-vectorize -fno-unroll-loops -emit-llvm foo.c -o -
|
|
;; Where foo.c is:
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;void CmovInHotPath(int n, int a, int b, int *c, int *d) {
|
|
;; for (int i = 0; i < n; i++) {
|
|
;; int t = c[i] + 1;
|
|
;; if (c[i] * a > b)
|
|
;; t = 10;
|
|
;; c[i] = (c[i] + 1) * t;
|
|
;; }
|
|
;;}
|
|
;;
|
|
;;
|
|
;;void CmovNotInHotPath(int n, int a, int b, int *c, int *d) {
|
|
;; for (int i = 0; i < n; i++) {
|
|
;; int t = c[i];
|
|
;; if (c[i] * a > b)
|
|
;; t = 10;
|
|
;; c[i] = t;
|
|
;; d[i] /= b;
|
|
;; }
|
|
;;}
|
|
;;
|
|
;;
|
|
;;int MaxIndex(int n, int *a) {
|
|
;; int t = 0;
|
|
;; for (int i = 1; i < n; i++) {
|
|
;; if (a[i] > a[t])
|
|
;; t = i;
|
|
;; }
|
|
;; return t;
|
|
;;}
|
|
;;
|
|
;;
|
|
;;int MaxValue(int n, int *a) {
|
|
;; int t = a[0];
|
|
;; for (int i = 1; i < n; i++) {
|
|
;; if (a[i] > t)
|
|
;; t = a[i];
|
|
;; }
|
|
;; return t;
|
|
;;}
|
|
;;
|
|
;;typedef struct Node Node;
|
|
;;struct Node {
|
|
;; unsigned Val;
|
|
;; Node *Right;
|
|
;; Node *Left;
|
|
;;};
|
|
;;
|
|
;;unsigned BinarySearch(unsigned Mask, Node *Curr, Node *Next) {
|
|
;; while (Curr->Val > Next->Val) {
|
|
;; Curr = Next;
|
|
;; if (Mask & (0x1 << Curr->Val))
|
|
;; Next = Curr->Right;
|
|
;; else
|
|
;; Next = Curr->Left;
|
|
;; }
|
|
;; return Curr->Val;
|
|
;;}
|
|
;;
|
|
;;
|
|
;;void SmallGainPerLoop(int n, int a, int b, int *c, int *d) {
|
|
;; for (int i = 0; i < n; i++) {
|
|
;; int t = c[i];
|
|
;; if (c[i] * a > b)
|
|
;; t = 10;
|
|
;; c[i] = t;
|
|
;; }
|
|
;;}
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
%struct.Node = type { i32, %struct.Node*, %struct.Node* }
|
|
|
|
; CHECK-LABEL: CmovInHotPath
|
|
; CHECK-NOT: cmov
|
|
; CHECK: jg
|
|
|
|
define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 {
|
|
entry:
|
|
%cmp14 = icmp sgt i32 %n, 0
|
|
br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%wide.trip.count = zext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body, %entry
|
|
ret void
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%add = add nsw i32 %0, 1
|
|
%mul = mul nsw i32 %0, %a
|
|
%cmp3 = icmp sgt i32 %mul, %b
|
|
%. = select i1 %cmp3, i32 10, i32 %add
|
|
%mul7 = mul nsw i32 %., %add
|
|
store i32 %mul7, i32* %arrayidx, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
; CHECK-LABEL: CmovNotInHotPath
|
|
; CHECK: cmovg
|
|
|
|
define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture %d) #0 {
|
|
entry:
|
|
%cmp18 = icmp sgt i32 %n, 0
|
|
br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%wide.trip.count = zext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body, %entry
|
|
ret void
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%mul = mul nsw i32 %0, %a
|
|
%cmp3 = icmp sgt i32 %mul, %b
|
|
%. = select i1 %cmp3, i32 10, i32 %0
|
|
store i32 %., i32* %arrayidx, align 4
|
|
%arrayidx7 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
|
|
%1 = load i32, i32* %arrayidx7, align 4
|
|
%div = sdiv i32 %1, %b
|
|
store i32 %div, i32* %arrayidx7, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
; CHECK-LABEL: MaxIndex
|
|
; CHECK-NOT: cmov
|
|
; CHECK: jg
|
|
|
|
define i32 @MaxIndex(i32 %n, i32* nocapture readonly %a) #0 {
|
|
entry:
|
|
%cmp14 = icmp sgt i32 %n, 1
|
|
br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%wide.trip.count = zext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body, %entry
|
|
%t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ]
|
|
ret i32 %t.0.lcssa
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
|
|
%t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%idxprom1 = sext i32 %t.015 to i64
|
|
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
|
|
%1 = load i32, i32* %arrayidx2, align 4
|
|
%cmp3 = icmp sgt i32 %0, %1
|
|
%2 = trunc i64 %indvars.iv to i32
|
|
%i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
; CHECK-LABEL: MaxValue
|
|
; CHECK-NOT: jg
|
|
; CHECK: cmovg
|
|
|
|
define i32 @MaxValue(i32 %n, i32* nocapture readonly %a) #0 {
|
|
entry:
|
|
%0 = load i32, i32* %a, align 4
|
|
%cmp13 = icmp sgt i32 %n, 1
|
|
br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%wide.trip.count = zext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body, %entry
|
|
%t.0.lcssa = phi i32 [ %0, %entry ], [ %.t.0, %for.body ]
|
|
ret i32 %t.0.lcssa
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
|
|
%t.014 = phi i32 [ %.t.0, %for.body ], [ %0, %for.body.preheader ]
|
|
%arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
%1 = load i32, i32* %arrayidx1, align 4
|
|
%cmp2 = icmp sgt i32 %1, %t.014
|
|
%.t.0 = select i1 %cmp2, i32 %1, i32 %t.014
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
; CHECK-LABEL: BinarySearch
|
|
; CHECK: set
|
|
|
|
define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %struct.Node* nocapture readonly %Next) #0 {
|
|
entry:
|
|
%Val8 = getelementptr inbounds %struct.Node, %struct.Node* %Curr, i64 0, i32 0
|
|
%0 = load i32, i32* %Val8, align 8
|
|
%Val19 = getelementptr inbounds %struct.Node, %struct.Node* %Next, i64 0, i32 0
|
|
%1 = load i32, i32* %Val19, align 8
|
|
%cmp10 = icmp ugt i32 %0, %1
|
|
br i1 %cmp10, label %while.body, label %while.end
|
|
|
|
while.body: ; preds = %entry, %while.body
|
|
%2 = phi i32 [ %4, %while.body ], [ %1, %entry ]
|
|
%Next.addr.011 = phi %struct.Node* [ %3, %while.body ], [ %Next, %entry ]
|
|
%shl = shl i32 1, %2
|
|
%and = and i32 %shl, %Mask
|
|
%tobool = icmp eq i32 %and, 0
|
|
%Left = getelementptr inbounds %struct.Node, %struct.Node* %Next.addr.011, i64 0, i32 2
|
|
%Right = getelementptr inbounds %struct.Node, %struct.Node* %Next.addr.011, i64 0, i32 1
|
|
%Left.sink = select i1 %tobool, %struct.Node** %Left, %struct.Node** %Right
|
|
%3 = load %struct.Node*, %struct.Node** %Left.sink, align 8
|
|
%Val1 = getelementptr inbounds %struct.Node, %struct.Node* %3, i64 0, i32 0
|
|
%4 = load i32, i32* %Val1, align 8
|
|
%cmp = icmp ugt i32 %2, %4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
%.lcssa = phi i32 [ %0, %entry ], [ %2, %while.body ]
|
|
ret i32 %.lcssa
|
|
}
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; The following test checks that x86-cmov-converter optimization transforms
|
|
;; CMOV instructions into branch correctly.
|
|
;;
|
|
;; MBB:
|
|
;; cond = cmp ...
|
|
;; v1 = CMOVgt t1, f1, cond
|
|
;; v2 = CMOVle s1, f2, cond
|
|
;;
|
|
;; Where: t1 = 11, f1 = 22, f2 = a
|
|
;;
|
|
;; After CMOV transformation
|
|
;; -------------------------
|
|
;; MBB:
|
|
;; cond = cmp ...
|
|
;; ja %SinkMBB
|
|
;;
|
|
;; FalseMBB:
|
|
;; jmp %SinkMBB
|
|
;;
|
|
;; SinkMBB:
|
|
;; %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
|
|
;; %v2 = phi[%f1, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
|
|
;; ; true-value with false-value
|
|
;; ; Phi instruction cannot use
|
|
;; ; previous Phi instruction result
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
; CHECK-LABEL: Transform
|
|
; CHECK-NOT: cmov
|
|
; CHECK: divl [[a:%[0-9a-z]*]]
|
|
; CHECK: movl $11, [[s1:%[0-9a-z]*]]
|
|
; CHECK: movl [[a]], [[s2:%[0-9a-z]*]]
|
|
; CHECK: cmpl [[a]], %edx
|
|
; CHECK: ja [[SinkBB:.*]]
|
|
; CHECK: [[FalseBB:.*]]:
|
|
; CHECK: movl $22, [[s1]]
|
|
; CHECK: movl $22, [[s2]]
|
|
; CHECK: [[SinkBB]]:
|
|
; CHECK: ja
|
|
|
|
define void @Transform(i32 *%arr, i32 *%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 {
|
|
entry:
|
|
%cmp10 = icmp ugt i32 0, %n
|
|
br i1 %cmp10, label %while.body, label %while.end
|
|
|
|
while.body: ; preds = %entry, %while.body
|
|
%i = phi i32 [ %i_inc, %while.body ], [ 0, %entry ]
|
|
%arr_i = getelementptr inbounds i32, i32* %arr, i32 %i
|
|
%x = load i32, i32* %arr_i, align 4
|
|
%div = udiv i32 %x, %a
|
|
%cond = icmp ugt i32 %div, %a
|
|
%condOpp = icmp ule i32 %div, %a
|
|
%s1 = select i1 %cond, i32 11, i32 22
|
|
%s2 = select i1 %condOpp, i32 %s1, i32 %a
|
|
%sum = urem i32 %s1, %s2
|
|
store i32 %sum, i32* %arr_i, align 4
|
|
%i_inc = add i32 %i, 1
|
|
%cmp = icmp ugt i32 %i_inc, %n
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; Test that we always will convert a cmov with a memory operand into a branch,
|
|
; even outside of a loop.
|
|
define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
|
|
; CHECK-LABEL: test_cmov_memoperand:
|
|
entry:
|
|
%cond = icmp ugt i32 %a, %b
|
|
; CHECK: movl %edx, %eax
|
|
; CHECK: cmpl
|
|
%load = load i32, i32* %y
|
|
%z = select i1 %cond, i32 %x, i32 %load
|
|
; CHECK-NOT: cmov
|
|
; CHECK: ja [[FALSE_BB:.*]]
|
|
; CHECK: movl (%rcx), %eax
|
|
; CHECK: [[FALSE_BB]]:
|
|
ret i32 %z
|
|
}
|
|
|
|
; Test that we can convert a group of cmovs where only one has a memory
|
|
; operand.
|
|
define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
|
|
; CHECK-LABEL: test_cmov_memoperand_in_group:
|
|
entry:
|
|
%cond = icmp ugt i32 %a, %b
|
|
; CHECK: movl %edx, %eax
|
|
; CHECK: cmpl
|
|
%y = load i32, i32* %y.ptr
|
|
%z1 = select i1 %cond, i32 %x, i32 %a
|
|
%z2 = select i1 %cond, i32 %x, i32 %y
|
|
%z3 = select i1 %cond, i32 %x, i32 %b
|
|
; CHECK-NOT: cmov
|
|
; CHECK: ja [[FALSE_BB:.*]]
|
|
; CHECK-DAG: movl %{{.*}}, %[[R1:.*]]
|
|
; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]]
|
|
; CHECK-DAG: movl %{{.*}} %eax
|
|
; CHECK: [[FALSE_BB]]:
|
|
; CHECK: addl
|
|
; CHECK-DAG: %[[R1]]
|
|
; CHECK-DAG: ,
|
|
; CHECK-DAG: %eax
|
|
; CHECK-DAG: addl
|
|
; CHECK-DAG: %[[R2]]
|
|
; CHECK-DAG: ,
|
|
; CHECK-DAG: %eax
|
|
; CHECK: retq
|
|
%s1 = add i32 %z1, %z2
|
|
%s2 = add i32 %s1, %z3
|
|
ret i32 %s2
|
|
}
|
|
|
|
; Same as before but with operands reversed in the select with a load.
|
|
define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
|
|
; CHECK-LABEL: test_cmov_memoperand_in_group2:
|
|
entry:
|
|
%cond = icmp ugt i32 %a, %b
|
|
; CHECK: movl %edx, %eax
|
|
; CHECK: cmpl
|
|
%y = load i32, i32* %y.ptr
|
|
%z2 = select i1 %cond, i32 %a, i32 %x
|
|
%z1 = select i1 %cond, i32 %y, i32 %x
|
|
%z3 = select i1 %cond, i32 %b, i32 %x
|
|
; CHECK-NOT: cmov
|
|
; CHECK: jbe [[FALSE_BB:.*]]
|
|
; CHECK-DAG: movl %{{.*}}, %[[R1:.*]]
|
|
; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]]
|
|
; CHECK-DAG: movl %{{.*}} %eax
|
|
; CHECK: [[FALSE_BB]]:
|
|
; CHECK: addl
|
|
; CHECK-DAG: %[[R1]]
|
|
; CHECK-DAG: ,
|
|
; CHECK-DAG: %eax
|
|
; CHECK-DAG: addl
|
|
; CHECK-DAG: %[[R2]]
|
|
; CHECK-DAG: ,
|
|
; CHECK-DAG: %eax
|
|
; CHECK: retq
|
|
%s1 = add i32 %z1, %z2
|
|
%s2 = add i32 %s1, %z3
|
|
ret i32 %s2
|
|
}
|
|
|
|
; Test that we don't convert a group of cmovs with conflicting directions of
|
|
; loads.
|
|
define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, i32* %y1.ptr, i32* %y2.ptr) #0 {
|
|
; CHECK-LABEL: test_cmov_memoperand_conflicting_dir:
|
|
entry:
|
|
%cond = icmp ugt i32 %a, %b
|
|
; CHECK: cmpl
|
|
%y1 = load i32, i32* %y1.ptr
|
|
%y2 = load i32, i32* %y2.ptr
|
|
%z1 = select i1 %cond, i32 %x, i32 %y1
|
|
%z2 = select i1 %cond, i32 %y2, i32 %x
|
|
; CHECK: cmoval
|
|
; CHECK: cmoval
|
|
%s1 = add i32 %z1, %z2
|
|
ret i32 %s1
|
|
}
|
|
|
|
; Test that we can convert a group of cmovs where only one has a memory
|
|
; operand and where that memory operand's registers come from a prior cmov in
|
|
; the group.
|
|
define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, i32* %x, i32* %y) #0 {
|
|
; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
|
|
entry:
|
|
%cond = icmp ugt i32 %a, %b
|
|
; CHECK: movl %edi, %eax
|
|
; CHECK: cmpl
|
|
%p = select i1 %cond, i32* %x, i32* %y
|
|
%load = load i32, i32* %p
|
|
%z = select i1 %cond, i32 %a, i32 %load
|
|
; CHECK-NOT: cmov
|
|
; CHECK: ja [[FALSE_BB:.*]]
|
|
; CHECK: movl (%r{{..}}), %eax
|
|
; CHECK: [[FALSE_BB]]:
|
|
; CHECK: retq
|
|
ret i32 %z
|
|
}
|
|
|
|
; Test that we can convert a group of two cmovs with memory operands where one
|
|
; uses the result of the other as part of the address.
|
|
define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, i32* %x, i32** %y) #0 {
|
|
; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
|
|
entry:
|
|
%cond = icmp ugt i32 %a, %b
|
|
; CHECK: movl %edi, %eax
|
|
; CHECK: cmpl
|
|
%load1 = load i32*, i32** %y
|
|
%p = select i1 %cond, i32* %x, i32* %load1
|
|
%load2 = load i32, i32* %p
|
|
%z = select i1 %cond, i32 %a, i32 %load2
|
|
; CHECK-NOT: cmov
|
|
; CHECK: ja [[FALSE_BB:.*]]
|
|
; CHECK: movq (%r{{..}}), %[[R1:.*]]
|
|
; CHECK: movl (%[[R1]]), %eax
|
|
; CHECK: [[FALSE_BB]]:
|
|
; CHECK: retq
|
|
ret i32 %z
|
|
}
|
|
|
|
; Test that we can convert a group of cmovs where only one has a memory
|
|
; operand and where that memory operand's registers come from a prior cmov and
|
|
; where that cmov gets *its* input from a prior cmov in the group.
|
|
define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, i32* %x, i32* %y, i32* %z) #0 {
|
|
; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
|
|
entry:
|
|
%cond = icmp ugt i32 %a, %b
|
|
; CHECK: movl %edi, %eax
|
|
; CHECK: cmpl
|
|
%p = select i1 %cond, i32* %x, i32* %y
|
|
%p2 = select i1 %cond, i32* %z, i32* %p
|
|
%load = load i32, i32* %p2
|
|
%r = select i1 %cond, i32 %a, i32 %load
|
|
; CHECK-NOT: cmov
|
|
; CHECK: ja [[FALSE_BB:.*]]
|
|
; CHECK: movl (%r{{..}}), %eax
|
|
; CHECK: [[FALSE_BB]]:
|
|
; CHECK: retq
|
|
ret i32 %r
|
|
}
|
|
|
|
@begin = external global i32*
|
|
@end = external global i32*
|
|
|
|
define void @test_memoperand_loop(i32 %data) #0 {
|
|
; CHECK-LABEL: test_memoperand_loop:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movq begin@GOTPCREL(%rip), %r8
|
|
; CHECK-NEXT: movq (%r8), %rax
|
|
; CHECK-NEXT: movq end@GOTPCREL(%rip), %rcx
|
|
; CHECK-NEXT: movq (%rcx), %rdx
|
|
; CHECK-NEXT: xorl %esi, %esi
|
|
; CHECK-NEXT: movq %rax, %rcx
|
|
entry:
|
|
%begin = load i32*, i32** @begin, align 8
|
|
%end = load i32*, i32** @end, align 8
|
|
br label %loop.body
|
|
|
|
; CHECK-NEXT: .LBB13_1: # %loop.body
|
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: addq $8, %rcx
|
|
; CHECK-NEXT: cmpq %rdx, %rcx
|
|
; CHECK-NEXT: ja .LBB13_3
|
|
; CHECK-NEXT: # %bb.2: # %loop.body
|
|
; CHECK-NEXT: # in Loop: Header=BB13_1 Depth=1
|
|
; CHECK-NEXT: movq (%r8), %rcx
|
|
; CHECK-NEXT: .LBB13_3: # %loop.body
|
|
; CHECK-NEXT: # in Loop: Header=BB13_1 Depth=1
|
|
; CHECK-NEXT: movl %edi, (%rcx)
|
|
; CHECK-NEXT: addq $8, %rcx
|
|
; CHECK-NEXT: cmpq %rdx, %rcx
|
|
; CHECK-NEXT: ja .LBB13_5
|
|
; CHECK-NEXT: # %bb.4: # %loop.body
|
|
; CHECK-NEXT: # in Loop: Header=BB13_1 Depth=1
|
|
; CHECK-NEXT: movq %rax, %rcx
|
|
; CHECK-NEXT: .LBB13_5: # %loop.body
|
|
; CHECK-NEXT: # in Loop: Header=BB13_1 Depth=1
|
|
; CHECK-NEXT: movl %edi, (%rcx)
|
|
; CHECK-NEXT: addl $1, %esi
|
|
; CHECK-NEXT: cmpl $1024, %esi # imm = 0x400
|
|
; CHECK-NEXT: jl .LBB13_1
|
|
loop.body:
|
|
%phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
|
|
%phi.ptr = phi i32* [ %begin, %entry ], [ %dst2, %loop.body ]
|
|
%gep1 = getelementptr inbounds i32, i32 *%phi.ptr, i64 2
|
|
%cmp1 = icmp ugt i32* %gep1, %end
|
|
%begin_dup = load i32*, i32** @begin, align 8
|
|
%dst1 = select i1 %cmp1, i32* %gep1, i32* %begin_dup
|
|
store i32 %data, i32 *%dst1, align 4
|
|
%gep2 = getelementptr inbounds i32, i32 *%dst1, i64 2
|
|
%cmp2 = icmp ugt i32* %gep2, %end
|
|
%dst2 = select i1 %cmp2, i32* %gep2, i32* %begin
|
|
store i32 %data, i32 *%dst2, align 4
|
|
%iv.next = add i32 %phi.iv, 1
|
|
%cond = icmp slt i32 %iv.next, 1024
|
|
br i1 %cond, label %loop.body, label %exit
|
|
|
|
; CHECK-NEXT: # %bb.6: # %exit
|
|
; CHECK-NEXT: retq
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = {"target-cpu"="x86-64"}
|