forked from OSchip/llvm-project
637 lines
18 KiB
LLVM
637 lines
18 KiB
LLVM
; RUN: llc -O2 -ppc-reduce-cr-logicals -o - %s | FileCheck \
|
|
; RUN: --check-prefix=CHECK --check-prefix=CHECK-O2 %s
|
|
; RUN: llc -O3 -ppc-reduce-cr-logicals -o - %s | FileCheck \
|
|
; RUN: --check-prefix=CHECK --check-prefix=CHECK-O3 %s
|
|
target datalayout = "e-m:e-i64:64-n32:64"
|
|
target triple = "powerpc64le-grtev4-linux-gnu"
|
|
|
|
; Intended layout:
|
|
; The chain-based outlining produces the layout
|
|
; test1
|
|
; test2
|
|
; test3
|
|
; test4
|
|
; optional1
|
|
; optional2
|
|
; optional3
|
|
; optional4
|
|
; exit
|
|
; Tail duplication puts test n+1 at the end of optional n
|
|
; so optional1 includes a copy of test2 at the end, and branches
|
|
; to test3 (at the top) or falls through to optional 2.
|
|
; The CHECK statements check for the whole string of tests
|
|
; and then check that the correct test has been duplicated into the end of
|
|
; the optional blocks and that the optional blocks are in the correct order.
|
|
;CHECK-LABEL: straight_test:
|
|
; test1 may have been merged with entry
|
|
;CHECK: mr [[TAGREG:[0-9]+]], 3
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG:[0-9]+]], 1
|
|
;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: # %test2
|
|
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
|
|
;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
|
|
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
|
|
;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4
|
|
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
|
|
;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
|
|
;CHECK: blr
|
|
;CHECK-NEXT: .[[OPT1LABEL]]:
|
|
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
|
|
;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
|
|
;CHECK-NEXT: .[[OPT2LABEL]]:
|
|
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
|
|
;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
|
|
;CHECK-NEXT: .[[OPT3LABEL]]:
|
|
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
|
|
;CHECK-NEXT: beq 0, .[[EXITLABEL]]
|
|
;CHECK-NEXT: .[[OPT4LABEL]]:
|
|
;CHECK: b .[[EXITLABEL]]
|
|
|
|
define void @straight_test(i32 %tag) {
|
|
entry:
|
|
br label %test1
|
|
test1:
|
|
%tagbit1 = and i32 %tag, 1
|
|
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
|
|
br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
|
|
optional1:
|
|
call void @a()
|
|
call void @a()
|
|
call void @a()
|
|
call void @a()
|
|
br label %test2
|
|
test2:
|
|
%tagbit2 = and i32 %tag, 2
|
|
%tagbit2eq0 = icmp eq i32 %tagbit2, 0
|
|
br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
|
|
optional2:
|
|
call void @b()
|
|
call void @b()
|
|
call void @b()
|
|
call void @b()
|
|
br label %test3
|
|
test3:
|
|
%tagbit3 = and i32 %tag, 4
|
|
%tagbit3eq0 = icmp eq i32 %tagbit3, 0
|
|
br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
|
|
optional3:
|
|
call void @c()
|
|
call void @c()
|
|
call void @c()
|
|
call void @c()
|
|
br label %test4
|
|
test4:
|
|
%tagbit4 = and i32 %tag, 8
|
|
%tagbit4eq0 = icmp eq i32 %tagbit4, 0
|
|
br i1 %tagbit4eq0, label %exit, label %optional4, !prof !1
|
|
optional4:
|
|
call void @d()
|
|
call void @d()
|
|
call void @d()
|
|
call void @d()
|
|
br label %exit
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Intended layout:
|
|
; The chain-of-triangles based duplicating produces the layout
|
|
; test1
|
|
; test2
|
|
; test3
|
|
; optional1
|
|
; optional2
|
|
; optional3
|
|
; exit
|
|
; even for 50/50 branches.
|
|
; Tail duplication puts test n+1 at the end of optional n
|
|
; so optional1 includes a copy of test2 at the end, and branches
|
|
; to test3 (at the top) or falls through to optional 2.
|
|
; The CHECK statements check for the whole string of tests
|
|
; and then check that the correct test has been duplicated into the end of
|
|
; the optional blocks and that the optional blocks are in the correct order.
|
|
;CHECK-LABEL: straight_test_50:
|
|
; test1 may have been merged with entry
|
|
;CHECK: mr [[TAGREG:[0-9]+]], 3
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
|
|
;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: # %test2
|
|
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
|
|
;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
|
|
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
|
|
;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
|
|
;CHECK: blr
|
|
;CHECK-NEXT: .[[OPT1LABEL]]:
|
|
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
|
|
;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
|
|
;CHECK-NEXT: .[[OPT2LABEL]]:
|
|
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
|
|
;CHECK-NEXT: beq 0, .[[EXITLABEL]]
|
|
;CHECK-NEXT: .[[OPT3LABEL]]:
|
|
;CHECK: b .[[EXITLABEL]]
|
|
|
|
define void @straight_test_50(i32 %tag) {
|
|
entry:
|
|
br label %test1
|
|
test1:
|
|
%tagbit1 = and i32 %tag, 1
|
|
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
|
|
br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2
|
|
optional1:
|
|
call void @a()
|
|
br label %test2
|
|
test2:
|
|
%tagbit2 = and i32 %tag, 2
|
|
%tagbit2eq0 = icmp eq i32 %tagbit2, 0
|
|
br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2
|
|
optional2:
|
|
call void @b()
|
|
br label %test3
|
|
test3:
|
|
%tagbit3 = and i32 %tag, 4
|
|
%tagbit3eq0 = icmp eq i32 %tagbit3, 0
|
|
br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
|
|
optional3:
|
|
call void @c()
|
|
br label %exit
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Intended layout:
|
|
; The chain-of-triangles based duplicating produces the layout when 3
|
|
; instructions are allowed for tail-duplication.
|
|
; test1
|
|
; test2
|
|
; test3
|
|
; optional1
|
|
; optional2
|
|
; optional3
|
|
; exit
|
|
;
|
|
; Otherwise it produces the layout:
|
|
; test1
|
|
; optional1
|
|
; test2
|
|
; optional2
|
|
; test3
|
|
; optional3
|
|
; exit
|
|
|
|
;CHECK-LABEL: straight_test_3_instr_test:
|
|
; test1 may have been merged with entry
|
|
;CHECK: mr [[TAGREG:[0-9]+]], 3
|
|
;CHECK: clrlwi {{[0-9]+}}, [[TAGREG]], 30
|
|
;CHECK-NEXT: cmplwi {{[0-9]+}}, 2
|
|
|
|
;CHECK-O3-NEXT: bne 0, .[[OPT1LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-O3-NEXT: # %test2
|
|
;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
|
|
;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
|
|
;CHECK-O3-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-O3-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
|
|
;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
|
|
;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
|
|
;CHECK-O3-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-O3-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
|
|
;CHECK-O3: blr
|
|
;CHECK-O3-NEXT: .[[OPT1LABEL]]:
|
|
;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
|
|
;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
|
|
;CHECK-O3-NEXT: beq 0, .[[TEST3LABEL]]
|
|
;CHECK-O3-NEXT: .[[OPT2LABEL]]:
|
|
;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
|
|
;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
|
|
;CHECK-O3-NEXT: beq 0, .[[EXITLABEL]]
|
|
;CHECK-O3-NEXT: .[[OPT3LABEL]]:
|
|
;CHECK-O3: b .[[EXITLABEL]]
|
|
|
|
;CHECK-O2-NEXT: beq 0, .[[TEST2LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-O2-NEXT: # %optional1
|
|
;CHECK-O2: .[[TEST2LABEL]]: # %test2
|
|
;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
|
|
;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 8
|
|
;CHECK-O2-NEXT: beq 0, .[[TEST3LABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-O2-NEXT: # %optional2
|
|
;CHECK-O2: .[[TEST3LABEL]]: # %test3
|
|
;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
|
|
;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 32
|
|
;CHECK-O2-NEXT: beq 0, .[[EXITLABEL:[_0-9A-Za-z]+]]
|
|
;CHECK-O2-NEXT: # %optional3
|
|
;CHECK-O2: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
|
|
;CHECK-O2: blr
|
|
|
|
|
|
define void @straight_test_3_instr_test(i32 %tag) {
|
|
entry:
|
|
br label %test1
|
|
test1:
|
|
%tagbit1 = and i32 %tag, 3
|
|
%tagbit1eq0 = icmp eq i32 %tagbit1, 2
|
|
br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2
|
|
optional1:
|
|
call void @a()
|
|
br label %test2
|
|
test2:
|
|
%tagbit2 = and i32 %tag, 12
|
|
%tagbit2eq0 = icmp eq i32 %tagbit2, 8
|
|
br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2
|
|
optional2:
|
|
call void @b()
|
|
br label %test3
|
|
test3:
|
|
%tagbit3 = and i32 %tag, 48
|
|
%tagbit3eq0 = icmp eq i32 %tagbit3, 32
|
|
br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
|
|
optional3:
|
|
call void @c()
|
|
br label %exit
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Intended layout:
|
|
; The chain-based outlining produces the layout
|
|
; entry
|
|
; --- Begin loop ---
|
|
; for.latch
|
|
; for.check
|
|
; test1
|
|
; test2
|
|
; test3
|
|
; test4
|
|
; optional1
|
|
; optional2
|
|
; optional3
|
|
; optional4
|
|
; --- End loop ---
|
|
; exit
|
|
; The CHECK statements check for the whole string of tests and exit block,
|
|
; and then check that the correct test has been duplicated into the end of
|
|
; the optional blocks and that the optional blocks are in the correct order.
|
|
;CHECK-LABEL: loop_test:
|
|
;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4
|
|
;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch
|
|
;CHECK: addi
|
|
;CHECK-O2: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
|
|
;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]])
|
|
;CHECK-O3: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
|
|
;CHECK: # %bb.{{[0-9]+}}: # %test1
|
|
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
|
|
;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]]
|
|
;CHECK-NEXT: # %test2
|
|
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
|
|
;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3
|
|
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
|
|
;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]]
|
|
;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4|optional3)}}
|
|
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
|
|
;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
|
|
;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]]
|
|
;CHECK: [[OPT1LABEL]]
|
|
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
|
|
;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
|
|
;CHECK-NEXT: .[[OPT2LABEL]]
|
|
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
|
|
;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
|
|
;CHECK-NEXT: .[[OPT3LABEL]]
|
|
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
|
|
;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
|
|
;CHECK: [[OPT4LABEL]]:
|
|
;CHECK: b .[[LATCHLABEL]]
|
|
define void @loop_test(i32* %tags, i32 %count) {
|
|
entry:
|
|
br label %for.check
|
|
for.check:
|
|
%count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch]
|
|
%done.count = icmp ugt i32 %count.loop, 0
|
|
%tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count
|
|
%tag = load i32, i32* %tag_ptr
|
|
%done.tag = icmp eq i32 %tag, 0
|
|
%done = and i1 %done.count, %done.tag
|
|
br i1 %done, label %test1, label %exit, !prof !1
|
|
test1:
|
|
%tagbit1 = and i32 %tag, 1
|
|
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
|
|
br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
|
|
optional1:
|
|
call void @a()
|
|
call void @a()
|
|
call void @a()
|
|
call void @a()
|
|
br label %test2
|
|
test2:
|
|
%tagbit2 = and i32 %tag, 2
|
|
%tagbit2eq0 = icmp eq i32 %tagbit2, 0
|
|
br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
|
|
optional2:
|
|
call void @b()
|
|
call void @b()
|
|
call void @b()
|
|
call void @b()
|
|
br label %test3
|
|
test3:
|
|
%tagbit3 = and i32 %tag, 4
|
|
%tagbit3eq0 = icmp eq i32 %tagbit3, 0
|
|
br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
|
|
optional3:
|
|
call void @c()
|
|
call void @c()
|
|
call void @c()
|
|
call void @c()
|
|
br label %test4
|
|
test4:
|
|
%tagbit4 = and i32 %tag, 8
|
|
%tagbit4eq0 = icmp eq i32 %tagbit4, 0
|
|
br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1
|
|
optional4:
|
|
call void @d()
|
|
call void @d()
|
|
call void @d()
|
|
call void @d()
|
|
br label %for.latch
|
|
for.latch:
|
|
%count.sub = sub i32 %count.loop, 1
|
|
br label %for.check
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; The block then2 is not unavoidable, meaning it does not dominate the exit.
|
|
; But since it can be tail-duplicated, it should be placed as a fallthrough from
|
|
; test2 and copied. The purpose here is to make sure that the tail-duplication
|
|
; code is independent of the outlining code, which works by choosing the
|
|
; "unavoidable" blocks.
|
|
; CHECK-LABEL: avoidable_test:
|
|
; CHECK: # %bb.{{[0-9]+}}: # %entry
|
|
; CHECK: andi.
|
|
; CHECK: # %bb.{{[0-9]+}}: # %test2
|
|
; Make sure then2 falls through from test2
|
|
; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}}
|
|
; CHECK: # %bb.{{[0-9]+}}: # %then2
|
|
; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29
|
|
; CHECK: # %else1
|
|
; CHECK: bl a
|
|
; CHECK: bl a
|
|
; Make sure then2 was copied into else1
|
|
; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29
|
|
; CHECK: # %end1
|
|
; CHECK: bl d
|
|
; CHECK: # %else2
|
|
; CHECK: bl c
|
|
; CHECK: # %end2
|
|
define void @avoidable_test(i32 %tag) {
|
|
entry:
|
|
br label %test1
|
|
test1:
|
|
%tagbit1 = and i32 %tag, 1
|
|
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
|
|
br i1 %tagbit1eq0, label %test2, label %else1, !prof !1 ; %test2 more likely
|
|
else1:
|
|
call void @a()
|
|
call void @a()
|
|
br label %then2
|
|
test2:
|
|
%tagbit2 = and i32 %tag, 2
|
|
%tagbit2eq0 = icmp eq i32 %tagbit2, 0
|
|
br i1 %tagbit2eq0, label %then2, label %else2, !prof !1 ; %then2 more likely
|
|
then2:
|
|
%tagbit3 = and i32 %tag, 4
|
|
%tagbit3eq0 = icmp eq i32 %tagbit3, 0
|
|
br i1 %tagbit3eq0, label %end2, label %end1, !prof !1 ; %end2 more likely
|
|
else2:
|
|
call void @c()
|
|
br label %end2
|
|
end2:
|
|
ret void
|
|
end1:
|
|
call void @d()
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: trellis_test
|
|
; The number in the block labels is the expected block frequency given the
|
|
; probabilities annotated. There is a conflict in the b;c->d;e trellis that
|
|
; should be resolved as c->e;b->d.
|
|
; The d;e->f;g trellis should be resolved as e->g;d->f.
|
|
; The f;g->h;i trellis should be resolved as f->i;g->h.
|
|
; The h;i->j;ret trellis contains a triangle edge, and should be resolved as
|
|
; h->j->ret
|
|
; CHECK: # %bb.{{[0-9]+}}: # %entry
|
|
; CHECK: # %bb.{{[0-9]+}}: # %c10
|
|
; CHECK: # %e9
|
|
; CHECK: # %g10
|
|
; CHECK: # %h10
|
|
; CHECK: # %j8
|
|
; CHECK: # %ret
|
|
; CHECK: # %b6
|
|
; CHECK: # %d7
|
|
; CHECK: # %f6
|
|
; CHECK: # %i6
|
|
define void @trellis_test(i32 %tag) {
|
|
entry:
|
|
br label %a16
|
|
a16:
|
|
call void @a()
|
|
call void @a()
|
|
%tagbits.a = and i32 %tag, 3
|
|
%tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
|
|
br i1 %tagbits.a.eq0, label %c10, label %b6, !prof !1 ; 10 to 6
|
|
c10:
|
|
call void @c()
|
|
call void @c()
|
|
%tagbits.c = and i32 %tag, 12
|
|
%tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
|
|
; Both of these edges should be hotter than the other incoming edge
|
|
; for e9 or d7
|
|
br i1 %tagbits.c.eq0, label %e9, label %d7, !prof !3 ; 6 to 4
|
|
e9:
|
|
call void @e()
|
|
call void @e()
|
|
%tagbits.e = and i32 %tag, 48
|
|
%tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
|
|
br i1 %tagbits.e.eq0, label %g10, label %f6, !prof !4 ; 7 to 2
|
|
g10:
|
|
call void @g()
|
|
call void @g()
|
|
%tagbits.g = and i32 %tag, 192
|
|
%tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
|
|
br i1 %tagbits.g.eq0, label %i6, label %h10, !prof !5 ; 2 to 8
|
|
i6:
|
|
call void @i()
|
|
call void @i()
|
|
%tagbits.i = and i32 %tag, 768
|
|
%tagbits.i.eq0 = icmp eq i32 %tagbits.i, 0
|
|
br i1 %tagbits.i.eq0, label %ret, label %j8, !prof !2 ; balanced (3 to 3)
|
|
b6:
|
|
call void @b()
|
|
call void @b()
|
|
%tagbits.b = and i32 %tag, 12
|
|
%tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
|
|
br i1 %tagbits.b.eq1, label %e9, label %d7, !prof !2 ; balanced (3 to 3)
|
|
d7:
|
|
call void @d()
|
|
call void @d()
|
|
%tagbits.d = and i32 %tag, 48
|
|
%tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
|
|
br i1 %tagbits.d.eq1, label %g10, label %f6, !prof !6 ; 3 to 4
|
|
f6:
|
|
call void @f()
|
|
call void @f()
|
|
%tagbits.f = and i32 %tag, 192
|
|
%tagbits.f.eq1 = icmp eq i32 %tagbits.f, 128
|
|
br i1 %tagbits.f.eq1, label %i6, label %h10, !prof !7 ; 4 to 2
|
|
h10:
|
|
call void @h()
|
|
call void @h()
|
|
%tagbits.h = and i32 %tag, 768
|
|
%tagbits.h.eq1 = icmp eq i32 %tagbits.h, 512
|
|
br i1 %tagbits.h.eq1, label %ret, label %j8, !prof !2 ; balanced (5 to 5)
|
|
j8:
|
|
call void @j()
|
|
call void @j()
|
|
br label %ret
|
|
ret:
|
|
ret void
|
|
}
|
|
|
|
; Verify that we still consider tail-duplication opportunities if we find a
|
|
; triangle trellis. Here D->F->G is the triangle, and D;E are both predecessors
|
|
; of both F and G. The basic trellis algorithm picks the F->G edge, but after
|
|
; checking, it's profitable to duplicate G into F. The weights here are not
|
|
; really important. They are there to help make the test stable.
|
|
; CHECK-LABEL: trellis_then_dup_test
|
|
; CHECK: # %bb.{{[0-9]+}}: # %entry
|
|
; CHECK: # %bb.{{[0-9]+}}: # %b
|
|
; CHECK: # %d
|
|
; CHECK: # %g
|
|
; CHECK: # %ret1
|
|
; CHECK: # %c
|
|
; CHECK: # %e
|
|
; CHECK: # %f
|
|
; CHECK: # %ret2
|
|
; CHECK: # %ret
|
|
define void @trellis_then_dup_test(i32 %tag) {
|
|
entry:
|
|
br label %a
|
|
a:
|
|
call void @a()
|
|
call void @a()
|
|
%tagbits.a = and i32 %tag, 3
|
|
%tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
|
|
br i1 %tagbits.a.eq0, label %b, label %c, !prof !1 ; 5 to 3
|
|
b:
|
|
call void @b()
|
|
call void @b()
|
|
%tagbits.b = and i32 %tag, 12
|
|
%tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
|
|
br i1 %tagbits.b.eq1, label %d, label %e, !prof !1 ; 5 to 3
|
|
d:
|
|
call void @d()
|
|
call void @d()
|
|
%tagbits.d = and i32 %tag, 48
|
|
%tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
|
|
br i1 %tagbits.d.eq1, label %g, label %f, !prof !1 ; 5 to 3
|
|
f:
|
|
call void @f()
|
|
call void @f()
|
|
br label %g
|
|
g:
|
|
%tagbits.g = and i32 %tag, 192
|
|
%tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
|
|
br i1 %tagbits.g.eq0, label %ret1, label %ret2, !prof !2 ; balanced
|
|
c:
|
|
call void @c()
|
|
call void @c()
|
|
%tagbits.c = and i32 %tag, 12
|
|
%tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
|
|
br i1 %tagbits.c.eq0, label %d, label %e, !prof !1 ; 5 to 3
|
|
e:
|
|
call void @e()
|
|
call void @e()
|
|
%tagbits.e = and i32 %tag, 48
|
|
%tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
|
|
br i1 %tagbits.e.eq0, label %g, label %f, !prof !1 ; 5 to 3
|
|
ret1:
|
|
call void @a()
|
|
br label %ret
|
|
ret2:
|
|
call void @b()
|
|
br label %ret
|
|
ret:
|
|
ret void
|
|
}
|
|
|
|
; Verify that we did not mis-identify triangle trellises if it is not
|
|
; really a triangle.
|
|
; CHECK-LABEL: trellis_no_triangle
|
|
; CHECK: # %bb.{{[0-9]+}}: # %entry
|
|
; CHECK: # %bb.{{[0-9]+}}: # %b
|
|
; CHECK: # %d
|
|
; CHECK: # %ret
|
|
; CHECK: # %c
|
|
; CHECK: # %e
|
|
define void @trellis_no_triangle(i32 %tag) {
|
|
entry:
|
|
br label %a
|
|
a:
|
|
call void @a()
|
|
call void @a()
|
|
%tagbits.a = and i32 %tag, 3
|
|
%tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
|
|
br i1 %tagbits.a.eq0, label %b, label %c, !prof !8 ; 98 to 2
|
|
b:
|
|
call void @b()
|
|
call void @b()
|
|
%tagbits.b = and i32 %tag, 12
|
|
%tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
|
|
br i1 %tagbits.b.eq1, label %d, label %e, !prof !9 ; 97 to 1
|
|
d:
|
|
call void @d()
|
|
call void @d()
|
|
%tagbits.d = and i32 %tag, 48
|
|
%tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
|
|
br i1 %tagbits.d.eq1, label %ret, label %e, !prof !10 ; 96 to 2
|
|
c:
|
|
call void @c()
|
|
call void @c()
|
|
%tagbits.c = and i32 %tag, 12
|
|
%tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
|
|
br i1 %tagbits.c.eq0, label %d, label %e, !prof !2 ; 1 to 1
|
|
e:
|
|
call void @e()
|
|
call void @e()
|
|
br label %ret
|
|
ret:
|
|
call void @f()
|
|
ret void
|
|
}
|
|
|
|
declare void @a()
|
|
declare void @b()
|
|
declare void @c()
|
|
declare void @d()
|
|
declare void @e()
|
|
declare void @f()
|
|
declare void @g()
|
|
declare void @h()
|
|
declare void @i()
|
|
declare void @j()
|
|
|
|
!1 = !{!"branch_weights", i32 5, i32 3}
|
|
!2 = !{!"branch_weights", i32 50, i32 50}
|
|
!3 = !{!"branch_weights", i32 6, i32 4}
|
|
!4 = !{!"branch_weights", i32 7, i32 2}
|
|
!5 = !{!"branch_weights", i32 2, i32 8}
|
|
!6 = !{!"branch_weights", i32 3, i32 4}
|
|
!7 = !{!"branch_weights", i32 4, i32 2}
|
|
!8 = !{!"branch_weights", i32 98, i32 2}
|
|
!9 = !{!"branch_weights", i32 97, i32 1}
|
|
!10 = !{!"branch_weights", i32 96, i32 2}
|