llvm-project/llvm/test/CodeGen/X86/twoaddr-coalesce-3.ll

; RUN: llc < %s -march=x86-64 -relocation-model=pic | FileCheck %s
; This test is to ensure the TwoAddrInstruction pass chooses the proper operands to
; merge and generates fewer mov insns.

@M = common global i32 0, align 4
@total = common global i32 0, align 4
@g = common global i32 0, align 4

; Function Attrs: nounwind uwtable
define void @foo() {
entry:
  %0 = load i32, i32* @M, align 4
  %cmp3 = icmp sgt i32 %0, 0
  br i1 %cmp3, label %for.body.lr.ph, label %for.end

for.body.lr.ph:                                   ; preds = %entry
  %total.promoted = load i32, i32* @total, align 4
  br label %for.body

; Check that only one mov will be generated in the kernel loop.
; CHECK-LABEL: foo:
; CHECK: [[LOOP1:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body
; CHECK-NOT: mov
; CHECK: movl {{.*}}, [[REG1:%[a-z0-9]+]]
; CHECK-NOT: mov
; CHECK: shrl $31, [[REG1]]
; CHECK-NOT: mov
; CHECK: jl [[LOOP1]]
for.body:                                         ; preds = %for.body.lr.ph, %for.body
  %add5 = phi i32 [ %total.promoted, %for.body.lr.ph ], [ %add, %for.body ]
  %i.04 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
  %div = sdiv i32 %i.04, 2
  %add = add nsw i32 %div, %add5
  %inc = add nuw nsw i32 %i.04, 1
  %cmp = icmp slt i32 %inc, %0
  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge

for.cond.for.end_crit_edge:                       ; preds = %for.body
  store i32 %add, i32* @total, align 4
  br label %for.end

for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
  ret void
}

; Function Attrs: nounwind uwtable
define void @goo() {
entry:
  %0 = load i32, i32* @M, align 4
  %cmp3 = icmp sgt i32 %0, 0
  br i1 %cmp3, label %for.body.lr.ph, label %for.end

for.body.lr.ph:                                   ; preds = %entry
  %total.promoted = load i32, i32* @total, align 4
  br label %for.body

; Check that only two mov will be generated in the kernel loop.
; CHECK-LABEL: goo:
; CHECK: [[LOOP2:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body
; CHECK-NOT: mov
; CHECK: movl {{.*}}, [[REG2:%[a-z0-9]+]]
; CHECK-NOT: mov
; CHECK: shrl $31, [[REG2]]
; CHECK-NOT: mov
; CHECK: movl {{.*}}
; CHECK: jl [[LOOP2]]
for.body:                                         ; preds = %for.body.lr.ph, %for.body
  %add5 = phi i32 [ %total.promoted, %for.body.lr.ph ], [ %add, %for.body ]
  %i.04 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
  %div = sdiv i32 %i.04, 2
  %add = add nsw i32 %div, %add5
  store volatile i32 %add, i32* @g, align 4
  %inc = add nuw nsw i32 %i.04, 1
  %cmp = icmp slt i32 %inc, %0
  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge

for.cond.for.end_crit_edge:                       ; preds = %for.body
  store i32 %add, i32* @total, align 4
  br label %for.end

for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
  ret void
}
Update twoaddr-coalesce-3.ll to run on darwin and linux machines: a) Default relocation model differences, b) Different numbers of # in comments llvm-svn: 231178 2015-03-04 07:56:20 +08:00			`; RUN: llc < %s -march=x86-64 -relocation-model=pic \| FileCheck %s`
Fix a problem where the TwoAddressInstructionPass which generate redundant register moves in a loop. From: int M, total; void foo() { int i; for (i = 0; i < M; i++) { total = total + i / 2; } } This is the kernel loop: .LBB0_2: # %for.body =>This Inner Loop Header: Depth=1 movl %edx, %esi movl %ecx, %edx shrl $31, %edx addl %ecx, %edx sarl %edx addl %esi, %edx incl %ecx cmpl %eax, %ecx jl .LBB0_2 -------------------------- The first mov insn "movl %edx, %esi" could be removed if we change "addl %esi, %edx" to "addl %edx, %esi". The IR before TwoAddressInstructionPass is: BB#2: derived from LLVM BB %for.body Predecessors according to CFG: BB#1 BB#2 %vreg3<def> = COPY %vreg12<kill>; GR32:%vreg3,%vreg12 %vreg2<def> = COPY %vreg11<kill>; GR32:%vreg2,%vreg11 %vreg7<def,tied1> = SHR32ri %vreg3<tied0>, 31, %EFLAGS<imp-def,dead>; GR32:%vreg7,%vreg3 %vreg8<def,tied1> = ADD32rr %vreg3<tied0>, %vreg7<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg8,%vreg3,%vreg7 %vreg9<def,tied1> = SAR32r1 %vreg8<kill,tied0>, %EFLAGS<imp-def,dead>; GR32:%vreg9,%vreg8 %vreg4<def,tied1> = ADD32rr %vreg9<kill,tied0>, %vreg2<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg4,%vreg9,%vreg2 %vreg5<def,tied1> = INC64_32r %vreg3<kill,tied0>, %EFLAGS<imp-def,dead>; GR32:%vreg5,%vreg3 CMP32rr %vreg5, %vreg0, %EFLAGS<imp-def>; GR32:%vreg5,%vreg0 %vreg11<def> = COPY %vreg4; GR32:%vreg11,%vreg4 %vreg12<def> = COPY %vreg5<kill>; GR32:%vreg12,%vreg5 JL_4 <BB#2>, %EFLAGS<imp-use,kill> Now TwoAddressInstructionPass will choose vreg9 to be tied with vreg4. However, it doesn't see that there is copy from vreg4 to vreg11 and another copy from vreg11 to vreg2 inside the loop body. To remove those copies, it is necessary to choose vreg2 to be tied with vreg4 instead of vreg9. This code pattern commonly appears when there is reduction operation in a loop. So check for a reversed copy chain and if we encounter one then we can commute the add instruction so we can avoid a copy. Patch by Wei Mi. http://reviews.llvm.org/D7806 llvm-svn: 231148 2015-03-04 06:03:03 +08:00			`; This test is to ensure the TwoAddrInstruction pass chooses the proper operands to`
			`; merge and generates fewer mov insns.`

			`@M = common global i32 0, align 4`
			`@total = common global i32 0, align 4`
			`@g = common global i32 0, align 4`

			`; Function Attrs: nounwind uwtable`
			`define void @foo() {`
			`entry:`
			`%0 = load i32, i32* @M, align 4`
			`%cmp3 = icmp sgt i32 %0, 0`
			`br i1 %cmp3, label %for.body.lr.ph, label %for.end`

			`for.body.lr.ph: ; preds = %entry`
			`%total.promoted = load i32, i32* @total, align 4`
			`br label %for.body`

			`; Check that only one mov will be generated in the kernel loop.`
			`; CHECK-LABEL: foo:`
Update twoaddr-coalesce-3.ll to run on darwin and linux machines: a) Default relocation model differences, b) Different numbers of # in comments llvm-svn: 231178 2015-03-04 07:56:20 +08:00			`; CHECK: [[LOOP1:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body`
Fix a problem where the TwoAddressInstructionPass which generate redundant register moves in a loop. From: int M, total; void foo() { int i; for (i = 0; i < M; i++) { total = total + i / 2; } } This is the kernel loop: .LBB0_2: # %for.body =>This Inner Loop Header: Depth=1 movl %edx, %esi movl %ecx, %edx shrl $31, %edx addl %ecx, %edx sarl %edx addl %esi, %edx incl %ecx cmpl %eax, %ecx jl .LBB0_2 -------------------------- The first mov insn "movl %edx, %esi" could be removed if we change "addl %esi, %edx" to "addl %edx, %esi". The IR before TwoAddressInstructionPass is: BB#2: derived from LLVM BB %for.body Predecessors according to CFG: BB#1 BB#2 %vreg3<def> = COPY %vreg12<kill>; GR32:%vreg3,%vreg12 %vreg2<def> = COPY %vreg11<kill>; GR32:%vreg2,%vreg11 %vreg7<def,tied1> = SHR32ri %vreg3<tied0>, 31, %EFLAGS<imp-def,dead>; GR32:%vreg7,%vreg3 %vreg8<def,tied1> = ADD32rr %vreg3<tied0>, %vreg7<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg8,%vreg3,%vreg7 %vreg9<def,tied1> = SAR32r1 %vreg8<kill,tied0>, %EFLAGS<imp-def,dead>; GR32:%vreg9,%vreg8 %vreg4<def,tied1> = ADD32rr %vreg9<kill,tied0>, %vreg2<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg4,%vreg9,%vreg2 %vreg5<def,tied1> = INC64_32r %vreg3<kill,tied0>, %EFLAGS<imp-def,dead>; GR32:%vreg5,%vreg3 CMP32rr %vreg5, %vreg0, %EFLAGS<imp-def>; GR32:%vreg5,%vreg0 %vreg11<def> = COPY %vreg4; GR32:%vreg11,%vreg4 %vreg12<def> = COPY %vreg5<kill>; GR32:%vreg12,%vreg5 JL_4 <BB#2>, %EFLAGS<imp-use,kill> Now TwoAddressInstructionPass will choose vreg9 to be tied with vreg4. However, it doesn't see that there is copy from vreg4 to vreg11 and another copy from vreg11 to vreg2 inside the loop body. To remove those copies, it is necessary to choose vreg2 to be tied with vreg4 instead of vreg9. This code pattern commonly appears when there is reduction operation in a loop. So check for a reversed copy chain and if we encounter one then we can commute the add instruction so we can avoid a copy. Patch by Wei Mi. http://reviews.llvm.org/D7806 llvm-svn: 231148 2015-03-04 06:03:03 +08:00			`; CHECK-NOT: mov`
			`; CHECK: movl {{.*}}, [[REG1:%[a-z0-9]+]]`
			`; CHECK-NOT: mov`
			`; CHECK: shrl $31, [[REG1]]`
			`; CHECK-NOT: mov`
			`; CHECK: jl [[LOOP1]]`
			`for.body: ; preds = %for.body.lr.ph, %for.body`
			`%add5 = phi i32 [ %total.promoted, %for.body.lr.ph ], [ %add, %for.body ]`
			`%i.04 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]`
			`%div = sdiv i32 %i.04, 2`
			`%add = add nsw i32 %div, %add5`
			`%inc = add nuw nsw i32 %i.04, 1`
			`%cmp = icmp slt i32 %inc, %0`
			`br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge`

			`for.cond.for.end_crit_edge: ; preds = %for.body`
			`store i32 %add, i32* @total, align 4`
			`br label %for.end`

			`for.end: ; preds = %for.cond.for.end_crit_edge, %entry`
			`ret void`
			`}`

			`; Function Attrs: nounwind uwtable`
			`define void @goo() {`
			`entry:`
			`%0 = load i32, i32* @M, align 4`
			`%cmp3 = icmp sgt i32 %0, 0`
			`br i1 %cmp3, label %for.body.lr.ph, label %for.end`

			`for.body.lr.ph: ; preds = %entry`
			`%total.promoted = load i32, i32* @total, align 4`
			`br label %for.body`

			`; Check that only two mov will be generated in the kernel loop.`
			`; CHECK-LABEL: goo:`
Update twoaddr-coalesce-3.ll to run on darwin and linux machines: a) Default relocation model differences, b) Different numbers of # in comments llvm-svn: 231178 2015-03-04 07:56:20 +08:00			`; CHECK: [[LOOP2:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body`
Fix a problem where the TwoAddressInstructionPass which generate redundant register moves in a loop. From: int M, total; void foo() { int i; for (i = 0; i < M; i++) { total = total + i / 2; } } This is the kernel loop: .LBB0_2: # %for.body =>This Inner Loop Header: Depth=1 movl %edx, %esi movl %ecx, %edx shrl $31, %edx addl %ecx, %edx sarl %edx addl %esi, %edx incl %ecx cmpl %eax, %ecx jl .LBB0_2 -------------------------- The first mov insn "movl %edx, %esi" could be removed if we change "addl %esi, %edx" to "addl %edx, %esi". The IR before TwoAddressInstructionPass is: BB#2: derived from LLVM BB %for.body Predecessors according to CFG: BB#1 BB#2 %vreg3<def> = COPY %vreg12<kill>; GR32:%vreg3,%vreg12 %vreg2<def> = COPY %vreg11<kill>; GR32:%vreg2,%vreg11 %vreg7<def,tied1> = SHR32ri %vreg3<tied0>, 31, %EFLAGS<imp-def,dead>; GR32:%vreg7,%vreg3 %vreg8<def,tied1> = ADD32rr %vreg3<tied0>, %vreg7<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg8,%vreg3,%vreg7 %vreg9<def,tied1> = SAR32r1 %vreg8<kill,tied0>, %EFLAGS<imp-def,dead>; GR32:%vreg9,%vreg8 %vreg4<def,tied1> = ADD32rr %vreg9<kill,tied0>, %vreg2<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg4,%vreg9,%vreg2 %vreg5<def,tied1> = INC64_32r %vreg3<kill,tied0>, %EFLAGS<imp-def,dead>; GR32:%vreg5,%vreg3 CMP32rr %vreg5, %vreg0, %EFLAGS<imp-def>; GR32:%vreg5,%vreg0 %vreg11<def> = COPY %vreg4; GR32:%vreg11,%vreg4 %vreg12<def> = COPY %vreg5<kill>; GR32:%vreg12,%vreg5 JL_4 <BB#2>, %EFLAGS<imp-use,kill> Now TwoAddressInstructionPass will choose vreg9 to be tied with vreg4. However, it doesn't see that there is copy from vreg4 to vreg11 and another copy from vreg11 to vreg2 inside the loop body. To remove those copies, it is necessary to choose vreg2 to be tied with vreg4 instead of vreg9. This code pattern commonly appears when there is reduction operation in a loop. So check for a reversed copy chain and if we encounter one then we can commute the add instruction so we can avoid a copy. Patch by Wei Mi. http://reviews.llvm.org/D7806 llvm-svn: 231148 2015-03-04 06:03:03 +08:00			`; CHECK-NOT: mov`
			`; CHECK: movl {{.*}}, [[REG2:%[a-z0-9]+]]`
			`; CHECK-NOT: mov`
			`; CHECK: shrl $31, [[REG2]]`
			`; CHECK-NOT: mov`
Weaken the check for a specific movl on the twoaddr-coalesce-3 test - we only care that there are two moves in the loop and not which part is relative to which register anyhow. llvm-svn: 231191 2015-03-04 09:19:17 +08:00			`; CHECK: movl {{.*}}`
Fix a problem where the TwoAddressInstructionPass which generate redundant register moves in a loop. From: int M, total; void foo() { int i; for (i = 0; i < M; i++) { total = total + i / 2; } } This is the kernel loop: .LBB0_2: # %for.body =>This Inner Loop Header: Depth=1 movl %edx, %esi movl %ecx, %edx shrl $31, %edx addl %ecx, %edx sarl %edx addl %esi, %edx incl %ecx cmpl %eax, %ecx jl .LBB0_2 -------------------------- The first mov insn "movl %edx, %esi" could be removed if we change "addl %esi, %edx" to "addl %edx, %esi". The IR before TwoAddressInstructionPass is: BB#2: derived from LLVM BB %for.body Predecessors according to CFG: BB#1 BB#2 %vreg3<def> = COPY %vreg12<kill>; GR32:%vreg3,%vreg12 %vreg2<def> = COPY %vreg11<kill>; GR32:%vreg2,%vreg11 %vreg7<def,tied1> = SHR32ri %vreg3<tied0>, 31, %EFLAGS<imp-def,dead>; GR32:%vreg7,%vreg3 %vreg8<def,tied1> = ADD32rr %vreg3<tied0>, %vreg7<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg8,%vreg3,%vreg7 %vreg9<def,tied1> = SAR32r1 %vreg8<kill,tied0>, %EFLAGS<imp-def,dead>; GR32:%vreg9,%vreg8 %vreg4<def,tied1> = ADD32rr %vreg9<kill,tied0>, %vreg2<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg4,%vreg9,%vreg2 %vreg5<def,tied1> = INC64_32r %vreg3<kill,tied0>, %EFLAGS<imp-def,dead>; GR32:%vreg5,%vreg3 CMP32rr %vreg5, %vreg0, %EFLAGS<imp-def>; GR32:%vreg5,%vreg0 %vreg11<def> = COPY %vreg4; GR32:%vreg11,%vreg4 %vreg12<def> = COPY %vreg5<kill>; GR32:%vreg12,%vreg5 JL_4 <BB#2>, %EFLAGS<imp-use,kill> Now TwoAddressInstructionPass will choose vreg9 to be tied with vreg4. However, it doesn't see that there is copy from vreg4 to vreg11 and another copy from vreg11 to vreg2 inside the loop body. To remove those copies, it is necessary to choose vreg2 to be tied with vreg4 instead of vreg9. This code pattern commonly appears when there is reduction operation in a loop. So check for a reversed copy chain and if we encounter one then we can commute the add instruction so we can avoid a copy. Patch by Wei Mi. http://reviews.llvm.org/D7806 llvm-svn: 231148 2015-03-04 06:03:03 +08:00			`; CHECK: jl [[LOOP2]]`
			`for.body: ; preds = %for.body.lr.ph, %for.body`
			`%add5 = phi i32 [ %total.promoted, %for.body.lr.ph ], [ %add, %for.body ]`
			`%i.04 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]`
			`%div = sdiv i32 %i.04, 2`
			`%add = add nsw i32 %div, %add5`
			`store volatile i32 %add, i32* @g, align 4`
			`%inc = add nuw nsw i32 %i.04, 1`
			`%cmp = icmp slt i32 %inc, %0`
			`br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge`

			`for.cond.for.end_crit_edge: ; preds = %for.body`
			`store i32 %add, i32* @total, align 4`
			`br label %for.end`

			`for.end: ; preds = %for.cond.for.end_crit_edge, %entry`
			`ret void`
			`}`