From 2feb7e56e2872d0ea55f9cf8fd1a46f2a08b81ea Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Tue, 28 May 2019 20:39:39 +0000 Subject: [PATCH] [DAGCombiner][X86][AArch64][AMDGPU] (x + C) - y -> (x - y) + C fold. Try 2 Summary: The main motivation is shown by all these `neg` instructions that are now created. In particular, the `@reg32_lshr_by_negated_unfolded_sub_b` test. AArch64 test changes all look good (`neg` created), or neutral. X86 changes look neutral (vectors), or good (`neg` / `xor eax, eax` created). I'm not sure about `X86/ragreedy-hoist-spill.ll`, it looks like the spill is now hoisted into preheader (which should still be good?), 2 4-byte reloads become 1 8-byte reload, and are elsewhere, but i'm not sure how that affects that loop. I'm unable to interpret AMDGPU change, looks neutral-ish? This is hopefully a step towards solving [[ https://bugs.llvm.org/show_bug.cgi?id=41952 | PR41952 ]]. https://rise4fun.com/Alive/pkdq (we are missing more patterns, i'll submit them later) This is a recommit, originally committed in rL361852, but reverted to investigate test-suite compile-time hangs. Reviewers: craig.topper, RKSimon, spatel, arsenm Reviewed By: RKSimon Subscribers: bjope, qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, javed.absar, dstuttard, tpr, t-tye, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62223 llvm-svn: 361871 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 ++ llvm/test/CodeGen/AArch64/shift-amount-mod.ll | 6 +- .../CodeGen/AArch64/sink-addsub-of-const.ll | 16 ++-- .../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll | 21 ++--- llvm/test/CodeGen/X86/combine-add.ll | 4 +- llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 83 ++++++++++--------- llvm/test/CodeGen/X86/shift-amount-mod.ll | 9 +- llvm/test/CodeGen/X86/sink-addsub-of-const.ll | 20 ++--- llvm/test/CodeGen/X86/zext-sext.ll | 21 ++--- 9 files changed, 96 insertions(+), 91 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d53ee3134d55..06c2daa90bf1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2923,6 +2923,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N))) return V; + // Hoist one-use addition by constant: (x + C) - y -> (x - y) + C + if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD && + isConstantOrConstantVector(N0.getOperand(1))) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1)); + } + // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1' // rather than 'sub 0/1' (the sext should get folded). // sub X, (zext i1 Y) --> add X, (sext i1 Y) diff --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll index 6daef644761b..d349eb09f735 100644 --- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll +++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll @@ -486,8 +486,7 @@ define i64 @reg64_lshr_by_negated_unfolded(i64 %val, i64 %shamt) nounwind { define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounwind { ; CHECK-LABEL: reg32_lshr_by_negated_unfolded_sub_b: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: sub w8, w8, w1 +; CHECK-NEXT: neg w8, w1 ; CHECK-NEXT: sub w8, w8, w2 ; CHECK-NEXT: lsr w0, w0, w8 ; CHECK-NEXT: ret @@ -500,8 +499,7 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounwind { ; CHECK-LABEL: reg64_lshr_by_negated_unfolded_sub_b: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 -; CHECK-NEXT: sub x8, x8, x1 +; CHECK-NEXT: neg x8, x1 ; CHECK-NEXT: sub x8, x8, x2 ; CHECK-NEXT: lsr x0, x0, x8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll index c571dac94b81..8886954623f7 100644 --- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll +++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll @@ -96,8 +96,8 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: sink_add_of_const_to_sub: ; CHECK: // %bb.0: ; CHECK-NEXT: sub w8, w0, w1 -; CHECK-NEXT: add w8, w8, #32 // =32 -; CHECK-NEXT: sub w0, w8, w2 +; CHECK-NEXT: sub w8, w8, w2 +; CHECK-NEXT: add w0, w8, #32 // =32 ; CHECK-NEXT: ret %t0 = sub i32 %a, %b %t1 = add i32 %t0, 32 ; constant always on RHS @@ -124,8 +124,8 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: sink_sub_of_const_to_sub: ; CHECK: // %bb.0: ; CHECK-NEXT: sub w8, w0, w1 -; CHECK-NEXT: sub w8, w8, #32 // =32 -; CHECK-NEXT: sub w0, w8, w2 +; CHECK-NEXT: sub w8, w8, w2 +; CHECK-NEXT: sub w0, w8, #32 // =32 ; CHECK-NEXT: ret %t0 = sub i32 %a, %b %t1 = sub i32 %t0, 32 @@ -152,8 +152,8 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: sink_sub_from_const_to_sub: ; CHECK: // %bb.0: ; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: add w8, w8, #32 // =32 -; CHECK-NEXT: sub w0, w8, w2 +; CHECK-NEXT: sub w8, w8, w2 +; CHECK-NEXT: add w0, w8, #32 // =32 ; CHECK-NEXT: ret %t0 = sub i32 %a, %b %t1 = sub i32 32, %t0 @@ -282,8 +282,8 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x ; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: add v0.4s, v0.4s, v3.4s ; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v0.4s, v0.4s, v3.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %b %t1 = add <4 x i32> %t0, ; constant always on RHS @@ -346,8 +346,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 ; CHECK-NEXT: adrp x8, .LCPI22_0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI22_0] ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: add v0.4s, v0.4s, v3.4s ; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v0.4s, v0.4s, v3.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %b %t1 = sub <4 x i32> , %t0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll index 20c84c5b6327..71c8f6926c1f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @@ -15,10 +15,11 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 { ; VARIANT0-NEXT: v_mov_b32_e32 v2, 0 ; VARIANT0-NEXT: s_waitcnt lgkmcnt(0) ; VARIANT0-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 -; VARIANT0-NEXT: s_add_i32 s2, s2, -1 -; VARIANT0-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VARIANT0-NEXT: s_waitcnt expcnt(0) +; VARIANT0-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; VARIANT0-NEXT: s_waitcnt vmcnt(0) ; VARIANT0-NEXT: s_barrier -; VARIANT0-NEXT: v_sub_i32_e32 v3, vcc, s2, v0 +; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, -1, v0 ; VARIANT0-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; VARIANT0-NEXT: v_lshl_b64 v[3:4], v[3:4], 2 ; VARIANT0-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64 @@ -36,12 +37,12 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 { ; VARIANT1-NEXT: v_mov_b32_e32 v2, 0 ; VARIANT1-NEXT: s_waitcnt lgkmcnt(0) ; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 -; VARIANT1-NEXT: s_add_i32 s2, s2, -1 +; VARIANT1-NEXT: s_waitcnt expcnt(0) +; VARIANT1-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; VARIANT1-NEXT: s_barrier -; VARIANT1-NEXT: v_sub_i32_e32 v3, vcc, s2, v0 +; VARIANT1-NEXT: v_add_i32_e32 v3, vcc, -1, v0 ; VARIANT1-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; VARIANT1-NEXT: v_lshl_b64 v[3:4], v[3:4], 2 -; VARIANT1-NEXT: s_waitcnt expcnt(0) ; VARIANT1-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64 ; VARIANT1-NEXT: s_waitcnt vmcnt(0) ; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 @@ -59,8 +60,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 { ; VARIANT2-NEXT: global_store_dword v[1:2], v0, off ; VARIANT2-NEXT: s_waitcnt vmcnt(0) ; VARIANT2-NEXT: s_barrier -; VARIANT2-NEXT: s_add_i32 s0, s0, -1 -; VARIANT2-NEXT: v_sub_u32_e32 v3, s0, v0 +; VARIANT2-NEXT: v_sub_u32_e32 v0, s0, v0 +; VARIANT2-NEXT: v_add_u32_e32 v3, -1, v0 ; VARIANT2-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; VARIANT2-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4] ; VARIANT2-NEXT: v_mov_b32_e32 v0, s3 @@ -82,8 +83,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 { ; VARIANT3-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc ; VARIANT3-NEXT: global_store_dword v[1:2], v0, off ; VARIANT3-NEXT: s_barrier -; VARIANT3-NEXT: s_add_i32 s0, s0, -1 -; VARIANT3-NEXT: v_sub_u32_e32 v3, s0, v0 +; VARIANT3-NEXT: v_sub_u32_e32 v0, s0, v0 +; VARIANT3-NEXT: v_add_u32_e32 v3, -1, v0 ; VARIANT3-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; VARIANT3-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4] ; VARIANT3-NEXT: v_mov_b32_e32 v0, s3 diff --git a/llvm/test/CodeGen/X86/combine-add.ll b/llvm/test/CodeGen/X86/combine-add.ll index 6f5f1370e6b4..1d20fcf33d74 100644 --- a/llvm/test/CodeGen/X86/combine-add.ll +++ b/llvm/test/CodeGen/X86/combine-add.ll @@ -210,16 +210,16 @@ define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32> define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d) { ; SSE-LABEL: combine_vec_add_sub_sub: ; SSE: # %bb.0: -; SSE-NEXT: paddd {{.*}}(%rip), %xmm0 ; SSE-NEXT: paddd %xmm2, %xmm1 ; SSE-NEXT: psubd %xmm1, %xmm0 +; SSE-NEXT: paddd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_sub: ; AVX: # %bb.0: -; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %1 = sub <4 x i32> %a, %b %2 = sub <4 x i32> , %d diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll index c9a577dbaa92..fd3d83ed2cbe 100644 --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -67,8 +67,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je LBB0_55 ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 -; CHECK-NEXT: movq %rdx, %r14 -; CHECK-NEXT: movq %rdi, %r15 +; CHECK-NEXT: movq %rdx, %rbx +; CHECK-NEXT: movq %rdi, %rbp ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: cmpq %rax, %rcx @@ -78,10 +78,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: movl $32, %esi ; CHECK-NEXT: callq _memset ; CHECK-NEXT: LBB0_8: ## %while.body.preheader -; CHECK-NEXT: imulq $1040, %r14, %rax ## imm = 0x410 +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410 ; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx ; CHECK-NEXT: leaq 8(%rcx,%rax), %rbx -; CHECK-NEXT: movl $1, %r14d +; CHECK-NEXT: movl $1, %r15d ; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax ; CHECK-NEXT: movb $1, %cl ; CHECK-NEXT: .p2align 4, 0x90 @@ -91,48 +92,47 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: ## %bb.10: ## %do.end -; CHECK-NEXT: xorl %r12d, %r12d -; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: testb %r14b, %r14b ; CHECK-NEXT: jne LBB0_11 ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: leaq {{.*}}(%rip), %rsi ; CHECK-NEXT: leaq {{.*}}(%rip), %rdi ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %r13d, %r13d ; CHECK-NEXT: jmp LBB0_13 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_14: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal 1(%r12), %eax +; CHECK-NEXT: leal 1(%r14), %eax ; CHECK-NEXT: cmpl $21, %eax ; CHECK-NEXT: ja LBB0_20 ; CHECK-NEXT: ## %bb.15: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $-1, %ecx +; CHECK-NEXT: movl $-1, %r13d ; CHECK-NEXT: movslq (%rsi,%rax,4), %rax ; CHECK-NEXT: addq %rsi, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: movl $1, %r13d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movl %r12d, %ecx +; CHECK-NEXT: movl %r14d, %r13d ; CHECK-NEXT: jne LBB0_21 ; CHECK-NEXT: jmp LBB0_55 ; CHECK-NEXT: LBB0_26: ## %sw.bb474 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: ## implicit-def: $r13 +; CHECK-NEXT: ## implicit-def: $r12 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: ## implicit-def: $r13 +; CHECK-NEXT: ## implicit-def: $r12 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -145,7 +145,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: js LBB0_55 ; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780 ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 -; CHECK-NEXT: movq %rax, %r13 +; CHECK-NEXT: movq %rax, %r12 ; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 @@ -157,16 +157,15 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: je LBB0_34 ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 -; CHECK-NEXT: leaq 1(%r13), %rax +; CHECK-NEXT: leaq 1(%r12), %rax ; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: jne LBB0_29 ; CHECK-NEXT: ## %bb.33: ## %if.end517.loopexitsplit ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: incq %r13 +; CHECK-NEXT: incq %r12 ; CHECK-NEXT: LBB0_34: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload -; CHECK-NEXT: addl $-324, %eax ## imm = 0xFEBC +; CHECK-NEXT: leal -324(%r13), %eax ; CHECK-NEXT: cmpl $59, %eax ; CHECK-NEXT: ja LBB0_35 ; CHECK-NEXT: ## %bb.57: ## %if.end517 @@ -176,11 +175,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: jb LBB0_38 ; CHECK-NEXT: LBB0_35: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $11, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload +; CHECK-NEXT: cmpl $11, %r13d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.36: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $24, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload +; CHECK-NEXT: cmpl $24, %r13d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.37: ## %if.then532 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -195,8 +194,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movb $0, (%r13) -; CHECK-NEXT: movl %r12d, %ecx +; CHECK-NEXT: movb $0, (%r12) +; CHECK-NEXT: movl %r14d, %r13d ; CHECK-NEXT: leaq {{.*}}(%rip), %rsi ; CHECK-NEXT: leaq {{.*}}(%rip), %rdi ; CHECK-NEXT: jmp LBB0_21 @@ -208,22 +207,22 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: jb LBB0_55 ; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: movl $268, %ecx ## imm = 0x10C +; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_19: ## %sw.bb243 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $2, %ecx +; CHECK-NEXT: movl $2, %r13d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_40: ## %sw.bb566 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $20, %ecx +; CHECK-NEXT: movl $20, %r13d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_13: ## %while.body200 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 ; CHECK-NEXT: ## Child Loop BB0_29 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 -; CHECK-NEXT: leal -268(%r12), %eax +; CHECK-NEXT: leal -268(%r14), %eax ; CHECK-NEXT: cmpl $105, %eax ; CHECK-NEXT: ja LBB0_14 ; CHECK-NEXT: ## %bb.56: ## %while.body200 @@ -233,12 +232,12 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_20: ## %sw.bb256 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl %r12d, %ecx +; CHECK-NEXT: movl %r14d, %r13d ; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: decl %r14d -; CHECK-NEXT: testl %r14d, %r14d -; CHECK-NEXT: movl %ecx, %r12d +; CHECK-NEXT: decl %r15d +; CHECK-NEXT: testl %r15d, %r15d +; CHECK-NEXT: movl %r13d, %r14d ; CHECK-NEXT: jg LBB0_13 ; CHECK-NEXT: jmp LBB0_22 ; CHECK-NEXT: .p2align 4, 0x90 @@ -255,27 +254,28 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: LBB0_11: ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %r13d, %r13d ; CHECK-NEXT: LBB0_22: ## %while.end1465 -; CHECK-NEXT: incl %ecx -; CHECK-NEXT: cmpl $16, %ecx +; CHECK-NEXT: incl %r13d +; CHECK-NEXT: cmpl $16, %r13d ; CHECK-NEXT: ja LBB0_50 ; CHECK-NEXT: ## %bb.23: ## %while.end1465 ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801 -; CHECK-NEXT: btl %ecx, %eax +; CHECK-NEXT: btl %r13d, %eax ; CHECK-NEXT: jae LBB0_50 ; CHECK-NEXT: ## %bb.24: -; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload ; CHECK-NEXT: LBB0_48: ## %if.then1477 ; CHECK-NEXT: movl $1, %edx ; CHECK-NEXT: callq _write -; CHECK-NEXT: addq $8189, %r15 ## imm = 0x1FFD -; CHECK-NEXT: subq %rbx, %r15 -; CHECK-NEXT: addq _syHistory@{{.*}}(%rip), %r15 +; CHECK-NEXT: subq %rbp, %rbx +; CHECK-NEXT: movq _syHistory@{{.*}}(%rip), %rax +; CHECK-NEXT: leaq 8189(%rbx,%rax), %rax ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_49: ## %for.body1723 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: decq %r15 +; CHECK-NEXT: decq %rax ; CHECK-NEXT: jmp LBB0_49 ; CHECK-NEXT: LBB0_50: ## %for.cond1480.preheader ; CHECK-NEXT: movl $512, %eax ## imm = 0x200 @@ -302,7 +302,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: LBB0_55: ## %if.then.i ; CHECK-NEXT: ud2 ; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit -; CHECK-NEXT: movq %r15, %rbx +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload +; CHECK-NEXT: movq %rbx, %rbp ; CHECK-NEXT: jmp LBB0_48 ; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader ; CHECK-NEXT: xorl %eax, %eax diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll index 6c268d8a27f4..e8af5f66d36c 100644 --- a/llvm/test/CodeGen/X86/shift-amount-mod.ll +++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll @@ -1111,7 +1111,7 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw ; X32-LABEL: reg32_lshr_by_negated_unfolded_sub_b: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl $32, %ecx +; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1120,9 +1120,9 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw ; ; X64-LABEL: reg32_lshr_by_negated_unfolded_sub_b: ; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl $32, %ecx -; X64-NEXT: subl %esi, %ecx +; X64-NEXT: negl %ecx ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shrl %cl, %eax @@ -1139,9 +1139,10 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl $64, %ecx +; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: addb $64, %cl ; X32-NEXT: movl %esi, %edx ; X32-NEXT: shrl %cl, %edx ; X32-NEXT: shrdl %cl, %esi, %eax diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll index 2ffbfcb56b2f..37a3dcbd0e4a 100644 --- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll +++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll @@ -156,16 +156,16 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) { ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax -; X32-NEXT: addl $32, %eax ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax +; X32-NEXT: addl $32, %eax ; X32-NEXT: retl ; ; X64-LABEL: sink_add_of_const_to_sub: ; X64: # %bb.0: ; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: subl %esi, %edi +; X64-NEXT: subl %edx, %edi ; X64-NEXT: leal 32(%rdi), %eax -; X64-NEXT: subl %edx, %eax ; X64-NEXT: retq %t0 = sub i32 %a, %b %t1 = add i32 %t0, 32 ; constant always on RHS @@ -203,16 +203,16 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) { ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax -; X32-NEXT: addl $-32, %eax ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax +; X32-NEXT: addl $-32, %eax ; X32-NEXT: retl ; ; X64-LABEL: sink_sub_of_const_to_sub: ; X64: # %bb.0: ; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: subl %esi, %edi +; X64-NEXT: subl %edx, %edi ; X64-NEXT: leal -32(%rdi), %eax -; X64-NEXT: subl %edx, %eax ; X64-NEXT: retq %t0 = sub i32 %a, %b %t1 = sub i32 %t0, 32 @@ -250,16 +250,16 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) { ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax -; X32-NEXT: addl $32, %eax ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax +; X32-NEXT: addl $32, %eax ; X32-NEXT: retl ; ; X64-LABEL: sink_sub_from_const_to_sub: ; X64: # %bb.0: ; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: subl %edi, %esi +; X64-NEXT: subl %edx, %esi ; X64-NEXT: leal 32(%rsi), %eax -; X64-NEXT: subl %edx, %eax ; X64-NEXT: retq %t0 = sub i32 %a, %b %t1 = sub i32 32, %t0 @@ -416,15 +416,15 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x ; X32-LABEL: vec_sink_add_of_const_to_sub: ; X32: # %bb.0: ; X32-NEXT: psubd %xmm1, %xmm0 -; X32-NEXT: paddd {{\.LCPI.*}}, %xmm0 ; X32-NEXT: psubd %xmm2, %xmm0 +; X32-NEXT: paddd {{\.LCPI.*}}, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: vec_sink_add_of_const_to_sub: ; X64: # %bb.0: ; X64-NEXT: psubd %xmm1, %xmm0 -; X64-NEXT: paddd {{.*}}(%rip), %xmm0 ; X64-NEXT: psubd %xmm2, %xmm0 +; X64-NEXT: paddd {{.*}}(%rip), %xmm0 ; X64-NEXT: retq %t0 = sub <4 x i32> %a, %b %t1 = add <4 x i32> %t0, ; constant always on RHS @@ -504,16 +504,16 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 ; X32-LABEL: vec_sink_sub_from_const_to_sub: ; X32: # %bb.0: ; X32-NEXT: psubd %xmm0, %xmm1 -; X32-NEXT: paddd {{\.LCPI.*}}, %xmm1 ; X32-NEXT: psubd %xmm2, %xmm1 +; X32-NEXT: paddd {{\.LCPI.*}}, %xmm1 ; X32-NEXT: movdqa %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: vec_sink_sub_from_const_to_sub: ; X64: # %bb.0: ; X64-NEXT: psubd %xmm0, %xmm1 -; X64-NEXT: paddd {{.*}}(%rip), %xmm1 ; X64-NEXT: psubd %xmm2, %xmm1 +; X64-NEXT: paddd {{.*}}(%rip), %xmm1 ; X64-NEXT: movdqa %xmm1, %xmm0 ; X64-NEXT: retq %t0 = sub <4 x i32> %a, %b diff --git a/llvm/test/CodeGen/X86/zext-sext.ll b/llvm/test/CodeGen/X86/zext-sext.ll index 7034378a880b..84096e3b6805 100644 --- a/llvm/test/CodeGen/X86/zext-sext.ll +++ b/llvm/test/CodeGen/X86/zext-sext.ll @@ -15,30 +15,27 @@ define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind { ; CHECK-NEXT: subq %rax, %rsi ; CHECK-NEXT: movq (%rdx), %rax ; CHECK-NEXT: movswl 8(%rdi), %edx -; CHECK-NEXT: movabsq $5089792277106559579, %rdi # imm = 0x46A2931BF1768A5B ; CHECK-NEXT: movswl (%rax,%rsi,2), %eax ; CHECK-NEXT: movl $1, %esi ; CHECK-NEXT: imull %edx, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: addl $2138875574, %eax # imm = 0x7F7CA6B6 ; CHECK-NEXT: cmpl $-8608074, %eax # imm = 0xFF7CA6B6 -; CHECK-NEXT: movslq %eax, %r8 +; CHECK-NEXT: movslq %eax, %rdi ; CHECK-NEXT: setl %dl ; CHECK-NEXT: cmpl $2138875573, %eax # imm = 0x7F7CA6B5 -; CHECK-NEXT: movq %r8, %r9 +; CHECK-NEXT: movq %rdi, %r8 ; CHECK-NEXT: leal -1(%rdx,%rdx), %edx ; CHECK-NEXT: cmovlel %edx, %esi -; CHECK-NEXT: subq %rax, %r9 -; CHECK-NEXT: addq %r8, %rdi +; CHECK-NEXT: subq %rax, %r8 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $1, %esi -; CHECK-NEXT: cmovneq %rax, %r9 -; CHECK-NEXT: testl %r8d, %r8d -; CHECK-NEXT: cmovnsq %rax, %r9 -; CHECK-NEXT: movabsq $-5089792279245435153, %rax # imm = 0xB95D6CE38F0CCEEF -; CHECK-NEXT: subq %r9, %rdi -; CHECK-NEXT: addq (%rcx), %rdi -; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: cmovneq %rax, %r8 +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnsq %rax, %r8 +; CHECK-NEXT: movq (%rcx), %rax +; CHECK-NEXT: subq %r8, %rdi +; CHECK-NEXT: leaq -2138875574(%rax,%rdi), %rax ; CHECK-NEXT: movq %rax, (%rcx) ; CHECK-NEXT: retq entry: