From 2fea3fe41c5a177d019dd99fb1b43d767eccde24 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 9 Jun 2020 17:35:45 +0100 Subject: [PATCH] [MachineScheduler] Update available queue on the first mop of a new cycle If a resource can be held for multiple cycles in the schedule model then an instruction can be placed into the available queue, another instruction can be scheduled, but the first will not be taken back out if the two instructions hazard. To fix this make sure that we update the available queue even on the first MOp of a cycle, pushing available instructions back into the pending queue if they now conflict. This happens with some downstream schedules we have around MVE instruction scheduling where we use ResourceCycles=[2] to show the instruction executing over two beats. Apparently the test changes here are OK too. Differential Revision: https://reviews.llvm.org/D76909 --- llvm/lib/CodeGen/MachineScheduler.cpp | 16 +- .../CodeGen/AArch64/misched-fusion-aes.ll | 4 +- .../CodeGen/PowerPC/2007-01-15-AsmDialect.ll | 2 +- .../CodeGen/PowerPC/2008-10-28-f128-i32.ll | 126 +++---- llvm/test/CodeGen/PowerPC/aix-cc-abi.ll | 6 +- .../CodeGen/PowerPC/aix32-cc-abi-vaarg.ll | 18 +- .../PowerPC/fp128-bitcast-after-operation.ll | 4 +- llvm/test/CodeGen/PowerPC/inc-of-add.ll | 338 +++++++++--------- llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll | 2 +- .../ppcf128-constrained-fp-intrinsics.ll | 14 +- llvm/test/CodeGen/PowerPC/pr43976.ll | 6 +- llvm/test/CodeGen/PowerPC/spe.ll | 6 +- llvm/test/CodeGen/PowerPC/sub-of-not.ll | 338 +++++++++--------- .../umulo-128-legalisation-lowering.ll | 70 ++-- llvm/test/CodeGen/PowerPC/vec_splat.ll | 156 ++++---- 15 files changed, 553 insertions(+), 553 deletions(-) diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index a68899191374..0f21c97a30f6 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2424,16 +2424,14 @@ SUnit *SchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); - if (CurrMOps > 0) { - // Defer any ready instrs that now have a hazard. - for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { - if (checkHazard(*I)) { - Pending.push(*I); - I = Available.remove(I); - continue; - } - ++I; + // Defer any ready instrs that now have a hazard. + for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { + if (checkHazard(*I)) { + Pending.push(*I); + I = Available.remove(I); + continue; } + ++I; } for (unsigned i = 0; Available.empty(); ++i) { // FIXME: Re-enable assert once PR20057 is resolved. diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll index 70038e934c9f..95a419bd7398 100644 --- a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll @@ -79,7 +79,7 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, ; CHECK-LABEL: aesea: ; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECK-NEXT: aesmc [[VA]], [[VA]] +; CHECK: aesmc [[VA]], [[VA]] ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} ; CHECK-NEXT: aesmc [[VB]], [[VB]] ; CHECK: aese [[VC:v[0-7].16b]], {{v[0-7].16b}} @@ -163,7 +163,7 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, ; CHECK-LABEL: aesda: ; CHECK: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECK-NEXT: aesimc [[VA]], [[VA]] +; CHECK: aesimc [[VA]], [[VA]] ; CHECK: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}} ; CHECK-NEXT: aesimc [[VB]], [[VB]] ; CHECK: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}} diff --git a/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll index d216cf59bde2..9af68e7d8012 100644 --- a/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll +++ b/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll @@ -2,7 +2,7 @@ define i32 @foo() nounwind { entry: -; CHECK: cntlzw 3, 4 +; CHECK: cntlzw 3, 3 %retval = alloca i32, align 4 ; [#uses=2] %temp = alloca i32, align 4 ; [#uses=2] %ctz_x = alloca i32, align 4 ; [#uses=3] diff --git a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll index 7897d1c6b8a5..028904fc3200 100644 --- a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll +++ b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll @@ -9,29 +9,29 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: stwu 1, -464(1) ; CHECK-NEXT: mfcr 12 ; CHECK-NEXT: stw 29, 412(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 30, 416(1) # 4-byte Folded Spill ; CHECK-NEXT: lis 3, .LCPI0_0@ha +; CHECK-NEXT: stw 30, 416(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 12, 408(1) ; CHECK-NEXT: stfd 2, 376(1) +; CHECK-NEXT: lwz 4, 380(1) ; CHECK-NEXT: stfd 27, 424(1) # 8-byte Folded Spill +; CHECK-NEXT: stw 4, 396(1) +; CHECK-NEXT: lwz 4, 376(1) +; CHECK-NEXT: lfs 27, .LCPI0_0@l(3) ; CHECK-NEXT: stfd 1, 384(1) +; CHECK-NEXT: stw 4, 392(1) +; CHECK-NEXT: fcmpu 0, 2, 27 +; CHECK-NEXT: lwz 4, 388(1) +; CHECK-NEXT: fcmpu 1, 1, 27 +; CHECK-NEXT: lwz 3, 384(1) +; CHECK-NEXT: crand 20, 6, 0 +; CHECK-NEXT: cror 20, 4, 20 ; CHECK-NEXT: stfd 28, 432(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 29, 440(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 30, 448(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 31, 456(1) # 8-byte Folded Spill -; CHECK-NEXT: lwz 4, 380(1) -; CHECK-NEXT: lfs 27, .LCPI0_0@l(3) -; CHECK-NEXT: lwz 3, 384(1) -; CHECK-NEXT: stw 4, 396(1) -; CHECK-NEXT: fcmpu 0, 2, 27 -; CHECK-NEXT: lwz 4, 376(1) -; CHECK-NEXT: fcmpu 1, 1, 27 -; CHECK-NEXT: crand 20, 6, 0 -; CHECK-NEXT: cror 20, 4, 20 -; CHECK-NEXT: stw 4, 392(1) -; CHECK-NEXT: stw 3, 400(1) -; CHECK-NEXT: lwz 4, 388(1) ; CHECK-NEXT: stw 4, 404(1) +; CHECK-NEXT: stw 3, 400(1) ; CHECK-NEXT: bc 4, 20, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %bb5 ; CHECK-NEXT: li 3, 0 @@ -41,54 +41,53 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfd 0, 400(1) ; CHECK-NEXT: lis 3, 15856 ; CHECK-NEXT: stw 3, 336(1) -; CHECK-NEXT: lfd 1, 392(1) ; CHECK-NEXT: li 29, 0 ; CHECK-NEXT: stfd 0, 304(1) -; CHECK-NEXT: stw 29, 340(1) -; CHECK-NEXT: stw 29, 332(1) -; CHECK-NEXT: stw 29, 328(1) ; CHECK-NEXT: lwz 3, 308(1) -; CHECK-NEXT: stfd 1, 296(1) -; CHECK-NEXT: lfd 3, 336(1) -; CHECK-NEXT: lfd 4, 328(1) +; CHECK-NEXT: lfd 1, 392(1) ; CHECK-NEXT: stw 3, 324(1) ; CHECK-NEXT: lwz 3, 304(1) +; CHECK-NEXT: stfd 1, 296(1) ; CHECK-NEXT: stw 3, 320(1) ; CHECK-NEXT: lwz 3, 300(1) -; CHECK-NEXT: lfd 31, 320(1) +; CHECK-NEXT: stw 29, 340(1) ; CHECK-NEXT: stw 3, 316(1) -; CHECK-NEXT: fmr 1, 31 ; CHECK-NEXT: lwz 3, 296(1) +; CHECK-NEXT: stw 29, 332(1) ; CHECK-NEXT: stw 3, 312(1) +; CHECK-NEXT: stw 29, 328(1) +; CHECK-NEXT: lfd 31, 320(1) ; CHECK-NEXT: lfd 30, 312(1) +; CHECK-NEXT: lfd 3, 336(1) +; CHECK-NEXT: fmr 1, 31 +; CHECK-NEXT: lfd 4, 328(1) ; CHECK-NEXT: fmr 2, 30 ; CHECK-NEXT: bl __gcc_qmul ; CHECK-NEXT: lis 3, 16864 ; CHECK-NEXT: stfd 1, 280(1) -; CHECK-NEXT: stw 3, 368(1) -; CHECK-NEXT: stfd 2, 288(1) -; CHECK-NEXT: stw 29, 372(1) -; CHECK-NEXT: stw 29, 364(1) -; CHECK-NEXT: stw 29, 360(1) ; CHECK-NEXT: fmr 29, 1 -; CHECK-NEXT: lwz 3, 284(1) +; CHECK-NEXT: stw 3, 368(1) ; CHECK-NEXT: fmr 28, 2 -; CHECK-NEXT: lfd 3, 368(1) -; CHECK-NEXT: lfd 4, 360(1) +; CHECK-NEXT: lwz 3, 284(1) +; CHECK-NEXT: stfd 2, 288(1) ; CHECK-NEXT: stw 3, 356(1) ; CHECK-NEXT: lwz 3, 280(1) +; CHECK-NEXT: stw 29, 372(1) ; CHECK-NEXT: stw 3, 352(1) ; CHECK-NEXT: lwz 3, 292(1) -; CHECK-NEXT: lfd 1, 352(1) +; CHECK-NEXT: stw 29, 364(1) ; CHECK-NEXT: stw 3, 348(1) ; CHECK-NEXT: lwz 3, 288(1) +; CHECK-NEXT: stw 29, 360(1) ; CHECK-NEXT: stw 3, 344(1) +; CHECK-NEXT: lfd 3, 368(1) +; CHECK-NEXT: lfd 4, 360(1) +; CHECK-NEXT: lfd 1, 352(1) ; CHECK-NEXT: lfd 2, 344(1) ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 ; CHECK-NEXT: lis 3, .LCPI0_1@ha -; CHECK-NEXT: fcmpu 0, 28, 27 ; CHECK-NEXT: mtfsb0 30 ; CHECK-NEXT: fadd 1, 2, 1 ; CHECK-NEXT: mtfsf 1, 0 @@ -102,6 +101,7 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfs 1, .LCPI0_1@l(3) ; CHECK-NEXT: fctiwz 0, 0 ; CHECK-NEXT: stfd 0, 152(1) +; CHECK-NEXT: fcmpu 0, 28, 27 ; CHECK-NEXT: lwz 3, 164(1) ; CHECK-NEXT: fcmpu 1, 29, 1 ; CHECK-NEXT: lwz 4, 156(1) @@ -120,25 +120,25 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: bl __floatditf ; CHECK-NEXT: lis 3, 17392 ; CHECK-NEXT: stfd 1, 208(1) -; CHECK-NEXT: stw 3, 240(1) -; CHECK-NEXT: stfd 2, 200(1) -; CHECK-NEXT: stw 29, 244(1) -; CHECK-NEXT: stw 29, 236(1) -; CHECK-NEXT: stw 29, 232(1) ; CHECK-NEXT: fmr 29, 1 -; CHECK-NEXT: lwz 3, 212(1) +; CHECK-NEXT: stw 3, 240(1) ; CHECK-NEXT: fmr 28, 2 -; CHECK-NEXT: lfd 3, 240(1) -; CHECK-NEXT: lfd 4, 232(1) +; CHECK-NEXT: lwz 3, 212(1) ; CHECK-NEXT: cmpwi 2, 30, 0 +; CHECK-NEXT: stfd 2, 200(1) ; CHECK-NEXT: stw 3, 228(1) ; CHECK-NEXT: lwz 3, 208(1) +; CHECK-NEXT: stw 29, 244(1) ; CHECK-NEXT: stw 3, 224(1) ; CHECK-NEXT: lwz 3, 204(1) -; CHECK-NEXT: lfd 1, 224(1) +; CHECK-NEXT: stw 29, 236(1) ; CHECK-NEXT: stw 3, 220(1) ; CHECK-NEXT: lwz 3, 200(1) +; CHECK-NEXT: stw 29, 232(1) ; CHECK-NEXT: stw 3, 216(1) +; CHECK-NEXT: lfd 3, 240(1) +; CHECK-NEXT: lfd 4, 232(1) +; CHECK-NEXT: lfd 1, 224(1) ; CHECK-NEXT: lfd 2, 216(1) ; CHECK-NEXT: bl __gcc_qadd ; CHECK-NEXT: blt 2, .LBB0_7 @@ -150,9 +150,9 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: fmr 1, 29 ; CHECK-NEXT: .LBB0_9: # %bb1 ; CHECK-NEXT: stfd 1, 184(1) -; CHECK-NEXT: stfd 2, 192(1) ; CHECK-NEXT: fmr 1, 31 ; CHECK-NEXT: lwz 3, 188(1) +; CHECK-NEXT: stfd 2, 192(1) ; CHECK-NEXT: fmr 2, 30 ; CHECK-NEXT: stw 3, 260(1) ; CHECK-NEXT: lwz 3, 184(1) @@ -165,10 +165,10 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfd 4, 248(1) ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: stfd 2, 176(1) -; CHECK-NEXT: stfd 1, 168(1) ; CHECK-NEXT: fcmpu 1, 2, 27 ; CHECK-NEXT: lwz 3, 180(1) ; CHECK-NEXT: fcmpu 0, 1, 27 +; CHECK-NEXT: stfd 1, 168(1) ; CHECK-NEXT: crandc 20, 2, 4 ; CHECK-NEXT: stw 3, 268(1) ; CHECK-NEXT: lwz 3, 176(1) @@ -184,27 +184,27 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: cror 20, 1, 3 ; CHECK-NEXT: bc 12, 20, .LBB0_14 ; CHECK-NEXT: # %bb.11: # %bb2 -; CHECK-NEXT: fneg 28, 31 -; CHECK-NEXT: stfd 28, 48(1) +; CHECK-NEXT: fneg 29, 31 +; CHECK-NEXT: stfd 29, 48(1) ; CHECK-NEXT: lis 3, 16864 ; CHECK-NEXT: stw 3, 80(1) -; CHECK-NEXT: fneg 29, 30 +; CHECK-NEXT: fneg 28, 30 ; CHECK-NEXT: lwz 3, 52(1) -; CHECK-NEXT: stfd 29, 40(1) ; CHECK-NEXT: li 29, 0 -; CHECK-NEXT: stw 29, 84(1) -; CHECK-NEXT: stw 29, 76(1) -; CHECK-NEXT: stw 29, 72(1) +; CHECK-NEXT: stfd 28, 40(1) ; CHECK-NEXT: stw 3, 68(1) -; CHECK-NEXT: lfd 3, 80(1) -; CHECK-NEXT: lfd 4, 72(1) ; CHECK-NEXT: lwz 3, 48(1) +; CHECK-NEXT: stw 29, 84(1) ; CHECK-NEXT: stw 3, 64(1) ; CHECK-NEXT: lwz 3, 44(1) -; CHECK-NEXT: lfd 1, 64(1) +; CHECK-NEXT: stw 29, 76(1) ; CHECK-NEXT: stw 3, 60(1) ; CHECK-NEXT: lwz 3, 40(1) +; CHECK-NEXT: stw 29, 72(1) ; CHECK-NEXT: stw 3, 56(1) +; CHECK-NEXT: lfd 3, 80(1) +; CHECK-NEXT: lfd 4, 72(1) +; CHECK-NEXT: lfd 1, 64(1) ; CHECK-NEXT: lfd 2, 56(1) ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: mffs 0 @@ -220,12 +220,12 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfs 0, .LCPI0_2@l(3) ; CHECK-NEXT: lis 3, .LCPI0_3@ha ; CHECK-NEXT: mtfsb0 30 -; CHECK-NEXT: fadd 2, 29, 28 +; CHECK-NEXT: fadd 2, 28, 29 ; CHECK-NEXT: mtfsf 1, 1 ; CHECK-NEXT: lfs 1, .LCPI0_3@l(3) -; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: fctiwz 2, 2 ; CHECK-NEXT: stfd 2, 24(1) +; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: lwz 3, 36(1) ; CHECK-NEXT: fcmpu 1, 31, 1 ; CHECK-NEXT: lwz 4, 28(1) @@ -244,22 +244,22 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: stfd 31, 112(1) ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: stw 3, 148(1) +; CHECK-NEXT: lis 4, 16864 ; CHECK-NEXT: stw 3, 140(1) ; CHECK-NEXT: stw 3, 136(1) -; CHECK-NEXT: stfd 30, 104(1) -; CHECK-NEXT: lis 4, 16864 ; CHECK-NEXT: lwz 3, 116(1) -; CHECK-NEXT: stw 4, 144(1) -; CHECK-NEXT: lfd 4, 136(1) +; CHECK-NEXT: stfd 30, 104(1) ; CHECK-NEXT: stw 3, 132(1) -; CHECK-NEXT: lfd 3, 144(1) ; CHECK-NEXT: lwz 3, 112(1) +; CHECK-NEXT: stw 4, 144(1) ; CHECK-NEXT: stw 3, 128(1) ; CHECK-NEXT: lwz 3, 108(1) -; CHECK-NEXT: lfd 1, 128(1) +; CHECK-NEXT: lfd 3, 144(1) ; CHECK-NEXT: stw 3, 124(1) ; CHECK-NEXT: lwz 3, 104(1) +; CHECK-NEXT: lfd 4, 136(1) ; CHECK-NEXT: stw 3, 120(1) +; CHECK-NEXT: lfd 1, 128(1) ; CHECK-NEXT: lfd 2, 120(1) ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: mffs 0 @@ -278,9 +278,9 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: fadd 2, 30, 31 ; CHECK-NEXT: mtfsf 1, 1 ; CHECK-NEXT: lfs 1, .LCPI0_1@l(3) -; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: fctiwz 2, 2 ; CHECK-NEXT: stfd 2, 88(1) +; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: lwz 3, 100(1) ; CHECK-NEXT: fcmpu 1, 31, 1 ; CHECK-NEXT: lwz 4, 92(1) @@ -300,8 +300,8 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfd 28, 432(1) # 8-byte Folded Reload ; CHECK-NEXT: lwz 12, 408(1) ; CHECK-NEXT: lfd 27, 424(1) # 8-byte Folded Reload -; CHECK-NEXT: lwz 30, 416(1) # 4-byte Folded Reload ; CHECK-NEXT: mtcrf 32, 12 # cr2 +; CHECK-NEXT: lwz 30, 416(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 29, 412(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 0, 468(1) ; CHECK-NEXT: addi 1, 1, 464 diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll index d155a7881225..52070aa9063d 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -704,8 +704,8 @@ declare void @test_vararg(i32, ...) ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) ; 32BIT-NEXT: renamable $r6 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) @@ -773,8 +773,8 @@ entry: ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) ; 32BIT-NEXT: renamable $r7 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) @@ -844,8 +844,8 @@ entry: ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) ; 32BIT-NEXT: renamable $r8 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) diff --git a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll index 9f521788a3fc..c276d4ccc395 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll +++ b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll @@ -68,15 +68,15 @@ ; 32BIT-DAG: STW killed renamable $r8, 16, %fixed-stack.0 :: (store 4) ; 32BIT-DAG: STW killed renamable $r9, 20, %fixed-stack.0 :: (store 4) ; 32BIT-DAG: STW killed renamable $r10, 24, %fixed-stack.0 :: (store 4) -; 32BIT-DAG: STW killed renamable $r5, 0, %stack.1.arg2 :: (store 4 into %ir.arg2) -; 32BIT-DAG: renamable $r5 = ADDI %fixed-stack.0, 4 -; 32BIT-DAG: STW killed renamable $r4, 0, %stack.1.arg2 :: (store 4 into %ir.1) -; 32BIT-DAG: renamable $r4 = ADDI %fixed-stack.0, 0 -; 32BIT-DAG: STW renamable $r4, 0, %stack.0.arg1 :: (store 4 into %ir.0) -; 32BIT-DAG: STW renamable $r5, 0, %stack.0.arg1 :: (store 4 into %ir.arg1) -; 32BIT-DAG: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.2) -; 32BIT-DAG: renamable $r5 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.4) -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r4, killed renamable $r3 +; 32BIT-DAG: STW killed renamable $r4, 0, %stack.1.arg2 :: (store 4 into %ir.arg2) +; 32BIT-DAG: renamable $r4 = ADDI %fixed-stack.0, 4 +; 32BIT-DAG: STW killed renamable $r11, 0, %stack.1.arg2 :: (store 4 into %ir.1) +; 32BIT-DAG: renamable $r11 = ADDI %fixed-stack.0, 0 +; 32BIT-DAG: STW renamable $r11, 0, %stack.0.arg1 :: (store 4 into %ir.0) +; 32BIT-DAG: STW renamable $r4, 0, %stack.0.arg1 :: (store 4 into %ir.arg1) +; 32BIT-DAG: renamable $r5 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.2) +; 32BIT-DAG: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.4) +; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r5, killed renamable $r3 ; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 ; 32BIT-DAG: BLR implicit $lr, implicit $rm, implicit $r3 diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll index 54ceccd9c59a..fa57f50cb43d 100644 --- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll +++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll @@ -28,9 +28,9 @@ entry: ; PPC32-DAG: stfd 2, 16(1) ; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1) ; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1) +; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0 ; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1) ; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1) -; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0 ; PPC32-DAG: xor [[HI0]], [[HI0]], [[FLIP_BIT]] ; PPC32-DAG: xor [[LO0]], [[LO0]], [[FLIP_BIT]] ; PPC32: blr @@ -68,9 +68,9 @@ entry: ; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1) ; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1) ; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1) -; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1) ; PPC32-NOT: BARRIER ; PPC32-DAG: xoris [[HI0]], [[HI0]], 32768 +; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1) ; PPC32-DAG: xoris [[LO0]], [[LO0]], 32768 ; PPC32: blr %0 = fsub ppc_fp128 0xM80000000000000000000000000000000, %x diff --git a/llvm/test/CodeGen/PowerPC/inc-of-add.ll b/llvm/test/CodeGen/PowerPC/inc-of-add.ll index fa03379a3c30..90004143326f 100644 --- a/llvm/test/CodeGen/PowerPC/inc-of-add.ll +++ b/llvm/test/CodeGen/PowerPC/inc-of-add.ll @@ -65,88 +65,88 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; PPC32: # %bb.0: ; PPC32-NEXT: stwu 1, -64(1) ; PPC32-NEXT: stw 21, 20(1) # 4-byte Folded Spill -; PPC32-NEXT: lbz 4, 119(1) -; PPC32-NEXT: lbz 11, 115(1) -; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill -; PPC32-NEXT: add 4, 4, 6 ; PPC32-NEXT: lbz 21, 123(1) -; PPC32-NEXT: lbz 6, 131(1) -; PPC32-NEXT: add 5, 11, 5 -; PPC32-NEXT: lbz 11, 127(1) -; PPC32-NEXT: add 7, 21, 7 -; PPC32-NEXT: lbz 21, 135(1) -; PPC32-NEXT: lbz 24, 83(1) -; PPC32-NEXT: lbz 23, 79(1) -; PPC32-NEXT: add 6, 6, 9 -; PPC32-NEXT: add 10, 21, 10 -; PPC32-NEXT: lbz 21, 147(1) -; PPC32-NEXT: lbz 9, 143(1) ; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: add 8, 11, 8 -; PPC32-NEXT: lbz 22, 75(1) -; PPC32-NEXT: lbz 11, 139(1) -; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill -; PPC32-NEXT: add 24, 21, 24 -; PPC32-NEXT: lbz 27, 95(1) -; PPC32-NEXT: lbz 21, 159(1) +; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill +; PPC32-NEXT: add 7, 21, 7 +; PPC32-NEXT: lbz 23, 115(1) +; PPC32-NEXT: lbz 22, 119(1) +; PPC32-NEXT: lbz 21, 135(1) +; PPC32-NEXT: add 5, 23, 5 +; PPC32-NEXT: lbz 23, 127(1) +; PPC32-NEXT: add 6, 22, 6 +; PPC32-NEXT: lbz 22, 131(1) +; PPC32-NEXT: add 10, 21, 10 ; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill -; PPC32-NEXT: add 9, 9, 23 -; PPC32-NEXT: lbz 26, 91(1) -; PPC32-NEXT: lbz 23, 155(1) +; PPC32-NEXT: add 8, 23, 8 +; PPC32-NEXT: lbz 26, 83(1) +; PPC32-NEXT: add 9, 22, 9 +; PPC32-NEXT: lbz 21, 147(1) +; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill -; PPC32-NEXT: add 11, 11, 22 -; PPC32-NEXT: lbz 25, 87(1) -; PPC32-NEXT: lbz 22, 151(1) -; PPC32-NEXT: lbz 12, 111(1) -; PPC32-NEXT: add 27, 21, 27 -; PPC32-NEXT: lbz 21, 175(1) -; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill -; PPC32-NEXT: lbz 0, 107(1) +; PPC32-NEXT: add 26, 21, 26 +; PPC32-NEXT: lbz 25, 79(1) +; PPC32-NEXT: lbz 24, 75(1) +; PPC32-NEXT: lbz 23, 139(1) +; PPC32-NEXT: lbz 22, 143(1) ; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; PPC32-NEXT: add 26, 23, 26 -; PPC32-NEXT: lbz 30, 171(1) -; PPC32-NEXT: lbz 29, 103(1) -; PPC32-NEXT: lbz 23, 167(1) -; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 24, 23, 24 +; PPC32-NEXT: lbz 29, 95(1) ; PPC32-NEXT: add 25, 22, 25 -; PPC32-NEXT: lbz 28, 99(1) -; PPC32-NEXT: lbz 22, 163(1) -; PPC32-NEXT: add 12, 21, 12 -; PPC32-NEXT: add 30, 30, 0 -; PPC32-NEXT: addi 12, 12, 1 -; PPC32-NEXT: add 29, 23, 29 -; PPC32-NEXT: stb 12, 15(3) -; PPC32-NEXT: addi 12, 30, 1 +; PPC32-NEXT: lbz 21, 159(1) +; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 29, 21, 29 +; PPC32-NEXT: lbz 28, 91(1) +; PPC32-NEXT: lbz 27, 87(1) +; PPC32-NEXT: lbz 23, 151(1) +; PPC32-NEXT: lbz 22, 155(1) +; PPC32-NEXT: lbz 4, 111(1) +; PPC32-NEXT: add 27, 23, 27 +; PPC32-NEXT: lbz 21, 175(1) ; PPC32-NEXT: add 28, 22, 28 -; PPC32-NEXT: stb 12, 14(3) -; PPC32-NEXT: addi 12, 29, 1 -; PPC32-NEXT: stb 12, 13(3) -; PPC32-NEXT: addi 12, 28, 1 -; PPC32-NEXT: stb 12, 12(3) -; PPC32-NEXT: addi 12, 27, 1 -; PPC32-NEXT: stb 12, 11(3) -; PPC32-NEXT: addi 12, 26, 1 -; PPC32-NEXT: addi 9, 9, 1 -; PPC32-NEXT: addi 6, 6, 1 -; PPC32-NEXT: stb 12, 10(3) -; PPC32-NEXT: addi 12, 25, 1 -; PPC32-NEXT: stb 9, 7(3) -; PPC32-NEXT: addi 9, 11, 1 -; PPC32-NEXT: stb 6, 4(3) -; PPC32-NEXT: addi 6, 8, 1 +; PPC32-NEXT: lbz 11, 107(1) +; PPC32-NEXT: lbz 12, 171(1) +; PPC32-NEXT: add 4, 21, 4 +; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill ; PPC32-NEXT: addi 4, 4, 1 -; PPC32-NEXT: stb 12, 9(3) -; PPC32-NEXT: addi 12, 24, 1 -; PPC32-NEXT: stb 9, 6(3) -; PPC32-NEXT: addi 9, 10, 1 -; PPC32-NEXT: stb 6, 3(3) -; PPC32-NEXT: addi 6, 7, 1 +; PPC32-NEXT: lbz 0, 103(1) +; PPC32-NEXT: add 11, 12, 11 +; PPC32-NEXT: lbz 30, 99(1) +; PPC32-NEXT: lbz 23, 163(1) +; PPC32-NEXT: lbz 22, 167(1) +; PPC32-NEXT: add 30, 23, 30 +; PPC32-NEXT: stb 4, 15(3) +; PPC32-NEXT: add 23, 22, 0 +; PPC32-NEXT: addi 4, 11, 1 +; PPC32-NEXT: stb 4, 14(3) +; PPC32-NEXT: addi 4, 23, 1 +; PPC32-NEXT: stb 4, 13(3) +; PPC32-NEXT: addi 4, 30, 1 +; PPC32-NEXT: stb 4, 12(3) +; PPC32-NEXT: addi 4, 29, 1 +; PPC32-NEXT: stb 4, 11(3) +; PPC32-NEXT: addi 4, 28, 1 +; PPC32-NEXT: stb 4, 10(3) +; PPC32-NEXT: addi 4, 27, 1 +; PPC32-NEXT: stb 4, 9(3) +; PPC32-NEXT: addi 4, 26, 1 +; PPC32-NEXT: stb 4, 8(3) +; PPC32-NEXT: addi 4, 25, 1 +; PPC32-NEXT: stb 4, 7(3) +; PPC32-NEXT: addi 4, 24, 1 +; PPC32-NEXT: stb 4, 6(3) +; PPC32-NEXT: addi 4, 10, 1 +; PPC32-NEXT: stb 4, 5(3) +; PPC32-NEXT: addi 4, 9, 1 +; PPC32-NEXT: stb 4, 4(3) +; PPC32-NEXT: addi 4, 8, 1 +; PPC32-NEXT: stb 4, 3(3) +; PPC32-NEXT: addi 4, 7, 1 +; PPC32-NEXT: stb 4, 2(3) +; PPC32-NEXT: addi 4, 6, 1 ; PPC32-NEXT: stb 4, 1(3) ; PPC32-NEXT: addi 4, 5, 1 -; PPC32-NEXT: stb 12, 8(3) -; PPC32-NEXT: stb 9, 5(3) -; PPC32-NEXT: stb 6, 2(3) ; PPC32-NEXT: stb 4, 0(3) ; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload ; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload @@ -165,79 +165,79 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; PPC64BE: # %bb.0: ; PPC64BE-NEXT: std 21, -88(1) # 8-byte Folded Spill ; PPC64BE-NEXT: lbz 21, 207(1) -; PPC64BE-NEXT: lbz 11, 199(1) -; PPC64BE-NEXT: lbz 12, 191(1) -; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 22, -80(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 25, -56(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 24, -64(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lbz 0, 183(1) +; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PPC64BE-NEXT: lbz 22, 199(1) +; PPC64BE-NEXT: lbz 23, 191(1) ; PPC64BE-NEXT: add 6, 21, 6 ; PPC64BE-NEXT: lbz 21, 231(1) -; PPC64BE-NEXT: add 5, 11, 5 -; PPC64BE-NEXT: lbz 11, 223(1) -; PPC64BE-NEXT: add 4, 12, 4 -; PPC64BE-NEXT: lbz 12, 215(1) -; PPC64BE-NEXT: lbz 23, 127(1) +; PPC64BE-NEXT: add 5, 22, 5 +; PPC64BE-NEXT: lbz 22, 223(1) +; PPC64BE-NEXT: add 4, 23, 4 +; PPC64BE-NEXT: lbz 23, 215(1) ; PPC64BE-NEXT: add 9, 21, 9 +; PPC64BE-NEXT: lbz 25, 127(1) +; PPC64BE-NEXT: add 8, 22, 8 ; PPC64BE-NEXT: lbz 21, 255(1) -; PPC64BE-NEXT: lbz 22, 119(1) -; PPC64BE-NEXT: add 8, 11, 8 -; PPC64BE-NEXT: lbz 11, 247(1) -; PPC64BE-NEXT: add 7, 12, 7 -; PPC64BE-NEXT: lbz 12, 239(1) -; PPC64BE-NEXT: lbz 26, 151(1) -; PPC64BE-NEXT: add 23, 21, 23 -; PPC64BE-NEXT: lbz 21, 279(1) -; PPC64BE-NEXT: lbz 25, 143(1) -; PPC64BE-NEXT: add 11, 11, 22 -; PPC64BE-NEXT: lbz 22, 271(1) -; PPC64BE-NEXT: lbz 24, 135(1) -; PPC64BE-NEXT: add 10, 12, 10 -; PPC64BE-NEXT: lbz 12, 263(1) -; PPC64BE-NEXT: lbz 30, 175(1) -; PPC64BE-NEXT: lbz 29, 303(1) -; PPC64BE-NEXT: add 26, 21, 26 -; PPC64BE-NEXT: lbz 21, 311(1) -; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 25, 22, 25 -; PPC64BE-NEXT: lbz 28, 167(1) -; PPC64BE-NEXT: lbz 22, 295(1) -; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 12, 12, 24 -; PPC64BE-NEXT: lbz 27, 159(1) -; PPC64BE-NEXT: lbz 24, 287(1) -; PPC64BE-NEXT: add 30, 29, 30 -; PPC64BE-NEXT: add 29, 21, 0 -; PPC64BE-NEXT: addi 0, 29, 1 -; PPC64BE-NEXT: add 28, 22, 28 -; PPC64BE-NEXT: stb 0, 15(3) -; PPC64BE-NEXT: addi 0, 30, 1 -; PPC64BE-NEXT: add 27, 24, 27 -; PPC64BE-NEXT: stb 0, 14(3) -; PPC64BE-NEXT: addi 0, 28, 1 -; PPC64BE-NEXT: stb 0, 13(3) -; PPC64BE-NEXT: addi 0, 27, 1 -; PPC64BE-NEXT: stb 0, 12(3) -; PPC64BE-NEXT: addi 0, 26, 1 -; PPC64BE-NEXT: addi 12, 12, 1 -; PPC64BE-NEXT: stb 0, 11(3) -; PPC64BE-NEXT: addi 0, 25, 1 -; PPC64BE-NEXT: stb 12, 9(3) -; PPC64BE-NEXT: addi 12, 23, 1 -; PPC64BE-NEXT: addi 11, 11, 1 -; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: add 7, 23, 7 +; PPC64BE-NEXT: lbz 24, 119(1) ; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: lbz 22, 247(1) +; PPC64BE-NEXT: add 25, 21, 25 +; PPC64BE-NEXT: lbz 23, 239(1) ; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: lbz 28, 151(1) +; PPC64BE-NEXT: add 24, 22, 24 +; PPC64BE-NEXT: lbz 21, 279(1) +; PPC64BE-NEXT: add 10, 23, 10 +; PPC64BE-NEXT: lbz 27, 143(1) +; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: lbz 22, 271(1) +; PPC64BE-NEXT: add 28, 21, 28 +; PPC64BE-NEXT: lbz 26, 135(1) ; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: lbz 23, 263(1) +; PPC64BE-NEXT: add 27, 22, 27 +; PPC64BE-NEXT: lbz 11, 183(1) ; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: lbz 21, 311(1) +; PPC64BE-NEXT: add 26, 23, 26 +; PPC64BE-NEXT: lbz 12, 175(1) ; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: lbz 0, 303(1) +; PPC64BE-NEXT: add 11, 21, 11 +; PPC64BE-NEXT: lbz 30, 167(1) +; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: lbz 22, 295(1) +; PPC64BE-NEXT: add 12, 0, 12 +; PPC64BE-NEXT: lbz 29, 159(1) ; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: stb 0, 10(3) -; PPC64BE-NEXT: stb 12, 8(3) +; PPC64BE-NEXT: lbz 23, 287(1) +; PPC64BE-NEXT: add 30, 22, 30 +; PPC64BE-NEXT: stb 11, 15(3) +; PPC64BE-NEXT: addi 11, 12, 1 +; PPC64BE-NEXT: add 29, 23, 29 +; PPC64BE-NEXT: stb 11, 14(3) +; PPC64BE-NEXT: addi 11, 30, 1 +; PPC64BE-NEXT: stb 11, 13(3) +; PPC64BE-NEXT: addi 11, 29, 1 +; PPC64BE-NEXT: stb 11, 12(3) +; PPC64BE-NEXT: addi 11, 28, 1 +; PPC64BE-NEXT: stb 11, 11(3) +; PPC64BE-NEXT: addi 11, 27, 1 +; PPC64BE-NEXT: stb 11, 10(3) +; PPC64BE-NEXT: addi 11, 26, 1 +; PPC64BE-NEXT: stb 11, 9(3) +; PPC64BE-NEXT: addi 11, 25, 1 +; PPC64BE-NEXT: stb 11, 8(3) +; PPC64BE-NEXT: addi 11, 24, 1 ; PPC64BE-NEXT: stb 11, 7(3) ; PPC64BE-NEXT: stb 10, 6(3) ; PPC64BE-NEXT: stb 9, 5(3) @@ -277,23 +277,23 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; PPC32-NEXT: stw 28, 16(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 29, 20(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: lhz 11, 50(1) -; PPC32-NEXT: lhz 12, 46(1) -; PPC32-NEXT: lhz 0, 42(1) -; PPC32-NEXT: lhz 30, 70(1) -; PPC32-NEXT: lhz 29, 66(1) -; PPC32-NEXT: lhz 28, 62(1) -; PPC32-NEXT: lhz 27, 58(1) +; PPC32-NEXT: lhz 11, 70(1) +; PPC32-NEXT: lhz 12, 66(1) +; PPC32-NEXT: lhz 0, 62(1) +; PPC32-NEXT: add 10, 11, 10 +; PPC32-NEXT: lhz 30, 58(1) +; PPC32-NEXT: add 9, 12, 9 +; PPC32-NEXT: lhz 29, 50(1) +; PPC32-NEXT: add 8, 0, 8 +; PPC32-NEXT: lhz 28, 42(1) +; PPC32-NEXT: add 7, 30, 7 +; PPC32-NEXT: lhz 27, 46(1) +; PPC32-NEXT: add 5, 29, 5 ; PPC32-NEXT: lhz 26, 54(1) -; PPC32-NEXT: add 3, 0, 3 -; PPC32-NEXT: add 4, 12, 4 -; PPC32-NEXT: add 5, 11, 5 -; PPC32-NEXT: add 6, 26, 6 -; PPC32-NEXT: add 7, 27, 7 -; PPC32-NEXT: add 8, 28, 8 -; PPC32-NEXT: add 9, 29, 9 -; PPC32-NEXT: add 10, 30, 10 +; PPC32-NEXT: add 3, 28, 3 +; PPC32-NEXT: add 4, 27, 4 ; PPC32-NEXT: addi 3, 3, 1 +; PPC32-NEXT: add 6, 26, 6 ; PPC32-NEXT: addi 4, 4, 1 ; PPC32-NEXT: addi 5, 5, 1 ; PPC32-NEXT: addi 6, 6, 1 @@ -317,31 +317,31 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lhz 11, 142(1) -; PPC64BE-NEXT: lhz 12, 134(1) -; PPC64BE-NEXT: lhz 0, 126(1) -; PPC64BE-NEXT: lhz 30, 118(1) -; PPC64BE-NEXT: lhz 29, 182(1) -; PPC64BE-NEXT: lhz 28, 174(1) -; PPC64BE-NEXT: lhz 27, 166(1) -; PPC64BE-NEXT: lhz 26, 158(1) +; PPC64BE-NEXT: lhz 11, 118(1) +; PPC64BE-NEXT: lhz 12, 182(1) +; PPC64BE-NEXT: lhz 0, 174(1) +; PPC64BE-NEXT: lhz 30, 166(1) +; PPC64BE-NEXT: add 11, 12, 11 +; PPC64BE-NEXT: lhz 29, 158(1) +; PPC64BE-NEXT: add 10, 0, 10 +; PPC64BE-NEXT: lhz 28, 142(1) +; PPC64BE-NEXT: add 9, 30, 9 +; PPC64BE-NEXT: lhz 27, 126(1) +; PPC64BE-NEXT: add 8, 29, 8 +; PPC64BE-NEXT: lhz 26, 134(1) +; PPC64BE-NEXT: add 6, 28, 6 ; PPC64BE-NEXT: lhz 25, 150(1) -; PPC64BE-NEXT: add 4, 0, 4 -; PPC64BE-NEXT: add 5, 12, 5 -; PPC64BE-NEXT: add 6, 11, 6 -; PPC64BE-NEXT: add 7, 25, 7 -; PPC64BE-NEXT: add 8, 26, 8 -; PPC64BE-NEXT: add 9, 27, 9 -; PPC64BE-NEXT: add 10, 28, 10 -; PPC64BE-NEXT: add 11, 29, 30 -; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: addi 5, 5, 1 -; PPC64BE-NEXT: addi 6, 6, 1 -; PPC64BE-NEXT: addi 7, 7, 1 -; PPC64BE-NEXT: addi 8, 8, 1 -; PPC64BE-NEXT: addi 9, 9, 1 -; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: add 4, 27, 4 +; PPC64BE-NEXT: add 5, 26, 5 ; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: add 7, 25, 7 +; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: addi 4, 4, 1 ; PPC64BE-NEXT: sth 11, 14(3) ; PPC64BE-NEXT: sth 10, 12(3) ; PPC64BE-NEXT: sth 9, 10(3) diff --git a/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll b/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll index 42cbb30318bc..5fae34f212cc 100644 --- a/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll +++ b/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll @@ -17,9 +17,9 @@ entry: ; argument put on stack. ; CHECK-NOT: mr 8, 4 ; CHECK: stw 6, 16(1) +; CHECK: stw 7, 20(1) ; CHECK: stw 5, 12(1) ; CHECK: stw 4, 8(1) -; CHECK: stw 7, 20(1) declare i32 @printf(i8* nocapture readonly, ...) diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll index c9d9cf870e49..b87f1a682e25 100644 --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -1442,19 +1442,19 @@ define void @test_constrained_libcall_multichain(float* %firstptr, ppc_fp128* %r ; PC64-NEXT: mr 29, 3 ; PC64-NEXT: li 3, 0 ; PC64-NEXT: stfd 31, 168(1) # 8-byte Folded Spill -; PC64-NEXT: stfd 30, 160(1) # 8-byte Folded Spill ; PC64-NEXT: std 30, 128(1) # 8-byte Folded Spill -; PC64-NEXT: stfd 28, 144(1) # 8-byte Folded Spill -; PC64-NEXT: stfd 29, 152(1) # 8-byte Folded Spill ; PC64-NEXT: mr 30, 4 ; PC64-NEXT: lfs 31, 0(29) ; PC64-NEXT: std 3, 8(4) ; PC64-NEXT: addis 3, 2, .LCPI32_0@toc@ha +; PC64-NEXT: stfd 30, 160(1) # 8-byte Folded Spill ; PC64-NEXT: lfs 30, .LCPI32_0@toc@l(3) ; PC64-NEXT: fmr 1, 31 ; PC64-NEXT: fmr 3, 31 +; PC64-NEXT: stfd 28, 144(1) # 8-byte Folded Spill ; PC64-NEXT: fmr 2, 30 ; PC64-NEXT: fmr 4, 30 +; PC64-NEXT: stfd 29, 152(1) # 8-byte Folded Spill ; PC64-NEXT: stfd 31, 0(4) ; PC64-NEXT: bl __gcc_qadd ; PC64-NEXT: nop @@ -1475,14 +1475,14 @@ define void @test_constrained_libcall_multichain(float* %firstptr, ppc_fp128* %r ; PC64-NEXT: nop ; PC64-NEXT: frsp 0, 1 ; PC64-NEXT: stfs 0, 0(29) -; PC64-NEXT: lfd 31, 168(1) # 8-byte Folded Reload -; PC64-NEXT: lfd 30, 160(1) # 8-byte Folded Reload -; PC64-NEXT: lfd 29, 152(1) # 8-byte Folded Reload -; PC64-NEXT: lfd 28, 144(1) # 8-byte Folded Reload ; PC64-NEXT: ld 29, 120(1) # 8-byte Folded Reload ; PC64-NEXT: stfd 1, -16(30) ; PC64-NEXT: stfd 2, -8(30) ; PC64-NEXT: ld 30, 128(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 31, 168(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 30, 160(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 29, 152(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 28, 144(1) # 8-byte Folded Reload ; PC64-NEXT: addi 1, 1, 176 ; PC64-NEXT: ld 0, 16(1) ; PC64-NEXT: mtlr 0 diff --git a/llvm/test/CodeGen/PowerPC/pr43976.ll b/llvm/test/CodeGen/PowerPC/pr43976.ll index 91722283f4ae..9dc1a52c567f 100644 --- a/llvm/test/CodeGen/PowerPC/pr43976.ll +++ b/llvm/test/CodeGen/PowerPC/pr43976.ll @@ -10,11 +10,11 @@ define dso_local signext i32 @b() local_unnamed_addr #0 { ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -144(r1) ; CHECK-NEXT: addis r3, r2, a@toc@ha -; CHECK-NEXT: addis r4, r2, .LCPI0_0@toc@ha -; CHECK-NEXT: lfd f0, a@toc@l(r3) -; CHECK-NEXT: lfs f1, .LCPI0_0@toc@l(r4) ; CHECK-NEXT: li r4, 1 +; CHECK-NEXT: lfd f0, a@toc@l(r3) +; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; CHECK-NEXT: sldi r4, r4, 63 +; CHECK-NEXT: lfs f1, .LCPI0_0@toc@l(r3) ; CHECK-NEXT: fsub f2, f0, f1 ; CHECK-NEXT: fctidz f2, f2 ; CHECK-NEXT: stfd f2, 128(r1) diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll index d2400be43cb4..1c4c7a339817 100644 --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -1297,6 +1297,8 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* ; CHECK-NEXT: evlddx 31, 1, 5 # 8-byte Folded Reload ; CHECK-NEXT: li 5, 256 ; CHECK-NEXT: evlddx 30, 1, 5 # 8-byte Folded Reload +; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 +; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 ; CHECK-NEXT: evldd 29, 248(1) # 8-byte Folded Reload ; CHECK-NEXT: evldd 28, 240(1) # 8-byte Folded Reload ; CHECK-NEXT: evldd 27, 232(1) # 8-byte Folded Reload @@ -1313,8 +1315,6 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* ; CHECK-NEXT: evldd 16, 144(1) # 8-byte Folded Reload ; CHECK-NEXT: evldd 15, 136(1) # 8-byte Folded Reload ; CHECK-NEXT: evldd 14, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 ; CHECK-NEXT: lwz 31, 348(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 30, 344(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 29, 340(1) # 4-byte Folded Reload @@ -1392,8 +1392,8 @@ define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 { ; CHECK-NEXT: # implicit-def: $r5 ; CHECK-NEXT: .LBB57_4: # %for.cond.cleanup ; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload ; CHECK-NEXT: mr 3, 5 +; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload ; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 0, 52(1) diff --git a/llvm/test/CodeGen/PowerPC/sub-of-not.ll b/llvm/test/CodeGen/PowerPC/sub-of-not.ll index db92a3eb1bee..d2b55aaf7ac8 100644 --- a/llvm/test/CodeGen/PowerPC/sub-of-not.ll +++ b/llvm/test/CodeGen/PowerPC/sub-of-not.ll @@ -65,88 +65,88 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; PPC32: # %bb.0: ; PPC32-NEXT: stwu 1, -64(1) ; PPC32-NEXT: stw 21, 20(1) # 4-byte Folded Spill -; PPC32-NEXT: lbz 4, 119(1) -; PPC32-NEXT: lbz 11, 115(1) -; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill -; PPC32-NEXT: add 4, 4, 6 ; PPC32-NEXT: lbz 21, 123(1) -; PPC32-NEXT: lbz 6, 131(1) -; PPC32-NEXT: add 5, 11, 5 -; PPC32-NEXT: lbz 11, 127(1) -; PPC32-NEXT: add 7, 21, 7 -; PPC32-NEXT: lbz 21, 135(1) -; PPC32-NEXT: lbz 24, 83(1) -; PPC32-NEXT: lbz 23, 79(1) -; PPC32-NEXT: add 6, 6, 9 -; PPC32-NEXT: add 10, 21, 10 -; PPC32-NEXT: lbz 21, 147(1) -; PPC32-NEXT: lbz 9, 143(1) ; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: add 8, 11, 8 -; PPC32-NEXT: lbz 22, 75(1) -; PPC32-NEXT: lbz 11, 139(1) -; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill -; PPC32-NEXT: add 24, 21, 24 -; PPC32-NEXT: lbz 27, 95(1) -; PPC32-NEXT: lbz 21, 159(1) +; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill +; PPC32-NEXT: add 7, 21, 7 +; PPC32-NEXT: lbz 23, 115(1) +; PPC32-NEXT: lbz 22, 119(1) +; PPC32-NEXT: lbz 21, 135(1) +; PPC32-NEXT: add 5, 23, 5 +; PPC32-NEXT: lbz 23, 127(1) +; PPC32-NEXT: add 6, 22, 6 +; PPC32-NEXT: lbz 22, 131(1) +; PPC32-NEXT: add 10, 21, 10 ; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill -; PPC32-NEXT: add 9, 9, 23 -; PPC32-NEXT: lbz 26, 91(1) -; PPC32-NEXT: lbz 23, 155(1) +; PPC32-NEXT: add 8, 23, 8 +; PPC32-NEXT: lbz 26, 83(1) +; PPC32-NEXT: add 9, 22, 9 +; PPC32-NEXT: lbz 21, 147(1) +; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill -; PPC32-NEXT: add 11, 11, 22 -; PPC32-NEXT: lbz 25, 87(1) -; PPC32-NEXT: lbz 22, 151(1) -; PPC32-NEXT: lbz 12, 111(1) -; PPC32-NEXT: add 27, 21, 27 -; PPC32-NEXT: lbz 21, 175(1) -; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill -; PPC32-NEXT: lbz 0, 107(1) +; PPC32-NEXT: add 26, 21, 26 +; PPC32-NEXT: lbz 25, 79(1) +; PPC32-NEXT: lbz 24, 75(1) +; PPC32-NEXT: lbz 23, 139(1) +; PPC32-NEXT: lbz 22, 143(1) ; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; PPC32-NEXT: add 26, 23, 26 -; PPC32-NEXT: lbz 30, 171(1) -; PPC32-NEXT: lbz 29, 103(1) -; PPC32-NEXT: lbz 23, 167(1) -; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 24, 23, 24 +; PPC32-NEXT: lbz 29, 95(1) ; PPC32-NEXT: add 25, 22, 25 -; PPC32-NEXT: lbz 28, 99(1) -; PPC32-NEXT: lbz 22, 163(1) -; PPC32-NEXT: add 12, 21, 12 -; PPC32-NEXT: add 30, 30, 0 -; PPC32-NEXT: addi 12, 12, 1 -; PPC32-NEXT: add 29, 23, 29 -; PPC32-NEXT: stb 12, 15(3) -; PPC32-NEXT: addi 12, 30, 1 +; PPC32-NEXT: lbz 21, 159(1) +; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 29, 21, 29 +; PPC32-NEXT: lbz 28, 91(1) +; PPC32-NEXT: lbz 27, 87(1) +; PPC32-NEXT: lbz 23, 151(1) +; PPC32-NEXT: lbz 22, 155(1) +; PPC32-NEXT: lbz 4, 111(1) +; PPC32-NEXT: add 27, 23, 27 +; PPC32-NEXT: lbz 21, 175(1) ; PPC32-NEXT: add 28, 22, 28 -; PPC32-NEXT: stb 12, 14(3) -; PPC32-NEXT: addi 12, 29, 1 -; PPC32-NEXT: stb 12, 13(3) -; PPC32-NEXT: addi 12, 28, 1 -; PPC32-NEXT: stb 12, 12(3) -; PPC32-NEXT: addi 12, 27, 1 -; PPC32-NEXT: stb 12, 11(3) -; PPC32-NEXT: addi 12, 26, 1 -; PPC32-NEXT: addi 9, 9, 1 -; PPC32-NEXT: addi 6, 6, 1 -; PPC32-NEXT: stb 12, 10(3) -; PPC32-NEXT: addi 12, 25, 1 -; PPC32-NEXT: stb 9, 7(3) -; PPC32-NEXT: addi 9, 11, 1 -; PPC32-NEXT: stb 6, 4(3) -; PPC32-NEXT: addi 6, 8, 1 +; PPC32-NEXT: lbz 11, 107(1) +; PPC32-NEXT: lbz 12, 171(1) +; PPC32-NEXT: add 4, 21, 4 +; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill ; PPC32-NEXT: addi 4, 4, 1 -; PPC32-NEXT: stb 12, 9(3) -; PPC32-NEXT: addi 12, 24, 1 -; PPC32-NEXT: stb 9, 6(3) -; PPC32-NEXT: addi 9, 10, 1 -; PPC32-NEXT: stb 6, 3(3) -; PPC32-NEXT: addi 6, 7, 1 +; PPC32-NEXT: lbz 0, 103(1) +; PPC32-NEXT: add 11, 12, 11 +; PPC32-NEXT: lbz 30, 99(1) +; PPC32-NEXT: lbz 23, 163(1) +; PPC32-NEXT: lbz 22, 167(1) +; PPC32-NEXT: add 30, 23, 30 +; PPC32-NEXT: stb 4, 15(3) +; PPC32-NEXT: add 23, 22, 0 +; PPC32-NEXT: addi 4, 11, 1 +; PPC32-NEXT: stb 4, 14(3) +; PPC32-NEXT: addi 4, 23, 1 +; PPC32-NEXT: stb 4, 13(3) +; PPC32-NEXT: addi 4, 30, 1 +; PPC32-NEXT: stb 4, 12(3) +; PPC32-NEXT: addi 4, 29, 1 +; PPC32-NEXT: stb 4, 11(3) +; PPC32-NEXT: addi 4, 28, 1 +; PPC32-NEXT: stb 4, 10(3) +; PPC32-NEXT: addi 4, 27, 1 +; PPC32-NEXT: stb 4, 9(3) +; PPC32-NEXT: addi 4, 26, 1 +; PPC32-NEXT: stb 4, 8(3) +; PPC32-NEXT: addi 4, 25, 1 +; PPC32-NEXT: stb 4, 7(3) +; PPC32-NEXT: addi 4, 24, 1 +; PPC32-NEXT: stb 4, 6(3) +; PPC32-NEXT: addi 4, 10, 1 +; PPC32-NEXT: stb 4, 5(3) +; PPC32-NEXT: addi 4, 9, 1 +; PPC32-NEXT: stb 4, 4(3) +; PPC32-NEXT: addi 4, 8, 1 +; PPC32-NEXT: stb 4, 3(3) +; PPC32-NEXT: addi 4, 7, 1 +; PPC32-NEXT: stb 4, 2(3) +; PPC32-NEXT: addi 4, 6, 1 ; PPC32-NEXT: stb 4, 1(3) ; PPC32-NEXT: addi 4, 5, 1 -; PPC32-NEXT: stb 12, 8(3) -; PPC32-NEXT: stb 9, 5(3) -; PPC32-NEXT: stb 6, 2(3) ; PPC32-NEXT: stb 4, 0(3) ; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload ; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload @@ -165,79 +165,79 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; PPC64BE: # %bb.0: ; PPC64BE-NEXT: std 21, -88(1) # 8-byte Folded Spill ; PPC64BE-NEXT: lbz 21, 207(1) -; PPC64BE-NEXT: lbz 11, 199(1) -; PPC64BE-NEXT: lbz 12, 191(1) -; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 22, -80(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 25, -56(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 24, -64(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lbz 0, 183(1) +; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PPC64BE-NEXT: lbz 22, 199(1) +; PPC64BE-NEXT: lbz 23, 191(1) ; PPC64BE-NEXT: add 6, 21, 6 ; PPC64BE-NEXT: lbz 21, 231(1) -; PPC64BE-NEXT: add 5, 11, 5 -; PPC64BE-NEXT: lbz 11, 223(1) -; PPC64BE-NEXT: add 4, 12, 4 -; PPC64BE-NEXT: lbz 12, 215(1) -; PPC64BE-NEXT: lbz 23, 127(1) +; PPC64BE-NEXT: add 5, 22, 5 +; PPC64BE-NEXT: lbz 22, 223(1) +; PPC64BE-NEXT: add 4, 23, 4 +; PPC64BE-NEXT: lbz 23, 215(1) ; PPC64BE-NEXT: add 9, 21, 9 +; PPC64BE-NEXT: lbz 25, 127(1) +; PPC64BE-NEXT: add 8, 22, 8 ; PPC64BE-NEXT: lbz 21, 255(1) -; PPC64BE-NEXT: lbz 22, 119(1) -; PPC64BE-NEXT: add 8, 11, 8 -; PPC64BE-NEXT: lbz 11, 247(1) -; PPC64BE-NEXT: add 7, 12, 7 -; PPC64BE-NEXT: lbz 12, 239(1) -; PPC64BE-NEXT: lbz 26, 151(1) -; PPC64BE-NEXT: add 23, 21, 23 -; PPC64BE-NEXT: lbz 21, 279(1) -; PPC64BE-NEXT: lbz 25, 143(1) -; PPC64BE-NEXT: add 11, 11, 22 -; PPC64BE-NEXT: lbz 22, 271(1) -; PPC64BE-NEXT: lbz 24, 135(1) -; PPC64BE-NEXT: add 10, 12, 10 -; PPC64BE-NEXT: lbz 12, 263(1) -; PPC64BE-NEXT: lbz 30, 175(1) -; PPC64BE-NEXT: lbz 29, 303(1) -; PPC64BE-NEXT: add 26, 21, 26 -; PPC64BE-NEXT: lbz 21, 311(1) -; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 25, 22, 25 -; PPC64BE-NEXT: lbz 28, 167(1) -; PPC64BE-NEXT: lbz 22, 295(1) -; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 12, 12, 24 -; PPC64BE-NEXT: lbz 27, 159(1) -; PPC64BE-NEXT: lbz 24, 287(1) -; PPC64BE-NEXT: add 30, 29, 30 -; PPC64BE-NEXT: add 29, 21, 0 -; PPC64BE-NEXT: addi 0, 29, 1 -; PPC64BE-NEXT: add 28, 22, 28 -; PPC64BE-NEXT: stb 0, 15(3) -; PPC64BE-NEXT: addi 0, 30, 1 -; PPC64BE-NEXT: add 27, 24, 27 -; PPC64BE-NEXT: stb 0, 14(3) -; PPC64BE-NEXT: addi 0, 28, 1 -; PPC64BE-NEXT: stb 0, 13(3) -; PPC64BE-NEXT: addi 0, 27, 1 -; PPC64BE-NEXT: stb 0, 12(3) -; PPC64BE-NEXT: addi 0, 26, 1 -; PPC64BE-NEXT: addi 12, 12, 1 -; PPC64BE-NEXT: stb 0, 11(3) -; PPC64BE-NEXT: addi 0, 25, 1 -; PPC64BE-NEXT: stb 12, 9(3) -; PPC64BE-NEXT: addi 12, 23, 1 -; PPC64BE-NEXT: addi 11, 11, 1 -; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: add 7, 23, 7 +; PPC64BE-NEXT: lbz 24, 119(1) ; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: lbz 22, 247(1) +; PPC64BE-NEXT: add 25, 21, 25 +; PPC64BE-NEXT: lbz 23, 239(1) ; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: lbz 28, 151(1) +; PPC64BE-NEXT: add 24, 22, 24 +; PPC64BE-NEXT: lbz 21, 279(1) +; PPC64BE-NEXT: add 10, 23, 10 +; PPC64BE-NEXT: lbz 27, 143(1) +; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: lbz 22, 271(1) +; PPC64BE-NEXT: add 28, 21, 28 +; PPC64BE-NEXT: lbz 26, 135(1) ; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: lbz 23, 263(1) +; PPC64BE-NEXT: add 27, 22, 27 +; PPC64BE-NEXT: lbz 11, 183(1) ; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: lbz 21, 311(1) +; PPC64BE-NEXT: add 26, 23, 26 +; PPC64BE-NEXT: lbz 12, 175(1) ; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: lbz 0, 303(1) +; PPC64BE-NEXT: add 11, 21, 11 +; PPC64BE-NEXT: lbz 30, 167(1) +; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: lbz 22, 295(1) +; PPC64BE-NEXT: add 12, 0, 12 +; PPC64BE-NEXT: lbz 29, 159(1) ; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: stb 0, 10(3) -; PPC64BE-NEXT: stb 12, 8(3) +; PPC64BE-NEXT: lbz 23, 287(1) +; PPC64BE-NEXT: add 30, 22, 30 +; PPC64BE-NEXT: stb 11, 15(3) +; PPC64BE-NEXT: addi 11, 12, 1 +; PPC64BE-NEXT: add 29, 23, 29 +; PPC64BE-NEXT: stb 11, 14(3) +; PPC64BE-NEXT: addi 11, 30, 1 +; PPC64BE-NEXT: stb 11, 13(3) +; PPC64BE-NEXT: addi 11, 29, 1 +; PPC64BE-NEXT: stb 11, 12(3) +; PPC64BE-NEXT: addi 11, 28, 1 +; PPC64BE-NEXT: stb 11, 11(3) +; PPC64BE-NEXT: addi 11, 27, 1 +; PPC64BE-NEXT: stb 11, 10(3) +; PPC64BE-NEXT: addi 11, 26, 1 +; PPC64BE-NEXT: stb 11, 9(3) +; PPC64BE-NEXT: addi 11, 25, 1 +; PPC64BE-NEXT: stb 11, 8(3) +; PPC64BE-NEXT: addi 11, 24, 1 ; PPC64BE-NEXT: stb 11, 7(3) ; PPC64BE-NEXT: stb 10, 6(3) ; PPC64BE-NEXT: stb 9, 5(3) @@ -277,23 +277,23 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; PPC32-NEXT: stw 28, 16(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 29, 20(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: lhz 11, 50(1) -; PPC32-NEXT: lhz 12, 46(1) -; PPC32-NEXT: lhz 0, 42(1) -; PPC32-NEXT: lhz 30, 70(1) -; PPC32-NEXT: lhz 29, 66(1) -; PPC32-NEXT: lhz 28, 62(1) -; PPC32-NEXT: lhz 27, 58(1) +; PPC32-NEXT: lhz 11, 70(1) +; PPC32-NEXT: lhz 12, 66(1) +; PPC32-NEXT: lhz 0, 62(1) +; PPC32-NEXT: add 10, 11, 10 +; PPC32-NEXT: lhz 30, 58(1) +; PPC32-NEXT: add 9, 12, 9 +; PPC32-NEXT: lhz 29, 50(1) +; PPC32-NEXT: add 8, 0, 8 +; PPC32-NEXT: lhz 28, 42(1) +; PPC32-NEXT: add 7, 30, 7 +; PPC32-NEXT: lhz 27, 46(1) +; PPC32-NEXT: add 5, 29, 5 ; PPC32-NEXT: lhz 26, 54(1) -; PPC32-NEXT: add 3, 0, 3 -; PPC32-NEXT: add 4, 12, 4 -; PPC32-NEXT: add 5, 11, 5 -; PPC32-NEXT: add 6, 26, 6 -; PPC32-NEXT: add 7, 27, 7 -; PPC32-NEXT: add 8, 28, 8 -; PPC32-NEXT: add 9, 29, 9 -; PPC32-NEXT: add 10, 30, 10 +; PPC32-NEXT: add 3, 28, 3 +; PPC32-NEXT: add 4, 27, 4 ; PPC32-NEXT: addi 3, 3, 1 +; PPC32-NEXT: add 6, 26, 6 ; PPC32-NEXT: addi 4, 4, 1 ; PPC32-NEXT: addi 5, 5, 1 ; PPC32-NEXT: addi 6, 6, 1 @@ -317,31 +317,31 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lhz 11, 142(1) -; PPC64BE-NEXT: lhz 12, 134(1) -; PPC64BE-NEXT: lhz 0, 126(1) -; PPC64BE-NEXT: lhz 30, 118(1) -; PPC64BE-NEXT: lhz 29, 182(1) -; PPC64BE-NEXT: lhz 28, 174(1) -; PPC64BE-NEXT: lhz 27, 166(1) -; PPC64BE-NEXT: lhz 26, 158(1) +; PPC64BE-NEXT: lhz 11, 118(1) +; PPC64BE-NEXT: lhz 12, 182(1) +; PPC64BE-NEXT: lhz 0, 174(1) +; PPC64BE-NEXT: lhz 30, 166(1) +; PPC64BE-NEXT: add 11, 12, 11 +; PPC64BE-NEXT: lhz 29, 158(1) +; PPC64BE-NEXT: add 10, 0, 10 +; PPC64BE-NEXT: lhz 28, 142(1) +; PPC64BE-NEXT: add 9, 30, 9 +; PPC64BE-NEXT: lhz 27, 126(1) +; PPC64BE-NEXT: add 8, 29, 8 +; PPC64BE-NEXT: lhz 26, 134(1) +; PPC64BE-NEXT: add 6, 28, 6 ; PPC64BE-NEXT: lhz 25, 150(1) -; PPC64BE-NEXT: add 4, 0, 4 -; PPC64BE-NEXT: add 5, 12, 5 -; PPC64BE-NEXT: add 6, 11, 6 -; PPC64BE-NEXT: add 7, 25, 7 -; PPC64BE-NEXT: add 8, 26, 8 -; PPC64BE-NEXT: add 9, 27, 9 -; PPC64BE-NEXT: add 10, 28, 10 -; PPC64BE-NEXT: add 11, 29, 30 -; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: addi 5, 5, 1 -; PPC64BE-NEXT: addi 6, 6, 1 -; PPC64BE-NEXT: addi 7, 7, 1 -; PPC64BE-NEXT: addi 8, 8, 1 -; PPC64BE-NEXT: addi 9, 9, 1 -; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: add 4, 27, 4 +; PPC64BE-NEXT: add 5, 26, 5 ; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: add 7, 25, 7 +; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: addi 4, 4, 1 ; PPC64BE-NEXT: sth 11, 14(3) ; PPC64BE-NEXT: sth 10, 12(3) ; PPC64BE-NEXT: sth 9, 10(3) diff --git a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll index c0a8a76c7f1a..815d5b7443e4 100644 --- a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll @@ -5,23 +5,23 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC64-LABEL: muloti_test: ; PPC64: # %bb.0: # %start -; PPC64-NEXT: mulld 8, 5, 4 -; PPC64-NEXT: cmpdi 5, 3, 0 -; PPC64-NEXT: mulhdu. 9, 3, 6 -; PPC64-NEXT: mulld 3, 3, 6 +; PPC64-NEXT: mulhdu. 8, 3, 6 ; PPC64-NEXT: mcrf 1, 0 +; PPC64-NEXT: mulld 8, 5, 4 +; PPC64-NEXT: cmpdi 3, 0 +; PPC64-NEXT: mulld 3, 3, 6 +; PPC64-NEXT: cmpdi 5, 5, 0 ; PPC64-NEXT: add 3, 3, 8 -; PPC64-NEXT: cmpdi 5, 0 -; PPC64-NEXT: crnor 20, 2, 22 -; PPC64-NEXT: cmpldi 3, 0 +; PPC64-NEXT: crnor 20, 22, 2 ; PPC64-NEXT: mulhdu 8, 4, 6 +; PPC64-NEXT: cmpldi 3, 0 ; PPC64-NEXT: add 3, 8, 3 ; PPC64-NEXT: cmpld 6, 3, 8 ; PPC64-NEXT: crandc 21, 24, 2 ; PPC64-NEXT: crorc 20, 20, 6 -; PPC64-NEXT: li 7, 1 ; PPC64-NEXT: mulhdu. 5, 5, 4 ; PPC64-NEXT: crorc 20, 20, 2 +; PPC64-NEXT: li 7, 1 ; PPC64-NEXT: crnor 20, 20, 21 ; PPC64-NEXT: mulld 4, 4, 6 ; PPC64-NEXT: bc 12, 20, .LBB0_2 @@ -38,13 +38,13 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC32-NEXT: stw 0, 4(1) ; PPC32-NEXT: stwu 1, -80(1) ; PPC32-NEXT: stw 26, 56(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 27, 60(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 29, 68(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 30, 72(1) # 4-byte Folded Spill ; PPC32-NEXT: mfcr 12 -; PPC32-NEXT: mr 30, 8 -; PPC32-NEXT: mr 29, 7 +; PPC32-NEXT: stw 27, 60(1) # 4-byte Folded Spill ; PPC32-NEXT: mr 27, 4 +; PPC32-NEXT: stw 29, 68(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 29, 7 +; PPC32-NEXT: stw 30, 72(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 30, 8 ; PPC32-NEXT: mr 26, 3 ; PPC32-NEXT: li 3, 0 ; PPC32-NEXT: li 4, 0 @@ -54,30 +54,36 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC32-NEXT: stw 21, 36(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 22, 40(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 23, 44(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 24, 48(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 25, 52(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 28, 64(1) # 4-byte Folded Spill -; PPC32-NEXT: mr 25, 10 -; PPC32-NEXT: stw 12, 28(1) -; PPC32-NEXT: mr 28, 9 ; PPC32-NEXT: mr 23, 6 +; PPC32-NEXT: stw 24, 48(1) # 4-byte Folded Spill ; PPC32-NEXT: mr 24, 5 +; PPC32-NEXT: stw 25, 52(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 25, 10 +; PPC32-NEXT: stw 28, 64(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 28, 9 +; PPC32-NEXT: stw 12, 28(1) ; PPC32-NEXT: bl __multi3 ; PPC32-NEXT: mr 7, 4 ; PPC32-NEXT: mullw 4, 24, 30 +; PPC32-NEXT: cmpwi 5, 24, 0 +; PPC32-NEXT: cmpwi 6, 26, 0 +; PPC32-NEXT: cmpwi 7, 28, 0 +; PPC32-NEXT: crnor 9, 30, 26 ; PPC32-NEXT: mullw 8, 29, 23 -; PPC32-NEXT: mullw 10, 28, 27 -; PPC32-NEXT: mullw 11, 26, 25 +; PPC32-NEXT: add 21, 8, 4 +; PPC32-NEXT: mullw 11, 28, 27 +; PPC32-NEXT: mullw 12, 26, 25 +; PPC32-NEXT: add 11, 12, 11 +; PPC32-NEXT: cmplwi 7, 11, 0 ; PPC32-NEXT: mulhwu 9, 30, 23 -; PPC32-NEXT: mulhwu 12, 27, 25 +; PPC32-NEXT: add 12, 9, 21 +; PPC32-NEXT: cmplw 6, 12, 9 +; PPC32-NEXT: mulhwu 10, 27, 25 ; PPC32-NEXT: mullw 0, 30, 23 ; PPC32-NEXT: mullw 22, 27, 25 -; PPC32-NEXT: add 21, 8, 4 -; PPC32-NEXT: add 10, 11, 10 ; PPC32-NEXT: addc 4, 22, 0 -; PPC32-NEXT: add 11, 9, 21 -; PPC32-NEXT: add 0, 12, 10 -; PPC32-NEXT: adde 8, 0, 11 +; PPC32-NEXT: add 0, 10, 11 +; PPC32-NEXT: adde 8, 0, 12 ; PPC32-NEXT: addc 4, 7, 4 ; PPC32-NEXT: adde 8, 3, 8 ; PPC32-NEXT: xor 22, 4, 7 @@ -85,21 +91,15 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC32-NEXT: or. 22, 22, 20 ; PPC32-NEXT: mcrf 1, 0 ; PPC32-NEXT: cmpwi 29, 0 -; PPC32-NEXT: cmpwi 5, 24, 0 -; PPC32-NEXT: cmpwi 6, 26, 0 -; PPC32-NEXT: cmpwi 7, 28, 0 ; PPC32-NEXT: crnor 8, 22, 2 ; PPC32-NEXT: mulhwu. 23, 29, 23 -; PPC32-NEXT: crnor 9, 30, 26 ; PPC32-NEXT: mcrf 5, 0 ; PPC32-NEXT: cmplwi 21, 0 -; PPC32-NEXT: cmplw 6, 11, 9 -; PPC32-NEXT: cmplwi 7, 10, 0 ; PPC32-NEXT: crandc 10, 24, 2 -; PPC32-NEXT: cmplw 3, 0, 12 +; PPC32-NEXT: cmplw 3, 0, 10 +; PPC32-NEXT: crandc 11, 12, 30 ; PPC32-NEXT: mulhwu. 9, 24, 30 ; PPC32-NEXT: mcrf 6, 0 -; PPC32-NEXT: crandc 11, 12, 30 ; PPC32-NEXT: cmplw 4, 7 ; PPC32-NEXT: cmplw 7, 8, 3 ; PPC32-NEXT: crand 12, 30, 0 diff --git a/llvm/test/CodeGen/PowerPC/vec_splat.ll b/llvm/test/CodeGen/PowerPC/vec_splat.ll index 7c048ff37108..0e6626bbce23 100644 --- a/llvm/test/CodeGen/PowerPC/vec_splat.ll +++ b/llvm/test/CodeGen/PowerPC/vec_splat.ll @@ -10,17 +10,17 @@ define void @splat(%f4* %P, %f4* %Q, float %X) nounwind { ; G3-LABEL: splat: ; G3: # %bb.0: -; G3-NEXT: lfs 0, 0(4) +; G3-NEXT: lfs 0, 12(4) ; G3-NEXT: lfs 2, 8(4) ; G3-NEXT: lfs 3, 4(4) -; G3-NEXT: lfs 4, 12(4) ; G3-NEXT: fadds 0, 0, 1 -; G3-NEXT: fadds 2, 2, 1 -; G3-NEXT: fadds 3, 3, 1 -; G3-NEXT: fadds 1, 4, 1 -; G3-NEXT: stfs 1, 12(3) -; G3-NEXT: stfs 2, 8(3) -; G3-NEXT: stfs 3, 4(3) +; G3-NEXT: lfs 4, 0(4) +; G3-NEXT: stfs 0, 12(3) +; G3-NEXT: fadds 0, 2, 1 +; G3-NEXT: stfs 0, 8(3) +; G3-NEXT: fadds 0, 3, 1 +; G3-NEXT: stfs 0, 4(3) +; G3-NEXT: fadds 0, 4, 1 ; G3-NEXT: stfs 0, 0(3) ; G3-NEXT: blr ; @@ -49,18 +49,18 @@ define void @splat(%f4* %P, %f4* %Q, float %X) nounwind { define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind { ; G3-LABEL: splat_i4: ; G3: # %bb.0: -; G3-NEXT: lwz 6, 0(4) +; G3-NEXT: lwz 6, 12(4) ; G3-NEXT: lwz 7, 8(4) ; G3-NEXT: lwz 8, 4(4) -; G3-NEXT: lwz 4, 12(4) ; G3-NEXT: add 6, 6, 5 -; G3-NEXT: add 8, 8, 5 -; G3-NEXT: add 7, 7, 5 +; G3-NEXT: lwz 4, 0(4) +; G3-NEXT: stw 6, 12(3) +; G3-NEXT: add 6, 7, 5 +; G3-NEXT: stw 6, 8(3) +; G3-NEXT: add 6, 8, 5 ; G3-NEXT: add 4, 4, 5 -; G3-NEXT: stw 4, 12(3) -; G3-NEXT: stw 7, 8(3) -; G3-NEXT: stw 8, 4(3) -; G3-NEXT: stw 6, 0(3) +; G3-NEXT: stw 6, 4(3) +; G3-NEXT: stw 4, 0(3) ; G3-NEXT: blr ; ; G5-LABEL: splat_i4: @@ -88,18 +88,18 @@ define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind { define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind { ; G3-LABEL: splat_imm_i32: ; G3: # %bb.0: -; G3-NEXT: lwz 5, 0(4) +; G3-NEXT: lwz 5, 12(4) ; G3-NEXT: lwz 6, 8(4) ; G3-NEXT: lwz 7, 4(4) -; G3-NEXT: lwz 4, 12(4) ; G3-NEXT: addi 5, 5, -1 -; G3-NEXT: addi 7, 7, -1 -; G3-NEXT: addi 6, 6, -1 +; G3-NEXT: lwz 4, 0(4) +; G3-NEXT: stw 5, 12(3) +; G3-NEXT: addi 5, 6, -1 +; G3-NEXT: stw 5, 8(3) +; G3-NEXT: addi 5, 7, -1 ; G3-NEXT: addi 4, 4, -1 -; G3-NEXT: stw 4, 12(3) -; G3-NEXT: stw 6, 8(3) -; G3-NEXT: stw 7, 4(3) -; G3-NEXT: stw 5, 0(3) +; G3-NEXT: stw 5, 4(3) +; G3-NEXT: stw 4, 0(3) ; G3-NEXT: blr ; ; G5-LABEL: splat_imm_i32: @@ -118,22 +118,22 @@ define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind { define void @splat_imm_i16(%i4* %P, %i4* %Q, i32 %X) nounwind { ; G3-LABEL: splat_imm_i16: ; G3: # %bb.0: -; G3-NEXT: lwz 5, 0(4) -; G3-NEXT: lwz 6, 8(4) +; G3-NEXT: lwz 5, 8(4) +; G3-NEXT: lwz 6, 0(4) ; G3-NEXT: lwz 7, 4(4) -; G3-NEXT: lwz 4, 12(4) ; G3-NEXT: addi 5, 5, 1 -; G3-NEXT: addi 7, 7, 1 +; G3-NEXT: lwz 4, 12(4) ; G3-NEXT: addi 6, 6, 1 +; G3-NEXT: addi 7, 7, 1 ; G3-NEXT: addi 4, 4, 1 -; G3-NEXT: addis 5, 5, 1 -; G3-NEXT: addis 7, 7, 1 -; G3-NEXT: addis 6, 6, 1 ; G3-NEXT: addis 4, 4, 1 ; G3-NEXT: stw 4, 12(3) -; G3-NEXT: stw 6, 8(3) -; G3-NEXT: stw 7, 4(3) -; G3-NEXT: stw 5, 0(3) +; G3-NEXT: addis 4, 5, 1 +; G3-NEXT: stw 4, 8(3) +; G3-NEXT: addis 4, 7, 1 +; G3-NEXT: stw 4, 4(3) +; G3-NEXT: addis 4, 6, 1 +; G3-NEXT: stw 4, 0(3) ; G3-NEXT: blr ; ; G5-LABEL: splat_imm_i16: @@ -189,58 +189,60 @@ define void @spltish(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; G3-LABEL: spltish: ; G3: # %bb.0: ; G3-NEXT: stwu 1, -48(1) +; G3-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; G3-NEXT: lbz 5, 0(4) +; G3-NEXT: lbz 30, 15(4) +; G3-NEXT: stw 29, 36(1) # 4-byte Folded Spill +; G3-NEXT: lbz 29, 13(4) +; G3-NEXT: stw 28, 32(1) # 4-byte Folded Spill +; G3-NEXT: lbz 28, 11(4) +; G3-NEXT: stw 27, 28(1) # 4-byte Folded Spill +; G3-NEXT: lbz 27, 9(4) +; G3-NEXT: stw 24, 16(1) # 4-byte Folded Spill ; G3-NEXT: stw 25, 20(1) # 4-byte Folded Spill ; G3-NEXT: stw 26, 24(1) # 4-byte Folded Spill -; G3-NEXT: stw 27, 28(1) # 4-byte Folded Spill -; G3-NEXT: stw 28, 32(1) # 4-byte Folded Spill -; G3-NEXT: stw 29, 36(1) # 4-byte Folded Spill -; G3-NEXT: stw 30, 40(1) # 4-byte Folded Spill -; G3-NEXT: lbz 5, 5(4) -; G3-NEXT: lbz 6, 3(4) -; G3-NEXT: lbz 7, 1(4) -; G3-NEXT: lbz 8, 0(4) -; G3-NEXT: lbz 9, 2(4) -; G3-NEXT: lbz 10, 4(4) -; G3-NEXT: lbz 11, 6(4) -; G3-NEXT: lbz 12, 8(4) -; G3-NEXT: lbz 0, 10(4) -; G3-NEXT: addi 7, 7, -15 -; G3-NEXT: lbz 30, 12(4) -; G3-NEXT: lbz 29, 14(4) -; G3-NEXT: lbz 28, 15(4) -; G3-NEXT: lbz 27, 13(4) -; G3-NEXT: lbz 26, 11(4) -; G3-NEXT: lbz 25, 9(4) -; G3-NEXT: addi 6, 6, -15 -; G3-NEXT: lbz 4, 7(4) -; G3-NEXT: addi 5, 5, -15 -; G3-NEXT: addi 25, 25, -15 -; G3-NEXT: addi 26, 26, -15 -; G3-NEXT: addi 4, 4, -15 -; G3-NEXT: addi 27, 27, -15 -; G3-NEXT: addi 28, 28, -15 -; G3-NEXT: stb 29, 14(3) -; G3-NEXT: stb 30, 12(3) -; G3-NEXT: stb 0, 10(3) -; G3-NEXT: stb 12, 8(3) -; G3-NEXT: stb 11, 6(3) -; G3-NEXT: stb 10, 4(3) -; G3-NEXT: stb 9, 2(3) -; G3-NEXT: stb 8, 0(3) -; G3-NEXT: stb 28, 15(3) -; G3-NEXT: stb 27, 13(3) -; G3-NEXT: stb 26, 11(3) -; G3-NEXT: stb 25, 9(3) -; G3-NEXT: stb 4, 7(3) +; G3-NEXT: lbz 6, 2(4) +; G3-NEXT: lbz 7, 4(4) +; G3-NEXT: lbz 8, 6(4) +; G3-NEXT: lbz 9, 8(4) +; G3-NEXT: lbz 10, 10(4) +; G3-NEXT: lbz 11, 12(4) +; G3-NEXT: lbz 12, 14(4) +; G3-NEXT: lbz 26, 7(4) +; G3-NEXT: lbz 25, 5(4) +; G3-NEXT: lbz 24, 3(4) +; G3-NEXT: lbz 4, 1(4) +; G3-NEXT: stb 5, 0(3) +; G3-NEXT: addi 5, 30, -15 +; G3-NEXT: stb 5, 15(3) +; G3-NEXT: addi 5, 29, -15 +; G3-NEXT: stb 5, 13(3) +; G3-NEXT: addi 5, 28, -15 +; G3-NEXT: stb 5, 11(3) +; G3-NEXT: addi 5, 27, -15 +; G3-NEXT: stb 5, 9(3) +; G3-NEXT: addi 5, 26, -15 +; G3-NEXT: stb 5, 7(3) +; G3-NEXT: addi 5, 25, -15 ; G3-NEXT: stb 5, 5(3) -; G3-NEXT: stb 6, 3(3) -; G3-NEXT: stb 7, 1(3) +; G3-NEXT: addi 5, 24, -15 +; G3-NEXT: addi 4, 4, -15 +; G3-NEXT: stb 12, 14(3) +; G3-NEXT: stb 11, 12(3) +; G3-NEXT: stb 10, 10(3) +; G3-NEXT: stb 9, 8(3) +; G3-NEXT: stb 8, 6(3) +; G3-NEXT: stb 7, 4(3) +; G3-NEXT: stb 6, 2(3) +; G3-NEXT: stb 5, 3(3) +; G3-NEXT: stb 4, 1(3) ; G3-NEXT: lwz 30, 40(1) # 4-byte Folded Reload ; G3-NEXT: lwz 29, 36(1) # 4-byte Folded Reload ; G3-NEXT: lwz 28, 32(1) # 4-byte Folded Reload ; G3-NEXT: lwz 27, 28(1) # 4-byte Folded Reload ; G3-NEXT: lwz 26, 24(1) # 4-byte Folded Reload ; G3-NEXT: lwz 25, 20(1) # 4-byte Folded Reload +; G3-NEXT: lwz 24, 16(1) # 4-byte Folded Reload ; G3-NEXT: addi 1, 1, 48 ; G3-NEXT: blr ;