From 04377a81aee14fc8807aad90b0c1fdfdcb4a3883 Mon Sep 17 00:00:00 2001 From: Zheng Chen Date: Fri, 14 Feb 2020 01:42:43 -0500 Subject: [PATCH] [Powerpc] set instruction count as lsr first priority of lsr. On Powerpc, set instruction count as lsr first priority of lsr by default. Add an option ppc-lsr-no-insns-cost to return back to default lsr cost model. Reviewed By: steven.zhang, jsji Differential Revision: https://reviews.llvm.org/D72683 --- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 17 +++++++++ .../Target/PowerPC/PPCTargetTransformInfo.h | 2 ++ llvm/test/CodeGen/PowerPC/addi-licm.ll | 5 +-- llvm/test/CodeGen/PowerPC/lsr-insns-cost.ll | 35 ++++++++++++------- llvm/test/CodeGen/PowerPC/unal-altivec.ll | 13 +++---- 5 files changed, 52 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index e05699cc95ec..202272829018 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -33,6 +33,10 @@ EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), cl::desc("Enable using coldcc calling conv for cold " "internal functions")); +static cl::opt +LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false), + cl::desc("Do not add instruction count to lsr cost model")); + // The latency of mtctr is only justified if there are more than 4 // comparisons that will be removed as a result. static cl::opt @@ -967,3 +971,16 @@ bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, *BI = HWLoopInfo.ExitBranch; return true; } + +bool PPCTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) { + // PowerPC default behaviour here is "instruction number 1st priority". + // If LsrNoInsnsCost is set, call default implementation. + if (!LsrNoInsnsCost) + return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, + C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) < + std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, + C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost); + else + return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); +} diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 35388d14f606..b89e585b0806 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -63,6 +63,8 @@ public: TargetLibraryInfo *LibInfo); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2); /// @} diff --git a/llvm/test/CodeGen/PowerPC/addi-licm.ll b/llvm/test/CodeGen/PowerPC/addi-licm.ll index 24c9805f1343..17484cfe9bdc 100644 --- a/llvm/test/CodeGen/PowerPC/addi-licm.ll +++ b/llvm/test/CodeGen/PowerPC/addi-licm.ll @@ -17,9 +17,10 @@ entry: ; CHECK-LABEL: @foo ; CHECK: addi [[REG1:[0-9]+]], 1, ; CHECK: addi [[REG2:[0-9]+]], 1, +; CHECK: li [[REG3:[0-9]+]], 0 ; CHECK: %for.body.i -; CHECK-DAG: lfs {{[0-9]+}}, 0([[REG1]]) -; CHECK-DAG: lfs {{[0-9]+}}, 0([[REG2]]) +; CHECK-DAG: lfsx {{[0-9]+}}, [[REG1]], [[REG3]] +; CHECK-DAG: lfsx {{[0-9]+}}, [[REG2]], [[REG3]] ; CHECK: blr ; PIP-LABEL: @foo diff --git a/llvm/test/CodeGen/PowerPC/lsr-insns-cost.ll b/llvm/test/CodeGen/PowerPC/lsr-insns-cost.ll index 4e81811a56f3..29d51e49ab24 100644 --- a/llvm/test/CodeGen/PowerPC/lsr-insns-cost.ll +++ b/llvm/test/CodeGen/PowerPC/lsr-insns-cost.ll @@ -1,5 +1,7 @@ -; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs < %s \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s +; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs < %s \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=INST +; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -ppc-lsr-no-insns-cost=true \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=REG ; void test(unsigned *a, unsigned *b, unsigned *c) ; { @@ -10,16 +12,25 @@ ; compile with -fno-unroll-loops define void @lsr-insts-cost(i32* %0, i32* %1, i32* %2) { -; CHECK-LABEL: lsr-insts-cost -; CHECK: .LBB0_4: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lxvd2x vs34, 0, r3 -; CHECK-NEXT: lxvd2x vs35, 0, r4 -; CHECK-NEXT: addi r4, r4, 16 -; CHECK-NEXT: addi r3, r3, 16 -; CHECK-NEXT: vadduwm v2, v3, v2 -; CHECK-NEXT: stxvd2x vs34, 0, r5 -; CHECK-NEXT: addi r5, r5, 16 -; CHECK-NEXT: bdnz .LBB0_4 +; INST-LABEL: lsr-insts-cost +; INST: .LBB0_4: # =>This Inner Loop Header: Depth=1 +; INST-NEXT: lxvd2x vs34, r3, r6 +; INST-NEXT: lxvd2x vs35, r4, r6 +; INST-NEXT: vadduwm v2, v3, v2 +; INST-NEXT: stxvd2x vs34, r5, r6 +; INST-NEXT: addi r6, r6, 16 +; INST-NEXT: bdnz .LBB0_4 +; +; REG-LABEL: lsr-insts-cost +; REG: .LBB0_4: # =>This Inner Loop Header: Depth=1 +; REG-NEXT: lxvd2x vs34, 0, r3 +; REG-NEXT: lxvd2x vs35, 0, r4 +; REG-NEXT: addi r4, r4, 16 +; REG-NEXT: addi r3, r3, 16 +; REG-NEXT: vadduwm v2, v3, v2 +; REG-NEXT: stxvd2x vs34, 0, r5 +; REG-NEXT: addi r5, r5, 16 +; REG-NEXT: bdnz .LBB0_4 %4 = getelementptr i32, i32* %2, i64 1024 %5 = getelementptr i32, i32* %0, i64 1024 %6 = getelementptr i32, i32* %1, i64 1024 diff --git a/llvm/test/CodeGen/PowerPC/unal-altivec.ll b/llvm/test/CodeGen/PowerPC/unal-altivec.ll index 081d582b7414..a804b35052be 100644 --- a/llvm/test/CodeGen/PowerPC/unal-altivec.ll +++ b/llvm/test/CodeGen/PowerPC/unal-altivec.ll @@ -29,14 +29,15 @@ vector.body: ; preds = %vector.body, %vecto br i1 %10, label %for.end, label %vector.body ; CHECK: @foo -; CHECK-DAG: li [[C16:[0-9]+]], 16 +; CHECK-DAG: li [[C0:[0-9]+]], 0 ; CHECK-DAG: lvx [[CNST:[0-9]+]], ; CHECK: .LBB0_1: -; CHECK-DAG: lvx [[LD1:[0-9]+]], 0, [[C0:[0-9]+]] -; CHECK-DAG: lvx [[LD2:[0-9]+]], [[C0]], [[C16]] -; CHECK-DAG: lvsl [[MASK1:[0-9]+]], 0, [[C0]] -; CHECK-DAG: vperm [[VR1:[0-9]+]], [[LD1]], [[LD2]], [[MASK1]] -; CHECK-DAG: vaddfp {{[0-9]+}}, [[VR1]], [[CNST]] +; CHECK-DAG: lvsl [[MASK1:[0-9]+]], [[B1:[0-9]+]], [[C0]] +; CHECK-DAG: add [[B3:[0-9]+]], [[B1]], [[C0]] +; CHECK-DAG: lvx [[LD1:[0-9]+]], [[B1]], [[C0]] +; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]], +; CHECK-DAG: vperm [[R1:[0-9]+]], [[LD1]], [[LD2]], [[MASK1]] +; CHECK-DAG: vaddfp {{[0-9]+}}, [[R1]], [[CNST]] ; CHECK: blr for.end: ; preds = %vector.body