From 8ae0f88b139c77068573abce0a619757952f9f52 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Date: Mon, 3 Dec 2018 14:30:18 +0000
Subject: [PATCH] [SystemZ::TTI]  Return zero cost for ICmp that becomes Load
 And Test.

A loaded value with multiple users compared with 0 will become a load and
test single instruction. The load is not folded in this case (multiple
users), but the compare instruction is eliminated.

This patch returns 0 cost for the icmp in these cases.

Review: Ulrich Weigand
https://reviews.llvm.org/D55111

llvm-svn: 348141
---
 .../SystemZ/SystemZTargetTransformInfo.cpp    | 10 ++++++++
 .../CostModel/SystemZ/load-and-test.ll        | 25 +++++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/SystemZ/load-and-test.ll

diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 6155ba4b5c82..129610fe095b 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -839,6 +839,16 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
   else { // Scalar
     switch (Opcode) {
     case Instruction::ICmp: {
+      // A loaded value compared with 0 with multiple users becomes Load and
+      // Test. The load is then not foldable, so return 0 cost for the ICmp.
+      unsigned ScalarBits = ValTy->getScalarSizeInBits();
+      if (I != nullptr && ScalarBits >= 32)
+        if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
+          if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+            if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
+                C->getZExtValue() == 0)
+              return 0;
+
       unsigned Cost = 1;
       if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
         Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
diff --git a/llvm/test/Analysis/CostModel/SystemZ/load-and-test.ll b/llvm/test/Analysis/CostModel/SystemZ/load-and-test.ll
new file mode 100644
index 000000000000..f0695688889e
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/SystemZ/load-and-test.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+;
+; Test that load and test results in 0 cost for the compare.
+
+define i64 @fun0(i64* %Src, i64 %Arg) {
+  %Ld1 = load i64, i64* %Src
+  %Cmp = icmp eq i64 %Ld1, 0
+  %S   = select i1 %Cmp, i64 %Arg, i64 %Ld1
+  ret i64 %S
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'fun0':
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %Ld1 = load i64, i64* %Src
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %Cmp = icmp eq i64 %Ld1, 0
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %S = select
+}
+
+define i32 @fun1(i32* %Src, i32 %Arg) {
+  %Ld1 = load i32, i32* %Src
+  %Cmp = icmp eq i32 %Ld1, 0
+  %S   = select i1 %Cmp, i32 %Arg, i32 %Ld1
+  ret i32 %S
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'fun1':
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %Ld1 = load i32, i32* %Src
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %Cmp = icmp eq i32 %Ld1, 0
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %S = select
+}