[SystemZ] Utilize Test Data Class instructions.

This adds a new SystemZ-specific intrinsic, llvm.s390.tdc.f(32|64|128),
which maps straight to the test data class instructions.  A new IR pass
is added to recognize instructions that can be converted to TDC and
perform the necessary replacements.

Differential Revision: http://reviews.llvm.org/D21949

llvm-svn: 275016
This commit is contained in:
Marcin Koscielnicki 2016-07-10 14:41:22 +00:00
parent 303326541b
commit cf7cc724a7
13 changed files with 1003 additions and 4 deletions

View File

@ -374,3 +374,14 @@ let TargetPrefix = "s390" in {
[llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
//
// Misc intrinsics
//
//===----------------------------------------------------------------------===//
let TargetPrefix = "s390" in {
def int_s390_tdc : Intrinsic<[llvm_i32_ty], [llvm_anyfloat_ty, llvm_i64_ty],
[IntrNoMem]>;
}

View File

@ -30,6 +30,7 @@ add_llvm_target(SystemZCodeGen
SystemZSubtarget.cpp
SystemZTargetMachine.cpp
SystemZTargetTransformInfo.cpp
SystemZTDC.cpp
)
add_subdirectory(AsmParser)

View File

@ -36,10 +36,6 @@ We don't use the BRANCH ON INDEX instructions.
--
We don't use the TEST DATA CLASS instructions.
--
We only use MVC, XC and CLC for constant-length block operations.
We could extend them to variable-length operations too,
using EXECUTE RELATIVE LONG.

View File

@ -87,6 +87,11 @@ const unsigned CCMASK_VCMP_MIXED = CCMASK_1;
const unsigned CCMASK_VCMP_NONE = CCMASK_3;
const unsigned CCMASK_VCMP = CCMASK_0 | CCMASK_1 | CCMASK_3;
// Condition-code mask assignments for Test Data Class.
const unsigned CCMASK_TDC_NOMATCH = CCMASK_0;
const unsigned CCMASK_TDC_MATCH = CCMASK_1;
const unsigned CCMASK_TDC = CCMASK_TDC_NOMATCH | CCMASK_TDC_MATCH;
// The position of the low CC bit in an IPM result.
const unsigned IPM_CC = 28;
@ -94,6 +99,41 @@ const unsigned IPM_CC = 28;
const unsigned PFD_READ = 1;
const unsigned PFD_WRITE = 2;
// Mask assignments for TDC
const unsigned TDCMASK_ZERO_PLUS = 0x800;
const unsigned TDCMASK_ZERO_MINUS = 0x400;
const unsigned TDCMASK_NORMAL_PLUS = 0x200;
const unsigned TDCMASK_NORMAL_MINUS = 0x100;
const unsigned TDCMASK_SUBNORMAL_PLUS = 0x080;
const unsigned TDCMASK_SUBNORMAL_MINUS = 0x040;
const unsigned TDCMASK_INFINITY_PLUS = 0x020;
const unsigned TDCMASK_INFINITY_MINUS = 0x010;
const unsigned TDCMASK_QNAN_PLUS = 0x008;
const unsigned TDCMASK_QNAN_MINUS = 0x004;
const unsigned TDCMASK_SNAN_PLUS = 0x002;
const unsigned TDCMASK_SNAN_MINUS = 0x001;
const unsigned TDCMASK_ZERO = TDCMASK_ZERO_PLUS | TDCMASK_ZERO_MINUS;
const unsigned TDCMASK_POSITIVE = TDCMASK_NORMAL_PLUS |
TDCMASK_SUBNORMAL_PLUS |
TDCMASK_INFINITY_PLUS;
const unsigned TDCMASK_NEGATIVE = TDCMASK_NORMAL_MINUS |
TDCMASK_SUBNORMAL_MINUS |
TDCMASK_INFINITY_MINUS;
const unsigned TDCMASK_NAN = TDCMASK_QNAN_PLUS |
TDCMASK_QNAN_MINUS |
TDCMASK_SNAN_PLUS |
TDCMASK_SNAN_MINUS;
const unsigned TDCMASK_PLUS = TDCMASK_POSITIVE |
TDCMASK_ZERO_PLUS |
TDCMASK_QNAN_PLUS |
TDCMASK_SNAN_PLUS;
const unsigned TDCMASK_MINUS = TDCMASK_NEGATIVE |
TDCMASK_ZERO_MINUS |
TDCMASK_QNAN_MINUS |
TDCMASK_SNAN_MINUS;
const unsigned TDCMASK_ALL = TDCMASK_PLUS | TDCMASK_MINUS;
// Number of bits in a vector register.
const unsigned VectorBits = 128;
@ -138,6 +178,7 @@ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZTDCPass();
} // end namespace llvm
#endif

View File

@ -1444,6 +1444,11 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_tdc:
Opcode = SystemZISD::TDC;
CCValid = SystemZ::CCMASK_TDC;
return true;
default:
return false;
}

View File

@ -0,0 +1,382 @@
//===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass looks for instructions that can be replaced by a Test Data Class
// instruction, and replaces them when profitable.
//
// Roughly, the following rules are recognized:
//
// 1: fcmp pred X, 0 -> tdc X, mask
// 2: fcmp pred X, +-inf -> tdc X, mask
// 3: fcmp pred X, +-minnorm -> tdc X, mask
// 4: tdc (fabs X), mask -> tdc X, newmask
// 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
// 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
// 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
// 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
// 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
// 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
//
// The pass works in 4 steps:
//
// 1. All fcmp and icmp instructions in a function are checked for a match
// with rules 1-3 and 5-7. Their TDC equivalents are stored in
// the ConvertedInsts mapping. If the operand of a fcmp instruction is
// a fabs, it's also folded according to rule 4.
// 2. All and/or/xor i1 instructions whose both operands have been already
// mapped are mapped according to rules 8-10. LogicOpsWorklist is used
// as a queue of instructions to check.
// 3. All mapped instructions that are considered worthy of conversion (ie.
// replacing them will actually simplify the final code) are replaced
// with a call to the s390.tdc intrinsic.
// 4. All intermediate results of replaced instructions are removed if unused.
//
// Instructions that match rules 1-3 are considered unworthy of conversion
// on their own (since a comparison instruction is superior), but are mapped
// in the hopes of folding the result using rules 4 and 8-10 (likely removing
// the original comparison in the process).
//
//===----------------------------------------------------------------------===//
#include "SystemZ.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include <deque>
#include <set>
using namespace llvm;
namespace llvm {
void initializeSystemZTDCPassPass(PassRegistry&);
}
namespace {
class SystemZTDCPass : public FunctionPass {
public:
static char ID;
SystemZTDCPass() : FunctionPass(ID) {
initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
private:
// Maps seen instructions that can be mapped to a TDC, values are
// (TDC operand, TDC mask, worthy flag) triples.
MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
// The queue of and/or/xor i1 instructions to be potentially folded.
std::vector<BinaryOperator *> LogicOpsWorklist;
// Instructions matched while folding, to be removed at the end if unused.
std::set<Instruction *> PossibleJunk;
// Tries to convert a fcmp instruction.
void convertFCmp(CmpInst &I);
// Tries to convert an icmp instruction.
void convertICmp(CmpInst &I);
// Tries to convert an i1 and/or/xor instruction, whose both operands
// have been already converted.
void convertLogicOp(BinaryOperator &I);
// Marks an instruction as converted - adds it to ConvertedInsts and adds
// any and/or/xor i1 users to the queue.
void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
auto &M = *I->getFunction()->getParent();
auto &Ctx = M.getContext();
for (auto *U : I->users()) {
auto *LI = dyn_cast<BinaryOperator>(U);
if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
(LI->getOpcode() == Instruction::And ||
LI->getOpcode() == Instruction::Or ||
LI->getOpcode() == Instruction::Xor)) {
LogicOpsWorklist.push_back(LI);
}
}
}
};
} // end anonymous namespace
char SystemZTDCPass::ID = 0;
INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
"SystemZ Test Data Class optimization", false, false)
FunctionPass *llvm::createSystemZTDCPass() {
return new SystemZTDCPass();
}
void SystemZTDCPass::convertFCmp(CmpInst &I) {
Value *Op0 = I.getOperand(0);
auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
auto Pred = I.getPredicate();
// Only comparisons with consts are interesting.
if (!Const)
return;
// Compute the smallest normal number (and its negation).
auto &Sem = Op0->getType()->getFltSemantics();
APFloat Smallest = APFloat::getSmallestNormalized(Sem);
APFloat NegSmallest = Smallest;
NegSmallest.changeSign();
// Check if Const is one of our recognized consts.
int WhichConst;
if (Const->isZero()) {
// All comparisons with 0 can be converted.
WhichConst = 0;
} else if (Const->isInfinity()) {
// Likewise for infinities.
WhichConst = Const->isNegative() ? 2 : 1;
} else if (Const->isExactlyValue(Smallest)) {
// For Smallest, we cannot do EQ separately from GT.
if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
(Pred & CmpInst::FCMP_OGE) != 0)
return;
WhichConst = 3;
} else if (Const->isExactlyValue(NegSmallest)) {
// Likewise for NegSmallest, we cannot do EQ separately from LT.
if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
(Pred & CmpInst::FCMP_OLE) != 0)
return;
WhichConst = 4;
} else {
// Not one of our special constants.
return;
}
// Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
static const int Masks[][4] = {
{ // 0
SystemZ::TDCMASK_ZERO, // eq
SystemZ::TDCMASK_POSITIVE, // gt
SystemZ::TDCMASK_NEGATIVE, // lt
SystemZ::TDCMASK_NAN, // un
},
{ // inf
SystemZ::TDCMASK_INFINITY_PLUS, // eq
0, // gt
(SystemZ::TDCMASK_ZERO |
SystemZ::TDCMASK_NEGATIVE |
SystemZ::TDCMASK_NORMAL_PLUS |
SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
SystemZ::TDCMASK_NAN, // un
},
{ // -inf
SystemZ::TDCMASK_INFINITY_MINUS, // eq
(SystemZ::TDCMASK_ZERO |
SystemZ::TDCMASK_POSITIVE |
SystemZ::TDCMASK_NORMAL_MINUS |
SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
0, // lt
SystemZ::TDCMASK_NAN, // un
},
{ // minnorm
0, // eq (unsupported)
(SystemZ::TDCMASK_NORMAL_PLUS |
SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge)
(SystemZ::TDCMASK_ZERO |
SystemZ::TDCMASK_NEGATIVE |
SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
SystemZ::TDCMASK_NAN, // un
},
{ // -minnorm
0, // eq (unsupported)
(SystemZ::TDCMASK_ZERO |
SystemZ::TDCMASK_POSITIVE |
SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
(SystemZ::TDCMASK_NORMAL_MINUS |
SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le)
SystemZ::TDCMASK_NAN, // un
}
};
// Construct the mask as a combination of the partial masks.
int Mask = 0;
if (Pred & CmpInst::FCMP_OEQ)
Mask |= Masks[WhichConst][0];
if (Pred & CmpInst::FCMP_OGT)
Mask |= Masks[WhichConst][1];
if (Pred & CmpInst::FCMP_OLT)
Mask |= Masks[WhichConst][2];
if (Pred & CmpInst::FCMP_UNO)
Mask |= Masks[WhichConst][3];
// A lone fcmp is unworthy of tdc conversion on its own, but may become
// worthy if combined with fabs.
bool Worthy = false;
if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
Function *F = CI->getCalledFunction();
if (F && F->getIntrinsicID() == Intrinsic::fabs) {
// Fold with fabs - adjust the mask appropriately.
Mask &= SystemZ::TDCMASK_PLUS;
Mask |= Mask >> 1;
Op0 = CI->getArgOperand(0);
// A combination of fcmp with fabs is a win, unless the constant
// involved is 0 (which is handled by later passes).
Worthy = WhichConst != 0;
PossibleJunk.insert(CI);
}
}
converted(&I, Op0, Mask, Worthy);
}
void SystemZTDCPass::convertICmp(CmpInst &I) {
Value *Op0 = I.getOperand(0);
auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
auto Pred = I.getPredicate();
// All our icmp rules involve comparisons with consts.
if (!Const)
return;
if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
// Check for icmp+bitcast used for signbit.
if (!Cast->getSrcTy()->isFloatTy() &&
!Cast->getSrcTy()->isDoubleTy() &&
!Cast->getSrcTy()->isFP128Ty())
return;
Value *V = Cast->getOperand(0);
int Mask;
if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
// icmp slt (bitcast X), 0 - set if sign bit true
Mask = SystemZ::TDCMASK_MINUS;
} else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
// icmp sgt (bitcast X), -1 - set if sign bit false
Mask = SystemZ::TDCMASK_PLUS;
} else {
// Not a sign bit check.
return;
}
PossibleJunk.insert(Cast);
converted(&I, V, Mask, true);
} else if (auto *CI = dyn_cast<CallInst>(Op0)) {
// Check if this is a pre-existing call of our tdc intrinsic.
Function *F = CI->getCalledFunction();
if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
return;
if (!Const->isZero())
return;
Value *V = CI->getArgOperand(0);
auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
// Bail if the mask is not a constant.
if (!MaskC)
return;
int Mask = MaskC->getZExtValue();
Mask &= SystemZ::TDCMASK_ALL;
if (Pred == CmpInst::ICMP_NE) {
// icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
} else if (Pred == CmpInst::ICMP_EQ) {
// icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
Mask ^= SystemZ::TDCMASK_ALL;
} else {
// An unknown comparison - ignore.
return;
}
PossibleJunk.insert(CI);
converted(&I, V, Mask, false);
}
}
void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
Value *Op0, *Op1;
int Mask0, Mask1;
bool Worthy0, Worthy1;
std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
if (Op0 != Op1)
return;
int Mask;
switch (I.getOpcode()) {
case Instruction::And:
Mask = Mask0 & Mask1;
break;
case Instruction::Or:
Mask = Mask0 | Mask1;
break;
case Instruction::Xor:
Mask = Mask0 ^ Mask1;
break;
default:
llvm_unreachable("Unknown op in convertLogicOp");
}
converted(&I, Op0, Mask, true);
}
bool SystemZTDCPass::runOnFunction(Function &F) {
ConvertedInsts.clear();
LogicOpsWorklist.clear();
PossibleJunk.clear();
// Look for icmp+fcmp instructions.
for (auto &I : instructions(F)) {
if (I.getOpcode() == Instruction::FCmp)
convertFCmp(cast<CmpInst>(I));
else if (I.getOpcode() == Instruction::ICmp)
convertICmp(cast<CmpInst>(I));
}
// If none found, bail already.
if (ConvertedInsts.empty())
return false;
// Process the queue of logic instructions.
while (!LogicOpsWorklist.empty()) {
BinaryOperator *Op = LogicOpsWorklist.back();
LogicOpsWorklist.pop_back();
// If both operands mapped, and the instruction itself not yet mapped,
// convert it.
if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
!ConvertedInsts.count(Op))
convertLogicOp(*Op);
}
// Time to actually replace the instructions. Do it in the reverse order
// of finding them, since there's a good chance the earlier ones will be
// unused (due to being folded into later ones).
Module &M = *F.getParent();
auto &Ctx = M.getContext();
Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
bool MadeChange = false;
for (auto &It : reverse(ConvertedInsts)) {
Instruction *I = It.first;
Value *V;
int Mask;
bool Worthy;
std::tie(V, Mask, Worthy) = It.second;
if (!I->user_empty()) {
// If used and unworthy of conversion, skip it.
if (!Worthy)
continue;
// Call the intrinsic, compare result with 0.
Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc,
V->getType());
IRBuilder<> IRB(I);
Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
I->replaceAllUsesWith(ICmp);
}
// If unused, or used and converted, remove it.
I->eraseFromParent();
MadeChange = true;
}
if (!MadeChange)
return false;
// We've actually done something - now clear misc accumulated junk (fabs,
// bitcast).
for (auto *I : PossibleJunk)
if (I->user_empty())
I->eraseFromParent();
return true;
}

View File

@ -122,6 +122,9 @@ public:
} // end anonymous namespace
void SystemZPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOpt::None)
addPass(createSystemZTDCPass());
TargetPassConfig::addIRPasses();
}

View File

@ -0,0 +1,95 @@
; Test the Test Data Class instruction, selected manually via the intrinsic.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare i32 @llvm.s390.tdc.f32(float, i64)
declare i32 @llvm.s390.tdc.f64(double, i64)
declare i32 @llvm.s390.tdc.f128(fp128, i64)
; Check using as i32 - f32
define i32 @f1(float %x) {
; CHECK-LABEL: f1
; CHECK: tceb %f0, 123
; CHECK: ipm %r2
; CHECK: srl %r2, 28
%res = call i32 @llvm.s390.tdc.f32(float %x, i64 123)
ret i32 %res
}
; Check using as i32 - f64
define i32 @f2(double %x) {
; CHECK-LABEL: f2
; CHECK: tcdb %f0, 123
; CHECK: ipm %r2
; CHECK: srl %r2, 28
%res = call i32 @llvm.s390.tdc.f64(double %x, i64 123)
ret i32 %res
}
; Check using as i32 - f128
define i32 @f3(fp128 %x) {
; CHECK-LABEL: f3
; CHECK: ld %f0, 0(%r2)
; CHECK: ld %f2, 8(%r2)
; CHECK: tcxb %f0, 123
; CHECK: ipm %r2
; CHECK: srl %r2, 28
%res = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 123)
ret i32 %res
}
declare void @g()
; Check branch
define void @f4(float %x) {
; CHECK-LABEL: f4
; CHECK: tceb %f0, 123
; CHECK: jgl g
; CHECK: br %r14
%res = call i32 @llvm.s390.tdc.f32(float %x, i64 123)
%cond = icmp ne i32 %res, 0
br i1 %cond, label %call, label %exit
call:
tail call void @g()
br label %exit
exit:
ret void
}
; Check branch negated
define void @f5(float %x) {
; CHECK-LABEL: f5
; CHECK: tceb %f0, 123
; CHECK: jge g
; CHECK: br %r14
%res = call i32 @llvm.s390.tdc.f32(float %x, i64 123)
%cond = icmp eq i32 %res, 0
br i1 %cond, label %call, label %exit
call:
tail call void @g()
br label %exit
exit:
ret void
}
; Check non-const mask
define void @f6(float %x, i64 %y) {
; CHECK-LABEL: f6
; CHECK: tceb %f0, 0(%r2)
; CHECK: jge g
; CHECK: br %r14
%res = call i32 @llvm.s390.tdc.f32(float %x, i64 %y)
%cond = icmp eq i32 %res, 0
br i1 %cond, label %call, label %exit
call:
tail call void @g()
br label %exit
exit:
ret void
}

View File

@ -0,0 +1,96 @@
; Test the Test Data Class instruction logic operation folding.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare i32 @llvm.s390.tdc.f32(float, i64)
declare i32 @llvm.s390.tdc.f64(double, i64)
declare i32 @llvm.s390.tdc.f128(fp128, i64)
; Check using or i1
define i32 @f1(float %x) {
; CHECK-LABEL: f1
; CHECK: tceb %f0, 7
; CHECK-NEXT: ipm [[REG1:%r[0-9]+]]
; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36
%a = call i32 @llvm.s390.tdc.f32(float %x, i64 3)
%b = call i32 @llvm.s390.tdc.f32(float %x, i64 6)
%a1 = icmp ne i32 %a, 0
%b1 = icmp ne i32 %b, 0
%res = or i1 %a1, %b1
%xres = zext i1 %res to i32
ret i32 %xres
}
; Check using and i1
define i32 @f2(double %x) {
; CHECK-LABEL: f2
; CHECK: tcdb %f0, 2
; CHECK-NEXT: ipm [[REG1:%r[0-9]+]]
; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36
%a = call i32 @llvm.s390.tdc.f64(double %x, i64 3)
%b = call i32 @llvm.s390.tdc.f64(double %x, i64 6)
%a1 = icmp ne i32 %a, 0
%b1 = icmp ne i32 %b, 0
%res = and i1 %a1, %b1
%xres = zext i1 %res to i32
ret i32 %xres
}
; Check using xor i1
define i32 @f3(fp128 %x) {
; CHECK-LABEL: f3
; CHECK: tcxb %f0, 5
; CHECK-NEXT: ipm [[REG1:%r[0-9]+]]
; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36
%a = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 3)
%b = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 6)
%a1 = icmp ne i32 %a, 0
%b1 = icmp ne i32 %b, 0
%res = xor i1 %a1, %b1
%xres = zext i1 %res to i32
ret i32 %xres
}
; Check using xor i1 - negated test
define i32 @f4(fp128 %x) {
; CHECK-LABEL: f4
; CHECK: tcxb %f0, 4090
; CHECK-NEXT: ipm [[REG1:%r[0-9]+]]
; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36
%a = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 3)
%b = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 6)
%a1 = icmp ne i32 %a, 0
%b1 = icmp eq i32 %b, 0
%res = xor i1 %a1, %b1
%xres = zext i1 %res to i32
ret i32 %xres
}
; Check different first args
define i32 @f5(float %x, float %y) {
; CHECK-LABEL: f5
; CHECK-NOT: tceb {{%f[0-9]+}}, 5
; CHECK-DAG: tceb %f0, 3
; CHECK-DAG: tceb %f2, 6
%a = call i32 @llvm.s390.tdc.f32(float %x, i64 3)
%b = call i32 @llvm.s390.tdc.f32(float %y, i64 6)
%a1 = icmp ne i32 %a, 0
%b1 = icmp ne i32 %b, 0
%res = xor i1 %a1, %b1
%xres = zext i1 %res to i32
ret i32 %xres
}
; Non-const mask (not supported)
define i32 @f6(float %x, i64 %y) {
; CHECK-LABEL: f6
; CHECK-DAG: tceb %f0, 0(%r2)
; CHECK-DAG: tceb %f0, 6
%a = call i32 @llvm.s390.tdc.f32(float %x, i64 %y)
%b = call i32 @llvm.s390.tdc.f32(float %x, i64 6)
%a1 = icmp ne i32 %a, 0
%b1 = icmp ne i32 %b, 0
%res = xor i1 %a1, %b1
%xres = zext i1 %res to i32
ret i32 %xres
}

View File

@ -0,0 +1,139 @@
; Test the Test Data Class instruction logic operation conversion from
; compares.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare fp128 @llvm.fabs.f128(fp128)
; Compare with 0 (unworthy)
define i32 @f1(float %x) {
; CHECK-LABEL: f1
; CHECK-NOT: tceb
; CHECK: ltebr {{%f[0-9]+}}, %f0
; CHECK-NOT: tceb
%res = fcmp ugt float %x, 0.0
%xres = zext i1 %res to i32
ret i32 %xres
}
; Compare fabs with 0 (unworthy)
define i32 @f2(float %x) {
; CHECK-LABEL: f2
; CHECK-NOT: tceb
; CHECK: lpebr {{%f[0-9]+}}, %f0
; CHECK-NOT: tceb
%y = call float @llvm.fabs.f32(float %x)
%res = fcmp ugt float %y, 0.0
%xres = zext i1 %res to i32
ret i32 %xres
}
; Compare with inf (unworthy)
define i32 @f3(float %x) {
; CHECK-LABEL: f3
; CHECK-NOT: tceb
; CHECK: ceb %f0, 0(%r{{[0-9]+}})
; CHECK-NOT: tceb
%res = fcmp ult float %x, 0x7ff0000000000000
%xres = zext i1 %res to i32
ret i32 %xres
}
; Compare fabs with inf
define i32 @f4(float %x) {
; CHECK-LABEL: f4
; CHECK: tceb %f0, 4047
%y = call float @llvm.fabs.f32(float %x)
%res = fcmp ult float %y, 0x7ff0000000000000
%xres = zext i1 %res to i32
ret i32 %xres
}
; Compare with minnorm (unworthy)
define i32 @f5(float %x) {
; CHECK-LABEL: f5
; CHECK-NOT: tceb
; CHECK: ceb %f0, 0(%r{{[0-9]+}})
; CHECK-NOT: tceb
%res = fcmp ult float %x, 0x3810000000000000
%xres = zext i1 %res to i32
ret i32 %xres
}
; Compare fabs with minnorm
define i32 @f6(float %x) {
; CHECK-LABEL: f6
; CHECK: tceb %f0, 3279
%y = call float @llvm.fabs.f32(float %x)
%res = fcmp ult float %y, 0x3810000000000000
%xres = zext i1 %res to i32
ret i32 %xres
}
; Compare fabs with minnorm, unsupported condition
define i32 @f7(float %x) {
; CHECK-LABEL: f7
; CHECK-NOT: tceb
; CHECK: lpdfr [[REG:%f[0-9]+]], %f0
; CHECK: ceb [[REG]], 0(%r{{[0-9]+}})
; CHECK-NOT: tceb
%y = call float @llvm.fabs.f32(float %x)
%res = fcmp ugt float %y, 0x3810000000000000
%xres = zext i1 %res to i32
ret i32 %xres
}
; Compare fabs with unsupported constant
define i32 @f8(float %x) {
; CHECK-LABEL: f8
; CHECK-NOT: tceb
; CHECK: lpdfr [[REG:%f[0-9]+]], %f0
; CHECK: ceb [[REG]], 0(%r{{[0-9]+}})
; CHECK-NOT: tceb
%y = call float @llvm.fabs.f32(float %x)
%res = fcmp ult float %y, 0x3ff0000000000000
%xres = zext i1 %res to i32
ret i32 %xres
}
; Compare fabs with minnorm - double
define i32 @f9(double %x) {
; CHECK-LABEL: f9
; CHECK: tcdb %f0, 3279
%y = call double @llvm.fabs.f64(double %x)
%res = fcmp ult double %y, 0x0010000000000000
%xres = zext i1 %res to i32
ret i32 %xres
}
; Compare fabs with minnorm - long double
define i32 @f10(fp128 %x) {
; CHECK-LABEL: f10
; CHECK: tcxb %f0, 3279
%y = call fp128 @llvm.fabs.f128(fp128 %x)
%res = fcmp ult fp128 %y, 0xL00000000000000000001000000000000
%xres = zext i1 %res to i32
ret i32 %xres
}
; Compare fabs for one with inf - clang's isfinite
define i32 @f11(double %x) {
; CHECK-LABEL: f11
; CHECK: tcdb %f0, 4032
%y = call double @llvm.fabs.f64(double %x)
%res = fcmp one double %y, 0x7ff0000000000000
%xres = zext i1 %res to i32
ret i32 %xres
}
; Compare fabs for oeq with inf - clang's isinf
define i32 @f12(double %x) {
; CHECK-LABEL: f12
; CHECK: tcdb %f0, 48
%y = call double @llvm.fabs.f64(double %x)
%res = fcmp oeq double %y, 0x7ff0000000000000
%xres = zext i1 %res to i32
ret i32 %xres
}

View File

@ -0,0 +1,85 @@
; Test the Test Data Class instruction logic operation conversion from
; signbit extraction.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
;
; Extract sign bit.
define i32 @f1(float %x) {
; CHECK-LABEL: f1
; CHECK: tceb %f0, 1365
%cast = bitcast float %x to i32
%res = icmp slt i32 %cast, 0
%xres = zext i1 %res to i32
ret i32 %xres
}
; Extract negated sign bit.
define i32 @f2(float %x) {
; CHECK-LABEL: f2
; CHECK: tceb %f0, 2730
%cast = bitcast float %x to i32
%res = icmp sgt i32 %cast, -1
%xres = zext i1 %res to i32
ret i32 %xres
}
; Extract sign bit.
define i32 @f3(double %x) {
; CHECK-LABEL: f3
; CHECK: tcdb %f0, 1365
%cast = bitcast double %x to i64
%res = icmp slt i64 %cast, 0
%xres = zext i1 %res to i32
ret i32 %xres
}
; Extract negated sign bit.
define i32 @f4(double %x) {
; CHECK-LABEL: f4
; CHECK: tcdb %f0, 2730
%cast = bitcast double %x to i64
%res = icmp sgt i64 %cast, -1
%xres = zext i1 %res to i32
ret i32 %xres
}
; Extract sign bit.
define i32 @f5(fp128 %x) {
; CHECK-LABEL: f5
; CHECK: tcxb %f0, 1365
%cast = bitcast fp128 %x to i128
%res = icmp slt i128 %cast, 0
%xres = zext i1 %res to i32
ret i32 %xres
}
; Extract negated sign bit.
define i32 @f6(fp128 %x) {
; CHECK-LABEL: f6
; CHECK: tcxb %f0, 2730
%cast = bitcast fp128 %x to i128
%res = icmp sgt i128 %cast, -1
%xres = zext i1 %res to i32
ret i32 %xres
}
; Wrong const.
define i32 @f7(float %x) {
; CHECK-LABEL: f7
; CHECK-NOT: tceb
%cast = bitcast float %x to i32
%res = icmp slt i32 %cast, -1
%xres = zext i1 %res to i32
ret i32 %xres
}
; Wrong pred.
define i32 @f8(float %x) {
; CHECK-LABEL: f8
; CHECK-NOT: tceb
%cast = bitcast float %x to i32
%res = icmp eq i32 %cast, 0
%xres = zext i1 %res to i32
ret i32 %xres
}

View File

@ -0,0 +1,97 @@
; Test the Test Data Class instruction logic operation conversion from
; compares, combined with signbit or other compares to ensure worthiness.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
;
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare fp128 @llvm.fabs.f128(fp128)
; Compare with 0, extract sign bit
define i32 @f1(float %x) {
; CHECK-LABEL: f1
; CHECK: tceb %f0, 2047
%cast = bitcast float %x to i32
%sign = icmp slt i32 %cast, 0
%fcmp = fcmp ugt float %x, 0.0
%res = or i1 %sign, %fcmp
%xres = zext i1 %res to i32
ret i32 %xres
}
; Compare with inf, extract negated sign bit
define i32 @f2(float %x) {
; CHECK-LABEL: f2
; CHECK: tceb %f0, 2698
%cast = bitcast float %x to i32
%sign = icmp sgt i32 %cast, -1
%fcmp = fcmp ult float %x, 0x7ff0000000000000
%res = and i1 %sign, %fcmp
%xres = zext i1 %res to i32
ret i32 %xres
}
; Compare with minnorm, extract negated sign bit
define i32 @f3(float %x) {
; CHECK-LABEL: f3
; CHECK: tceb %f0, 2176
%cast = bitcast float %x to i32
%sign = icmp sgt i32 %cast, -1
%fcmp = fcmp olt float %x, 0x3810000000000000
%res = and i1 %sign, %fcmp
%xres = zext i1 %res to i32
ret i32 %xres
}
; Test float isnormal, from clang.
define i32 @f4(float %x) {
; CHECK-LABEL: f4
; CHECK: tceb %f0, 768
%y = call float @llvm.fabs.f32(float %x)
%ord = fcmp ord float %x, 0.0
%a = fcmp ult float %y, 0x7ff0000000000000
%b = fcmp uge float %y, 0x3810000000000000
%c = and i1 %a, %b
%res = and i1 %ord, %c
%xres = zext i1 %res to i32
ret i32 %xres
}
; Check for negative 0.
define i32 @f5(float %x) {
; CHECK-LABEL: f5
; CHECK: tceb %f0, 1024
%cast = bitcast float %x to i32
%sign = icmp slt i32 %cast, 0
%fcmp = fcmp oeq float %x, 0.0
%res = and i1 %sign, %fcmp
%xres = zext i1 %res to i32
ret i32 %xres
}
; Test isnormal, from clang.
define i32 @f6(double %x) {
; CHECK-LABEL: f6
; CHECK: tcdb %f0, 768
%y = call double @llvm.fabs.f64(double %x)
%ord = fcmp ord double %x, 0.0
%a = fcmp ult double %y, 0x7ff0000000000000
%b = fcmp uge double %y, 0x0010000000000000
%c = and i1 %ord, %a
%res = and i1 %b, %c
%xres = zext i1 %res to i32
ret i32 %xres
}
; Test isinf || isnan, from clang.
define i32 @f7(double %x) {
; CHECK-LABEL: f7
; CHECK: tcdb %f0, 63
%y = call double @llvm.fabs.f64(double %x)
%a = fcmp oeq double %y, 0x7ff0000000000000
%b = fcmp uno double %x, 0.0
%res = or i1 %a, %b
%xres = zext i1 %res to i32
ret i32 %xres
}

View File

@ -0,0 +1,48 @@
; Test the Test Data Class instruction, as used by fpclassify.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
;
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare fp128 @llvm.fabs.f128(fp128)
define i32 @fpc(double %x) {
entry:
; CHECK-LABEL: fpc
; CHECK: lhi %r2, 5
; CHECK: ltdbr %f0, %f0
; CHECK: je [[RET:.L.*]]
%testeq = fcmp oeq double %x, 0.000000e+00
br i1 %testeq, label %ret, label %nonzero
nonzero:
; CHECK: lhi %r2, 1
; CHECK: cdbr %f0, %f0
; CHECK: jo [[RET]]
%testnan = fcmp uno double %x, 0.000000e+00
br i1 %testnan, label %ret, label %nonzeroord
nonzeroord:
; CHECK: lhi %r2, 2
; CHECK: tcdb %f0, 48
; CHECK: jl [[RET]]
%abs = tail call double @llvm.fabs.f64(double %x)
%testinf = fcmp oeq double %abs, 0x7FF0000000000000
br i1 %testinf, label %ret, label %finite
finite:
; CHECK: lhi %r2, 3
; CHECK: tcdb %f0, 831
; CHECK: blr %r14
; CHECK: lhi %r2, 4
%testnormal = fcmp uge double %abs, 0x10000000000000
%finres = select i1 %testnormal, i32 3, i32 4
br label %ret
ret:
; CHECK: [[RET]]:
; CHECK: br %r14
%res = phi i32 [ 5, %entry ], [ 1, %nonzero ], [ 2, %nonzeroord ], [ %finres, %finite ]
ret i32 %res
}