2005-01-07 15:44:53 +08:00
|
|
|
//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
|
2005-04-22 06:55:34 +08:00
|
|
|
//
|
2005-01-07 15:44:53 +08:00
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
2007-12-30 04:36:04 +08:00
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
2005-04-22 06:55:34 +08:00
|
|
|
//
|
2005-01-07 15:44:53 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This implements the TargetLowering class.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "llvm/Target/TargetLowering.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/ADT/BitVector.h"
|
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2016-04-14 09:10:42 +08:00
|
|
|
#include "llvm/CodeGen/CallingConvLower.h"
|
2008-05-13 03:56:52 +08:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
2010-01-26 14:28:43 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/CodeGen/MachineJumpTableInfo.h"
|
2016-04-14 09:10:42 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2005-01-07 15:44:53 +08:00
|
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
2013-01-02 19:36:10 +08:00
|
|
|
#include "llvm/IR/DataLayout.h"
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
|
|
#include "llvm/IR/GlobalVariable.h"
|
2014-01-06 08:43:20 +08:00
|
|
|
#include "llvm/IR/LLVMContext.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/MC/MCAsmInfo.h"
|
|
|
|
#include "llvm/MC/MCExpr.h"
|
2009-07-12 04:10:48 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2006-01-30 12:09:27 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/Target/TargetLoweringObjectFile.h"
|
|
|
|
#include "llvm/Target/TargetMachine.h"
|
|
|
|
#include "llvm/Target/TargetRegisterInfo.h"
|
2014-08-05 05:25:23 +08:00
|
|
|
#include "llvm/Target/TargetSubtargetInfo.h"
|
2010-12-20 04:43:38 +08:00
|
|
|
#include <cctype>
|
2005-01-07 15:44:53 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2014-11-14 05:29:21 +08:00
|
|
|
/// NOTE: The TargetMachine owns TLOF.
|
|
|
|
TargetLowering::TargetLowering(const TargetMachine &tm)
|
|
|
|
: TargetLoweringBase(tm) {}
|
2005-01-16 15:28:11 +08:00
|
|
|
|
2005-12-20 14:22:03 +08:00
|
|
|
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
2014-04-14 08:51:57 +08:00
|
|
|
return nullptr;
|
2005-12-20 14:22:03 +08:00
|
|
|
}
|
2005-12-22 07:05:39 +08:00
|
|
|
|
2016-06-27 06:13:55 +08:00
|
|
|
bool TargetLowering::isPositionIndependent() const {
|
2016-06-29 04:13:36 +08:00
|
|
|
return getTargetMachine().isPositionIndependent();
|
2016-06-27 06:13:55 +08:00
|
|
|
}
|
|
|
|
|
2013-01-09 21:18:15 +08:00
|
|
|
/// Check whether a given call node is in tail position within its function. If
|
|
|
|
/// so, it sets Chain to the input chain of the tail call.
|
|
|
|
bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
|
|
|
|
SDValue &Chain) const {
|
|
|
|
const Function *F = DAG.getMachineFunction().getFunction();
|
|
|
|
|
|
|
|
// Conservatively require the attributes of the call to match those of
|
|
|
|
// the return. Ignore noalias because it doesn't affect the call sequence.
|
2013-01-19 05:53:16 +08:00
|
|
|
AttributeSet CallerAttrs = F->getAttributes();
|
|
|
|
if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex)
|
2013-01-09 21:18:15 +08:00
|
|
|
.removeAttribute(Attribute::NoAlias).hasAttributes())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// It's not safe to eliminate the sign / zero extension of the return value.
|
2013-01-19 05:53:16 +08:00
|
|
|
if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
|
|
|
|
CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
|
2013-01-09 21:18:15 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Check if the only use is a function return node.
|
|
|
|
return isUsedByReturnOnly(Node, Chain);
|
|
|
|
}
|
|
|
|
|
2016-04-14 09:10:42 +08:00
|
|
|
bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
|
|
|
|
const uint32_t *CallerPreservedMask,
|
|
|
|
const SmallVectorImpl<CCValAssign> &ArgLocs,
|
|
|
|
const SmallVectorImpl<SDValue> &OutVals) const {
|
|
|
|
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
|
|
|
|
const CCValAssign &ArgLoc = ArgLocs[I];
|
|
|
|
if (!ArgLoc.isRegLoc())
|
|
|
|
continue;
|
|
|
|
unsigned Reg = ArgLoc.getLocReg();
|
|
|
|
// Only look at callee saved registers.
|
|
|
|
if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
|
|
|
|
continue;
|
|
|
|
// Check that we pass the value used for the caller.
|
|
|
|
// (We look for a CopyFromReg reading a virtual register that is used
|
|
|
|
// for the function live-in value of register Reg)
|
|
|
|
SDValue Value = OutVals[I];
|
|
|
|
if (Value->getOpcode() != ISD::CopyFromReg)
|
|
|
|
return false;
|
|
|
|
unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
|
|
|
|
if (MRI.getLiveInPhysReg(ArgReg) != Reg)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-11-01 01:18:24 +08:00
|
|
|
/// \brief Set CallLoweringInfo attribute flags based on a call instruction
|
|
|
|
/// and called function attributes.
|
|
|
|
void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
|
|
|
|
unsigned AttrIdx) {
|
|
|
|
isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt);
|
|
|
|
isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt);
|
|
|
|
isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg);
|
|
|
|
isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
|
|
|
|
isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
|
|
|
|
isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
|
2014-02-01 07:50:57 +08:00
|
|
|
isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
|
2013-11-01 01:18:24 +08:00
|
|
|
isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
|
2016-03-30 01:37:21 +08:00
|
|
|
isSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf);
|
2016-04-02 05:41:15 +08:00
|
|
|
isSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError);
|
2013-11-01 01:18:24 +08:00
|
|
|
Alignment = CS->getParamAlignment(AttrIdx);
|
|
|
|
}
|
2013-01-09 21:18:15 +08:00
|
|
|
|
|
|
|
/// Generate a libcall taking the given operands as arguments and returning a
|
|
|
|
/// result of type RetVT.
|
2013-08-14 01:54:56 +08:00
|
|
|
std::pair<SDValue, SDValue>
|
2016-06-12 23:39:02 +08:00
|
|
|
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
|
|
|
|
ArrayRef<SDValue> Ops, bool isSigned,
|
|
|
|
const SDLoc &dl, bool doesNotReturn,
|
2013-08-14 01:54:56 +08:00
|
|
|
bool isReturnValueUsed) const {
|
2013-01-09 21:18:15 +08:00
|
|
|
TargetLowering::ArgListTy Args;
|
2015-10-23 01:05:00 +08:00
|
|
|
Args.reserve(Ops.size());
|
2013-01-09 21:18:15 +08:00
|
|
|
|
|
|
|
TargetLowering::ArgListEntry Entry;
|
2015-10-23 01:05:00 +08:00
|
|
|
for (SDValue Op : Ops) {
|
|
|
|
Entry.Node = Op;
|
2013-01-09 21:18:15 +08:00
|
|
|
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
|
2015-10-23 01:05:00 +08:00
|
|
|
Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
|
|
|
|
Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
|
2013-01-09 21:18:15 +08:00
|
|
|
Args.push_back(Entry);
|
|
|
|
}
|
2015-10-25 16:14:05 +08:00
|
|
|
|
2015-03-27 06:46:58 +08:00
|
|
|
if (LC == RTLIB::UNKNOWN_LIBCALL)
|
|
|
|
report_fatal_error("Unsupported library call operation!");
|
2015-07-09 10:09:04 +08:00
|
|
|
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
|
|
|
|
getPointerTy(DAG.getDataLayout()));
|
2013-01-09 21:18:15 +08:00
|
|
|
|
|
|
|
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
|
2014-05-18 05:50:17 +08:00
|
|
|
TargetLowering::CallLoweringInfo CLI(DAG);
|
2015-03-23 20:28:13 +08:00
|
|
|
bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
|
2014-05-18 05:50:17 +08:00
|
|
|
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
|
2016-06-22 20:54:25 +08:00
|
|
|
.setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
|
2014-05-18 05:50:17 +08:00
|
|
|
.setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
|
2015-03-23 20:28:13 +08:00
|
|
|
.setSExtResult(signExtend).setZExtResult(!signExtend);
|
2013-08-14 01:54:56 +08:00
|
|
|
return LowerCallTo(CLI);
|
2013-01-09 21:18:15 +08:00
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Soften the operands of a comparison. This code is shared among BR_CC,
|
|
|
|
/// SELECT_CC, and SETCC handlers.
|
2013-01-09 21:18:15 +08:00
|
|
|
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
|
|
|
|
SDValue &NewLHS, SDValue &NewRHS,
|
|
|
|
ISD::CondCode &CCCode,
|
2016-06-12 23:39:02 +08:00
|
|
|
const SDLoc &dl) const {
|
2016-02-04 22:43:50 +08:00
|
|
|
assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
|
2013-01-09 21:18:15 +08:00
|
|
|
&& "Unsupported setcc type!");
|
|
|
|
|
|
|
|
// Expand into one or more soft-fp libcall(s).
|
|
|
|
RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
|
2015-07-15 16:39:35 +08:00
|
|
|
bool ShouldInvertCC = false;
|
2013-01-09 21:18:15 +08:00
|
|
|
switch (CCCode) {
|
|
|
|
case ISD::SETEQ:
|
|
|
|
case ISD::SETOEQ:
|
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::OEQ_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
|
2013-01-09 21:18:15 +08:00
|
|
|
break;
|
|
|
|
case ISD::SETNE:
|
|
|
|
case ISD::SETUNE:
|
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::UNE_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
|
2013-01-09 21:18:15 +08:00
|
|
|
break;
|
|
|
|
case ISD::SETGE:
|
|
|
|
case ISD::SETOGE:
|
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::OGE_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
|
2013-01-09 21:18:15 +08:00
|
|
|
break;
|
|
|
|
case ISD::SETLT:
|
|
|
|
case ISD::SETOLT:
|
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::OLT_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
|
2013-01-09 21:18:15 +08:00
|
|
|
break;
|
|
|
|
case ISD::SETLE:
|
|
|
|
case ISD::SETOLE:
|
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::OLE_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
|
2013-01-09 21:18:15 +08:00
|
|
|
break;
|
|
|
|
case ISD::SETGT:
|
|
|
|
case ISD::SETOGT:
|
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::OGT_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
|
2013-01-09 21:18:15 +08:00
|
|
|
break;
|
|
|
|
case ISD::SETUO:
|
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::UO_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
|
2013-01-09 21:18:15 +08:00
|
|
|
break;
|
|
|
|
case ISD::SETO:
|
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::O_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
|
2013-01-09 21:18:15 +08:00
|
|
|
break;
|
2015-07-15 16:39:35 +08:00
|
|
|
case ISD::SETONE:
|
|
|
|
// SETONE = SETOLT | SETOGT
|
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::OLT_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
|
2015-07-15 16:39:35 +08:00
|
|
|
LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::OGT_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
|
2015-07-15 16:39:35 +08:00
|
|
|
break;
|
|
|
|
case ISD::SETUEQ:
|
2013-01-09 21:18:15 +08:00
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::UO_F64 :
|
2016-08-16 05:46:19 +08:00
|
|
|
(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
|
2015-07-15 16:39:35 +08:00
|
|
|
LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::OEQ_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
|
2015-07-15 16:39:35 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// Invert CC for unordered comparisons
|
|
|
|
ShouldInvertCC = true;
|
2013-01-09 21:18:15 +08:00
|
|
|
switch (CCCode) {
|
|
|
|
case ISD::SETULT:
|
2015-07-15 16:39:35 +08:00
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::OGE_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
|
2013-01-09 21:18:15 +08:00
|
|
|
break;
|
|
|
|
case ISD::SETULE:
|
2015-07-15 16:39:35 +08:00
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::OGT_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
|
2015-07-15 16:39:35 +08:00
|
|
|
break;
|
|
|
|
case ISD::SETUGT:
|
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::OLE_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
|
2013-01-09 21:18:15 +08:00
|
|
|
break;
|
2015-07-15 16:39:35 +08:00
|
|
|
case ISD::SETUGE:
|
|
|
|
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
|
2016-02-04 22:43:50 +08:00
|
|
|
(VT == MVT::f64) ? RTLIB::OLT_F64 :
|
|
|
|
(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
|
2013-01-09 21:18:15 +08:00
|
|
|
break;
|
|
|
|
default: llvm_unreachable("Do not know how to soften this setcc!");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use the target specific return value for comparions lib calls.
|
|
|
|
EVT RetVT = getCmpLibcallReturnType();
|
2015-09-22 19:15:07 +08:00
|
|
|
SDValue Ops[2] = {NewLHS, NewRHS};
|
2015-10-23 01:05:00 +08:00
|
|
|
NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
|
|
|
|
dl).first;
|
2015-04-28 22:05:47 +08:00
|
|
|
NewRHS = DAG.getConstant(0, dl, RetVT);
|
2015-07-15 16:39:35 +08:00
|
|
|
|
2013-01-09 21:18:15 +08:00
|
|
|
CCCode = getCmpLibcallCC(LC1);
|
2015-07-15 16:39:35 +08:00
|
|
|
if (ShouldInvertCC)
|
|
|
|
CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
|
|
|
|
|
2013-01-09 21:18:15 +08:00
|
|
|
if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
|
2015-07-09 10:09:04 +08:00
|
|
|
SDValue Tmp = DAG.getNode(
|
|
|
|
ISD::SETCC, dl,
|
|
|
|
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
|
|
|
|
NewLHS, NewRHS, DAG.getCondCode(CCCode));
|
2015-10-23 01:05:00 +08:00
|
|
|
NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
|
|
|
|
dl).first;
|
2015-07-09 10:09:04 +08:00
|
|
|
NewLHS = DAG.getNode(
|
|
|
|
ISD::SETCC, dl,
|
|
|
|
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
|
|
|
|
NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
|
2013-01-09 21:18:15 +08:00
|
|
|
NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
|
|
|
|
NewRHS = SDValue();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Return the entry encoding for a jump table in the current function. The
|
|
|
|
/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
|
2010-01-26 07:26:13 +08:00
|
|
|
unsigned TargetLowering::getJumpTableEncoding() const {
|
|
|
|
// In non-pic modes, just use the address of a block.
|
2016-06-27 06:30:06 +08:00
|
|
|
if (!isPositionIndependent())
|
2010-01-26 07:26:13 +08:00
|
|
|
return MachineJumpTableInfo::EK_BlockAddress;
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2010-01-26 07:26:13 +08:00
|
|
|
// In PIC mode, if the target supports a GPRel32 directive, use it.
|
2014-04-14 08:51:57 +08:00
|
|
|
if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
|
2010-01-26 07:26:13 +08:00
|
|
|
return MachineJumpTableInfo::EK_GPRel32BlockAddress;
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2010-01-26 07:26:13 +08:00
|
|
|
// Otherwise, use a label difference.
|
|
|
|
return MachineJumpTableInfo::EK_LabelDifference32;
|
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
|
|
|
|
SelectionDAG &DAG) const {
|
2010-01-26 14:53:37 +08:00
|
|
|
// If our PIC model is GP relative, use the global offset table as the base.
|
2012-04-10 04:32:12 +08:00
|
|
|
unsigned JTEncoding = getJumpTableEncoding();
|
|
|
|
|
|
|
|
if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
|
|
|
|
(JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
|
2015-07-09 10:09:04 +08:00
|
|
|
return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
|
2012-04-10 04:32:12 +08:00
|
|
|
|
Much improved pic jumptable codegen:
Then:
call "L1$pb"
"L1$pb":
popl %eax
...
LBB1_1: # entry
imull $4, %ecx, %ecx
leal LJTI1_0-"L1$pb"(%eax), %edx
addl LJTI1_0-"L1$pb"(%ecx,%eax), %edx
jmpl *%edx
.align 2
.set L1_0_set_3,LBB1_3-LJTI1_0
.set L1_0_set_2,LBB1_2-LJTI1_0
.set L1_0_set_5,LBB1_5-LJTI1_0
.set L1_0_set_4,LBB1_4-LJTI1_0
LJTI1_0:
.long L1_0_set_3
.long L1_0_set_2
Now:
call "L1$pb"
"L1$pb":
popl %eax
...
LBB1_1: # entry
addl LJTI1_0-"L1$pb"(%eax,%ecx,4), %eax
jmpl *%eax
.align 2
.set L1_0_set_3,LBB1_3-"L1$pb"
.set L1_0_set_2,LBB1_2-"L1$pb"
.set L1_0_set_5,LBB1_5-"L1$pb"
.set L1_0_set_4,LBB1_4-"L1$pb"
LJTI1_0:
.long L1_0_set_3
.long L1_0_set_2
llvm-svn: 43924
2007-11-09 09:32:10 +08:00
|
|
|
return Table;
|
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// This returns the relocation base for the given PIC jumptable, the same as
|
|
|
|
/// getPICJumpTableRelocBase, but as an MCExpr.
|
2010-01-26 13:30:30 +08:00
|
|
|
const MCExpr *
|
2010-01-26 14:28:43 +08:00
|
|
|
TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
|
|
|
|
unsigned JTI,MCContext &Ctx) const{
|
2010-01-26 13:58:28 +08:00
|
|
|
// The normal PIC reloc base is the label at the start of the jump table.
|
2015-05-30 09:25:56 +08:00
|
|
|
return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
|
2010-01-26 13:30:30 +08:00
|
|
|
}
|
|
|
|
|
Teach DAGCombine to fold constant offsets into GlobalAddress nodes,
and add a TargetLowering hook for it to use to determine when this
is legal (i.e. not in PIC mode, etc.)
This allows instruction selection to emit folded constant offsets
in more cases, such as the included testcase, eliminating the need
for explicit arithmetic instructions.
This eliminates the need for the C++ code in X86ISelDAGToDAG.cpp
that attempted to achieve the same effect, but wasn't as effective.
Also, fix handling of offsets in GlobalAddressSDNodes in several
places, including changing GlobalAddressSDNode's offset from
int to int64_t.
The Mips, Alpha, Sparc, and CellSPU targets appear to be
unaware of GlobalAddress offsets currently, so set the hook to
false on those targets.
llvm-svn: 57748
2008-10-18 10:06:02 +08:00
|
|
|
bool
|
|
|
|
TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
|
2016-06-25 02:48:36 +08:00
|
|
|
const TargetMachine &TM = getTargetMachine();
|
|
|
|
const GlobalValue *GV = GA->getGlobal();
|
Teach DAGCombine to fold constant offsets into GlobalAddress nodes,
and add a TargetLowering hook for it to use to determine when this
is legal (i.e. not in PIC mode, etc.)
This allows instruction selection to emit folded constant offsets
in more cases, such as the included testcase, eliminating the need
for explicit arithmetic instructions.
This eliminates the need for the C++ code in X86ISelDAGToDAG.cpp
that attempted to achieve the same effect, but wasn't as effective.
Also, fix handling of offsets in GlobalAddressSDNodes in several
places, including changing GlobalAddressSDNode's offset from
int to int64_t.
The Mips, Alpha, Sparc, and CellSPU targets appear to be
unaware of GlobalAddress offsets currently, so set the hook to
false on those targets.
llvm-svn: 57748
2008-10-18 10:06:02 +08:00
|
|
|
|
2016-06-25 02:48:36 +08:00
|
|
|
// If the address is not even local to this DSO we will have to load it from
|
|
|
|
// a got and then add the offset.
|
2016-06-28 07:15:57 +08:00
|
|
|
if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
|
2016-06-25 02:48:36 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// If the code is position independent we will have to add a base register.
|
2016-06-27 06:38:44 +08:00
|
|
|
if (isPositionIndependent())
|
2016-06-25 02:48:36 +08:00
|
|
|
return false;
|
Teach DAGCombine to fold constant offsets into GlobalAddress nodes,
and add a TargetLowering hook for it to use to determine when this
is legal (i.e. not in PIC mode, etc.)
This allows instruction selection to emit folded constant offsets
in more cases, such as the included testcase, eliminating the need
for explicit arithmetic instructions.
This eliminates the need for the C++ code in X86ISelDAGToDAG.cpp
that attempted to achieve the same effect, but wasn't as effective.
Also, fix handling of offsets in GlobalAddressSDNodes in several
places, including changing GlobalAddressSDNode's offset from
int to int64_t.
The Mips, Alpha, Sparc, and CellSPU targets appear to be
unaware of GlobalAddress offsets currently, so set the hook to
false on those targets.
llvm-svn: 57748
2008-10-18 10:06:02 +08:00
|
|
|
|
2016-06-25 02:48:36 +08:00
|
|
|
// Otherwise we can do it.
|
|
|
|
return true;
|
Teach DAGCombine to fold constant offsets into GlobalAddress nodes,
and add a TargetLowering hook for it to use to determine when this
is legal (i.e. not in PIC mode, etc.)
This allows instruction selection to emit folded constant offsets
in more cases, such as the included testcase, eliminating the need
for explicit arithmetic instructions.
This eliminates the need for the C++ code in X86ISelDAGToDAG.cpp
that attempted to achieve the same effect, but wasn't as effective.
Also, fix handling of offsets in GlobalAddressSDNodes in several
places, including changing GlobalAddressSDNode's offset from
int to int64_t.
The Mips, Alpha, Sparc, and CellSPU targets appear to be
unaware of GlobalAddress offsets currently, so set the hook to
false on those targets.
llvm-svn: 57748
2008-10-18 10:06:02 +08:00
|
|
|
}
|
|
|
|
|
2006-02-04 10:13:02 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Optimization Methods
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Check to see if the specified operand of the specified instruction is a
|
|
|
|
/// constant integer. If so, check to see if there are any bits set in the
|
|
|
|
/// constant that are not demanded. If so, shrink the constant and return true.
|
2010-11-23 11:31:01 +08:00
|
|
|
bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
|
2008-02-27 08:25:32 +08:00
|
|
|
const APInt &Demanded) {
|
2013-05-25 10:42:55 +08:00
|
|
|
SDLoc dl(Op);
|
2009-03-04 08:18:06 +08:00
|
|
|
|
2006-02-27 07:36:02 +08:00
|
|
|
// FIXME: ISD::SELECT, ISD::SELECT_CC
|
2009-01-29 09:59:02 +08:00
|
|
|
switch (Op.getOpcode()) {
|
2006-02-17 05:11:51 +08:00
|
|
|
default: break;
|
|
|
|
case ISD::XOR:
|
2009-03-04 08:18:06 +08:00
|
|
|
case ISD::AND:
|
|
|
|
case ISD::OR: {
|
|
|
|
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
|
|
|
|
if (!C) return false;
|
|
|
|
|
|
|
|
if (Op.getOpcode() == ISD::XOR &&
|
|
|
|
(C->getAPIntValue() | (~Demanded)).isAllOnesValue())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// if we can expand it to have all bits set, do it
|
|
|
|
if (C->getAPIntValue().intersects(~Demanded)) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = Op.getValueType();
|
2009-03-04 08:18:06 +08:00
|
|
|
SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
|
|
|
|
DAG.getConstant(Demanded &
|
2010-11-23 11:31:01 +08:00
|
|
|
C->getAPIntValue(),
|
2015-04-28 22:05:47 +08:00
|
|
|
dl, VT));
|
2009-03-04 08:18:06 +08:00
|
|
|
return CombineTo(Op, New);
|
|
|
|
}
|
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
break;
|
|
|
|
}
|
2009-03-04 08:18:06 +08:00
|
|
|
}
|
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
|
|
|
|
/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
|
|
|
|
/// generalized for targets with other types of implicit widening casts.
|
2016-06-12 23:39:02 +08:00
|
|
|
bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
|
|
|
|
unsigned BitWidth,
|
|
|
|
const APInt &Demanded,
|
|
|
|
const SDLoc &dl) {
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
assert(Op.getNumOperands() == 2 &&
|
|
|
|
"ShrinkDemandedOp only supports binary operators!");
|
|
|
|
assert(Op.getNode()->getNumValues() == 1 &&
|
|
|
|
"ShrinkDemandedOp only supports nodes with one result!");
|
|
|
|
|
2014-05-29 17:19:07 +08:00
|
|
|
// Early return, as this function cannot handle vector types.
|
|
|
|
if (Op.getValueType().isVector())
|
|
|
|
return false;
|
|
|
|
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
// Don't do this if the node has another user, which may require the
|
|
|
|
// full value.
|
|
|
|
if (!Op.getNode()->hasOneUse())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Search for the smallest integer type with free casts to and from
|
|
|
|
// Op's type. For expedience, just check power-of-2 integer types.
|
|
|
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
2012-12-19 15:39:08 +08:00
|
|
|
unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros();
|
|
|
|
unsigned SmallVTBits = DemandedSize;
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
if (!isPowerOf2_32(SmallVTBits))
|
|
|
|
SmallVTBits = NextPowerOf2(SmallVTBits);
|
|
|
|
for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
|
2009-08-12 08:36:31 +08:00
|
|
|
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
|
|
|
|
TLI.isZExtFree(SmallVT, Op.getValueType())) {
|
|
|
|
// We found a type with free casts.
|
|
|
|
SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
|
|
|
|
DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
|
|
|
|
Op.getNode()->getOperand(0)),
|
|
|
|
DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
|
|
|
|
Op.getNode()->getOperand(1)));
|
2012-12-19 15:39:08 +08:00
|
|
|
bool NeedZext = DemandedSize > SmallVTBits;
|
|
|
|
SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND,
|
|
|
|
dl, Op.getValueType(), X);
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
return CombineTo(Op, Z);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
TargetLowering: Add SimplifyDemandedBits() helper to TargetLoweringOpt
Summary:
The main purpose of this new helper is to enable simplifying operations that
have multiple uses. SimplifyDemandedBits does not handle multiple uses
currently, and this new function makes it possible to optimize:
and v1, v0, 0xffffff
mul24 v2, v1, v1 ; Multiply ignoring high 8-bits.
To:
mul24 v2, v0, v0
Where before this would not be optimized, because v1 has multiple uses.
Reviewers: bogner, arsenm
Subscribers: nhaehnle, wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D24964
llvm-svn: 284266
2016-10-15 03:14:26 +08:00
|
|
|
bool
|
|
|
|
TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
|
|
|
|
unsigned OpIdx,
|
|
|
|
const APInt &Demanded,
|
|
|
|
DAGCombinerInfo &DCI) {
|
|
|
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
|
|
|
SDValue Op = User->getOperand(OpIdx);
|
|
|
|
APInt KnownZero, KnownOne;
|
|
|
|
|
|
|
|
if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne,
|
|
|
|
*this, 0, true))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
|
|
// Old will not always be the same as Op. For example:
|
|
|
|
//
|
|
|
|
// Demanded = 0xffffff
|
|
|
|
// Op = i64 truncate (i32 and x, 0xffffff)
|
|
|
|
// In this case simplify demand bits will want to replace the 'and' node
|
|
|
|
// with the value 'x', which will give us:
|
|
|
|
// Old = i32 and x, 0xffffff
|
|
|
|
// New = x
|
|
|
|
if (Old.hasOneUse()) {
|
|
|
|
// For the one use case, we just commit the change.
|
|
|
|
DCI.CommitTargetLoweringOpt(*this);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If Old has more than one use then it must be Op, because the
|
|
|
|
// AssumeSingleUse flag is not propogated to recursive calls of
|
|
|
|
// SimplifyDemanded bits, so the only node with multiple use that
|
|
|
|
// it will attempt to combine will be opt.
|
|
|
|
assert(Old == Op);
|
|
|
|
|
|
|
|
SmallVector <SDValue, 4> NewOps;
|
|
|
|
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
|
|
|
|
if (i == OpIdx) {
|
|
|
|
NewOps.push_back(New);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
NewOps.push_back(User->getOperand(i));
|
|
|
|
}
|
|
|
|
DAG.UpdateNodeOperands(User, NewOps);
|
|
|
|
// Op has less users now, so we may be able to perform additional combines
|
|
|
|
// with it.
|
|
|
|
DCI.AddToWorklist(Op.getNode());
|
|
|
|
// User's operands have been updated, so we may be able to do new combines
|
|
|
|
// with it.
|
|
|
|
DCI.AddToWorklist(User);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Look at Op. At this point, we know that only the DemandedMask bits of the
|
|
|
|
/// result of Op are ever used downstream. If we can use this information to
|
|
|
|
/// simplify Op, create a new simplified DAG node and return true, returning the
|
|
|
|
/// original and new nodes in Old and New. Otherwise, analyze the expression and
|
|
|
|
/// return a mask of KnownOne and KnownZero bits for the expression (used to
|
|
|
|
/// simplify the caller). The KnownZero/One bits may only be accurate for those
|
|
|
|
/// bits in the DemandedMask.
|
2008-07-28 05:46:04 +08:00
|
|
|
bool TargetLowering::SimplifyDemandedBits(SDValue Op,
|
2008-02-27 08:25:32 +08:00
|
|
|
const APInt &DemandedMask,
|
|
|
|
APInt &KnownZero,
|
|
|
|
APInt &KnownOne,
|
2006-02-17 05:11:51 +08:00
|
|
|
TargetLoweringOpt &TLO,
|
TargetLowering: Add SimplifyDemandedBits() helper to TargetLoweringOpt
Summary:
The main purpose of this new helper is to enable simplifying operations that
have multiple uses. SimplifyDemandedBits does not handle multiple uses
currently, and this new function makes it possible to optimize:
and v1, v0, 0xffffff
mul24 v2, v1, v1 ; Multiply ignoring high 8-bits.
To:
mul24 v2, v0, v0
Where before this would not be optimized, because v1 has multiple uses.
Reviewers: bogner, arsenm
Subscribers: nhaehnle, wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D24964
llvm-svn: 284266
2016-10-15 03:14:26 +08:00
|
|
|
unsigned Depth,
|
|
|
|
bool AssumeSingleUse) const {
|
2008-02-27 08:25:32 +08:00
|
|
|
unsigned BitWidth = DemandedMask.getBitWidth();
|
2016-09-14 23:43:44 +08:00
|
|
|
assert(Op.getScalarValueSizeInBits() == BitWidth &&
|
2008-02-27 08:25:32 +08:00
|
|
|
"Mask size mismatches value type size!");
|
|
|
|
APInt NewMask = DemandedMask;
|
2013-05-25 10:42:55 +08:00
|
|
|
SDLoc dl(Op);
|
2015-07-09 10:09:20 +08:00
|
|
|
auto &DL = TLO.DAG.getDataLayout();
|
2008-02-27 08:25:32 +08:00
|
|
|
|
|
|
|
// Don't know anything.
|
|
|
|
KnownZero = KnownOne = APInt(BitWidth, 0);
|
2007-05-18 02:19:23 +08:00
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
// Other users may use these bits.
|
TargetLowering: Add SimplifyDemandedBits() helper to TargetLoweringOpt
Summary:
The main purpose of this new helper is to enable simplifying operations that
have multiple uses. SimplifyDemandedBits does not handle multiple uses
currently, and this new function makes it possible to optimize:
and v1, v0, 0xffffff
mul24 v2, v1, v1 ; Multiply ignoring high 8-bits.
To:
mul24 v2, v0, v0
Where before this would not be optimized, because v1 has multiple uses.
Reviewers: bogner, arsenm
Subscribers: nhaehnle, wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D24964
llvm-svn: 284266
2016-10-15 03:14:26 +08:00
|
|
|
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
|
2006-02-17 05:11:51 +08:00
|
|
|
if (Depth != 0) {
|
2010-11-23 11:31:01 +08:00
|
|
|
// If not at the root, Just compute the KnownZero/KnownOne bits to
|
2006-02-17 05:11:51 +08:00
|
|
|
// simplify things downstream.
|
2014-05-15 05:14:37 +08:00
|
|
|
TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth);
|
2006-02-17 05:11:51 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
// If this is the root being simplified, allow it to have multiple uses,
|
2008-02-27 08:25:32 +08:00
|
|
|
// just set the NewMask to all bits.
|
|
|
|
NewMask = APInt::getAllOnesValue(BitWidth);
|
2010-11-23 11:31:01 +08:00
|
|
|
} else if (DemandedMask == 0) {
|
2006-02-17 05:11:51 +08:00
|
|
|
// Not demanding any bits from Op.
|
2016-03-15 02:09:43 +08:00
|
|
|
if (!Op.isUndef())
|
2009-02-07 07:05:02 +08:00
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
|
2006-02-04 06:24:05 +08:00
|
|
|
return false;
|
2006-02-17 05:11:51 +08:00
|
|
|
} else if (Depth == 6) { // Limit search depth.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-02-27 08:25:32 +08:00
|
|
|
APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
|
2006-02-04 06:24:05 +08:00
|
|
|
switch (Op.getOpcode()) {
|
2006-02-17 05:11:51 +08:00
|
|
|
case ISD::Constant:
|
|
|
|
// We know all of the bits for a constant!
|
2012-04-04 20:51:34 +08:00
|
|
|
KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
|
|
|
|
KnownZero = ~KnownOne;
|
2006-02-27 07:36:02 +08:00
|
|
|
return false; // Don't fall through, will infinitely loop.
|
2016-09-08 20:57:51 +08:00
|
|
|
case ISD::BUILD_VECTOR:
|
|
|
|
// Collect the known bits that are shared by every constant vector element.
|
|
|
|
KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
|
|
|
|
for (SDValue SrcOp : Op->ops()) {
|
|
|
|
if (!isa<ConstantSDNode>(SrcOp)) {
|
|
|
|
// We can only handle all constant values - bail out with no known bits.
|
|
|
|
KnownZero = KnownOne = APInt(BitWidth, 0);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
KnownOne2 = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
|
|
|
|
KnownZero2 = ~KnownOne2;
|
|
|
|
|
|
|
|
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
|
|
|
|
if (KnownOne2.getBitWidth() != BitWidth) {
|
|
|
|
assert(KnownOne2.getBitWidth() > BitWidth &&
|
|
|
|
KnownZero2.getBitWidth() > BitWidth &&
|
|
|
|
"Expected BUILD_VECTOR implicit truncation");
|
|
|
|
KnownOne2 = KnownOne2.trunc(BitWidth);
|
|
|
|
KnownZero2 = KnownZero2.trunc(BitWidth);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Known bits are the values that are shared by every element.
|
|
|
|
// TODO: support per-element known bits.
|
|
|
|
KnownOne &= KnownOne2;
|
|
|
|
KnownZero &= KnownZero2;
|
|
|
|
}
|
|
|
|
return false; // Don't fall through, will infinitely loop.
|
2006-02-04 06:24:05 +08:00
|
|
|
case ISD::AND:
|
2006-02-27 08:36:27 +08:00
|
|
|
// If the RHS is a constant, check to see if the LHS would be zero without
|
|
|
|
// using the bits from the RHS. Below, we use knowledge about the RHS to
|
|
|
|
// simplify the LHS, here we're using information from the LHS to simplify
|
|
|
|
// the RHS.
|
|
|
|
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
2008-02-27 08:25:32 +08:00
|
|
|
APInt LHSZero, LHSOne;
|
2011-01-11 05:53:07 +08:00
|
|
|
// Do not increment Depth here; that can cause an infinite loop.
|
2014-05-15 05:14:37 +08:00
|
|
|
TLO.DAG.computeKnownBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
|
2006-02-27 08:36:27 +08:00
|
|
|
// If the LHS already has zeros where RHSC does, this and is dead.
|
2008-02-27 08:25:32 +08:00
|
|
|
if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
|
2006-02-27 08:36:27 +08:00
|
|
|
return TLO.CombineTo(Op, Op.getOperand(0));
|
|
|
|
// If any of the set bits in the RHS are known zero on the LHS, shrink
|
|
|
|
// the constant.
|
2008-02-27 08:25:32 +08:00
|
|
|
if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
|
2006-02-27 08:36:27 +08:00
|
|
|
return true;
|
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2008-02-27 08:25:32 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownOne, TLO, Depth+1))
|
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
2008-02-27 08:25:32 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownZero2, KnownOne2, TLO, Depth+1))
|
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
|
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
// If all of the demanded bits are known one on one side, return the other.
|
|
|
|
// These bits cannot contribute to the result of the 'and'.
|
2008-02-27 08:25:32 +08:00
|
|
|
if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
|
2006-02-17 05:11:51 +08:00
|
|
|
return TLO.CombineTo(Op, Op.getOperand(0));
|
2008-02-27 08:25:32 +08:00
|
|
|
if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
|
2006-02-17 05:11:51 +08:00
|
|
|
return TLO.CombineTo(Op, Op.getOperand(1));
|
|
|
|
// If all of the demanded bits in the inputs are known zeros, return zero.
|
2008-02-27 08:25:32 +08:00
|
|
|
if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
|
2015-04-28 22:05:47 +08:00
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType()));
|
2006-02-17 05:11:51 +08:00
|
|
|
// If the RHS is a constant, see if we can simplify it.
|
2008-02-27 08:25:32 +08:00
|
|
|
if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
|
2006-02-17 05:11:51 +08:00
|
|
|
return true;
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
// If the operation can be done in a smaller type, do so.
|
2010-06-24 22:30:44 +08:00
|
|
|
if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
return true;
|
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
// Output known-1 bits are only known if set in both the LHS & RHS.
|
|
|
|
KnownOne &= KnownOne2;
|
|
|
|
// Output known-0 are known to be clear if zero in either the LHS | RHS.
|
|
|
|
KnownZero |= KnownZero2;
|
|
|
|
break;
|
|
|
|
case ISD::OR:
|
2010-11-23 11:31:01 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownOne, TLO, Depth+1))
|
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
2008-02-27 08:25:32 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownZero2, KnownOne2, TLO, Depth+1))
|
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
|
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
// If all of the demanded bits are known zero on one side, return the other.
|
|
|
|
// These bits cannot contribute to the result of the 'or'.
|
2008-02-27 08:25:32 +08:00
|
|
|
if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
|
2006-02-17 05:11:51 +08:00
|
|
|
return TLO.CombineTo(Op, Op.getOperand(0));
|
2008-02-27 08:25:32 +08:00
|
|
|
if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
|
2006-02-17 05:11:51 +08:00
|
|
|
return TLO.CombineTo(Op, Op.getOperand(1));
|
|
|
|
// If all of the potentially set bits on one side are known to be set on
|
|
|
|
// the other side, just use the 'other' side.
|
2008-02-27 08:25:32 +08:00
|
|
|
if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
|
2006-02-17 05:11:51 +08:00
|
|
|
return TLO.CombineTo(Op, Op.getOperand(0));
|
2008-02-27 08:25:32 +08:00
|
|
|
if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
|
2006-02-17 05:11:51 +08:00
|
|
|
return TLO.CombineTo(Op, Op.getOperand(1));
|
|
|
|
// If the RHS is a constant, see if we can simplify it.
|
2008-02-27 08:25:32 +08:00
|
|
|
if (TLO.ShrinkDemandedConstant(Op, NewMask))
|
2006-02-17 05:11:51 +08:00
|
|
|
return true;
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
// If the operation can be done in a smaller type, do so.
|
2010-06-24 22:30:44 +08:00
|
|
|
if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
return true;
|
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
// Output known-0 bits are only known if clear in both the LHS & RHS.
|
|
|
|
KnownZero &= KnownZero2;
|
|
|
|
// Output known-1 are known to be set if set in either the LHS | RHS.
|
|
|
|
KnownOne |= KnownOne2;
|
|
|
|
break;
|
|
|
|
case ISD::XOR:
|
2010-11-23 11:31:01 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownOne, TLO, Depth+1))
|
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
2008-02-27 08:25:32 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownOne2, TLO, Depth+1))
|
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
|
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
// If all of the demanded bits are known zero on one side, return the other.
|
|
|
|
// These bits cannot contribute to the result of the 'xor'.
|
2008-02-27 08:25:32 +08:00
|
|
|
if ((KnownZero & NewMask) == NewMask)
|
2006-02-17 05:11:51 +08:00
|
|
|
return TLO.CombineTo(Op, Op.getOperand(0));
|
2008-02-27 08:25:32 +08:00
|
|
|
if ((KnownZero2 & NewMask) == NewMask)
|
2006-02-17 05:11:51 +08:00
|
|
|
return TLO.CombineTo(Op, Op.getOperand(1));
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
// If the operation can be done in a smaller type, do so.
|
2010-06-24 22:30:44 +08:00
|
|
|
if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
return true;
|
|
|
|
|
2006-11-28 05:50:02 +08:00
|
|
|
// If all of the unknown bits are known to be zero on one side or the other
|
|
|
|
// (but not both) turn this into an *inclusive* or.
|
2012-09-27 18:14:43 +08:00
|
|
|
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
|
2008-02-27 08:25:32 +08:00
|
|
|
if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
|
2009-02-07 05:50:26 +08:00
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
|
2006-11-28 05:50:02 +08:00
|
|
|
Op.getOperand(0),
|
|
|
|
Op.getOperand(1)));
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
// Output known-0 bits are known if clear or set in both the LHS & RHS.
|
|
|
|
KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
|
|
|
|
// Output known-1 are known to be set if set in only one of the LHS, RHS.
|
|
|
|
KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
// If all of the demanded bits on one side are known, and all of the set
|
|
|
|
// bits on that side are also known to be set on the other side, turn this
|
|
|
|
// into an AND, as we know the bits will be cleared.
|
2012-09-27 18:14:43 +08:00
|
|
|
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
|
2012-04-18 06:23:10 +08:00
|
|
|
// NB: it is okay if more bits are known than are requested
|
2013-07-08 08:37:03 +08:00
|
|
|
if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side
|
2012-04-18 06:23:10 +08:00
|
|
|
if (KnownOne == KnownOne2) { // set bits are the same on both sides
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = Op.getValueType();
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, dl, VT);
|
2010-11-23 11:31:01 +08:00
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
|
2009-02-03 08:47:48 +08:00
|
|
|
Op.getOperand(0), ANDC));
|
2006-02-17 05:11:51 +08:00
|
|
|
}
|
2006-02-04 06:24:05 +08:00
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
// If the RHS is a constant, see if we can simplify it.
|
2008-04-07 05:23:02 +08:00
|
|
|
// for XOR, we prefer to force bits to 1 if they will make a -1.
|
|
|
|
// if we can't force bits, try to shrink constant
|
|
|
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
|
|
|
APInt Expanded = C->getAPIntValue() | (~NewMask);
|
|
|
|
// if we can expand it to have all bits set, do it
|
|
|
|
if (Expanded.isAllOnesValue()) {
|
|
|
|
if (Expanded != C->getAPIntValue()) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = Op.getValueType();
|
2009-02-03 08:47:48 +08:00
|
|
|
SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
|
2015-04-28 22:05:47 +08:00
|
|
|
TLO.DAG.getConstant(Expanded, dl, VT));
|
2008-04-07 05:23:02 +08:00
|
|
|
return TLO.CombineTo(Op, New);
|
|
|
|
}
|
|
|
|
// if it already has all the bits set, nothing to change
|
|
|
|
// but don't shrink either!
|
|
|
|
} else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownZero = KnownZeroOut;
|
|
|
|
KnownOne = KnownOneOut;
|
|
|
|
break;
|
|
|
|
case ISD::SELECT:
|
2010-11-23 11:31:01 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownOne, TLO, Depth+1))
|
|
|
|
return true;
|
2008-02-27 08:25:32 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownOne2, TLO, Depth+1))
|
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
|
|
|
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
|
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
// If the operands are constants, see if we can simplify them.
|
2008-02-27 08:25:32 +08:00
|
|
|
if (TLO.ShrinkDemandedConstant(Op, NewMask))
|
2006-02-17 05:11:51 +08:00
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-27 07:36:02 +08:00
|
|
|
// Only known if known in both the LHS and RHS.
|
|
|
|
KnownOne &= KnownOne2;
|
|
|
|
KnownZero &= KnownZero2;
|
|
|
|
break;
|
|
|
|
case ISD::SELECT_CC:
|
2010-11-23 11:31:01 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
|
2006-02-27 07:36:02 +08:00
|
|
|
KnownOne, TLO, Depth+1))
|
|
|
|
return true;
|
2008-02-27 08:25:32 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
|
2006-02-27 07:36:02 +08:00
|
|
|
KnownOne2, TLO, Depth+1))
|
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
|
|
|
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
|
|
|
|
|
2006-02-27 07:36:02 +08:00
|
|
|
// If the operands are constants, see if we can simplify them.
|
2008-02-27 08:25:32 +08:00
|
|
|
if (TLO.ShrinkDemandedConstant(Op, NewMask))
|
2006-02-27 07:36:02 +08:00
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
// Only known if known in both the LHS and RHS.
|
|
|
|
KnownOne &= KnownOne2;
|
|
|
|
KnownZero &= KnownZero2;
|
2006-02-04 06:24:05 +08:00
|
|
|
break;
|
|
|
|
case ISD::SHL:
|
2006-02-17 05:11:51 +08:00
|
|
|
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
2008-09-13 00:56:44 +08:00
|
|
|
unsigned ShAmt = SA->getZExtValue();
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue InOp = Op.getOperand(0);
|
Fold (x << c1)>> c2 into a single shift if the bits shifted out aren't used.
This compiles:
int baz(long long a) { return (short)(((int)(a >>24)) >> 9); }
into:
_baz:
srwi r2, r3, 1
extsh r3, r2
blr
on PPC, instead of:
_baz:
slwi r2, r3, 8
srwi r2, r2, 9
extsh r3, r2
blr
GCC produces:
_baz:
srwi r10,r4,24
insrwi r10,r3,24,0
srawi r9,r3,24
srawi r3,r10,9
extsh r3,r3
blr
This implements CodeGen/PowerPC/shl_elim.ll
llvm-svn: 36221
2007-04-18 05:14:16 +08:00
|
|
|
|
2008-02-27 08:25:32 +08:00
|
|
|
// If the shift count is an invalid immediate, don't do anything.
|
|
|
|
if (ShAmt >= BitWidth)
|
|
|
|
break;
|
|
|
|
|
Fold (x << c1)>> c2 into a single shift if the bits shifted out aren't used.
This compiles:
int baz(long long a) { return (short)(((int)(a >>24)) >> 9); }
into:
_baz:
srwi r2, r3, 1
extsh r3, r2
blr
on PPC, instead of:
_baz:
slwi r2, r3, 8
srwi r2, r2, 9
extsh r3, r2
blr
GCC produces:
_baz:
srwi r10,r4,24
insrwi r10,r3,24,0
srawi r9,r3,24
srawi r3,r10,9
extsh r3,r3
blr
This implements CodeGen/PowerPC/shl_elim.ll
llvm-svn: 36221
2007-04-18 05:14:16 +08:00
|
|
|
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
|
|
|
|
// single shift. We can do this if the bottom bits (which are shifted
|
|
|
|
// out) are never demanded.
|
|
|
|
if (InOp.getOpcode() == ISD::SRL &&
|
|
|
|
isa<ConstantSDNode>(InOp.getOperand(1))) {
|
2008-02-27 08:25:32 +08:00
|
|
|
if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
|
2008-09-13 00:56:44 +08:00
|
|
|
unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
|
Fold (x << c1)>> c2 into a single shift if the bits shifted out aren't used.
This compiles:
int baz(long long a) { return (short)(((int)(a >>24)) >> 9); }
into:
_baz:
srwi r2, r3, 1
extsh r3, r2
blr
on PPC, instead of:
_baz:
slwi r2, r3, 8
srwi r2, r2, 9
extsh r3, r2
blr
GCC produces:
_baz:
srwi r10,r4,24
insrwi r10,r3,24,0
srawi r9,r3,24
srawi r3,r10,9
extsh r3,r3
blr
This implements CodeGen/PowerPC/shl_elim.ll
llvm-svn: 36221
2007-04-18 05:14:16 +08:00
|
|
|
unsigned Opc = ISD::SHL;
|
|
|
|
int Diff = ShAmt-C1;
|
|
|
|
if (Diff < 0) {
|
|
|
|
Diff = -Diff;
|
|
|
|
Opc = ISD::SRL;
|
2010-11-23 11:31:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
SDValue NewSA =
|
2015-04-28 22:05:47 +08:00
|
|
|
TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = Op.getValueType();
|
2009-02-03 08:47:48 +08:00
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
|
Fold (x << c1)>> c2 into a single shift if the bits shifted out aren't used.
This compiles:
int baz(long long a) { return (short)(((int)(a >>24)) >> 9); }
into:
_baz:
srwi r2, r3, 1
extsh r3, r2
blr
on PPC, instead of:
_baz:
slwi r2, r3, 8
srwi r2, r2, 9
extsh r3, r2
blr
GCC produces:
_baz:
srwi r10,r4,24
insrwi r10,r3,24,0
srawi r9,r3,24
srawi r3,r10,9
extsh r3,r3
blr
This implements CodeGen/PowerPC/shl_elim.ll
llvm-svn: 36221
2007-04-18 05:14:16 +08:00
|
|
|
InOp.getOperand(0), NewSA));
|
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
}
|
|
|
|
|
2010-07-24 02:03:30 +08:00
|
|
|
if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownZero, KnownOne, TLO, Depth+1))
|
|
|
|
return true;
|
2010-07-24 02:03:30 +08:00
|
|
|
|
|
|
|
// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
|
|
|
|
// are not demanded. This will likely allow the anyext to be folded away.
|
|
|
|
if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
|
|
|
|
SDValue InnerOp = InOp.getNode()->getOperand(0);
|
|
|
|
EVT InnerVT = InnerOp.getValueType();
|
2011-12-09 09:16:26 +08:00
|
|
|
unsigned InnerBits = InnerVT.getSizeInBits();
|
|
|
|
if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
|
2010-07-24 02:03:30 +08:00
|
|
|
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
|
2015-07-09 10:09:20 +08:00
|
|
|
EVT ShTy = getShiftAmountTy(InnerVT, DL);
|
2010-07-24 05:08:12 +08:00
|
|
|
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
|
|
|
|
ShTy = InnerVT;
|
2010-07-24 02:03:30 +08:00
|
|
|
SDValue NarrowShl =
|
|
|
|
TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
|
2015-04-28 22:05:47 +08:00
|
|
|
TLO.DAG.getConstant(ShAmt, dl, ShTy));
|
2010-07-24 02:03:30 +08:00
|
|
|
return
|
|
|
|
TLO.CombineTo(Op,
|
|
|
|
TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
|
|
|
|
NarrowShl));
|
|
|
|
}
|
2013-10-16 18:26:19 +08:00
|
|
|
// Repeat the SHL optimization above in cases where an extension
|
|
|
|
// intervenes: (shl (anyext (shr x, c1)), c2) to
|
|
|
|
// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
|
|
|
|
// aren't demanded (as above) and that the shifted upper c1 bits of
|
|
|
|
// x aren't demanded.
|
|
|
|
if (InOp.hasOneUse() &&
|
|
|
|
InnerOp.getOpcode() == ISD::SRL &&
|
|
|
|
InnerOp.hasOneUse() &&
|
|
|
|
isa<ConstantSDNode>(InnerOp.getOperand(1))) {
|
|
|
|
uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1))
|
|
|
|
->getZExtValue();
|
|
|
|
if (InnerShAmt < ShAmt &&
|
|
|
|
InnerShAmt < InnerBits &&
|
|
|
|
NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 &&
|
|
|
|
NewMask.trunc(ShAmt) == 0) {
|
|
|
|
SDValue NewSA =
|
2015-04-28 22:05:47 +08:00
|
|
|
TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
|
2013-10-16 18:26:19 +08:00
|
|
|
Op.getOperand(1).getValueType());
|
|
|
|
EVT VT = Op.getValueType();
|
|
|
|
SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
|
|
|
|
InnerOp.getOperand(0));
|
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
|
|
|
|
NewExt, NewSA));
|
|
|
|
}
|
|
|
|
}
|
2010-07-24 02:03:30 +08:00
|
|
|
}
|
|
|
|
|
2008-09-13 00:56:44 +08:00
|
|
|
KnownZero <<= SA->getZExtValue();
|
|
|
|
KnownOne <<= SA->getZExtValue();
|
2008-02-27 08:25:32 +08:00
|
|
|
// low bits known zero.
|
2008-09-13 00:56:44 +08:00
|
|
|
KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
|
2006-02-17 05:11:51 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ISD::SRL:
|
|
|
|
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = Op.getValueType();
|
2008-09-13 00:56:44 +08:00
|
|
|
unsigned ShAmt = SA->getZExtValue();
|
2008-06-06 20:08:01 +08:00
|
|
|
unsigned VTSize = VT.getSizeInBits();
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue InOp = Op.getOperand(0);
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2008-02-27 08:25:32 +08:00
|
|
|
// If the shift count is an invalid immediate, don't do anything.
|
|
|
|
if (ShAmt >= BitWidth)
|
|
|
|
break;
|
|
|
|
|
2015-06-27 00:59:31 +08:00
|
|
|
APInt InDemandedMask = (NewMask << ShAmt);
|
|
|
|
|
|
|
|
// If the shift is exact, then it does demand the low bits (and knows that
|
|
|
|
// they are zero).
|
|
|
|
if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
|
|
|
|
InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt);
|
|
|
|
|
Fold (x << c1)>> c2 into a single shift if the bits shifted out aren't used.
This compiles:
int baz(long long a) { return (short)(((int)(a >>24)) >> 9); }
into:
_baz:
srwi r2, r3, 1
extsh r3, r2
blr
on PPC, instead of:
_baz:
slwi r2, r3, 8
srwi r2, r2, 9
extsh r3, r2
blr
GCC produces:
_baz:
srwi r10,r4,24
insrwi r10,r3,24,0
srawi r9,r3,24
srawi r3,r10,9
extsh r3,r3
blr
This implements CodeGen/PowerPC/shl_elim.ll
llvm-svn: 36221
2007-04-18 05:14:16 +08:00
|
|
|
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
|
|
|
|
// single shift. We can do this if the top bits (which are shifted out)
|
|
|
|
// are never demanded.
|
|
|
|
if (InOp.getOpcode() == ISD::SHL &&
|
|
|
|
isa<ConstantSDNode>(InOp.getOperand(1))) {
|
2008-02-27 08:25:32 +08:00
|
|
|
if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
|
2008-09-13 00:56:44 +08:00
|
|
|
unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
|
Fold (x << c1)>> c2 into a single shift if the bits shifted out aren't used.
This compiles:
int baz(long long a) { return (short)(((int)(a >>24)) >> 9); }
into:
_baz:
srwi r2, r3, 1
extsh r3, r2
blr
on PPC, instead of:
_baz:
slwi r2, r3, 8
srwi r2, r2, 9
extsh r3, r2
blr
GCC produces:
_baz:
srwi r10,r4,24
insrwi r10,r3,24,0
srawi r9,r3,24
srawi r3,r10,9
extsh r3,r3
blr
This implements CodeGen/PowerPC/shl_elim.ll
llvm-svn: 36221
2007-04-18 05:14:16 +08:00
|
|
|
unsigned Opc = ISD::SRL;
|
|
|
|
int Diff = ShAmt-C1;
|
|
|
|
if (Diff < 0) {
|
|
|
|
Diff = -Diff;
|
|
|
|
Opc = ISD::SHL;
|
2010-11-23 11:31:01 +08:00
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue NewSA =
|
2015-04-28 22:05:47 +08:00
|
|
|
TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
|
2009-02-03 08:47:48 +08:00
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
|
Fold (x << c1)>> c2 into a single shift if the bits shifted out aren't used.
This compiles:
int baz(long long a) { return (short)(((int)(a >>24)) >> 9); }
into:
_baz:
srwi r2, r3, 1
extsh r3, r2
blr
on PPC, instead of:
_baz:
slwi r2, r3, 8
srwi r2, r2, 9
extsh r3, r2
blr
GCC produces:
_baz:
srwi r10,r4,24
insrwi r10,r3,24,0
srawi r9,r3,24
srawi r3,r10,9
extsh r3,r3
blr
This implements CodeGen/PowerPC/shl_elim.ll
llvm-svn: 36221
2007-04-18 05:14:16 +08:00
|
|
|
InOp.getOperand(0), NewSA));
|
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
}
|
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
// Compute the new bits that are at the top now.
|
2015-06-27 00:59:31 +08:00
|
|
|
if (SimplifyDemandedBits(InOp, InDemandedMask,
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownZero, KnownOne, TLO, Depth+1))
|
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
2008-02-27 08:25:32 +08:00
|
|
|
KnownZero = KnownZero.lshr(ShAmt);
|
|
|
|
KnownOne = KnownOne.lshr(ShAmt);
|
2006-06-14 00:52:37 +08:00
|
|
|
|
2008-02-27 08:25:32 +08:00
|
|
|
APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
|
2006-06-14 00:52:37 +08:00
|
|
|
KnownZero |= HighBits; // High bits known zero.
|
2006-02-17 05:11:51 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ISD::SRA:
|
2009-01-29 09:59:02 +08:00
|
|
|
// If this is an arithmetic shift right and only the low-bit is set, we can
|
|
|
|
// always convert this into a logical shr, even if the shift amount is
|
|
|
|
// variable. The low bit of the shift cannot be an input sign bit unless
|
|
|
|
// the shift amount is >= the size of the datatype, which is undefined.
|
2011-12-09 09:16:26 +08:00
|
|
|
if (NewMask == 1)
|
2010-04-17 14:13:15 +08:00
|
|
|
return TLO.CombineTo(Op,
|
|
|
|
TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
|
|
|
|
Op.getOperand(0), Op.getOperand(1)));
|
2009-01-29 09:59:02 +08:00
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = Op.getValueType();
|
2008-09-13 00:56:44 +08:00
|
|
|
unsigned ShAmt = SA->getZExtValue();
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2008-02-27 08:25:32 +08:00
|
|
|
// If the shift count is an invalid immediate, don't do anything.
|
|
|
|
if (ShAmt >= BitWidth)
|
|
|
|
break;
|
|
|
|
|
|
|
|
APInt InDemandedMask = (NewMask << ShAmt);
|
2006-05-09 01:22:53 +08:00
|
|
|
|
2015-06-27 00:59:31 +08:00
|
|
|
// If the shift is exact, then it does demand the low bits (and knows that
|
|
|
|
// they are zero).
|
|
|
|
if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
|
|
|
|
InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt);
|
|
|
|
|
2006-05-09 01:22:53 +08:00
|
|
|
// If any of the demanded bits are produced by the sign extension, we also
|
|
|
|
// demand the input sign bit.
|
2008-02-27 08:25:32 +08:00
|
|
|
APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
|
|
|
|
if (HighBits.intersects(NewMask))
|
2016-09-14 23:21:00 +08:00
|
|
|
InDemandedMask |= APInt::getSignBit(VT.getScalarSizeInBits());
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-05-09 01:22:53 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownZero, KnownOne, TLO, Depth+1))
|
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
2008-02-27 08:25:32 +08:00
|
|
|
KnownZero = KnownZero.lshr(ShAmt);
|
|
|
|
KnownOne = KnownOne.lshr(ShAmt);
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2008-02-27 08:25:32 +08:00
|
|
|
// Handle the sign bit, adjusted to where it is now in the mask.
|
|
|
|
APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-17 05:11:51 +08:00
|
|
|
// If the input sign bit is known to be zero, or if none of the top bits
|
|
|
|
// are demanded, turn this into an unsigned shift right.
|
2015-06-26 22:51:49 +08:00
|
|
|
if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
|
|
|
|
SDNodeFlags Flags;
|
|
|
|
Flags.setExact(cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact());
|
|
|
|
return TLO.CombineTo(Op,
|
|
|
|
TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
|
|
|
|
Op.getOperand(1), &Flags));
|
|
|
|
}
|
2013-10-17 19:16:57 +08:00
|
|
|
|
|
|
|
int Log2 = NewMask.exactLogBase2();
|
|
|
|
if (Log2 >= 0) {
|
|
|
|
// The bit must come from the sign.
|
|
|
|
SDValue NewSA =
|
2015-04-28 22:05:47 +08:00
|
|
|
TLO.DAG.getConstant(BitWidth - 1 - Log2, dl,
|
2013-10-17 19:16:57 +08:00
|
|
|
Op.getOperand(1).getValueType());
|
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
|
|
|
|
Op.getOperand(0), NewSA));
|
2006-02-17 05:11:51 +08:00
|
|
|
}
|
2013-10-17 19:16:57 +08:00
|
|
|
|
|
|
|
if (KnownOne.intersects(SignBit))
|
|
|
|
// New bits are known one.
|
|
|
|
KnownOne |= HighBits;
|
2006-02-04 06:24:05 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ISD::SIGN_EXTEND_INREG: {
|
2012-01-16 03:27:55 +08:00
|
|
|
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
|
|
|
|
|
|
|
|
APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
|
|
|
|
// If we only care about the highest bit, don't bother shifting right.
|
2015-02-18 17:43:40 +08:00
|
|
|
if (MsbMask == NewMask) {
|
2016-09-14 23:21:00 +08:00
|
|
|
unsigned ShAmt = ExVT.getScalarSizeInBits();
|
2012-01-16 03:27:55 +08:00
|
|
|
SDValue InOp = Op.getOperand(0);
|
2016-09-14 23:21:00 +08:00
|
|
|
unsigned VTBits = Op->getValueType(0).getScalarSizeInBits();
|
2015-02-18 17:43:40 +08:00
|
|
|
bool AlreadySignExtended =
|
|
|
|
TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
|
|
|
|
// However if the input is already sign extended we expect the sign
|
|
|
|
// extension to be dropped altogether later and do not simplify.
|
|
|
|
if (!AlreadySignExtended) {
|
|
|
|
// Compute the correct shift amount type, which must be getShiftAmountTy
|
|
|
|
// for scalar types after legalization.
|
|
|
|
EVT ShiftAmtTy = Op.getValueType();
|
|
|
|
if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
|
2015-07-09 10:09:20 +08:00
|
|
|
ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
|
2015-02-18 17:43:40 +08:00
|
|
|
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl,
|
|
|
|
ShiftAmtTy);
|
2015-02-18 17:43:40 +08:00
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
|
|
|
|
Op.getValueType(), InOp,
|
|
|
|
ShiftAmt));
|
|
|
|
}
|
2012-01-16 03:27:55 +08:00
|
|
|
}
|
2006-02-17 05:11:51 +08:00
|
|
|
|
2010-11-23 11:31:01 +08:00
|
|
|
// Sign extension. Compute the demanded bits in the result that are not
|
2006-02-17 05:11:51 +08:00
|
|
|
// present in the input.
|
2010-01-09 10:13:55 +08:00
|
|
|
APInt NewBits =
|
|
|
|
APInt::getHighBitsSet(BitWidth,
|
2016-09-14 23:21:00 +08:00
|
|
|
BitWidth - ExVT.getScalarSizeInBits());
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-27 07:36:02 +08:00
|
|
|
// If none of the extended bits are demanded, eliminate the sextinreg.
|
2010-08-02 12:42:25 +08:00
|
|
|
if ((NewBits & NewMask) == 0)
|
2006-02-27 07:36:02 +08:00
|
|
|
return TLO.CombineTo(Op, Op.getOperand(0));
|
|
|
|
|
2010-12-07 16:25:19 +08:00
|
|
|
APInt InSignBit =
|
2016-09-14 23:21:00 +08:00
|
|
|
APInt::getSignBit(ExVT.getScalarSizeInBits()).zext(BitWidth);
|
2010-01-09 10:13:55 +08:00
|
|
|
APInt InputDemandedBits =
|
|
|
|
APInt::getLowBitsSet(BitWidth,
|
2016-09-14 23:21:00 +08:00
|
|
|
ExVT.getScalarSizeInBits()) &
|
2010-01-09 10:13:55 +08:00
|
|
|
NewMask;
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-27 07:36:02 +08:00
|
|
|
// Since the sign extended bits are demanded, we know that the sign
|
2006-02-17 05:11:51 +08:00
|
|
|
// bit is demanded.
|
2006-02-27 07:36:02 +08:00
|
|
|
InputDemandedBits |= InSignBit;
|
2006-02-17 05:11:51 +08:00
|
|
|
|
|
|
|
if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
|
|
|
|
KnownZero, KnownOne, TLO, Depth+1))
|
2006-02-04 06:24:05 +08:00
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
2006-02-17 05:11:51 +08:00
|
|
|
|
|
|
|
// If the sign bit of the input is known set or clear, then we know the
|
|
|
|
// top bits of the result.
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-27 07:36:02 +08:00
|
|
|
// If the input sign bit is known zero, convert this into a zero extension.
|
2008-02-27 08:25:32 +08:00
|
|
|
if (KnownZero.intersects(InSignBit))
|
2016-09-09 21:31:52 +08:00
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
|
|
|
|
Op.getOperand(0), dl, ExVT.getScalarType()));
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2008-02-27 08:25:32 +08:00
|
|
|
if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownOne |= NewBits;
|
|
|
|
KnownZero &= ~NewBits;
|
2006-02-27 07:36:02 +08:00
|
|
|
} else { // Input sign bit unknown
|
2006-02-17 05:11:51 +08:00
|
|
|
KnownZero &= ~NewBits;
|
|
|
|
KnownOne &= ~NewBits;
|
2006-02-04 06:24:05 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2014-05-13 01:14:48 +08:00
|
|
|
case ISD::BUILD_PAIR: {
|
|
|
|
EVT HalfVT = Op.getOperand(0).getValueType();
|
|
|
|
unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
|
|
|
|
|
|
|
|
APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
|
|
|
|
APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
|
|
|
|
|
|
|
|
APInt KnownZeroLo, KnownOneLo;
|
|
|
|
APInt KnownZeroHi, KnownOneHi;
|
|
|
|
|
|
|
|
if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownZeroLo,
|
|
|
|
KnownOneLo, TLO, Depth + 1))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownZeroHi,
|
|
|
|
KnownOneHi, TLO, Depth + 1))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
KnownZero = KnownZeroLo.zext(BitWidth) |
|
|
|
|
KnownZeroHi.zext(BitWidth).shl(HalfBitWidth);
|
|
|
|
|
|
|
|
KnownOne = KnownOneLo.zext(BitWidth) |
|
|
|
|
KnownOneHi.zext(BitWidth).shl(HalfBitWidth);
|
|
|
|
break;
|
|
|
|
}
|
2006-02-27 07:36:02 +08:00
|
|
|
case ISD::ZERO_EXTEND: {
|
2016-09-14 23:43:44 +08:00
|
|
|
unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
|
2010-12-07 16:25:19 +08:00
|
|
|
APInt InMask = NewMask.trunc(OperandBitWidth);
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-27 07:36:02 +08:00
|
|
|
// If none of the top bits are demanded, convert this into an any_extend.
|
2008-02-27 08:25:32 +08:00
|
|
|
APInt NewBits =
|
|
|
|
APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
|
|
|
|
if (!NewBits.intersects(NewMask))
|
2009-02-03 08:47:48 +08:00
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
|
2010-11-23 11:31:01 +08:00
|
|
|
Op.getValueType(),
|
2006-02-27 07:36:02 +08:00
|
|
|
Op.getOperand(0)));
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2008-02-27 08:25:32 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(0), InMask,
|
2006-02-27 07:36:02 +08:00
|
|
|
KnownZero, KnownOne, TLO, Depth+1))
|
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
2010-12-07 16:25:19 +08:00
|
|
|
KnownZero = KnownZero.zext(BitWidth);
|
|
|
|
KnownOne = KnownOne.zext(BitWidth);
|
2006-02-27 07:36:02 +08:00
|
|
|
KnownZero |= NewBits;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case ISD::SIGN_EXTEND: {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT InVT = Op.getOperand(0).getValueType();
|
2016-09-14 23:21:00 +08:00
|
|
|
unsigned InBits = InVT.getScalarSizeInBits();
|
2008-02-27 08:25:32 +08:00
|
|
|
APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
|
2008-03-12 05:29:43 +08:00
|
|
|
APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
|
2008-02-27 08:25:32 +08:00
|
|
|
APInt NewBits = ~InMask & NewMask;
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-27 07:36:02 +08:00
|
|
|
// If none of the top bits are demanded, convert this into an any_extend.
|
|
|
|
if (NewBits == 0)
|
2009-02-03 08:47:48 +08:00
|
|
|
return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
|
|
|
|
Op.getValueType(),
|
|
|
|
Op.getOperand(0)));
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-27 07:36:02 +08:00
|
|
|
// Since some of the sign extended bits are demanded, we know that the sign
|
|
|
|
// bit is demanded.
|
2008-02-27 08:25:32 +08:00
|
|
|
APInt InDemandedBits = InMask & NewMask;
|
2006-02-27 07:36:02 +08:00
|
|
|
InDemandedBits |= InSignBit;
|
2010-12-07 16:25:19 +08:00
|
|
|
InDemandedBits = InDemandedBits.trunc(InBits);
|
2010-11-23 11:31:01 +08:00
|
|
|
|
|
|
|
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
|
2006-02-27 07:36:02 +08:00
|
|
|
KnownOne, TLO, Depth+1))
|
|
|
|
return true;
|
2010-12-07 16:25:19 +08:00
|
|
|
KnownZero = KnownZero.zext(BitWidth);
|
|
|
|
KnownOne = KnownOne.zext(BitWidth);
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-27 07:36:02 +08:00
|
|
|
// If the sign bit is known zero, convert this to a zero extend.
|
2008-02-27 08:25:32 +08:00
|
|
|
if (KnownZero.intersects(InSignBit))
|
2009-02-03 08:47:48 +08:00
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
|
2010-11-23 11:31:01 +08:00
|
|
|
Op.getValueType(),
|
2006-02-27 07:36:02 +08:00
|
|
|
Op.getOperand(0)));
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-27 07:36:02 +08:00
|
|
|
// If the sign bit is known one, the top bits match.
|
2008-02-27 08:25:32 +08:00
|
|
|
if (KnownOne.intersects(InSignBit)) {
|
2012-04-04 20:51:34 +08:00
|
|
|
KnownOne |= NewBits;
|
|
|
|
assert((KnownZero & NewBits) == 0);
|
2006-02-27 07:36:02 +08:00
|
|
|
} else { // Otherwise, top bits aren't known.
|
2012-04-04 20:51:34 +08:00
|
|
|
assert((KnownOne & NewBits) == 0);
|
|
|
|
assert((KnownZero & NewBits) == 0);
|
2006-02-27 07:36:02 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case ISD::ANY_EXTEND: {
|
2016-09-14 23:43:44 +08:00
|
|
|
unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
|
2010-12-07 16:25:19 +08:00
|
|
|
APInt InMask = NewMask.trunc(OperandBitWidth);
|
2008-02-27 08:25:32 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(0), InMask,
|
2006-02-27 07:36:02 +08:00
|
|
|
KnownZero, KnownOne, TLO, Depth+1))
|
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
2010-12-07 16:25:19 +08:00
|
|
|
KnownZero = KnownZero.zext(BitWidth);
|
|
|
|
KnownOne = KnownOne.zext(BitWidth);
|
2006-02-27 07:36:02 +08:00
|
|
|
break;
|
|
|
|
}
|
2006-05-06 06:32:12 +08:00
|
|
|
case ISD::TRUNCATE: {
|
2006-05-06 08:11:52 +08:00
|
|
|
// Simplify the input, using demanded bit information, and compute the known
|
|
|
|
// zero/one bits live out.
|
2016-09-14 23:43:44 +08:00
|
|
|
unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
|
2010-12-07 16:25:19 +08:00
|
|
|
APInt TruncMask = NewMask.zext(OperandBitWidth);
|
2008-02-27 08:25:32 +08:00
|
|
|
if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
|
2006-05-06 06:32:12 +08:00
|
|
|
KnownZero, KnownOne, TLO, Depth+1))
|
|
|
|
return true;
|
2010-12-07 16:25:19 +08:00
|
|
|
KnownZero = KnownZero.trunc(BitWidth);
|
|
|
|
KnownOne = KnownOne.trunc(BitWidth);
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-05-06 08:11:52 +08:00
|
|
|
// If the input is only used by this truncate, see if we can shrink it based
|
|
|
|
// on the known demanded bits.
|
2008-08-29 05:40:38 +08:00
|
|
|
if (Op.getOperand(0).getNode()->hasOneUse()) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue In = Op.getOperand(0);
|
2006-05-06 08:11:52 +08:00
|
|
|
switch (In.getOpcode()) {
|
|
|
|
default: break;
|
|
|
|
case ISD::SRL:
|
|
|
|
// Shrink SRL by a constant if none of the high bits shifted in are
|
|
|
|
// demanded.
|
2010-04-17 14:13:15 +08:00
|
|
|
if (TLO.LegalTypes() &&
|
|
|
|
!isTypeDesirableForOp(ISD::SRL, Op.getValueType()))
|
|
|
|
// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
|
|
|
|
// undesirable.
|
|
|
|
break;
|
|
|
|
ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
|
|
|
|
if (!ShAmt)
|
|
|
|
break;
|
2011-04-14 07:22:23 +08:00
|
|
|
SDValue Shift = In.getOperand(1);
|
|
|
|
if (TLO.LegalTypes()) {
|
|
|
|
uint64_t ShVal = ShAmt->getZExtValue();
|
2015-07-09 10:09:20 +08:00
|
|
|
Shift = TLO.DAG.getConstant(ShVal, dl,
|
|
|
|
getShiftAmountTy(Op.getValueType(), DL));
|
2011-04-14 07:22:23 +08:00
|
|
|
}
|
|
|
|
|
2010-04-17 14:13:15 +08:00
|
|
|
APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
|
|
|
|
OperandBitWidth - BitWidth);
|
2010-12-07 16:25:19 +08:00
|
|
|
HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
|
2010-04-17 14:13:15 +08:00
|
|
|
|
|
|
|
if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
|
|
|
|
// None of the shifted in bits are needed. Add a truncate of the
|
|
|
|
// shift input, then shift it.
|
|
|
|
SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
|
2010-11-23 11:31:01 +08:00
|
|
|
Op.getValueType(),
|
2010-04-17 14:13:15 +08:00
|
|
|
In.getOperand(0));
|
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
|
|
|
|
Op.getValueType(),
|
2010-11-23 11:31:01 +08:00
|
|
|
NewTrunc,
|
2011-04-14 07:22:23 +08:00
|
|
|
Shift));
|
2006-05-06 08:11:52 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
|
|
|
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
2006-05-06 06:32:12 +08:00
|
|
|
break;
|
|
|
|
}
|
2006-02-27 07:36:02 +08:00
|
|
|
case ISD::AssertZext: {
|
2011-09-03 08:26:49 +08:00
|
|
|
// AssertZext demands all of the high bits, plus any of the low bits
|
|
|
|
// demanded by its users.
|
|
|
|
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
|
|
|
|
APInt InMask = APInt::getLowBitsSet(BitWidth,
|
|
|
|
VT.getSizeInBits());
|
|
|
|
if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
|
2006-02-27 07:36:02 +08:00
|
|
|
KnownZero, KnownOne, TLO, Depth+1))
|
|
|
|
return true;
|
2010-11-23 11:31:01 +08:00
|
|
|
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
2010-06-04 04:21:33 +08:00
|
|
|
|
2008-02-27 08:25:32 +08:00
|
|
|
KnownZero |= ~InMask & NewMask;
|
2006-02-27 07:36:02 +08:00
|
|
|
break;
|
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
case ISD::BITCAST:
|
2011-06-07 00:44:31 +08:00
|
|
|
// If this is an FP->Int bitcast and if the sign bit is the only
|
|
|
|
// thing demanded, turn this into a FGETSIGN.
|
2011-12-15 10:07:20 +08:00
|
|
|
if (!TLO.LegalOperations() &&
|
|
|
|
!Op.getValueType().isVector() &&
|
2011-11-10 06:25:12 +08:00
|
|
|
!Op.getOperand(0).getValueType().isVector() &&
|
2016-09-15 00:05:51 +08:00
|
|
|
NewMask == APInt::getSignBit(Op.getValueSizeInBits()) &&
|
2011-06-12 22:56:55 +08:00
|
|
|
Op.getOperand(0).getValueType().isFloatingPoint()) {
|
2011-06-07 00:44:31 +08:00
|
|
|
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
|
|
|
|
bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
|
[X86] Part 1 to fix x86-64 fp128 calling convention.
Almost all these changes are conditioned and only apply to the new
x86-64 f128 type configuration, which will be enabled in a follow up
patch. They are required together to make new f128 work. If there is
any error, we should fix or revert them as a whole.
These changes should have no impact to current configurations.
* Relax type legalization checks to accept new f128 type configuration,
whose TypeAction is TypeSoftenFloat, not TypeLegal, but also has
TLI.isTypeLegal true.
* Relax GetSoftenedFloat to return in some cases f128 type SDValue,
which is TLI.isTypeLegal but not "softened" to i128 node.
* Allow customized FABS, FNEG, FCOPYSIGN on new f128 type configuration,
to generate optimized bitwise operators for libm functions.
* Enhance related Lower* functions to handle f128 type.
* Enhance DAGTypeLegalizer::run, SoftenFloatResult, and related functions
to keep new f128 type in register, and convert f128 operators to library calls.
* Fix Combiner, Emitter, Legalizer routines that did not handle f128 type.
* Add ExpandConstant to handle i128 constants, ExpandNode
to handle ISD::Constant node.
* Add one more parameter to getCommonSubClass and firstCommonClass,
to guarantee that returned common sub class will contain the specified
simple value type.
This extra parameter is used by EmitCopyFromReg in InstrEmitter.cpp.
* Fix infinite loop in getTypeLegalizationCost when f128 is the value type.
* Fix printOperand to handle null operand.
* Enhance ISD::BITCAST node to handle f128 constant.
* Expand new f128 type for BR_CC, SELECT_CC, SELECT, SETCC nodes.
* Enhance X86AsmPrinter to emit f128 values in comments.
Differential Revision: http://reviews.llvm.org/D15134
llvm-svn: 254653
2015-12-04 06:02:40 +08:00
|
|
|
if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() &&
|
|
|
|
Op.getOperand(0).getValueType() != MVT::f128) {
|
|
|
|
// Cannot eliminate/lower SHL for f128 yet.
|
2011-06-07 00:44:31 +08:00
|
|
|
EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
|
2007-12-23 05:35:38 +08:00
|
|
|
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
|
|
|
|
// place. We expect the SHL to be eliminated by other optimizations.
|
2011-06-02 02:32:25 +08:00
|
|
|
SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
|
2016-09-15 00:05:51 +08:00
|
|
|
unsigned OpVTSizeInBits = Op.getValueSizeInBits();
|
2011-06-07 00:44:31 +08:00
|
|
|
if (!OpVTLegal && OpVTSizeInBits > 32)
|
2011-06-02 02:32:25 +08:00
|
|
|
Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
|
2016-09-15 00:05:51 +08:00
|
|
|
unsigned ShVal = Op.getValueSizeInBits() - 1;
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType());
|
2011-05-20 02:48:20 +08:00
|
|
|
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
|
|
|
|
Op.getValueType(),
|
2007-12-23 05:35:38 +08:00
|
|
|
Sign, ShAmt));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
case ISD::ADD:
|
|
|
|
case ISD::MUL:
|
|
|
|
case ISD::SUB: {
|
|
|
|
// Add, Sub, and Mul don't demand any bits in positions beyond that
|
|
|
|
// of the highest bit demanded of them.
|
|
|
|
APInt LoMask = APInt::getLowBitsSet(BitWidth,
|
|
|
|
BitWidth - NewMask.countLeadingZeros());
|
|
|
|
if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
|
|
|
|
KnownOne2, TLO, Depth+1))
|
|
|
|
return true;
|
|
|
|
if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
|
|
|
|
KnownOne2, TLO, Depth+1))
|
|
|
|
return true;
|
|
|
|
// See if the operation should be performed at a smaller bit width.
|
2010-06-24 22:30:44 +08:00
|
|
|
if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
return true;
|
2016-08-18 04:30:52 +08:00
|
|
|
LLVM_FALLTHROUGH;
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
llvm-svn: 68576
2009-04-08 08:15:30 +08:00
|
|
|
}
|
2008-05-06 08:53:29 +08:00
|
|
|
default:
|
2014-05-15 05:14:37 +08:00
|
|
|
// Just use computeKnownBits to compute output bits.
|
|
|
|
TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth);
|
2006-02-27 09:00:42 +08:00
|
|
|
break;
|
2006-02-04 06:24:05 +08:00
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-27 07:36:02 +08:00
|
|
|
// If we know the value of all of the demanded bits, return this as a
|
|
|
|
// constant.
|
2015-05-21 02:54:02 +08:00
|
|
|
if ((NewMask & (KnownZero|KnownOne)) == NewMask) {
|
|
|
|
// Avoid folding to a constant if any OpaqueConstant is involved.
|
|
|
|
const SDNode *N = Op.getNode();
|
|
|
|
for (SDNodeIterator I = SDNodeIterator::begin(N),
|
|
|
|
E = SDNodeIterator::end(N); I != E; ++I) {
|
|
|
|
SDNode *Op = *I;
|
|
|
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
|
|
|
|
if (C->isOpaque())
|
|
|
|
return false;
|
|
|
|
}
|
2015-04-28 22:05:47 +08:00
|
|
|
return TLO.CombineTo(Op,
|
|
|
|
TLO.DAG.getConstant(KnownOne, dl, Op.getValueType()));
|
2015-05-21 02:54:02 +08:00
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-02-04 06:24:05 +08:00
|
|
|
return false;
|
|
|
|
}
|
2006-01-30 12:09:27 +08:00
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Determine which of the bits specified in Mask are known to be either zero or
|
|
|
|
/// one and return them in the KnownZero/KnownOne bitsets.
|
2014-05-15 05:14:37 +08:00
|
|
|
void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
|
|
|
|
APInt &KnownZero,
|
|
|
|
APInt &KnownOne,
|
|
|
|
const SelectionDAG &DAG,
|
|
|
|
unsigned Depth) const {
|
2006-04-02 14:19:46 +08:00
|
|
|
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
|
|
|
|
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
|
|
|
|
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
|
|
|
|
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
|
2006-01-30 12:09:27 +08:00
|
|
|
"Should use MaskedValueIsZero if you don't know whether Op"
|
|
|
|
" is a target node!");
|
2012-04-04 20:51:34 +08:00
|
|
|
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
|
2005-12-22 07:05:39 +08:00
|
|
|
}
|
2006-01-27 04:37:03 +08:00
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// This method can be implemented by targets that want to expose additional
|
|
|
|
/// information about sign bits to the DAG Combiner.
|
2008-07-28 05:46:04 +08:00
|
|
|
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
|
2014-04-05 04:13:13 +08:00
|
|
|
const SelectionDAG &,
|
2006-05-06 17:27:13 +08:00
|
|
|
unsigned Depth) const {
|
|
|
|
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
|
|
|
|
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
|
|
|
|
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
|
|
|
|
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
|
|
|
|
"Should use ComputeNumSignBits if you don't know whether Op"
|
|
|
|
" is a target node!");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2014-04-02 02:13:22 +08:00
|
|
|
bool TargetLowering::isConstTrueVal(const SDNode *N) const {
|
|
|
|
if (!N)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
|
[x86] Revert r212324 which was too aggressive w.r.t. allowing undef
lanes in vector splats.
The core problem here is that undef lanes can't *unilaterally* be
considered to contribute to splats. Their handling needs to be more
cautious. There is also a reported failure of the nightly testers
(thanks Tobias!) that may well stem from the same core issue. I'm going
to fix this theoretical issue, factor the APIs a bit better, and then
verify that I don't see anything bad with Tobias's reduction from the
test suite before recommitting.
Original commit message for r212324:
[x86] Generalize BuildVectorSDNode::getConstantSplatValue to work for
any constant, constant FP, or undef splat and to tolerate any undef
lanes in a splat, then replace all uses of isSplatVector in X86's
lowering with it.
This fixes issues where undef lanes in an otherwise splat vector would
prevent the splat logic from firing. It is a touch more awkward to use
this interface, but it is much more accurate. Suggestions for better
interface structuring welcome.
With this fix, the code generated with the widening legalization
strategy for widen_cast-4.ll is *dramatically* improved as the special
lowering strategies for a v16i8 SRA kick in even though the high lanes
are undef.
We also get a slightly different choice for broadcasting an aligned
memory location, and use vpshufd instead of vbroadcastss. This looks
like a minor win for pipelining and domain crossing, but a minor loss
for the number of micro-ops. I suspect its a wash, but folks can
easily tweak the lowering if they want.
llvm-svn: 212475
2014-07-08 03:03:32 +08:00
|
|
|
if (!CN) {
|
|
|
|
const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
|
|
|
|
if (!BV)
|
|
|
|
return false;
|
|
|
|
|
2014-07-09 08:41:34 +08:00
|
|
|
BitVector UndefElements;
|
|
|
|
CN = BV->getConstantSplatNode(&UndefElements);
|
2014-07-08 15:44:15 +08:00
|
|
|
// Only interested in constant splats, and we don't try to handle undef
|
|
|
|
// elements in identifying boolean constants.
|
2014-07-09 08:41:34 +08:00
|
|
|
if (!CN || UndefElements.none())
|
2014-07-08 15:44:15 +08:00
|
|
|
return false;
|
[x86] Revert r212324 which was too aggressive w.r.t. allowing undef
lanes in vector splats.
The core problem here is that undef lanes can't *unilaterally* be
considered to contribute to splats. Their handling needs to be more
cautious. There is also a reported failure of the nightly testers
(thanks Tobias!) that may well stem from the same core issue. I'm going
to fix this theoretical issue, factor the APIs a bit better, and then
verify that I don't see anything bad with Tobias's reduction from the
test suite before recommitting.
Original commit message for r212324:
[x86] Generalize BuildVectorSDNode::getConstantSplatValue to work for
any constant, constant FP, or undef splat and to tolerate any undef
lanes in a splat, then replace all uses of isSplatVector in X86's
lowering with it.
This fixes issues where undef lanes in an otherwise splat vector would
prevent the splat logic from firing. It is a touch more awkward to use
this interface, but it is much more accurate. Suggestions for better
interface structuring welcome.
With this fix, the code generated with the widening legalization
strategy for widen_cast-4.ll is *dramatically* improved as the special
lowering strategies for a v16i8 SRA kick in even though the high lanes
are undef.
We also get a slightly different choice for broadcasting an aligned
memory location, and use vpshufd instead of vbroadcastss. This looks
like a minor win for pipelining and domain crossing, but a minor loss
for the number of micro-ops. I suspect its a wash, but folks can
easily tweak the lowering if they want.
llvm-svn: 212475
2014-07-08 03:03:32 +08:00
|
|
|
}
|
2014-04-02 02:13:22 +08:00
|
|
|
|
2014-07-10 18:18:12 +08:00
|
|
|
switch (getBooleanContents(N->getValueType(0))) {
|
2014-04-02 02:13:22 +08:00
|
|
|
case UndefinedBooleanContent:
|
|
|
|
return CN->getAPIntValue()[0];
|
|
|
|
case ZeroOrOneBooleanContent:
|
|
|
|
return CN->isOne();
|
|
|
|
case ZeroOrNegativeOneBooleanContent:
|
|
|
|
return CN->isAllOnesValue();
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm_unreachable("Invalid boolean contents");
|
|
|
|
}
|
|
|
|
|
[DAGCombine] Make sext(setcc) combine respect getBooleanContents
We used to combine "sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)"
Instead, we should combine to (select (setcc x, y, cc), T, 0) where the value
of T is 1 or -1, depending on the type of the setcc, and getBooleanContents()
for the type if it is not i1.
This fixes PR28504.
llvm-svn: 277371
2016-08-02 03:39:49 +08:00
|
|
|
SDValue TargetLowering::getConstTrueVal(SelectionDAG &DAG, EVT VT,
|
|
|
|
const SDLoc &DL) const {
|
|
|
|
unsigned ElementWidth = VT.getScalarSizeInBits();
|
|
|
|
APInt TrueInt =
|
|
|
|
getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent
|
|
|
|
? APInt(ElementWidth, 1)
|
|
|
|
: APInt::getAllOnesValue(ElementWidth);
|
|
|
|
return DAG.getConstant(TrueInt, DL, VT);
|
|
|
|
}
|
|
|
|
|
2014-04-02 02:13:22 +08:00
|
|
|
bool TargetLowering::isConstFalseVal(const SDNode *N) const {
|
|
|
|
if (!N)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
|
[x86] Revert r212324 which was too aggressive w.r.t. allowing undef
lanes in vector splats.
The core problem here is that undef lanes can't *unilaterally* be
considered to contribute to splats. Their handling needs to be more
cautious. There is also a reported failure of the nightly testers
(thanks Tobias!) that may well stem from the same core issue. I'm going
to fix this theoretical issue, factor the APIs a bit better, and then
verify that I don't see anything bad with Tobias's reduction from the
test suite before recommitting.
Original commit message for r212324:
[x86] Generalize BuildVectorSDNode::getConstantSplatValue to work for
any constant, constant FP, or undef splat and to tolerate any undef
lanes in a splat, then replace all uses of isSplatVector in X86's
lowering with it.
This fixes issues where undef lanes in an otherwise splat vector would
prevent the splat logic from firing. It is a touch more awkward to use
this interface, but it is much more accurate. Suggestions for better
interface structuring welcome.
With this fix, the code generated with the widening legalization
strategy for widen_cast-4.ll is *dramatically* improved as the special
lowering strategies for a v16i8 SRA kick in even though the high lanes
are undef.
We also get a slightly different choice for broadcasting an aligned
memory location, and use vpshufd instead of vbroadcastss. This looks
like a minor win for pipelining and domain crossing, but a minor loss
for the number of micro-ops. I suspect its a wash, but folks can
easily tweak the lowering if they want.
llvm-svn: 212475
2014-07-08 03:03:32 +08:00
|
|
|
if (!CN) {
|
|
|
|
const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
|
|
|
|
if (!BV)
|
|
|
|
return false;
|
|
|
|
|
2014-07-09 08:41:34 +08:00
|
|
|
BitVector UndefElements;
|
|
|
|
CN = BV->getConstantSplatNode(&UndefElements);
|
2014-07-08 15:44:15 +08:00
|
|
|
// Only interested in constant splats, and we don't try to handle undef
|
|
|
|
// elements in identifying boolean constants.
|
2014-07-09 08:41:34 +08:00
|
|
|
if (!CN || UndefElements.none())
|
2014-07-08 15:44:15 +08:00
|
|
|
return false;
|
[x86] Revert r212324 which was too aggressive w.r.t. allowing undef
lanes in vector splats.
The core problem here is that undef lanes can't *unilaterally* be
considered to contribute to splats. Their handling needs to be more
cautious. There is also a reported failure of the nightly testers
(thanks Tobias!) that may well stem from the same core issue. I'm going
to fix this theoretical issue, factor the APIs a bit better, and then
verify that I don't see anything bad with Tobias's reduction from the
test suite before recommitting.
Original commit message for r212324:
[x86] Generalize BuildVectorSDNode::getConstantSplatValue to work for
any constant, constant FP, or undef splat and to tolerate any undef
lanes in a splat, then replace all uses of isSplatVector in X86's
lowering with it.
This fixes issues where undef lanes in an otherwise splat vector would
prevent the splat logic from firing. It is a touch more awkward to use
this interface, but it is much more accurate. Suggestions for better
interface structuring welcome.
With this fix, the code generated with the widening legalization
strategy for widen_cast-4.ll is *dramatically* improved as the special
lowering strategies for a v16i8 SRA kick in even though the high lanes
are undef.
We also get a slightly different choice for broadcasting an aligned
memory location, and use vpshufd instead of vbroadcastss. This looks
like a minor win for pipelining and domain crossing, but a minor loss
for the number of micro-ops. I suspect its a wash, but folks can
easily tweak the lowering if they want.
llvm-svn: 212475
2014-07-08 03:03:32 +08:00
|
|
|
}
|
2014-04-02 02:13:22 +08:00
|
|
|
|
2014-07-10 18:18:12 +08:00
|
|
|
if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
|
2014-04-02 02:13:22 +08:00
|
|
|
return !CN->getAPIntValue()[0];
|
|
|
|
|
|
|
|
return CN->isNullValue();
|
|
|
|
}
|
|
|
|
|
TargetLowering: Improve handling of (setcc ([sz]ext x) 0, cc) in SimplifySetCC
Summary:
When SimplifySetCC sees a setcc node that compares the result of a
value extension operation with a constant, it tries to simplify the
setcc node by eliminating the extension and shrinking the constant.
If shrinking the inputs to setcc is deemed not desirable by the target
(e.g. the target does not want a setcc comparing i1 values), then it
is still possible to optimize this sequence in some cases.
This patch adds the following combines to SimplifySetCC when shrinking setcc
inputs is not desirable:
(setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> (setcc (x, y, cc))
(setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> (setcc (x, Y, !cc))
There are no tests for this yet, but once AMDGPU correctly implements
TargetLowering::isTypeDesirableForOp(), this new combine will be
exercised by the existing CodeGen/AMDGPU/setcc-opt.ll test.
Reviewers: resistor, arsenm
Subscribers: jroelofs, arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D15034
llvm-svn: 258067
2016-01-19 03:55:21 +08:00
|
|
|
bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
|
|
|
|
bool SExt) const {
|
|
|
|
if (VT == MVT::i1)
|
|
|
|
return N->isOne();
|
|
|
|
|
|
|
|
TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
|
|
|
|
switch (Cnt) {
|
|
|
|
case TargetLowering::ZeroOrOneBooleanContent:
|
|
|
|
// An extended value of 1 is always true, unless its original type is i1,
|
|
|
|
// in which case it will be sign extended to -1.
|
|
|
|
return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
|
|
|
|
case TargetLowering::UndefinedBooleanContent:
|
|
|
|
case TargetLowering::ZeroOrNegativeOneBooleanContent:
|
|
|
|
return N->isAllOnesValue() && SExt;
|
|
|
|
}
|
2016-01-19 06:54:46 +08:00
|
|
|
llvm_unreachable("Unexpected enumeration.");
|
TargetLowering: Improve handling of (setcc ([sz]ext x) 0, cc) in SimplifySetCC
Summary:
When SimplifySetCC sees a setcc node that compares the result of a
value extension operation with a constant, it tries to simplify the
setcc node by eliminating the extension and shrinking the constant.
If shrinking the inputs to setcc is deemed not desirable by the target
(e.g. the target does not want a setcc comparing i1 values), then it
is still possible to optimize this sequence in some cases.
This patch adds the following combines to SimplifySetCC when shrinking setcc
inputs is not desirable:
(setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> (setcc (x, y, cc))
(setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> (setcc (x, Y, !cc))
There are no tests for this yet, but once AMDGPU correctly implements
TargetLowering::isTypeDesirableForOp(), this new combine will be
exercised by the existing CodeGen/AMDGPU/setcc-opt.ll test.
Reviewers: resistor, arsenm
Subscribers: jroelofs, arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D15034
llvm-svn: 258067
2016-01-19 03:55:21 +08:00
|
|
|
}
|
|
|
|
|
2016-05-10 00:42:50 +08:00
|
|
|
/// This helper function of SimplifySetCC tries to optimize the comparison when
|
|
|
|
/// either operand of the SetCC node is a bitwise-and instruction.
|
|
|
|
SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
|
|
|
|
ISD::CondCode Cond,
|
|
|
|
DAGCombinerInfo &DCI,
|
2016-06-12 23:39:02 +08:00
|
|
|
const SDLoc &DL) const {
|
2016-05-07 23:03:40 +08:00
|
|
|
// Match these patterns in any of their permutations:
|
|
|
|
// (X & Y) == Y
|
|
|
|
// (X & Y) != Y
|
|
|
|
if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
|
|
|
|
std::swap(N0, N1);
|
|
|
|
|
2016-05-10 00:42:50 +08:00
|
|
|
EVT OpVT = N0.getValueType();
|
|
|
|
if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
|
2016-05-07 23:03:40 +08:00
|
|
|
(Cond != ISD::SETEQ && Cond != ISD::SETNE))
|
|
|
|
return SDValue();
|
|
|
|
|
|
|
|
SDValue X, Y;
|
|
|
|
if (N0.getOperand(0) == N1) {
|
|
|
|
X = N0.getOperand(1);
|
|
|
|
Y = N0.getOperand(0);
|
|
|
|
} else if (N0.getOperand(1) == N1) {
|
|
|
|
X = N0.getOperand(0);
|
|
|
|
Y = N0.getOperand(1);
|
|
|
|
} else {
|
|
|
|
return SDValue();
|
|
|
|
}
|
|
|
|
|
2016-05-10 00:42:50 +08:00
|
|
|
SelectionDAG &DAG = DCI.DAG;
|
|
|
|
SDValue Zero = DAG.getConstant(0, DL, OpVT);
|
2016-05-19 23:53:52 +08:00
|
|
|
if (DAG.isKnownToBeAPowerOfTwo(Y)) {
|
2016-05-10 00:42:50 +08:00
|
|
|
// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
|
|
|
|
// Note that where Y is variable and is known to have at most one bit set
|
|
|
|
// (for example, if it is Z & 1) we cannot do this; the expressions are not
|
|
|
|
// equivalent when Y == 0.
|
|
|
|
Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
|
|
|
|
if (DCI.isBeforeLegalizeOps() ||
|
|
|
|
isCondCodeLegal(Cond, N0.getSimpleValueType()))
|
|
|
|
return DAG.getSetCC(DL, VT, N0, Zero, Cond);
|
|
|
|
} else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
|
|
|
|
// If the target supports an 'and-not' or 'and-complement' logic operation,
|
|
|
|
// try to use that to make a comparison operation more efficient.
|
|
|
|
// But don't do this transform if the mask is a single bit because there are
|
|
|
|
// more efficient ways to deal with that case (for example, 'bt' on x86 or
|
|
|
|
// 'rlwinm' on PPC).
|
|
|
|
|
|
|
|
// Bail out if the compare operand that we want to turn into a zero is
|
|
|
|
// already a zero (otherwise, infinite loop).
|
|
|
|
auto *YConst = dyn_cast<ConstantSDNode>(Y);
|
|
|
|
if (YConst && YConst->isNullValue())
|
|
|
|
return SDValue();
|
|
|
|
|
|
|
|
// Transform this into: ~X & Y == 0.
|
|
|
|
SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
|
|
|
|
SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
|
|
|
|
return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
|
|
|
|
}
|
2016-05-07 23:03:40 +08:00
|
|
|
|
2016-05-10 00:42:50 +08:00
|
|
|
return SDValue();
|
2016-05-07 23:03:40 +08:00
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Try to simplify a setcc built with the specified operands and cc. If it is
|
|
|
|
/// unable to simplify it, return a null SDValue.
|
2016-06-12 23:39:02 +08:00
|
|
|
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
|
|
|
|
ISD::CondCode Cond, bool foldBooleans,
|
|
|
|
DAGCombinerInfo &DCI,
|
|
|
|
const SDLoc &dl) const {
|
2007-02-09 06:13:59 +08:00
|
|
|
SelectionDAG &DAG = DCI.DAG;
|
|
|
|
|
|
|
|
// These setcc operations always fold.
|
|
|
|
switch (Cond) {
|
|
|
|
default: break;
|
|
|
|
case ISD::SETFALSE:
|
2015-04-28 22:05:47 +08:00
|
|
|
case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT);
|
2007-02-09 06:13:59 +08:00
|
|
|
case ISD::SETTRUE:
|
2013-09-06 20:38:12 +08:00
|
|
|
case ISD::SETTRUE2: {
|
2014-07-10 18:18:12 +08:00
|
|
|
TargetLowering::BooleanContent Cnt =
|
|
|
|
getBooleanContents(N0->getValueType(0));
|
2013-09-06 20:38:12 +08:00
|
|
|
return DAG.getConstant(
|
2015-04-28 22:05:47 +08:00
|
|
|
Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
|
|
|
|
VT);
|
2013-09-06 20:38:12 +08:00
|
|
|
}
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
|
|
|
|
2011-04-15 13:18:47 +08:00
|
|
|
// Ensure that the constant occurs on the RHS, and fold constant
|
|
|
|
// comparisons.
|
2013-09-28 10:50:38 +08:00
|
|
|
ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
|
|
|
|
if (isa<ConstantSDNode>(N0.getNode()) &&
|
|
|
|
(DCI.isBeforeLegalizeOps() ||
|
|
|
|
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
|
|
|
|
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
|
2011-06-18 04:41:29 +08:00
|
|
|
|
2015-12-30 05:49:08 +08:00
|
|
|
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
|
2008-03-04 06:22:56 +08:00
|
|
|
const APInt &C1 = N1C->getAPIntValue();
|
2009-07-27 07:47:17 +08:00
|
|
|
|
|
|
|
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
|
|
|
|
// equality comparison, then we're just comparing whether X itself is
|
|
|
|
// zero.
|
|
|
|
if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
|
|
|
|
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
|
|
|
|
N0.getOperand(1).getOpcode() == ISD::Constant) {
|
2010-01-08 04:58:44 +08:00
|
|
|
const APInt &ShAmt
|
|
|
|
= cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
|
2009-07-27 07:47:17 +08:00
|
|
|
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
|
2016-09-15 00:05:51 +08:00
|
|
|
ShAmt == Log2_32(N0.getValueSizeInBits())) {
|
2009-07-27 07:47:17 +08:00
|
|
|
if ((C1 == 0) == (Cond == ISD::SETEQ)) {
|
|
|
|
// (srl (ctlz x), 5) == 0 -> X != 0
|
|
|
|
// (srl (ctlz x), 5) != 1 -> X != 0
|
|
|
|
Cond = ISD::SETNE;
|
|
|
|
} else {
|
|
|
|
// (srl (ctlz x), 5) != 0 -> X == 0
|
|
|
|
// (srl (ctlz x), 5) == 1 -> X == 0
|
|
|
|
Cond = ISD::SETEQ;
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
|
2009-07-27 07:47:17 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
|
|
|
|
Zero, Cond);
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
2009-07-27 07:47:17 +08:00
|
|
|
}
|
2008-11-07 09:28:02 +08:00
|
|
|
|
2011-01-17 20:04:57 +08:00
|
|
|
SDValue CTPOP = N0;
|
|
|
|
// Look through truncs that don't change the value of a ctpop.
|
|
|
|
if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
|
|
|
|
CTPOP = N0.getOperand(0);
|
|
|
|
|
|
|
|
if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
|
2016-09-15 00:05:51 +08:00
|
|
|
(N0 == CTPOP ||
|
|
|
|
N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
|
2011-01-17 20:04:57 +08:00
|
|
|
EVT CTVT = CTPOP.getValueType();
|
|
|
|
SDValue CTOp = CTPOP.getOperand(0);
|
|
|
|
|
|
|
|
// (ctpop x) u< 2 -> (x & x-1) == 0
|
|
|
|
// (ctpop x) u> 1 -> (x & x-1) != 0
|
|
|
|
if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
|
|
|
|
SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(1, dl, CTVT));
|
2011-01-17 20:04:57 +08:00
|
|
|
SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
|
|
|
|
ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
|
2011-01-17 20:04:57 +08:00
|
|
|
}
|
|
|
|
|
2012-09-27 18:14:43 +08:00
|
|
|
// TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
|
2011-01-17 20:04:57 +08:00
|
|
|
}
|
|
|
|
|
2011-04-23 02:47:44 +08:00
|
|
|
// (zext x) == C --> x == (trunc C)
|
2014-12-22 00:48:42 +08:00
|
|
|
// (sext x) == C --> x == (trunc C)
|
|
|
|
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
|
|
|
|
DCI.isBeforeLegalize() && N0->hasOneUse()) {
|
2011-04-23 02:47:44 +08:00
|
|
|
unsigned MinBits = N0.getValueSizeInBits();
|
2014-12-22 00:48:42 +08:00
|
|
|
SDValue PreExt;
|
|
|
|
bool Signed = false;
|
2011-04-23 02:47:44 +08:00
|
|
|
if (N0->getOpcode() == ISD::ZERO_EXTEND) {
|
|
|
|
// ZExt
|
|
|
|
MinBits = N0->getOperand(0).getValueSizeInBits();
|
2014-12-22 00:48:42 +08:00
|
|
|
PreExt = N0->getOperand(0);
|
2011-04-23 02:47:44 +08:00
|
|
|
} else if (N0->getOpcode() == ISD::AND) {
|
|
|
|
// DAGCombine turns costly ZExts into ANDs
|
2015-12-30 05:49:08 +08:00
|
|
|
if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
|
2011-04-23 02:47:44 +08:00
|
|
|
if ((C->getAPIntValue()+1).isPowerOf2()) {
|
|
|
|
MinBits = C->getAPIntValue().countTrailingOnes();
|
2014-12-22 00:48:42 +08:00
|
|
|
PreExt = N0->getOperand(0);
|
2011-04-23 02:47:44 +08:00
|
|
|
}
|
2014-12-22 00:48:42 +08:00
|
|
|
} else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
|
|
|
|
// SExt
|
|
|
|
MinBits = N0->getOperand(0).getValueSizeInBits();
|
|
|
|
PreExt = N0->getOperand(0);
|
|
|
|
Signed = true;
|
2015-12-30 05:49:08 +08:00
|
|
|
} else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
|
2014-12-22 00:48:42 +08:00
|
|
|
// ZEXTLOAD / SEXTLOAD
|
2011-04-23 02:47:44 +08:00
|
|
|
if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
|
|
|
|
MinBits = LN0->getMemoryVT().getSizeInBits();
|
2014-12-22 00:48:42 +08:00
|
|
|
PreExt = N0;
|
|
|
|
} else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
|
|
|
|
Signed = true;
|
|
|
|
MinBits = LN0->getMemoryVT().getSizeInBits();
|
|
|
|
PreExt = N0;
|
2011-04-23 02:47:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-12-22 00:48:42 +08:00
|
|
|
// Figure out how many bits we need to preserve this constant.
|
|
|
|
unsigned ReqdBits = Signed ?
|
|
|
|
C1.getBitWidth() - C1.getNumSignBits() + 1 :
|
|
|
|
C1.getActiveBits();
|
|
|
|
|
2012-06-02 18:20:22 +08:00
|
|
|
// Make sure we're not losing bits from the constant.
|
2013-05-21 16:51:09 +08:00
|
|
|
if (MinBits > 0 &&
|
2014-12-22 00:48:42 +08:00
|
|
|
MinBits < C1.getBitWidth() &&
|
|
|
|
MinBits >= ReqdBits) {
|
2011-04-23 02:47:44 +08:00
|
|
|
EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
|
|
|
|
if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
|
|
|
|
// Will get folded away.
|
2014-12-22 00:48:42 +08:00
|
|
|
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
|
2016-07-19 15:14:21 +08:00
|
|
|
if (MinBits == 1 && C1 == 1)
|
|
|
|
// Invert the condition.
|
|
|
|
return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
|
|
|
|
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
|
2011-04-23 02:47:44 +08:00
|
|
|
return DAG.getSetCC(dl, VT, Trunc, C, Cond);
|
|
|
|
}
|
TargetLowering: Improve handling of (setcc ([sz]ext x) 0, cc) in SimplifySetCC
Summary:
When SimplifySetCC sees a setcc node that compares the result of a
value extension operation with a constant, it tries to simplify the
setcc node by eliminating the extension and shrinking the constant.
If shrinking the inputs to setcc is deemed not desirable by the target
(e.g. the target does not want a setcc comparing i1 values), then it
is still possible to optimize this sequence in some cases.
This patch adds the following combines to SimplifySetCC when shrinking setcc
inputs is not desirable:
(setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> (setcc (x, y, cc))
(setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> (setcc (x, Y, !cc))
There are no tests for this yet, but once AMDGPU correctly implements
TargetLowering::isTypeDesirableForOp(), this new combine will be
exercised by the existing CodeGen/AMDGPU/setcc-opt.ll test.
Reviewers: resistor, arsenm
Subscribers: jroelofs, arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D15034
llvm-svn: 258067
2016-01-19 03:55:21 +08:00
|
|
|
|
|
|
|
// If truncating the setcc operands is not desirable, we can still
|
|
|
|
// simplify the expression in some cases:
|
|
|
|
// setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
|
|
|
|
// setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
|
|
|
|
// setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
|
|
|
|
// setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
|
|
|
|
// setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
|
|
|
|
// setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
|
|
|
|
SDValue TopSetCC = N0->getOperand(0);
|
|
|
|
unsigned N0Opc = N0->getOpcode();
|
|
|
|
bool SExt = (N0Opc == ISD::SIGN_EXTEND);
|
|
|
|
if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
|
|
|
|
TopSetCC.getOpcode() == ISD::SETCC &&
|
|
|
|
(N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
|
|
|
|
(isConstFalseVal(N1C) ||
|
|
|
|
isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
|
|
|
|
|
|
|
|
bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
|
|
|
|
(!N1C->isNullValue() && Cond == ISD::SETNE);
|
|
|
|
|
|
|
|
if (!Inverse)
|
|
|
|
return TopSetCC;
|
|
|
|
|
|
|
|
ISD::CondCode InvCond = ISD::getSetCCInverse(
|
|
|
|
cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
|
|
|
|
TopSetCC.getOperand(0).getValueType().isInteger());
|
|
|
|
return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
|
|
|
|
TopSetCC.getOperand(1),
|
|
|
|
InvCond);
|
|
|
|
|
|
|
|
}
|
2011-04-23 02:47:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-07-27 07:47:17 +08:00
|
|
|
// If the LHS is '(and load, const)', the RHS is 0,
|
|
|
|
// the test is for equality or unsigned, and all 1 bits of the const are
|
|
|
|
// in the same partial word, see if we can shorten the load.
|
|
|
|
if (DCI.isBeforeLegalize() &&
|
2013-09-25 06:50:14 +08:00
|
|
|
!ISD::isSignedIntSetCC(Cond) &&
|
2009-07-27 07:47:17 +08:00
|
|
|
N0.getOpcode() == ISD::AND && C1 == 0 &&
|
|
|
|
N0.getNode()->hasOneUse() &&
|
|
|
|
isa<LoadSDNode>(N0.getOperand(0)) &&
|
|
|
|
N0.getOperand(0).getNode()->hasOneUse() &&
|
|
|
|
isa<ConstantSDNode>(N0.getOperand(1))) {
|
|
|
|
LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
|
2010-01-08 04:58:44 +08:00
|
|
|
APInt bestMask;
|
2009-07-27 07:47:17 +08:00
|
|
|
unsigned bestWidth = 0, bestOffset = 0;
|
2010-01-08 04:58:44 +08:00
|
|
|
if (!Lod->isVolatile() && Lod->isUnindexed()) {
|
2016-09-15 00:05:51 +08:00
|
|
|
unsigned origWidth = N0.getValueSizeInBits();
|
2010-01-08 04:58:44 +08:00
|
|
|
unsigned maskWidth = origWidth;
|
2010-11-23 11:31:01 +08:00
|
|
|
// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
|
2009-07-27 07:47:17 +08:00
|
|
|
// 8 bits, but have to be careful...
|
|
|
|
if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
|
|
|
|
origWidth = Lod->getMemoryVT().getSizeInBits();
|
2010-01-08 04:58:44 +08:00
|
|
|
const APInt &Mask =
|
|
|
|
cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
|
2009-07-27 07:47:17 +08:00
|
|
|
for (unsigned width = origWidth / 2; width>=8; width /= 2) {
|
2010-01-08 04:58:44 +08:00
|
|
|
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
|
2009-07-27 07:47:17 +08:00
|
|
|
for (unsigned offset=0; offset<origWidth/width; offset++) {
|
|
|
|
if ((newMask & Mask) == Mask) {
|
2015-07-08 03:07:19 +08:00
|
|
|
if (!DAG.getDataLayout().isLittleEndian())
|
2009-07-27 07:47:17 +08:00
|
|
|
bestOffset = (origWidth/width - offset - 1) * (width/8);
|
|
|
|
else
|
|
|
|
bestOffset = (uint64_t)offset * (width/8);
|
2010-01-08 04:58:44 +08:00
|
|
|
bestMask = Mask.lshr(offset * (width/8) * 8);
|
2009-07-27 07:47:17 +08:00
|
|
|
bestWidth = width;
|
|
|
|
break;
|
2008-11-07 09:28:02 +08:00
|
|
|
}
|
2009-07-27 07:47:17 +08:00
|
|
|
newMask = newMask << width;
|
2008-11-07 09:28:02 +08:00
|
|
|
}
|
|
|
|
}
|
2009-07-27 07:47:17 +08:00
|
|
|
}
|
|
|
|
if (bestWidth) {
|
2011-04-14 12:12:47 +08:00
|
|
|
EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
|
2009-07-27 07:47:17 +08:00
|
|
|
if (newVT.isRound()) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrType = Lod->getOperand(1).getValueType();
|
2009-07-27 07:47:17 +08:00
|
|
|
SDValue Ptr = Lod->getBasePtr();
|
|
|
|
if (bestOffset != 0)
|
|
|
|
Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(bestOffset, dl, PtrType));
|
2009-07-27 07:47:17 +08:00
|
|
|
unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
SDValue NewLoad = DAG.getLoad(
|
|
|
|
newVT, dl, Lod->getChain(), Ptr,
|
|
|
|
Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
|
2010-11-23 11:31:01 +08:00
|
|
|
return DAG.getSetCC(dl, VT,
|
2009-07-27 07:47:17 +08:00
|
|
|
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
|
2010-01-08 04:58:44 +08:00
|
|
|
DAG.getConstant(bestMask.trunc(bestWidth),
|
2015-04-28 22:05:47 +08:00
|
|
|
dl, newVT)),
|
|
|
|
DAG.getConstant(0LL, dl, newVT), Cond);
|
2008-11-07 09:28:02 +08:00
|
|
|
}
|
|
|
|
}
|
2009-07-27 07:47:17 +08:00
|
|
|
}
|
2008-11-11 05:22:06 +08:00
|
|
|
|
2009-07-27 07:47:17 +08:00
|
|
|
// If the LHS is a ZERO_EXTEND, perform the comparison on the input.
|
|
|
|
if (N0.getOpcode() == ISD::ZERO_EXTEND) {
|
2016-09-15 00:05:51 +08:00
|
|
|
unsigned InSize = N0.getOperand(0).getValueSizeInBits();
|
2007-02-09 06:13:59 +08:00
|
|
|
|
2009-07-27 07:47:17 +08:00
|
|
|
// If the comparison constant has bits in the upper part, the
|
|
|
|
// zero-extended value could never match.
|
|
|
|
if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
|
|
|
|
C1.getBitWidth() - InSize))) {
|
2007-02-09 06:13:59 +08:00
|
|
|
switch (Cond) {
|
|
|
|
case ISD::SETUGT:
|
|
|
|
case ISD::SETUGE:
|
2015-04-28 22:05:47 +08:00
|
|
|
case ISD::SETEQ: return DAG.getConstant(0, dl, VT);
|
2007-02-09 06:13:59 +08:00
|
|
|
case ISD::SETULT:
|
2009-07-27 07:47:17 +08:00
|
|
|
case ISD::SETULE:
|
2015-04-28 22:05:47 +08:00
|
|
|
case ISD::SETNE: return DAG.getConstant(1, dl, VT);
|
2009-07-27 07:47:17 +08:00
|
|
|
case ISD::SETGT:
|
|
|
|
case ISD::SETGE:
|
|
|
|
// True if the sign bit of C1 is set.
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getConstant(C1.isNegative(), dl, VT);
|
2009-07-27 07:47:17 +08:00
|
|
|
case ISD::SETLT:
|
|
|
|
case ISD::SETLE:
|
|
|
|
// True if the sign bit of C1 isn't set.
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getConstant(C1.isNonNegative(), dl, VT);
|
2007-02-09 06:13:59 +08:00
|
|
|
default:
|
2009-07-27 07:47:17 +08:00
|
|
|
break;
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
|
|
|
}
|
2009-07-27 07:47:17 +08:00
|
|
|
|
|
|
|
// Otherwise, we can perform the comparison with the low bits.
|
|
|
|
switch (Cond) {
|
|
|
|
case ISD::SETEQ:
|
|
|
|
case ISD::SETNE:
|
|
|
|
case ISD::SETUGT:
|
|
|
|
case ISD::SETUGE:
|
|
|
|
case ISD::SETULT:
|
|
|
|
case ISD::SETULE: {
|
2012-12-11 19:14:33 +08:00
|
|
|
EVT newVT = N0.getOperand(0).getValueType();
|
2009-07-27 07:47:17 +08:00
|
|
|
if (DCI.isBeforeLegalizeOps() ||
|
|
|
|
(isOperationLegal(ISD::SETCC, newVT) &&
|
2014-05-08 02:26:58 +08:00
|
|
|
getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) {
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT NewSetCCVT =
|
|
|
|
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
|
2014-05-08 02:26:58 +08:00
|
|
|
|
|
|
|
SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
|
|
|
|
NewConst, Cond);
|
2014-07-10 18:18:12 +08:00
|
|
|
return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
|
2014-05-08 02:26:58 +08:00
|
|
|
}
|
2009-07-27 07:47:17 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break; // todo, be more careful with signed comparisons
|
|
|
|
}
|
|
|
|
} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
|
2010-02-27 15:36:59 +08:00
|
|
|
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
|
2009-07-27 07:47:17 +08:00
|
|
|
unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT ExtDstTy = N0.getValueType();
|
2009-07-27 07:47:17 +08:00
|
|
|
unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
|
|
|
|
|
2010-07-30 14:44:31 +08:00
|
|
|
// If the constant doesn't fit into the number of bits for the source of
|
|
|
|
// the sign extension, it is impossible for both sides to be equal.
|
|
|
|
if (C1.getMinSignedBits() > ExtSrcTyBits)
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2009-07-27 07:47:17 +08:00
|
|
|
SDValue ZextOp;
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT Op0Ty = N0.getOperand(0).getValueType();
|
2009-07-27 07:47:17 +08:00
|
|
|
if (Op0Ty == ExtSrcTy) {
|
|
|
|
ZextOp = N0.getOperand(0);
|
2007-02-09 06:13:59 +08:00
|
|
|
} else {
|
2009-07-27 07:47:17 +08:00
|
|
|
APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
|
|
|
|
ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(Imm, dl, Op0Ty));
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
2009-07-27 07:47:17 +08:00
|
|
|
if (!DCI.isCalledByLegalizer())
|
|
|
|
DCI.AddToWorklist(ZextOp.getNode());
|
|
|
|
// Otherwise, make this a use of a zext.
|
2010-11-23 11:31:01 +08:00
|
|
|
return DAG.getSetCC(dl, VT, ZextOp,
|
2009-07-27 07:47:17 +08:00
|
|
|
DAG.getConstant(C1 & APInt::getLowBitsSet(
|
|
|
|
ExtDstTyBits,
|
2010-11-23 11:31:01 +08:00
|
|
|
ExtSrcTyBits),
|
2015-04-28 22:05:47 +08:00
|
|
|
dl, ExtDstTy),
|
2009-07-27 07:47:17 +08:00
|
|
|
Cond);
|
|
|
|
} else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
|
|
|
|
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
|
|
|
|
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
|
2010-02-27 15:36:59 +08:00
|
|
|
if (N0.getOpcode() == ISD::SETCC &&
|
|
|
|
isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
|
2010-01-08 04:58:44 +08:00
|
|
|
bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
|
2009-07-27 07:47:17 +08:00
|
|
|
if (TrueWhenTrue)
|
2010-11-23 11:31:01 +08:00
|
|
|
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
|
2009-07-27 07:47:17 +08:00
|
|
|
// Invert the condition.
|
|
|
|
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
|
2010-11-23 11:31:01 +08:00
|
|
|
CC = ISD::getSetCCInverse(CC,
|
2009-07-27 07:47:17 +08:00
|
|
|
N0.getOperand(0).getValueType().isInteger());
|
2013-09-28 10:50:38 +08:00
|
|
|
if (DCI.isBeforeLegalizeOps() ||
|
|
|
|
isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
|
|
|
|
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
2010-02-27 15:36:59 +08:00
|
|
|
|
2009-07-27 07:47:17 +08:00
|
|
|
if ((N0.getOpcode() == ISD::XOR ||
|
2010-11-23 11:31:01 +08:00
|
|
|
(N0.getOpcode() == ISD::AND &&
|
2009-07-27 07:47:17 +08:00
|
|
|
N0.getOperand(0).getOpcode() == ISD::XOR &&
|
|
|
|
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
|
|
|
|
isa<ConstantSDNode>(N0.getOperand(1)) &&
|
|
|
|
cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
|
|
|
|
// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
|
|
|
|
// can only do this if the top bits are known zero.
|
|
|
|
unsigned BitWidth = N0.getValueSizeInBits();
|
|
|
|
if (DAG.MaskedValueIsZero(N0,
|
|
|
|
APInt::getHighBitsSet(BitWidth,
|
|
|
|
BitWidth-1))) {
|
|
|
|
// Okay, get the un-inverted input value.
|
|
|
|
SDValue Val;
|
|
|
|
if (N0.getOpcode() == ISD::XOR)
|
|
|
|
Val = N0.getOperand(0);
|
|
|
|
else {
|
2010-11-23 11:31:01 +08:00
|
|
|
assert(N0.getOpcode() == ISD::AND &&
|
2009-07-27 07:47:17 +08:00
|
|
|
N0.getOperand(0).getOpcode() == ISD::XOR);
|
|
|
|
// ((X^1)&1)^1 -> X & 1
|
|
|
|
Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
|
|
|
|
N0.getOperand(0).getOperand(0),
|
|
|
|
N0.getOperand(1));
|
|
|
|
}
|
2010-02-27 15:36:59 +08:00
|
|
|
|
2009-07-27 07:47:17 +08:00
|
|
|
return DAG.getSetCC(dl, VT, Val, N1,
|
|
|
|
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
|
|
|
|
}
|
2010-02-27 15:36:59 +08:00
|
|
|
} else if (N1C->getAPIntValue() == 1 &&
|
|
|
|
(VT == MVT::i1 ||
|
2014-07-10 18:18:12 +08:00
|
|
|
getBooleanContents(N0->getValueType(0)) ==
|
|
|
|
ZeroOrOneBooleanContent)) {
|
2010-02-27 15:36:59 +08:00
|
|
|
SDValue Op0 = N0;
|
|
|
|
if (Op0.getOpcode() == ISD::TRUNCATE)
|
|
|
|
Op0 = Op0.getOperand(0);
|
|
|
|
|
|
|
|
if ((Op0.getOpcode() == ISD::XOR) &&
|
|
|
|
Op0.getOperand(0).getOpcode() == ISD::SETCC &&
|
|
|
|
Op0.getOperand(1).getOpcode() == ISD::SETCC) {
|
|
|
|
// (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
|
|
|
|
Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
|
|
|
|
return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
|
|
|
|
Cond);
|
2012-12-19 14:12:28 +08:00
|
|
|
}
|
|
|
|
if (Op0.getOpcode() == ISD::AND &&
|
|
|
|
isa<ConstantSDNode>(Op0.getOperand(1)) &&
|
|
|
|
cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
|
2010-02-27 15:36:59 +08:00
|
|
|
// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
|
2010-05-01 20:52:34 +08:00
|
|
|
if (Op0.getValueType().bitsGT(VT))
|
2010-02-27 15:36:59 +08:00
|
|
|
Op0 = DAG.getNode(ISD::AND, dl, VT,
|
|
|
|
DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(1, dl, VT));
|
2010-05-01 20:52:34 +08:00
|
|
|
else if (Op0.getValueType().bitsLT(VT))
|
|
|
|
Op0 = DAG.getNode(ISD::AND, dl, VT,
|
|
|
|
DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(1, dl, VT));
|
2010-05-01 20:52:34 +08:00
|
|
|
|
2010-02-27 15:36:59 +08:00
|
|
|
return DAG.getSetCC(dl, VT, Op0,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, dl, Op0.getValueType()),
|
2010-02-27 15:36:59 +08:00
|
|
|
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
|
|
|
|
}
|
2012-12-19 14:12:28 +08:00
|
|
|
if (Op0.getOpcode() == ISD::AssertZext &&
|
|
|
|
cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
|
|
|
|
return DAG.getSetCC(dl, VT, Op0,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, dl, Op0.getValueType()),
|
2012-12-19 14:12:28 +08:00
|
|
|
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
2009-07-27 07:47:17 +08:00
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2009-07-27 07:47:17 +08:00
|
|
|
APInt MinVal, MaxVal;
|
|
|
|
unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
|
|
|
|
if (ISD::isSignedIntSetCC(Cond)) {
|
|
|
|
MinVal = APInt::getSignedMinValue(OperandBitSize);
|
|
|
|
MaxVal = APInt::getSignedMaxValue(OperandBitSize);
|
|
|
|
} else {
|
|
|
|
MinVal = APInt::getMinValue(OperandBitSize);
|
|
|
|
MaxVal = APInt::getMaxValue(OperandBitSize);
|
|
|
|
}
|
2007-02-09 06:13:59 +08:00
|
|
|
|
2009-07-27 07:47:17 +08:00
|
|
|
// Canonicalize GE/LE comparisons to use GT/LT comparisons.
|
|
|
|
if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
|
2015-04-28 22:05:47 +08:00
|
|
|
if (C1 == MinVal) return DAG.getConstant(1, dl, VT); // X >= MIN --> true
|
2014-03-26 00:09:21 +08:00
|
|
|
// X >= C0 --> X > (C0 - 1)
|
|
|
|
APInt C = C1 - 1;
|
|
|
|
ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
|
|
|
|
if ((DCI.isBeforeLegalizeOps() ||
|
|
|
|
isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
|
|
|
|
(!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
|
|
|
|
isLegalICmpImmediate(C.getSExtValue())))) {
|
2014-01-25 10:02:55 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(C, dl, N1.getValueType()),
|
2014-03-26 00:09:21 +08:00
|
|
|
NewCC);
|
|
|
|
}
|
2009-07-27 07:47:17 +08:00
|
|
|
}
|
2007-02-09 06:13:59 +08:00
|
|
|
|
2009-07-27 07:47:17 +08:00
|
|
|
if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
|
2015-04-28 22:05:47 +08:00
|
|
|
if (C1 == MaxVal) return DAG.getConstant(1, dl, VT); // X <= MAX --> true
|
2014-03-26 00:09:21 +08:00
|
|
|
// X <= C0 --> X < (C0 + 1)
|
|
|
|
APInt C = C1 + 1;
|
|
|
|
ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
|
|
|
|
if ((DCI.isBeforeLegalizeOps() ||
|
|
|
|
isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
|
|
|
|
(!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
|
|
|
|
isLegalICmpImmediate(C.getSExtValue())))) {
|
2014-01-25 10:02:55 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(C, dl, N1.getValueType()),
|
2014-03-26 00:09:21 +08:00
|
|
|
NewCC);
|
|
|
|
}
|
2009-07-27 07:47:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getConstant(0, dl, VT); // X < MIN --> false
|
2009-07-27 07:47:17 +08:00
|
|
|
if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getConstant(1, dl, VT); // X >= MIN --> true
|
2009-07-27 07:47:17 +08:00
|
|
|
if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getConstant(0, dl, VT); // X > MAX --> false
|
2009-07-27 07:47:17 +08:00
|
|
|
if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getConstant(1, dl, VT); // X <= MAX --> true
|
2009-07-27 07:47:17 +08:00
|
|
|
|
|
|
|
// Canonicalize setgt X, Min --> setne X, Min
|
|
|
|
if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
|
|
|
|
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
|
|
|
|
// Canonicalize setlt X, Max --> setne X, Max
|
|
|
|
if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
|
|
|
|
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
|
|
|
|
|
|
|
|
// If we have setult X, 1, turn it into seteq X, 0
|
|
|
|
if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
|
2010-11-23 11:31:01 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(MinVal, dl, N0.getValueType()),
|
2009-07-27 07:47:17 +08:00
|
|
|
ISD::SETEQ);
|
|
|
|
// If we have setugt X, Max-1, turn it into seteq X, Max
|
2012-12-19 14:43:58 +08:00
|
|
|
if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
|
2010-11-23 11:31:01 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(MaxVal, dl, N0.getValueType()),
|
2009-07-27 07:47:17 +08:00
|
|
|
ISD::SETEQ);
|
|
|
|
|
|
|
|
// If we have "setcc X, C0", check to see if we can shrink the immediate
|
|
|
|
// by changing cc.
|
|
|
|
|
|
|
|
// SETUGT X, SINTMAX -> SETLT X, 0
|
2010-11-23 11:31:01 +08:00
|
|
|
if (Cond == ISD::SETUGT &&
|
2009-07-27 07:47:17 +08:00
|
|
|
C1 == APInt::getSignedMaxValue(OperandBitSize))
|
2010-11-23 11:31:01 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, dl, N1.getValueType()),
|
2009-07-27 07:47:17 +08:00
|
|
|
ISD::SETLT);
|
|
|
|
|
|
|
|
// SETULT X, SINTMIN -> SETGT X, -1
|
|
|
|
if (Cond == ISD::SETULT &&
|
|
|
|
C1 == APInt::getSignedMinValue(OperandBitSize)) {
|
|
|
|
SDValue ConstMinusOne =
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
|
2009-07-27 07:47:17 +08:00
|
|
|
N1.getValueType());
|
|
|
|
return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fold bit comparisons when we can.
|
|
|
|
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
|
2010-01-07 03:38:29 +08:00
|
|
|
(VT == N0.getValueType() ||
|
|
|
|
(isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
|
2015-07-09 10:09:04 +08:00
|
|
|
N0.getOpcode() == ISD::AND) {
|
|
|
|
auto &DL = DAG.getDataLayout();
|
2015-12-30 05:49:08 +08:00
|
|
|
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT ShiftTy = DCI.isBeforeLegalize()
|
|
|
|
? getPointerTy(DL)
|
2015-07-09 10:09:20 +08:00
|
|
|
: getShiftAmountTy(N0.getValueType(), DL);
|
2009-07-27 07:47:17 +08:00
|
|
|
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
|
|
|
|
// Perform the xform if the AND RHS is a single bit.
|
2010-01-08 04:58:44 +08:00
|
|
|
if (AndRHS->getAPIntValue().isPowerOf2()) {
|
2010-01-07 03:38:29 +08:00
|
|
|
return DAG.getNode(ISD::TRUNCATE, dl, VT,
|
|
|
|
DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
|
|
|
|
ShiftTy)));
|
2009-07-27 07:47:17 +08:00
|
|
|
}
|
2010-01-08 04:58:44 +08:00
|
|
|
} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
|
2009-07-27 07:47:17 +08:00
|
|
|
// (X & 8) == 8 --> (X & 8) >> 3
|
|
|
|
// Perform the xform if C1 is a single bit.
|
|
|
|
if (C1.isPowerOf2()) {
|
2010-01-07 03:38:29 +08:00
|
|
|
return DAG.getNode(ISD::TRUNCATE, dl, VT,
|
|
|
|
DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(C1.logBase2(), dl,
|
|
|
|
ShiftTy)));
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
|
|
|
}
|
2009-07-27 07:47:17 +08:00
|
|
|
}
|
2015-07-09 10:09:04 +08:00
|
|
|
}
|
2012-07-17 14:53:39 +08:00
|
|
|
|
2012-07-17 15:47:50 +08:00
|
|
|
if (C1.getMinSignedBits() <= 64 &&
|
|
|
|
!isLegalICmpImmediate(C1.getSExtValue())) {
|
2012-07-17 14:53:39 +08:00
|
|
|
// (X & -256) == 256 -> (X >> 8) == 1
|
|
|
|
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
|
|
|
|
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
|
2015-12-30 05:49:08 +08:00
|
|
|
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
|
2012-07-17 14:53:39 +08:00
|
|
|
const APInt &AndRHSC = AndRHS->getAPIntValue();
|
|
|
|
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
|
|
|
|
unsigned ShiftBits = AndRHSC.countTrailingZeros();
|
2015-07-09 10:09:04 +08:00
|
|
|
auto &DL = DAG.getDataLayout();
|
|
|
|
EVT ShiftTy = DCI.isBeforeLegalize()
|
|
|
|
? getPointerTy(DL)
|
2015-07-09 10:09:20 +08:00
|
|
|
: getShiftAmountTy(N0.getValueType(), DL);
|
2012-07-17 14:53:39 +08:00
|
|
|
EVT CmpTy = N0.getValueType();
|
|
|
|
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(ShiftBits, dl,
|
|
|
|
ShiftTy));
|
|
|
|
SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
|
2012-07-17 14:53:39 +08:00
|
|
|
return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
|
|
|
|
}
|
|
|
|
}
|
2012-07-17 16:31:11 +08:00
|
|
|
} else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
|
|
|
|
Cond == ISD::SETULE || Cond == ISD::SETUGT) {
|
|
|
|
bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
|
|
|
|
// X < 0x100000000 -> (X >> 32) < 1
|
|
|
|
// X >= 0x100000000 -> (X >> 32) >= 1
|
|
|
|
// X <= 0x0ffffffff -> (X >> 32) < 1
|
|
|
|
// X > 0x0ffffffff -> (X >> 32) >= 1
|
|
|
|
unsigned ShiftBits;
|
|
|
|
APInt NewC = C1;
|
|
|
|
ISD::CondCode NewCond = Cond;
|
|
|
|
if (AdjOne) {
|
|
|
|
ShiftBits = C1.countTrailingOnes();
|
|
|
|
NewC = NewC + 1;
|
|
|
|
NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
|
|
|
|
} else {
|
|
|
|
ShiftBits = C1.countTrailingZeros();
|
|
|
|
}
|
|
|
|
NewC = NewC.lshr(ShiftBits);
|
2015-05-21 01:21:09 +08:00
|
|
|
if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
|
|
|
|
isLegalICmpImmediate(NewC.getSExtValue())) {
|
2015-07-09 10:09:04 +08:00
|
|
|
auto &DL = DAG.getDataLayout();
|
|
|
|
EVT ShiftTy = DCI.isBeforeLegalize()
|
|
|
|
? getPointerTy(DL)
|
2015-07-09 10:09:20 +08:00
|
|
|
: getShiftAmountTy(N0.getValueType(), DL);
|
2012-07-17 16:31:11 +08:00
|
|
|
EVT CmpTy = N0.getValueType();
|
|
|
|
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(ShiftBits, dl, ShiftTy));
|
|
|
|
SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
|
2012-07-17 16:31:11 +08:00
|
|
|
return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
|
|
|
|
}
|
2012-07-17 14:53:39 +08:00
|
|
|
}
|
|
|
|
}
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
|
|
|
|
2008-08-29 05:40:38 +08:00
|
|
|
if (isa<ConstantFPSDNode>(N0.getNode())) {
|
2007-02-09 06:13:59 +08:00
|
|
|
// Constant fold or commute setcc.
|
2009-02-03 08:47:48 +08:00
|
|
|
SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
|
2008-08-29 05:40:38 +08:00
|
|
|
if (O.getNode()) return O;
|
2015-12-30 05:49:08 +08:00
|
|
|
} else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
|
Fold comparisons against a constant nan, and optimize ORD/UNORD
comparisons with a constant. This allows us to compile isnan to:
_foo:
fcmpu cr7, f1, f1
mfcr r2
rlwinm r3, r2, 0, 31, 31
blr
instead of:
LCPI1_0: ; float
.space 4
_foo:
lis r2, ha16(LCPI1_0)
lfs f0, lo16(LCPI1_0)(r2)
fcmpu cr7, f1, f0
mfcr r2
rlwinm r3, r2, 0, 31, 31
blr
llvm-svn: 45405
2007-12-29 16:37:08 +08:00
|
|
|
// If the RHS of an FP comparison is a constant, simplify it away in
|
|
|
|
// some cases.
|
|
|
|
if (CFP->getValueAPF().isNaN()) {
|
|
|
|
// If an operand is known to be a nan, we can fold it.
|
|
|
|
switch (ISD::getUnorderedFlavor(Cond)) {
|
2009-07-15 00:55:14 +08:00
|
|
|
default: llvm_unreachable("Unknown flavor!");
|
Fold comparisons against a constant nan, and optimize ORD/UNORD
comparisons with a constant. This allows us to compile isnan to:
_foo:
fcmpu cr7, f1, f1
mfcr r2
rlwinm r3, r2, 0, 31, 31
blr
instead of:
LCPI1_0: ; float
.space 4
_foo:
lis r2, ha16(LCPI1_0)
lfs f0, lo16(LCPI1_0)(r2)
fcmpu cr7, f1, f0
mfcr r2
rlwinm r3, r2, 0, 31, 31
blr
llvm-svn: 45405
2007-12-29 16:37:08 +08:00
|
|
|
case 0: // Known false.
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getConstant(0, dl, VT);
|
Fold comparisons against a constant nan, and optimize ORD/UNORD
comparisons with a constant. This allows us to compile isnan to:
_foo:
fcmpu cr7, f1, f1
mfcr r2
rlwinm r3, r2, 0, 31, 31
blr
instead of:
LCPI1_0: ; float
.space 4
_foo:
lis r2, ha16(LCPI1_0)
lfs f0, lo16(LCPI1_0)(r2)
fcmpu cr7, f1, f0
mfcr r2
rlwinm r3, r2, 0, 31, 31
blr
llvm-svn: 45405
2007-12-29 16:37:08 +08:00
|
|
|
case 1: // Known true.
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getConstant(1, dl, VT);
|
2007-12-31 05:21:10 +08:00
|
|
|
case 2: // Undefined.
|
2009-02-07 07:05:02 +08:00
|
|
|
return DAG.getUNDEF(VT);
|
Fold comparisons against a constant nan, and optimize ORD/UNORD
comparisons with a constant. This allows us to compile isnan to:
_foo:
fcmpu cr7, f1, f1
mfcr r2
rlwinm r3, r2, 0, 31, 31
blr
instead of:
LCPI1_0: ; float
.space 4
_foo:
lis r2, ha16(LCPI1_0)
lfs f0, lo16(LCPI1_0)(r2)
fcmpu cr7, f1, f0
mfcr r2
rlwinm r3, r2, 0, 31, 31
blr
llvm-svn: 45405
2007-12-29 16:37:08 +08:00
|
|
|
}
|
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
Fold comparisons against a constant nan, and optimize ORD/UNORD
comparisons with a constant. This allows us to compile isnan to:
_foo:
fcmpu cr7, f1, f1
mfcr r2
rlwinm r3, r2, 0, 31, 31
blr
instead of:
LCPI1_0: ; float
.space 4
_foo:
lis r2, ha16(LCPI1_0)
lfs f0, lo16(LCPI1_0)(r2)
fcmpu cr7, f1, f0
mfcr r2
rlwinm r3, r2, 0, 31, 31
blr
llvm-svn: 45405
2007-12-29 16:37:08 +08:00
|
|
|
// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
|
|
|
|
// constant if knowing that the operand is non-nan is enough. We prefer to
|
|
|
|
// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
|
|
|
|
// materialize 0.0.
|
|
|
|
if (Cond == ISD::SETO || Cond == ISD::SETUO)
|
2009-02-03 08:47:48 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0, N0, Cond);
|
2009-09-26 23:24:17 +08:00
|
|
|
|
|
|
|
// If the condition is not legal, see if we can find an equivalent one
|
|
|
|
// which is legal.
|
2012-12-19 18:19:55 +08:00
|
|
|
if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
|
2009-09-26 23:24:17 +08:00
|
|
|
// If the comparison was an awkward floating-point == or != and one of
|
|
|
|
// the comparison operands is infinity or negative infinity, convert the
|
|
|
|
// condition to a less-awkward <= or >=.
|
|
|
|
if (CFP->getValueAPF().isInfinity()) {
|
|
|
|
if (CFP->getValueAPF().isNegative()) {
|
|
|
|
if (Cond == ISD::SETOEQ &&
|
2012-12-19 18:19:55 +08:00
|
|
|
isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
|
2009-09-26 23:24:17 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
|
|
|
|
if (Cond == ISD::SETUEQ &&
|
2012-12-19 18:19:55 +08:00
|
|
|
isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
|
2009-09-26 23:24:17 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
|
|
|
|
if (Cond == ISD::SETUNE &&
|
2012-12-19 18:19:55 +08:00
|
|
|
isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
|
2009-09-26 23:24:17 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
|
|
|
|
if (Cond == ISD::SETONE &&
|
2012-12-19 18:19:55 +08:00
|
|
|
isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
|
2009-09-26 23:24:17 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
|
|
|
|
} else {
|
|
|
|
if (Cond == ISD::SETOEQ &&
|
2012-12-19 18:19:55 +08:00
|
|
|
isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
|
2009-09-26 23:24:17 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
|
|
|
|
if (Cond == ISD::SETUEQ &&
|
2012-12-19 18:19:55 +08:00
|
|
|
isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
|
2009-09-26 23:24:17 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
|
|
|
|
if (Cond == ISD::SETUNE &&
|
2012-12-19 18:19:55 +08:00
|
|
|
isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
|
2009-09-26 23:24:17 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
|
|
|
|
if (Cond == ISD::SETONE &&
|
2012-12-19 18:19:55 +08:00
|
|
|
isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
|
2009-09-26 23:24:17 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (N0 == N1) {
|
2012-07-05 17:32:46 +08:00
|
|
|
// The sext(setcc()) => setcc() optimization relies on the appropriate
|
|
|
|
// constant being emitted.
|
2012-09-06 19:13:55 +08:00
|
|
|
uint64_t EqVal = 0;
|
2014-07-10 18:18:12 +08:00
|
|
|
switch (getBooleanContents(N0.getValueType())) {
|
2012-07-05 17:32:46 +08:00
|
|
|
case UndefinedBooleanContent:
|
|
|
|
case ZeroOrOneBooleanContent:
|
|
|
|
EqVal = ISD::isTrueWhenEqual(Cond);
|
|
|
|
break;
|
|
|
|
case ZeroOrNegativeOneBooleanContent:
|
|
|
|
EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2007-02-09 06:13:59 +08:00
|
|
|
// We can always fold X == X for integer setcc's.
|
2012-04-04 04:11:24 +08:00
|
|
|
if (N0.getValueType().isInteger()) {
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getConstant(EqVal, dl, VT);
|
2012-04-04 04:11:24 +08:00
|
|
|
}
|
2007-02-09 06:13:59 +08:00
|
|
|
unsigned UOF = ISD::getUnorderedFlavor(Cond);
|
|
|
|
if (UOF == 2) // FP operators that are undefined on NaNs.
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getConstant(EqVal, dl, VT);
|
2007-02-09 06:13:59 +08:00
|
|
|
if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getConstant(EqVal, dl, VT);
|
2007-02-09 06:13:59 +08:00
|
|
|
// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
|
|
|
|
// if it is not already.
|
|
|
|
ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
|
2012-08-01 02:07:43 +08:00
|
|
|
if (NewCond != Cond && (DCI.isBeforeLegalizeOps() ||
|
2012-12-19 18:09:26 +08:00
|
|
|
getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal))
|
2009-02-03 08:47:48 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
|
2008-06-06 20:08:01 +08:00
|
|
|
N0.getValueType().isInteger()) {
|
2007-02-09 06:13:59 +08:00
|
|
|
if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
|
|
|
|
N0.getOpcode() == ISD::XOR) {
|
|
|
|
// Simplify (X+Y) == (X+Z) --> Y == Z
|
|
|
|
if (N0.getOpcode() == N1.getOpcode()) {
|
|
|
|
if (N0.getOperand(0) == N1.getOperand(0))
|
2009-02-03 08:47:48 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
|
2007-02-09 06:13:59 +08:00
|
|
|
if (N0.getOperand(1) == N1.getOperand(1))
|
2009-02-03 08:47:48 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
|
2007-02-09 06:13:59 +08:00
|
|
|
if (DAG.isCommutativeBinOp(N0.getOpcode())) {
|
|
|
|
// If X op Y == Y op X, try other combinations.
|
|
|
|
if (N0.getOperand(0) == N1.getOperand(1))
|
2010-11-23 11:31:01 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
|
2009-02-03 08:47:48 +08:00
|
|
|
Cond);
|
2007-02-09 06:13:59 +08:00
|
|
|
if (N0.getOperand(1) == N1.getOperand(0))
|
2010-11-23 11:31:01 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
|
2009-02-03 08:47:48 +08:00
|
|
|
Cond);
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2012-04-06 04:30:20 +08:00
|
|
|
// If RHS is a legal immediate value for a compare instruction, we need
|
|
|
|
// to be careful about increasing register pressure needlessly.
|
|
|
|
bool LegalRHSImm = false;
|
|
|
|
|
2015-12-30 05:49:08 +08:00
|
|
|
if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
|
|
|
|
if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
|
2007-02-09 06:13:59 +08:00
|
|
|
// Turn (X+C1) == C2 --> X == C2-C1
|
2008-08-29 05:40:38 +08:00
|
|
|
if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
|
2009-02-03 08:47:48 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N0.getOperand(0),
|
2008-09-13 00:56:44 +08:00
|
|
|
DAG.getConstant(RHSC->getAPIntValue()-
|
|
|
|
LHSR->getAPIntValue(),
|
2015-04-28 22:05:47 +08:00
|
|
|
dl, N0.getValueType()), Cond);
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2012-09-27 18:14:43 +08:00
|
|
|
// Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
|
2007-02-09 06:13:59 +08:00
|
|
|
if (N0.getOpcode() == ISD::XOR)
|
|
|
|
// If we know that all of the inverted bits are zero, don't bother
|
|
|
|
// performing the inversion.
|
2008-02-26 05:11:39 +08:00
|
|
|
if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
|
|
|
|
return
|
2009-02-03 08:47:48 +08:00
|
|
|
DAG.getSetCC(dl, VT, N0.getOperand(0),
|
2008-02-26 05:11:39 +08:00
|
|
|
DAG.getConstant(LHSR->getAPIntValue() ^
|
|
|
|
RHSC->getAPIntValue(),
|
2015-04-28 22:05:47 +08:00
|
|
|
dl, N0.getValueType()),
|
2008-02-26 05:11:39 +08:00
|
|
|
Cond);
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2007-02-09 06:13:59 +08:00
|
|
|
// Turn (C1-X) == C2 --> X == C1-C2
|
2015-12-30 05:49:08 +08:00
|
|
|
if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
|
2008-08-29 05:40:38 +08:00
|
|
|
if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
|
2008-02-26 05:11:39 +08:00
|
|
|
return
|
2009-02-03 08:47:48 +08:00
|
|
|
DAG.getSetCC(dl, VT, N0.getOperand(1),
|
2008-02-26 05:11:39 +08:00
|
|
|
DAG.getConstant(SUBC->getAPIntValue() -
|
|
|
|
RHSC->getAPIntValue(),
|
2015-04-28 22:05:47 +08:00
|
|
|
dl, N0.getValueType()),
|
2008-02-26 05:11:39 +08:00
|
|
|
Cond);
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
}
|
2012-04-06 04:30:20 +08:00
|
|
|
|
|
|
|
// Could RHSC fold directly into a compare?
|
|
|
|
if (RHSC->getValueType(0).getSizeInBits() <= 64)
|
|
|
|
LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Simplify (X+Z) == X --> Z == 0
|
2012-04-06 04:30:20 +08:00
|
|
|
// Don't do this if X is an immediate that can fold into a cmp
|
|
|
|
// instruction and X+Z has other uses. It could be an induction variable
|
|
|
|
// chain, and the transform would increase register pressure.
|
|
|
|
if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
|
|
|
|
if (N0.getOperand(0) == N1)
|
|
|
|
return DAG.getSetCC(dl, VT, N0.getOperand(1),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, dl, N0.getValueType()), Cond);
|
2012-04-06 04:30:20 +08:00
|
|
|
if (N0.getOperand(1) == N1) {
|
|
|
|
if (DAG.isCommutativeBinOp(N0.getOpcode()))
|
|
|
|
return DAG.getSetCC(dl, VT, N0.getOperand(0),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, dl, N0.getValueType()),
|
|
|
|
Cond);
|
2012-12-19 14:43:58 +08:00
|
|
|
if (N0.getNode()->hasOneUse()) {
|
2012-04-06 04:30:20 +08:00
|
|
|
assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
|
2015-07-09 10:09:20 +08:00
|
|
|
auto &DL = DAG.getDataLayout();
|
2012-04-06 04:30:20 +08:00
|
|
|
// (Z-X) == X --> Z == X<<1
|
2015-07-09 10:09:20 +08:00
|
|
|
SDValue SH = DAG.getNode(
|
|
|
|
ISD::SHL, dl, N1.getValueType(), N1,
|
|
|
|
DAG.getConstant(1, dl,
|
|
|
|
getShiftAmountTy(N1.getValueType(), DL)));
|
2012-04-06 04:30:20 +08:00
|
|
|
if (!DCI.isCalledByLegalizer())
|
|
|
|
DCI.AddToWorklist(SH.getNode());
|
|
|
|
return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
|
|
|
|
}
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
|
|
|
|
N1.getOpcode() == ISD::XOR) {
|
|
|
|
// Simplify X == (X+Z) --> Z == 0
|
2012-12-19 14:43:58 +08:00
|
|
|
if (N1.getOperand(0) == N0)
|
2009-02-03 08:47:48 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N1.getOperand(1),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, dl, N1.getValueType()), Cond);
|
2012-12-19 14:43:58 +08:00
|
|
|
if (N1.getOperand(1) == N0) {
|
|
|
|
if (DAG.isCommutativeBinOp(N1.getOpcode()))
|
2009-02-03 08:47:48 +08:00
|
|
|
return DAG.getSetCC(dl, VT, N1.getOperand(0),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, dl, N1.getValueType()), Cond);
|
2012-12-19 14:43:58 +08:00
|
|
|
if (N1.getNode()->hasOneUse()) {
|
2007-02-09 06:13:59 +08:00
|
|
|
assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
|
2015-07-09 10:09:20 +08:00
|
|
|
auto &DL = DAG.getDataLayout();
|
2007-02-09 06:13:59 +08:00
|
|
|
// X == (Z-X) --> X<<1 == Z
|
2015-07-09 10:09:20 +08:00
|
|
|
SDValue SH = DAG.getNode(
|
|
|
|
ISD::SHL, dl, N1.getValueType(), N0,
|
|
|
|
DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL)));
|
2007-02-09 06:13:59 +08:00
|
|
|
if (!DCI.isCalledByLegalizer())
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.AddToWorklist(SH.getNode());
|
2009-02-03 08:47:48 +08:00
|
|
|
return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-01-29 09:59:02 +08:00
|
|
|
|
2016-05-10 00:42:50 +08:00
|
|
|
if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl))
|
|
|
|
return V;
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Fold away ALL boolean setcc's.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Temp;
|
2009-08-12 04:47:22 +08:00
|
|
|
if (N0.getValueType() == MVT::i1 && foldBooleans) {
|
2007-02-09 06:13:59 +08:00
|
|
|
switch (Cond) {
|
2009-07-15 00:55:14 +08:00
|
|
|
default: llvm_unreachable("Unknown integer setcc!");
|
2009-01-23 01:39:32 +08:00
|
|
|
case ISD::SETEQ: // X == Y -> ~(X^Y)
|
2009-08-12 04:47:22 +08:00
|
|
|
Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
|
|
|
|
N0 = DAG.getNOT(dl, Temp, MVT::i1);
|
2007-02-09 06:13:59 +08:00
|
|
|
if (!DCI.isCalledByLegalizer())
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.AddToWorklist(Temp.getNode());
|
2007-02-09 06:13:59 +08:00
|
|
|
break;
|
|
|
|
case ISD::SETNE: // X != Y --> (X^Y)
|
2009-08-12 04:47:22 +08:00
|
|
|
N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
|
2007-02-09 06:13:59 +08:00
|
|
|
break;
|
2009-01-23 01:39:32 +08:00
|
|
|
case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
|
|
|
|
case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
|
2009-08-12 04:47:22 +08:00
|
|
|
Temp = DAG.getNOT(dl, N0, MVT::i1);
|
|
|
|
N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
|
2007-02-09 06:13:59 +08:00
|
|
|
if (!DCI.isCalledByLegalizer())
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.AddToWorklist(Temp.getNode());
|
2007-02-09 06:13:59 +08:00
|
|
|
break;
|
2009-01-23 01:39:32 +08:00
|
|
|
case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
|
|
|
|
case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
|
2009-08-12 04:47:22 +08:00
|
|
|
Temp = DAG.getNOT(dl, N1, MVT::i1);
|
|
|
|
N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
|
2007-02-09 06:13:59 +08:00
|
|
|
if (!DCI.isCalledByLegalizer())
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.AddToWorklist(Temp.getNode());
|
2007-02-09 06:13:59 +08:00
|
|
|
break;
|
2009-01-23 01:39:32 +08:00
|
|
|
case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
|
|
|
|
case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
|
2009-08-12 04:47:22 +08:00
|
|
|
Temp = DAG.getNOT(dl, N0, MVT::i1);
|
|
|
|
N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
|
2007-02-09 06:13:59 +08:00
|
|
|
if (!DCI.isCalledByLegalizer())
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.AddToWorklist(Temp.getNode());
|
2007-02-09 06:13:59 +08:00
|
|
|
break;
|
2009-01-23 01:39:32 +08:00
|
|
|
case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
|
|
|
|
case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
|
2009-08-12 04:47:22 +08:00
|
|
|
Temp = DAG.getNOT(dl, N1, MVT::i1);
|
|
|
|
N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
|
2007-02-09 06:13:59 +08:00
|
|
|
break;
|
|
|
|
}
|
2009-08-12 04:47:22 +08:00
|
|
|
if (VT != MVT::i1) {
|
2007-02-09 06:13:59 +08:00
|
|
|
if (!DCI.isCalledByLegalizer())
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.AddToWorklist(N0.getNode());
|
2007-02-09 06:13:59 +08:00
|
|
|
// FIXME: If running after legalize, we probably can't do this.
|
2009-02-03 08:47:48 +08:00
|
|
|
N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
|
|
|
return N0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Could not fold it.
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2007-02-09 06:13:59 +08:00
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Returns true (and the GlobalValue and the offset) if the node is a
|
|
|
|
/// GlobalAddress + offset.
|
2011-02-14 06:25:43 +08:00
|
|
|
bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
|
2008-05-13 03:56:52 +08:00
|
|
|
int64_t &Offset) const {
|
2015-12-30 06:00:37 +08:00
|
|
|
if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
|
2008-06-10 06:05:52 +08:00
|
|
|
GA = GASD->getGlobal();
|
|
|
|
Offset += GASD->getOffset();
|
2008-05-13 03:56:52 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (N->getOpcode() == ISD::ADD) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue N1 = N->getOperand(0);
|
|
|
|
SDValue N2 = N->getOperand(1);
|
2008-08-29 05:40:38 +08:00
|
|
|
if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
|
2015-12-30 06:00:37 +08:00
|
|
|
if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
|
2008-09-27 05:54:37 +08:00
|
|
|
Offset += V->getSExtValue();
|
2008-05-13 03:56:52 +08:00
|
|
|
return true;
|
|
|
|
}
|
2008-08-29 05:40:38 +08:00
|
|
|
} else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
|
2015-12-30 06:00:37 +08:00
|
|
|
if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
|
2008-09-27 05:54:37 +08:00
|
|
|
Offset += V->getSExtValue();
|
2008-05-13 03:56:52 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2011-02-26 05:41:48 +08:00
|
|
|
|
2008-05-13 03:56:52 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-09-22 19:13:55 +08:00
|
|
|
SDValue TargetLowering::PerformDAGCombine(SDNode *N,
|
|
|
|
DAGCombinerInfo &DCI) const {
|
2006-03-01 12:52:55 +08:00
|
|
|
// Default implementation: no optimization.
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2006-03-01 12:52:55 +08:00
|
|
|
}
|
|
|
|
|
2006-02-04 10:13:02 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Inline Assembler Implementation Methods
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
TargetLowering::ConstraintType
|
2015-07-06 03:29:18 +08:00
|
|
|
TargetLowering::getConstraintType(StringRef Constraint) const {
|
2013-01-12 02:12:39 +08:00
|
|
|
unsigned S = Constraint.size();
|
|
|
|
|
|
|
|
if (S == 1) {
|
2007-03-25 10:14:49 +08:00
|
|
|
switch (Constraint[0]) {
|
|
|
|
default: break;
|
|
|
|
case 'r': return C_RegisterClass;
|
|
|
|
case 'm': // memory
|
|
|
|
case 'o': // offsetable
|
|
|
|
case 'V': // not offsetable
|
|
|
|
return C_Memory;
|
|
|
|
case 'i': // Simple Integer or Relocatable Constant
|
|
|
|
case 'n': // Simple Integer
|
2010-09-22 06:04:54 +08:00
|
|
|
case 'E': // Floating Point Constant
|
|
|
|
case 'F': // Floating Point Constant
|
2007-03-25 10:14:49 +08:00
|
|
|
case 's': // Relocatable Constant
|
2010-09-22 06:04:54 +08:00
|
|
|
case 'p': // Address.
|
2007-03-25 12:35:41 +08:00
|
|
|
case 'X': // Allow ANY value.
|
2007-03-25 10:14:49 +08:00
|
|
|
case 'I': // Target registers.
|
|
|
|
case 'J':
|
|
|
|
case 'K':
|
|
|
|
case 'L':
|
|
|
|
case 'M':
|
|
|
|
case 'N':
|
|
|
|
case 'O':
|
|
|
|
case 'P':
|
2010-09-22 06:04:54 +08:00
|
|
|
case '<':
|
|
|
|
case '>':
|
2007-03-25 10:14:49 +08:00
|
|
|
return C_Other;
|
|
|
|
}
|
2006-02-04 10:13:02 +08:00
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2013-01-12 02:12:39 +08:00
|
|
|
if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
|
2015-07-06 03:29:18 +08:00
|
|
|
if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
|
2013-01-12 02:12:39 +08:00
|
|
|
return C_Memory;
|
2007-03-25 10:18:14 +08:00
|
|
|
return C_Register;
|
2013-01-12 02:12:39 +08:00
|
|
|
}
|
2007-03-25 10:14:49 +08:00
|
|
|
return C_Unknown;
|
2006-02-04 10:13:02 +08:00
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Try to replace an X constraint, which matches anything, with another that
|
|
|
|
/// has more specific requirements based on the type of the corresponding
|
|
|
|
/// operand.
|
2009-08-11 06:56:29 +08:00
|
|
|
const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
|
2008-06-06 20:08:01 +08:00
|
|
|
if (ConstraintVT.isInteger())
|
2008-04-27 07:02:14 +08:00
|
|
|
return "r";
|
2008-06-06 20:08:01 +08:00
|
|
|
if (ConstraintVT.isFloatingPoint())
|
2008-04-27 07:02:14 +08:00
|
|
|
return "f"; // works for many targets
|
2014-04-14 08:51:57 +08:00
|
|
|
return nullptr;
|
2008-01-29 10:21:21 +08:00
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Lower the specified operand into the Ops vector.
|
|
|
|
/// If it is invalid, don't add anything to Ops.
|
2008-07-28 05:46:04 +08:00
|
|
|
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
|
2011-06-03 07:16:42 +08:00
|
|
|
std::string &Constraint,
|
2008-07-28 05:46:04 +08:00
|
|
|
std::vector<SDValue> &Ops,
|
2008-04-27 07:02:14 +08:00
|
|
|
SelectionDAG &DAG) const {
|
2011-06-18 04:41:29 +08:00
|
|
|
|
2011-06-03 07:16:42 +08:00
|
|
|
if (Constraint.length() > 1) return;
|
2011-06-18 04:41:29 +08:00
|
|
|
|
2011-06-03 07:16:42 +08:00
|
|
|
char ConstraintLetter = Constraint[0];
|
2006-02-04 10:13:02 +08:00
|
|
|
switch (ConstraintLetter) {
|
2007-02-17 14:00:35 +08:00
|
|
|
default: break;
|
2007-11-06 05:20:28 +08:00
|
|
|
case 'X': // Allows any operand; labels (basic block) use this.
|
|
|
|
if (Op.getOpcode() == ISD::BasicBlock) {
|
|
|
|
Ops.push_back(Op);
|
|
|
|
return;
|
|
|
|
}
|
2016-08-18 04:30:52 +08:00
|
|
|
LLVM_FALLTHROUGH;
|
2006-02-04 10:13:02 +08:00
|
|
|
case 'i': // Simple Integer or Relocatable Constant
|
|
|
|
case 'n': // Simple Integer
|
2007-11-06 05:20:28 +08:00
|
|
|
case 's': { // Relocatable Constant
|
2007-05-04 00:54:34 +08:00
|
|
|
// These operands are interested in values of the form (GV+C), where C may
|
|
|
|
// be folded in as an offset of GV, or it may be explicitly added. Also, it
|
|
|
|
// is possible and fine if either GV or C are missing.
|
|
|
|
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
|
|
|
|
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2007-05-04 00:54:34 +08:00
|
|
|
// If we have "(add GV, C)", pull out GV/C
|
|
|
|
if (Op.getOpcode() == ISD::ADD) {
|
|
|
|
C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
|
|
|
|
GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
|
2014-04-14 08:51:57 +08:00
|
|
|
if (!C || !GA) {
|
2007-05-04 00:54:34 +08:00
|
|
|
C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
|
|
|
|
GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
|
|
|
|
}
|
2016-02-19 06:09:30 +08:00
|
|
|
if (!C || !GA) {
|
|
|
|
C = nullptr;
|
|
|
|
GA = nullptr;
|
|
|
|
}
|
2007-05-04 00:54:34 +08:00
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2007-05-04 00:54:34 +08:00
|
|
|
// If we find a valid operand, map to the TargetXXX version so that the
|
|
|
|
// value itself doesn't get selected.
|
|
|
|
if (GA) { // Either &GV or &GV+C
|
|
|
|
if (ConstraintLetter != 'n') {
|
|
|
|
int64_t Offs = GA->getOffset();
|
2008-09-13 00:56:44 +08:00
|
|
|
if (C) Offs += C->getZExtValue();
|
2010-11-23 11:31:01 +08:00
|
|
|
Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
|
2013-05-25 10:42:55 +08:00
|
|
|
C ? SDLoc(C) : SDLoc(),
|
2007-08-25 08:47:38 +08:00
|
|
|
Op.getValueType(), Offs));
|
2007-05-04 00:54:34 +08:00
|
|
|
}
|
2015-07-14 00:36:22 +08:00
|
|
|
return;
|
2007-05-04 00:54:34 +08:00
|
|
|
}
|
|
|
|
if (C) { // just C, no GV.
|
2007-02-17 14:00:35 +08:00
|
|
|
// Simple constants are not allowed for 's'.
|
2007-08-25 08:47:38 +08:00
|
|
|
if (ConstraintLetter != 's') {
|
2009-02-13 04:58:09 +08:00
|
|
|
// gcc prints these as sign extended. Sign extend value to 64 bits
|
|
|
|
// now; without this it would get ZExt'd later in
|
|
|
|
// ScheduleDAGSDNodes::EmitNode, which is very generic.
|
|
|
|
Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
|
2015-04-28 22:05:47 +08:00
|
|
|
SDLoc(C), MVT::i64));
|
2007-08-25 08:47:38 +08:00
|
|
|
}
|
2015-07-14 00:36:22 +08:00
|
|
|
return;
|
2007-02-17 14:00:35 +08:00
|
|
|
}
|
|
|
|
break;
|
2006-02-04 10:13:02 +08:00
|
|
|
}
|
2007-05-04 00:54:34 +08:00
|
|
|
}
|
2006-02-04 10:13:02 +08:00
|
|
|
}
|
|
|
|
|
2015-02-27 06:38:43 +08:00
|
|
|
std::pair<unsigned, const TargetRegisterClass *>
|
|
|
|
TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
|
2015-07-06 03:29:18 +08:00
|
|
|
StringRef Constraint,
|
2015-02-27 06:38:43 +08:00
|
|
|
MVT VT) const {
|
2013-10-13 11:08:49 +08:00
|
|
|
if (Constraint.empty() || Constraint[0] != '{')
|
2014-04-14 08:51:57 +08:00
|
|
|
return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
|
2006-02-01 09:29:47 +08:00
|
|
|
assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
|
|
|
|
|
|
|
|
// Remove the braces from around the name.
|
2009-11-13 04:36:59 +08:00
|
|
|
StringRef RegName(Constraint.data()+1, Constraint.size()-2);
|
2006-02-22 08:56:39 +08:00
|
|
|
|
2012-12-19 01:50:58 +08:00
|
|
|
std::pair<unsigned, const TargetRegisterClass*> R =
|
2014-04-14 08:51:57 +08:00
|
|
|
std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
|
2012-12-19 01:50:58 +08:00
|
|
|
|
2006-02-22 08:56:39 +08:00
|
|
|
// Figure out which register class contains this reg.
|
2008-02-11 02:45:23 +08:00
|
|
|
for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
|
2006-02-22 08:56:39 +08:00
|
|
|
E = RI->regclass_end(); RCI != E; ++RCI) {
|
|
|
|
const TargetRegisterClass *RC = *RCI;
|
2010-11-23 11:31:01 +08:00
|
|
|
|
|
|
|
// If none of the value types for this register class are valid, we
|
2006-02-23 07:00:51 +08:00
|
|
|
// can't use it. For example, 64-bit reg classes on 32-bit targets.
|
2011-10-12 09:24:51 +08:00
|
|
|
if (!isLegalRC(RC))
|
|
|
|
continue;
|
2010-11-23 11:31:01 +08:00
|
|
|
|
|
|
|
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
|
2006-02-22 08:56:39 +08:00
|
|
|
I != E; ++I) {
|
2016-04-12 00:21:12 +08:00
|
|
|
if (RegName.equals_lower(RI->getRegAsmName(*I))) {
|
2012-12-19 01:50:58 +08:00
|
|
|
std::pair<unsigned, const TargetRegisterClass*> S =
|
|
|
|
std::make_pair(*I, RC);
|
|
|
|
|
|
|
|
// If this register class has the requested value type, return it,
|
|
|
|
// otherwise keep searching and return the first class found
|
|
|
|
// if no other is found which explicitly has the requested type.
|
|
|
|
if (RC->hasType(VT))
|
|
|
|
return S;
|
|
|
|
else if (!R.second)
|
|
|
|
R = S;
|
|
|
|
}
|
2006-02-22 08:56:39 +08:00
|
|
|
}
|
2006-01-27 04:37:03 +08:00
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2012-12-19 01:50:58 +08:00
|
|
|
return R;
|
2006-01-27 04:37:03 +08:00
|
|
|
}
|
2006-03-14 07:18:16 +08:00
|
|
|
|
2008-04-27 08:09:47 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Constraint Selection.
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Return true of this is an input operand that is a matching constraint like
|
|
|
|
/// "4".
|
2008-10-18 00:47:46 +08:00
|
|
|
bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
|
2008-10-18 00:21:11 +08:00
|
|
|
assert(!ConstraintCode.empty() && "No known constraint!");
|
2013-02-13 05:21:59 +08:00
|
|
|
return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
|
2008-10-18 00:21:11 +08:00
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// If this is an input matching constraint, this method returns the output
|
|
|
|
/// operand it matches.
|
2008-10-18 00:21:11 +08:00
|
|
|
unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
|
|
|
|
assert(!ConstraintCode.empty() && "No known constraint!");
|
|
|
|
return atoi(ConstraintCode.c_str());
|
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Split up the constraint string from the inline assembly value into the
|
|
|
|
/// specific constraints and their prefixes, and also tie in the associated
|
|
|
|
/// operand values.
|
2010-09-14 02:15:37 +08:00
|
|
|
/// If this returns an empty vector, and if the constraint string itself
|
|
|
|
/// isn't empty, there was an error parsing.
|
2015-02-27 06:38:43 +08:00
|
|
|
TargetLowering::AsmOperandInfoVector
|
2015-07-08 03:07:19 +08:00
|
|
|
TargetLowering::ParseConstraints(const DataLayout &DL,
|
|
|
|
const TargetRegisterInfo *TRI,
|
2015-02-27 06:38:43 +08:00
|
|
|
ImmutableCallSite CS) const {
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Information about all of the constraints.
|
2010-10-30 01:29:13 +08:00
|
|
|
AsmOperandInfoVector ConstraintOperands;
|
2010-09-14 02:15:37 +08:00
|
|
|
const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
|
2010-09-22 06:04:54 +08:00
|
|
|
unsigned maCount = 0; // Largest number of multiple alternative constraints.
|
2010-09-14 02:15:37 +08:00
|
|
|
|
|
|
|
// Do a prepass over the constraints, canonicalizing them, and building up the
|
|
|
|
// ConstraintOperands list.
|
|
|
|
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
|
|
|
|
unsigned ResNo = 0; // ResNo - The result number of the next output.
|
|
|
|
|
2014-10-04 02:33:16 +08:00
|
|
|
for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
|
|
|
|
ConstraintOperands.emplace_back(std::move(CI));
|
2010-09-14 02:15:37 +08:00
|
|
|
AsmOperandInfo &OpInfo = ConstraintOperands.back();
|
|
|
|
|
2010-09-22 06:04:54 +08:00
|
|
|
// Update multiple alternative constraint count.
|
|
|
|
if (OpInfo.multipleAlternatives.size() > maCount)
|
|
|
|
maCount = OpInfo.multipleAlternatives.size();
|
|
|
|
|
2010-10-30 01:29:13 +08:00
|
|
|
OpInfo.ConstraintVT = MVT::Other;
|
2010-09-14 02:15:37 +08:00
|
|
|
|
|
|
|
// Compute the value type for each operand.
|
|
|
|
switch (OpInfo.Type) {
|
|
|
|
case InlineAsm::isOutput:
|
|
|
|
// Indirect outputs just consume an argument.
|
|
|
|
if (OpInfo.isIndirect) {
|
|
|
|
OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The return value of the call is this value. As such, there is no
|
|
|
|
// corresponding argument.
|
|
|
|
assert(!CS.getType()->isVoidTy() &&
|
|
|
|
"Bad inline asm!");
|
2011-07-18 12:54:35 +08:00
|
|
|
if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
|
2015-07-09 10:09:04 +08:00
|
|
|
OpInfo.ConstraintVT =
|
|
|
|
getSimpleValueType(DL, STy->getElementType(ResNo));
|
2010-09-14 02:15:37 +08:00
|
|
|
} else {
|
|
|
|
assert(ResNo == 0 && "Asm only has one result!");
|
2015-07-09 10:09:04 +08:00
|
|
|
OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
|
2010-09-14 02:15:37 +08:00
|
|
|
}
|
|
|
|
++ResNo;
|
|
|
|
break;
|
|
|
|
case InlineAsm::isInput:
|
|
|
|
OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
|
|
|
|
break;
|
|
|
|
case InlineAsm::isClobber:
|
|
|
|
// Nothing to do.
|
|
|
|
break;
|
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2010-10-30 01:29:13 +08:00
|
|
|
if (OpInfo.CallOperandVal) {
|
2011-07-18 12:54:35 +08:00
|
|
|
llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
|
2010-10-30 01:29:13 +08:00
|
|
|
if (OpInfo.isIndirect) {
|
2011-07-18 12:54:35 +08:00
|
|
|
llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
|
2010-10-30 01:29:13 +08:00
|
|
|
if (!PtrTy)
|
|
|
|
report_fatal_error("Indirect operand for inline asm not a pointer!");
|
|
|
|
OpTy = PtrTy->getElementType();
|
|
|
|
}
|
2011-06-18 04:41:29 +08:00
|
|
|
|
2011-05-10 04:04:43 +08:00
|
|
|
// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
|
2011-07-18 12:54:35 +08:00
|
|
|
if (StructType *STy = dyn_cast<StructType>(OpTy))
|
2011-05-10 04:04:43 +08:00
|
|
|
if (STy->getNumElements() == 1)
|
|
|
|
OpTy = STy->getElementType(0);
|
|
|
|
|
2010-10-30 01:29:13 +08:00
|
|
|
// If OpTy is not a single value, it may be a struct/union that we
|
|
|
|
// can tile with integers.
|
|
|
|
if (!OpTy->isSingleValueType() && OpTy->isSized()) {
|
2015-07-08 03:07:19 +08:00
|
|
|
unsigned BitSize = DL.getTypeSizeInBits(OpTy);
|
2010-10-30 01:29:13 +08:00
|
|
|
switch (BitSize) {
|
|
|
|
default: break;
|
|
|
|
case 1:
|
|
|
|
case 8:
|
|
|
|
case 16:
|
|
|
|
case 32:
|
|
|
|
case 64:
|
|
|
|
case 128:
|
2010-11-09 09:15:07 +08:00
|
|
|
OpInfo.ConstraintVT =
|
2012-12-19 23:19:11 +08:00
|
|
|
MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
|
2010-10-30 01:29:13 +08:00
|
|
|
break;
|
|
|
|
}
|
2012-10-10 00:06:12 +08:00
|
|
|
} else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
|
2015-07-08 03:07:19 +08:00
|
|
|
unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
|
2013-10-11 03:09:05 +08:00
|
|
|
OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
|
2010-10-30 01:29:13 +08:00
|
|
|
} else {
|
2012-12-19 23:19:11 +08:00
|
|
|
OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
|
2010-10-30 01:29:13 +08:00
|
|
|
}
|
|
|
|
}
|
2010-09-14 02:15:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// If we have multiple alternative constraints, select the best alternative.
|
2015-01-15 19:41:30 +08:00
|
|
|
if (!ConstraintOperands.empty()) {
|
2010-09-14 02:15:37 +08:00
|
|
|
if (maCount) {
|
|
|
|
unsigned bestMAIndex = 0;
|
|
|
|
int bestWeight = -1;
|
|
|
|
// weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
|
|
|
|
int weight = -1;
|
|
|
|
unsigned maIndex;
|
|
|
|
// Compute the sums of the weights for each alternative, keeping track
|
|
|
|
// of the best (highest weight) one so far.
|
|
|
|
for (maIndex = 0; maIndex < maCount; ++maIndex) {
|
|
|
|
int weightSum = 0;
|
|
|
|
for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
|
|
|
|
cIndex != eIndex; ++cIndex) {
|
|
|
|
AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
|
|
|
|
if (OpInfo.Type == InlineAsm::isClobber)
|
|
|
|
continue;
|
|
|
|
|
2010-10-30 01:29:13 +08:00
|
|
|
// If this is an output operand with a matching input operand,
|
|
|
|
// look up the matching input. If their types mismatch, e.g. one
|
|
|
|
// is an integer, the other is floating point, or their sizes are
|
|
|
|
// different, flag it as an maCantMatch.
|
2010-09-14 02:15:37 +08:00
|
|
|
if (OpInfo.hasMatchingInput()) {
|
|
|
|
AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
|
|
|
|
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
|
|
|
|
if ((OpInfo.ConstraintVT.isInteger() !=
|
|
|
|
Input.ConstraintVT.isInteger()) ||
|
|
|
|
(OpInfo.ConstraintVT.getSizeInBits() !=
|
|
|
|
Input.ConstraintVT.getSizeInBits())) {
|
|
|
|
weightSum = -1; // Can't match.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
|
|
|
|
if (weight == -1) {
|
|
|
|
weightSum = -1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
weightSum += weight;
|
|
|
|
}
|
|
|
|
// Update best.
|
|
|
|
if (weightSum > bestWeight) {
|
|
|
|
bestWeight = weightSum;
|
|
|
|
bestMAIndex = maIndex;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now select chosen alternative in each constraint.
|
|
|
|
for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
|
|
|
|
cIndex != eIndex; ++cIndex) {
|
|
|
|
AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
|
|
|
|
if (cInfo.Type == InlineAsm::isClobber)
|
|
|
|
continue;
|
|
|
|
cInfo.selectAlternative(bestMAIndex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check and hook up tied operands, choose constraint code to use.
|
|
|
|
for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
|
|
|
|
cIndex != eIndex; ++cIndex) {
|
|
|
|
AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2010-09-14 02:15:37 +08:00
|
|
|
// If this is an output operand with a matching input operand, look up the
|
|
|
|
// matching input. If their types mismatch, e.g. one is an integer, the
|
|
|
|
// other is floating point, or their sizes are different, flag it as an
|
|
|
|
// error.
|
|
|
|
if (OpInfo.hasMatchingInput()) {
|
|
|
|
AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
|
2010-10-30 01:29:13 +08:00
|
|
|
|
2010-09-14 02:15:37 +08:00
|
|
|
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
|
2015-02-27 06:38:43 +08:00
|
|
|
std::pair<unsigned, const TargetRegisterClass *> MatchRC =
|
|
|
|
getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
|
|
|
|
OpInfo.ConstraintVT);
|
|
|
|
std::pair<unsigned, const TargetRegisterClass *> InputRC =
|
|
|
|
getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
|
|
|
|
Input.ConstraintVT);
|
2010-09-14 02:15:37 +08:00
|
|
|
if ((OpInfo.ConstraintVT.isInteger() !=
|
|
|
|
Input.ConstraintVT.isInteger()) ||
|
2011-07-15 04:13:52 +08:00
|
|
|
(MatchRC.second != InputRC.second)) {
|
2010-09-14 02:15:37 +08:00
|
|
|
report_fatal_error("Unsupported asm: input constraint"
|
|
|
|
" with a matching output constraint of"
|
|
|
|
" incompatible type!");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ConstraintOperands;
|
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Return an integer indicating how general CT is.
|
2008-04-27 08:09:47 +08:00
|
|
|
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
|
|
|
|
switch (CT) {
|
|
|
|
case TargetLowering::C_Other:
|
|
|
|
case TargetLowering::C_Unknown:
|
|
|
|
return 0;
|
|
|
|
case TargetLowering::C_Register:
|
|
|
|
return 1;
|
|
|
|
case TargetLowering::C_RegisterClass:
|
|
|
|
return 2;
|
|
|
|
case TargetLowering::C_Memory:
|
|
|
|
return 3;
|
|
|
|
}
|
2012-01-11 02:08:01 +08:00
|
|
|
llvm_unreachable("Invalid constraint type");
|
2008-04-27 08:09:47 +08:00
|
|
|
}
|
|
|
|
|
2010-10-30 01:29:13 +08:00
|
|
|
/// Examine constraint type and operand type and determine a weight value.
|
2010-09-14 02:15:37 +08:00
|
|
|
/// This object must already have been set up with the operand type
|
|
|
|
/// and the current alternative constraint selected.
|
2010-10-30 01:29:13 +08:00
|
|
|
TargetLowering::ConstraintWeight
|
|
|
|
TargetLowering::getMultipleConstraintMatchWeight(
|
2010-09-14 02:15:37 +08:00
|
|
|
AsmOperandInfo &info, int maIndex) const {
|
2010-10-30 01:29:13 +08:00
|
|
|
InlineAsm::ConstraintCodeVector *rCodes;
|
2010-09-22 06:04:54 +08:00
|
|
|
if (maIndex >= (int)info.multipleAlternatives.size())
|
|
|
|
rCodes = &info.Codes;
|
|
|
|
else
|
|
|
|
rCodes = &info.multipleAlternatives[maIndex].Codes;
|
2010-10-30 01:29:13 +08:00
|
|
|
ConstraintWeight BestWeight = CW_Invalid;
|
2010-09-14 02:15:37 +08:00
|
|
|
|
|
|
|
// Loop over the options, keeping track of the most general one.
|
2010-09-22 06:04:54 +08:00
|
|
|
for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
|
2010-10-30 01:29:13 +08:00
|
|
|
ConstraintWeight weight =
|
|
|
|
getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
|
2010-09-14 02:15:37 +08:00
|
|
|
if (weight > BestWeight)
|
|
|
|
BestWeight = weight;
|
|
|
|
}
|
|
|
|
|
|
|
|
return BestWeight;
|
|
|
|
}
|
|
|
|
|
2010-10-30 01:29:13 +08:00
|
|
|
/// Examine constraint type and operand type and determine a weight value.
|
2010-09-14 02:15:37 +08:00
|
|
|
/// This object must already have been set up with the operand type
|
|
|
|
/// and the current alternative constraint selected.
|
2010-10-30 01:29:13 +08:00
|
|
|
TargetLowering::ConstraintWeight
|
|
|
|
TargetLowering::getSingleConstraintMatchWeight(
|
2010-09-14 02:15:37 +08:00
|
|
|
AsmOperandInfo &info, const char *constraint) const {
|
2010-10-30 01:29:13 +08:00
|
|
|
ConstraintWeight weight = CW_Invalid;
|
2010-09-14 02:15:37 +08:00
|
|
|
Value *CallOperandVal = info.CallOperandVal;
|
|
|
|
// If we don't have a value, we can't do a match,
|
|
|
|
// but allow it at the lowest weight.
|
2014-04-14 08:51:57 +08:00
|
|
|
if (!CallOperandVal)
|
2010-10-30 01:29:13 +08:00
|
|
|
return CW_Default;
|
2010-09-14 02:15:37 +08:00
|
|
|
// Look at the constraint type.
|
|
|
|
switch (*constraint) {
|
|
|
|
case 'i': // immediate integer.
|
|
|
|
case 'n': // immediate integer with a known value.
|
2010-10-30 01:29:13 +08:00
|
|
|
if (isa<ConstantInt>(CallOperandVal))
|
|
|
|
weight = CW_Constant;
|
2010-09-14 02:15:37 +08:00
|
|
|
break;
|
|
|
|
case 's': // non-explicit intregal immediate.
|
2010-10-30 01:29:13 +08:00
|
|
|
if (isa<GlobalValue>(CallOperandVal))
|
|
|
|
weight = CW_Constant;
|
|
|
|
break;
|
|
|
|
case 'E': // immediate float if host format.
|
|
|
|
case 'F': // immediate float.
|
|
|
|
if (isa<ConstantFP>(CallOperandVal))
|
|
|
|
weight = CW_Constant;
|
2010-09-14 02:15:37 +08:00
|
|
|
break;
|
2010-10-30 01:29:13 +08:00
|
|
|
case '<': // memory operand with autodecrement.
|
|
|
|
case '>': // memory operand with autoincrement.
|
2010-09-14 02:15:37 +08:00
|
|
|
case 'm': // memory operand.
|
|
|
|
case 'o': // offsettable memory operand
|
|
|
|
case 'V': // non-offsettable memory operand
|
2010-10-30 01:29:13 +08:00
|
|
|
weight = CW_Memory;
|
2010-09-14 02:15:37 +08:00
|
|
|
break;
|
2010-10-30 01:29:13 +08:00
|
|
|
case 'r': // general register.
|
2010-09-14 02:15:37 +08:00
|
|
|
case 'g': // general register, memory operand or immediate integer.
|
2010-10-30 01:29:13 +08:00
|
|
|
// note: Clang converts "g" to "imr".
|
|
|
|
if (CallOperandVal->getType()->isIntegerTy())
|
|
|
|
weight = CW_Register;
|
2010-09-14 02:15:37 +08:00
|
|
|
break;
|
2010-10-30 01:29:13 +08:00
|
|
|
case 'X': // any operand.
|
2010-09-14 02:15:37 +08:00
|
|
|
default:
|
2010-10-30 01:29:13 +08:00
|
|
|
weight = CW_Default;
|
2010-09-14 02:15:37 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
return weight;
|
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// If there are multiple different constraints that we could pick for this
|
|
|
|
/// operand (e.g. "imr") try to pick the 'best' one.
|
2008-04-27 09:49:46 +08:00
|
|
|
/// This is somewhat tricky: constraints fall into four classes:
|
2008-04-27 08:09:47 +08:00
|
|
|
/// Other -> immediates and magic values
|
|
|
|
/// Register -> one specific register
|
|
|
|
/// RegisterClass -> a group of regs
|
|
|
|
/// Memory -> memory
|
|
|
|
/// Ideally, we would pick the most specific constraint possible: if we have
|
|
|
|
/// something that fits into a register, we would pick it. The problem here
|
|
|
|
/// is that if we have something that could either be in a register or in
|
|
|
|
/// memory that use of the register could cause selection of *other*
|
|
|
|
/// operands to fail: they might only succeed if we pick memory. Because of
|
|
|
|
/// this the heuristic we use is:
|
|
|
|
///
|
|
|
|
/// 1) If there is an 'other' constraint, and if the operand is valid for
|
|
|
|
/// that constraint, use it. This makes us take advantage of 'i'
|
|
|
|
/// constraints when available.
|
|
|
|
/// 2) Otherwise, pick the most general constraint present. This prefers
|
|
|
|
/// 'm' over 'r', for example.
|
|
|
|
///
|
|
|
|
static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
|
2010-06-26 05:55:36 +08:00
|
|
|
const TargetLowering &TLI,
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Op, SelectionDAG *DAG) {
|
2008-04-27 08:09:47 +08:00
|
|
|
assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
|
|
|
|
unsigned BestIdx = 0;
|
|
|
|
TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
|
|
|
|
int BestGenerality = -1;
|
2010-06-29 06:09:45 +08:00
|
|
|
|
2008-04-27 08:09:47 +08:00
|
|
|
// Loop over the options, keeping track of the most general one.
|
|
|
|
for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
|
|
|
|
TargetLowering::ConstraintType CType =
|
|
|
|
TLI.getConstraintType(OpInfo.Codes[i]);
|
2010-06-29 06:09:45 +08:00
|
|
|
|
2008-04-27 08:37:18 +08:00
|
|
|
// If this is an 'other' constraint, see if the operand is valid for it.
|
|
|
|
// For example, on X86 we might have an 'rI' constraint. If the operand
|
|
|
|
// is an integer in the range [0..31] we want to use I (saving a load
|
|
|
|
// of a register), otherwise we must use 'r'.
|
2008-08-29 05:40:38 +08:00
|
|
|
if (CType == TargetLowering::C_Other && Op.getNode()) {
|
2008-04-27 08:37:18 +08:00
|
|
|
assert(OpInfo.Codes[i].size() == 1 &&
|
|
|
|
"Unhandled multi-letter 'other' constraint");
|
2008-07-28 05:46:04 +08:00
|
|
|
std::vector<SDValue> ResultOps;
|
2011-06-03 07:16:42 +08:00
|
|
|
TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
|
2008-04-27 08:37:18 +08:00
|
|
|
ResultOps, *DAG);
|
|
|
|
if (!ResultOps.empty()) {
|
|
|
|
BestType = CType;
|
|
|
|
BestIdx = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2010-06-29 06:09:45 +08:00
|
|
|
// Things with matching constraints can only be registers, per gcc
|
|
|
|
// documentation. This mainly affects "g" constraints.
|
|
|
|
if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
|
|
|
|
continue;
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2008-04-27 08:09:47 +08:00
|
|
|
// This constraint letter is more general than the previous one, use it.
|
|
|
|
int Generality = getConstraintGenerality(CType);
|
|
|
|
if (Generality > BestGenerality) {
|
|
|
|
BestType = CType;
|
|
|
|
BestIdx = i;
|
|
|
|
BestGenerality = Generality;
|
|
|
|
}
|
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2008-04-27 08:09:47 +08:00
|
|
|
OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
|
|
|
|
OpInfo.ConstraintType = BestType;
|
|
|
|
}
|
|
|
|
|
2015-12-30 06:11:50 +08:00
|
|
|
/// Determines the constraint code and constraint type to use for the specific
|
|
|
|
/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
|
2008-04-27 08:37:18 +08:00
|
|
|
void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
|
2010-11-23 11:31:01 +08:00
|
|
|
SDValue Op,
|
2008-04-27 08:37:18 +08:00
|
|
|
SelectionDAG *DAG) const {
|
2008-04-27 08:09:47 +08:00
|
|
|
assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2008-04-27 08:09:47 +08:00
|
|
|
// Single-letter constraints ('r') are very common.
|
|
|
|
if (OpInfo.Codes.size() == 1) {
|
|
|
|
OpInfo.ConstraintCode = OpInfo.Codes[0];
|
|
|
|
OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
|
|
|
|
} else {
|
2010-06-26 05:55:36 +08:00
|
|
|
ChooseConstraint(OpInfo, *this, Op, DAG);
|
2008-04-27 08:09:47 +08:00
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2008-04-27 08:09:47 +08:00
|
|
|
// 'X' matches anything.
|
|
|
|
if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
|
|
|
|
// Labels and constants are handled elsewhere ('X' is the only thing
|
2009-07-08 07:26:33 +08:00
|
|
|
// that matches labels). For Functions, the type here is the type of
|
2009-07-21 07:27:39 +08:00
|
|
|
// the result, which is not what we want to look at; leave them alone.
|
|
|
|
Value *v = OpInfo.CallOperandVal;
|
2009-07-08 07:26:33 +08:00
|
|
|
if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
|
|
|
|
OpInfo.CallOperandVal = v;
|
2008-04-27 08:09:47 +08:00
|
|
|
return;
|
2009-07-08 07:26:33 +08:00
|
|
|
}
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2008-04-27 08:09:47 +08:00
|
|
|
// Otherwise, try to resolve it to something we know about by looking at
|
|
|
|
// the actual operand type.
|
|
|
|
if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
|
|
|
|
OpInfo.ConstraintCode = Repl;
|
|
|
|
OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-09 07:51:45 +08:00
|
|
|
/// \brief Given an exact SDIV by a constant, create a multiplication
|
Emit a more efficient magic number multiplication for exact sdivs.
We have to do this in DAGBuilder instead of DAGCombiner, because the exact bit is lost after building.
struct foo { char x[24]; };
long bar(struct foo *a, struct foo *b) { return a-b; }
is now compiled into
movl 4(%esp), %eax
subl 8(%esp), %eax
sarl $3, %eax
imull $-1431655765, %eax, %eax
instead of
movl 4(%esp), %eax
subl 8(%esp), %eax
movl $715827883, %ecx
imull %ecx
movl %edx, %eax
shrl $31, %eax
sarl $2, %edx
addl %eax, %edx
movl %edx, %eax
llvm-svn: 134695
2011-07-08 18:31:30 +08:00
|
|
|
/// with the multiplicative inverse of the constant.
|
2015-06-28 04:33:26 +08:00
|
|
|
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
|
2016-06-12 23:39:02 +08:00
|
|
|
const SDLoc &dl, SelectionDAG &DAG,
|
2015-06-28 04:33:26 +08:00
|
|
|
std::vector<SDNode *> &Created) {
|
Emit a more efficient magic number multiplication for exact sdivs.
We have to do this in DAGBuilder instead of DAGCombiner, because the exact bit is lost after building.
struct foo { char x[24]; };
long bar(struct foo *a, struct foo *b) { return a-b; }
is now compiled into
movl 4(%esp), %eax
subl 8(%esp), %eax
sarl $3, %eax
imull $-1431655765, %eax, %eax
instead of
movl 4(%esp), %eax
subl 8(%esp), %eax
movl $715827883, %ecx
imull %ecx
movl %edx, %eax
shrl $31, %eax
sarl $2, %edx
addl %eax, %edx
movl %edx, %eax
llvm-svn: 134695
2011-07-08 18:31:30 +08:00
|
|
|
assert(d != 0 && "Division by zero!");
|
|
|
|
|
|
|
|
// Shift the value upfront if it is even, so the LSB is one.
|
|
|
|
unsigned ShAmt = d.countTrailingZeros();
|
|
|
|
if (ShAmt) {
|
|
|
|
// TODO: For UDIV use SRL instead of SRA.
|
2015-05-06 22:03:22 +08:00
|
|
|
SDValue Amt =
|
2015-07-09 10:09:20 +08:00
|
|
|
DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType(),
|
|
|
|
DAG.getDataLayout()));
|
2015-06-17 00:25:43 +08:00
|
|
|
SDNodeFlags Flags;
|
|
|
|
Flags.setExact(true);
|
|
|
|
Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags);
|
2015-06-28 04:33:26 +08:00
|
|
|
Created.push_back(Op1.getNode());
|
Emit a more efficient magic number multiplication for exact sdivs.
We have to do this in DAGBuilder instead of DAGCombiner, because the exact bit is lost after building.
struct foo { char x[24]; };
long bar(struct foo *a, struct foo *b) { return a-b; }
is now compiled into
movl 4(%esp), %eax
subl 8(%esp), %eax
sarl $3, %eax
imull $-1431655765, %eax, %eax
instead of
movl 4(%esp), %eax
subl 8(%esp), %eax
movl $715827883, %ecx
imull %ecx
movl %edx, %eax
shrl $31, %eax
sarl $2, %edx
addl %eax, %edx
movl %edx, %eax
llvm-svn: 134695
2011-07-08 18:31:30 +08:00
|
|
|
d = d.ashr(ShAmt);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Calculate the multiplicative inverse, using Newton's method.
|
|
|
|
APInt t, xn = d;
|
|
|
|
while ((t = d*xn) != 1)
|
|
|
|
xn *= APInt(d.getBitWidth(), 2) - t;
|
|
|
|
|
2015-06-28 04:33:26 +08:00
|
|
|
SDValue Op2 = DAG.getConstant(xn, dl, Op1.getValueType());
|
|
|
|
SDValue Mul = DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
|
|
|
|
Created.push_back(Mul.getNode());
|
|
|
|
return Mul;
|
Emit a more efficient magic number multiplication for exact sdivs.
We have to do this in DAGBuilder instead of DAGCombiner, because the exact bit is lost after building.
struct foo { char x[24]; };
long bar(struct foo *a, struct foo *b) { return a-b; }
is now compiled into
movl 4(%esp), %eax
subl 8(%esp), %eax
sarl $3, %eax
imull $-1431655765, %eax, %eax
instead of
movl 4(%esp), %eax
subl 8(%esp), %eax
movl $715827883, %ecx
imull %ecx
movl %edx, %eax
shrl $31, %eax
sarl $2, %edx
addl %eax, %edx
movl %edx, %eax
llvm-svn: 134695
2011-07-08 18:31:30 +08:00
|
|
|
}
|
|
|
|
|
2015-08-25 10:31:21 +08:00
|
|
|
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
|
|
|
|
SelectionDAG &DAG,
|
|
|
|
std::vector<SDNode *> *Created) const {
|
|
|
|
AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
|
|
|
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
|
|
|
if (TLI.isIntDivCheap(N->getValueType(0), Attr))
|
|
|
|
return SDValue(N,0); // Lower SDIV as SDIV
|
|
|
|
return SDValue();
|
|
|
|
}
|
|
|
|
|
2013-06-09 07:51:45 +08:00
|
|
|
/// \brief Given an ISD::SDIV node expressing a divide by constant,
|
2006-05-17 01:42:15 +08:00
|
|
|
/// return a DAG expression to select that will generate the same value by
|
2014-09-16 03:47:44 +08:00
|
|
|
/// multiplying by a magic number.
|
|
|
|
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
|
2014-04-26 20:06:28 +08:00
|
|
|
SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
|
|
|
|
SelectionDAG &DAG, bool IsAfterLegalization,
|
|
|
|
std::vector<SDNode *> *Created) const {
|
2014-09-16 05:52:51 +08:00
|
|
|
assert(Created && "No vector to hold sdiv ops.");
|
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = N->getValueType(0);
|
2013-05-25 10:42:55 +08:00
|
|
|
SDLoc dl(N);
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-05-17 01:42:15 +08:00
|
|
|
// Check to see if we can do this.
|
2008-11-30 14:35:39 +08:00
|
|
|
// FIXME: We should be more aggressive here.
|
|
|
|
if (!isTypeLegal(VT))
|
|
|
|
return SDValue();
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2015-06-28 04:33:26 +08:00
|
|
|
// If the sdiv has an 'exact' bit we can use a simpler lowering.
|
|
|
|
if (cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact())
|
|
|
|
return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created);
|
|
|
|
|
2014-04-26 20:06:28 +08:00
|
|
|
APInt::ms magics = Divisor.magic();
|
2010-11-23 11:31:01 +08:00
|
|
|
|
2006-05-17 01:42:15 +08:00
|
|
|
// Multiply the numerator (operand 0) by the magic value
|
2008-11-30 14:35:39 +08:00
|
|
|
// FIXME: We should support doing a MUL in a wider type
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Q;
|
2011-11-08 01:09:05 +08:00
|
|
|
if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
|
|
|
|
isOperationLegalOrCustom(ISD::MULHS, VT))
|
2009-02-03 08:47:48 +08:00
|
|
|
Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(magics.m, dl, VT));
|
2011-11-08 01:09:05 +08:00
|
|
|
else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
|
|
|
|
isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
|
2009-02-03 08:47:48 +08:00
|
|
|
Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
|
2007-10-09 02:33:35 +08:00
|
|
|
N->getOperand(0),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
|
2007-10-09 02:33:35 +08:00
|
|
|
else
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue(); // No mulhs or equvialent
|
2006-05-17 01:42:15 +08:00
|
|
|
// If d > 0 and m < 0, add the numerator
|
2014-04-26 20:06:28 +08:00
|
|
|
if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
|
2009-02-03 08:47:48 +08:00
|
|
|
Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
|
2014-09-16 05:52:51 +08:00
|
|
|
Created->push_back(Q.getNode());
|
2006-05-17 01:42:15 +08:00
|
|
|
}
|
|
|
|
// If d < 0 and m > 0, subtract the numerator.
|
2014-04-26 20:06:28 +08:00
|
|
|
if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
|
2009-02-03 08:47:48 +08:00
|
|
|
Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
|
2014-09-16 05:52:51 +08:00
|
|
|
Created->push_back(Q.getNode());
|
2006-05-17 01:42:15 +08:00
|
|
|
}
|
2015-07-09 10:09:20 +08:00
|
|
|
auto &DL = DAG.getDataLayout();
|
2006-05-17 01:42:15 +08:00
|
|
|
// Shift right algebraic if shift value is nonzero
|
|
|
|
if (magics.s > 0) {
|
2015-07-09 10:09:20 +08:00
|
|
|
Q = DAG.getNode(
|
|
|
|
ISD::SRA, dl, VT, Q,
|
|
|
|
DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
|
2014-09-16 05:52:51 +08:00
|
|
|
Created->push_back(Q.getNode());
|
2006-05-17 01:42:15 +08:00
|
|
|
}
|
|
|
|
// Extract the sign bit and add it to the quotient
|
2015-07-09 10:09:20 +08:00
|
|
|
SDValue T =
|
|
|
|
DAG.getNode(ISD::SRL, dl, VT, Q,
|
|
|
|
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,
|
|
|
|
getShiftAmountTy(Q.getValueType(), DL)));
|
2014-09-16 05:52:51 +08:00
|
|
|
Created->push_back(T.getNode());
|
2009-02-03 08:47:48 +08:00
|
|
|
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
|
2006-05-17 01:42:15 +08:00
|
|
|
}
|
|
|
|
|
2013-06-09 07:51:45 +08:00
|
|
|
/// \brief Given an ISD::UDIV node expressing a divide by constant,
|
2006-05-17 01:42:15 +08:00
|
|
|
/// return a DAG expression to select that will generate the same value by
|
2014-09-16 03:47:44 +08:00
|
|
|
/// multiplying by a magic number.
|
|
|
|
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
|
2014-04-26 20:06:28 +08:00
|
|
|
SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
|
|
|
|
SelectionDAG &DAG, bool IsAfterLegalization,
|
|
|
|
std::vector<SDNode *> *Created) const {
|
2014-09-16 05:52:51 +08:00
|
|
|
assert(Created && "No vector to hold udiv ops.");
|
2014-10-29 23:23:11 +08:00
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = N->getValueType(0);
|
2013-05-25 10:42:55 +08:00
|
|
|
SDLoc dl(N);
|
2015-07-09 10:09:20 +08:00
|
|
|
auto &DL = DAG.getDataLayout();
|
2008-11-30 14:02:26 +08:00
|
|
|
|
2006-05-17 01:42:15 +08:00
|
|
|
// Check to see if we can do this.
|
2008-11-30 14:02:26 +08:00
|
|
|
// FIXME: We should be more aggressive here.
|
|
|
|
if (!isTypeLegal(VT))
|
|
|
|
return SDValue();
|
|
|
|
|
|
|
|
// FIXME: We should use a narrower constant when the upper
|
|
|
|
// bits are known to be zero.
|
2014-04-26 20:06:28 +08:00
|
|
|
APInt::mu magics = Divisor.magicu();
|
BuildUDIV: If the divisor is even we can simplify the fixup of the multiplied value by introducing an early shift.
This allows us to compile "unsigned foo(unsigned x) { return x/28; }" into
shrl $2, %edi
imulq $613566757, %rdi, %rax
shrq $32, %rax
ret
instead of
movl %edi, %eax
imulq $613566757, %rax, %rcx
shrq $32, %rcx
subl %ecx, %eax
shrl %eax
addl %ecx, %eax
shrl $4, %eax
on x86_64
llvm-svn: 127829
2011-03-18 04:39:14 +08:00
|
|
|
|
|
|
|
SDValue Q = N->getOperand(0);
|
|
|
|
|
|
|
|
// If the divisor is even, we can avoid using the expensive fixup by shifting
|
|
|
|
// the divided value upfront.
|
2014-04-26 20:06:28 +08:00
|
|
|
if (magics.a != 0 && !Divisor[0]) {
|
|
|
|
unsigned Shift = Divisor.countTrailingZeros();
|
2015-07-09 10:09:20 +08:00
|
|
|
Q = DAG.getNode(
|
|
|
|
ISD::SRL, dl, VT, Q,
|
|
|
|
DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL)));
|
2014-09-16 05:52:51 +08:00
|
|
|
Created->push_back(Q.getNode());
|
BuildUDIV: If the divisor is even we can simplify the fixup of the multiplied value by introducing an early shift.
This allows us to compile "unsigned foo(unsigned x) { return x/28; }" into
shrl $2, %edi
imulq $613566757, %rdi, %rax
shrq $32, %rax
ret
instead of
movl %edi, %eax
imulq $613566757, %rax, %rcx
shrq $32, %rcx
subl %ecx, %eax
shrl %eax
addl %ecx, %eax
shrl $4, %eax
on x86_64
llvm-svn: 127829
2011-03-18 04:39:14 +08:00
|
|
|
|
|
|
|
// Get magic number for the shifted divisor.
|
2014-04-26 20:06:28 +08:00
|
|
|
magics = Divisor.lshr(Shift).magicu(Shift);
|
BuildUDIV: If the divisor is even we can simplify the fixup of the multiplied value by introducing an early shift.
This allows us to compile "unsigned foo(unsigned x) { return x/28; }" into
shrl $2, %edi
imulq $613566757, %rdi, %rax
shrq $32, %rax
ret
instead of
movl %edi, %eax
imulq $613566757, %rax, %rcx
shrq $32, %rcx
subl %ecx, %eax
shrl %eax
addl %ecx, %eax
shrl $4, %eax
on x86_64
llvm-svn: 127829
2011-03-18 04:39:14 +08:00
|
|
|
assert(magics.a == 0 && "Should use cheap fixup now");
|
|
|
|
}
|
2008-11-30 14:02:26 +08:00
|
|
|
|
2006-05-17 01:42:15 +08:00
|
|
|
// Multiply the numerator (operand 0) by the magic value
|
2008-11-30 14:02:26 +08:00
|
|
|
// FIXME: We should support doing a MUL in a wider type
|
2011-11-08 01:09:05 +08:00
|
|
|
if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
|
|
|
|
isOperationLegalOrCustom(ISD::MULHU, VT))
|
2015-04-28 22:05:47 +08:00
|
|
|
Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT));
|
2011-11-08 01:09:05 +08:00
|
|
|
else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
|
|
|
|
isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
|
BuildUDIV: If the divisor is even we can simplify the fixup of the multiplied value by introducing an early shift.
This allows us to compile "unsigned foo(unsigned x) { return x/28; }" into
shrl $2, %edi
imulq $613566757, %rdi, %rax
shrq $32, %rax
ret
instead of
movl %edi, %eax
imulq $613566757, %rax, %rcx
shrq $32, %rcx
subl %ecx, %eax
shrl %eax
addl %ecx, %eax
shrl $4, %eax
on x86_64
llvm-svn: 127829
2011-03-18 04:39:14 +08:00
|
|
|
Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
|
2007-10-09 02:33:35 +08:00
|
|
|
else
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue(); // No mulhu or equvialent
|
2014-09-16 05:52:51 +08:00
|
|
|
|
|
|
|
Created->push_back(Q.getNode());
|
2006-05-17 01:42:15 +08:00
|
|
|
|
|
|
|
if (magics.a == 0) {
|
2014-04-26 20:06:28 +08:00
|
|
|
assert(magics.s < Divisor.getBitWidth() &&
|
2008-11-30 14:02:26 +08:00
|
|
|
"We shouldn't generate an undefined shift!");
|
2015-07-09 10:09:20 +08:00
|
|
|
return DAG.getNode(
|
|
|
|
ISD::SRL, dl, VT, Q,
|
|
|
|
DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
|
2006-05-17 01:42:15 +08:00
|
|
|
} else {
|
2009-02-03 08:47:48 +08:00
|
|
|
SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
|
2014-09-16 05:52:51 +08:00
|
|
|
Created->push_back(NPQ.getNode());
|
2015-07-09 10:09:20 +08:00
|
|
|
NPQ = DAG.getNode(
|
|
|
|
ISD::SRL, dl, VT, NPQ,
|
|
|
|
DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL)));
|
2014-09-16 05:52:51 +08:00
|
|
|
Created->push_back(NPQ.getNode());
|
2009-02-03 08:47:48 +08:00
|
|
|
NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
|
2014-09-16 05:52:51 +08:00
|
|
|
Created->push_back(NPQ.getNode());
|
2015-07-09 10:09:20 +08:00
|
|
|
return DAG.getNode(
|
|
|
|
ISD::SRL, dl, VT, NPQ,
|
|
|
|
DAG.getConstant(magics.s - 1, dl,
|
|
|
|
getShiftAmountTy(NPQ.getValueType(), DL)));
|
2006-05-17 01:42:15 +08:00
|
|
|
}
|
|
|
|
}
|
2014-01-06 08:43:20 +08:00
|
|
|
|
|
|
|
bool TargetLowering::
|
|
|
|
verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
|
|
|
|
if (!isa<ConstantSDNode>(Op.getOperand(0))) {
|
|
|
|
DAG.getContext()->emitError("argument to '__builtin_return_address' must "
|
|
|
|
"be a constant integer");
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2014-04-12 00:11:58 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Legalization Utilities
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
|
|
|
|
SelectionDAG &DAG, SDValue LL, SDValue LH,
|
2014-10-29 23:23:11 +08:00
|
|
|
SDValue RL, SDValue RH) const {
|
2014-04-12 00:11:58 +08:00
|
|
|
EVT VT = N->getValueType(0);
|
|
|
|
SDLoc dl(N);
|
|
|
|
|
|
|
|
bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
|
|
|
|
bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
|
|
|
|
bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
|
|
|
|
bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
|
|
|
|
if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
|
|
|
|
unsigned OuterBitSize = VT.getSizeInBits();
|
|
|
|
unsigned InnerBitSize = HiLoVT.getSizeInBits();
|
|
|
|
unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
|
|
|
|
unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
|
|
|
|
|
|
|
|
// LL, LH, RL, and RH must be either all NULL or all set to a value.
|
|
|
|
assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
|
|
|
|
(!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
|
|
|
|
|
|
|
|
if (!LL.getNode() && !RL.getNode() &&
|
|
|
|
isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
|
|
|
|
LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(0));
|
|
|
|
RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(1));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!LL.getNode())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
|
|
|
|
if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
|
|
|
|
DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
|
|
|
|
// The inputs are both zero-extended.
|
|
|
|
if (HasUMUL_LOHI) {
|
|
|
|
// We can emit a umul_lohi.
|
2014-10-29 23:23:11 +08:00
|
|
|
Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
|
|
|
|
RL);
|
2014-04-12 00:11:58 +08:00
|
|
|
Hi = SDValue(Lo.getNode(), 1);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (HasMULHU) {
|
|
|
|
// We can emit a mulhu+mul.
|
|
|
|
Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
|
|
|
|
Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
|
|
|
|
// The input values are both sign-extended.
|
|
|
|
if (HasSMUL_LOHI) {
|
|
|
|
// We can emit a smul_lohi.
|
2014-10-29 23:23:11 +08:00
|
|
|
Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
|
|
|
|
RL);
|
2014-04-12 00:11:58 +08:00
|
|
|
Hi = SDValue(Lo.getNode(), 1);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (HasMULHS) {
|
|
|
|
// We can emit a mulhs+mul.
|
|
|
|
Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
|
|
|
|
Hi = DAG.getNode(ISD::MULHS, dl, HiLoVT, LL, RL);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!LH.getNode() && !RH.getNode() &&
|
|
|
|
isOperationLegalOrCustom(ISD::SRL, VT) &&
|
|
|
|
isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
|
2015-07-09 10:09:20 +08:00
|
|
|
auto &DL = DAG.getDataLayout();
|
2014-04-12 00:11:58 +08:00
|
|
|
unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits();
|
2015-07-09 10:09:20 +08:00
|
|
|
SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT, DL));
|
2014-04-12 00:11:58 +08:00
|
|
|
LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift);
|
|
|
|
LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
|
|
|
|
RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift);
|
|
|
|
RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!LH.getNode())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (HasUMUL_LOHI) {
|
|
|
|
// Lo,Hi = umul LHS, RHS.
|
|
|
|
SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
|
|
|
|
DAG.getVTList(HiLoVT, HiLoVT), LL, RL);
|
|
|
|
Lo = UMulLOHI;
|
|
|
|
Hi = UMulLOHI.getValue(1);
|
|
|
|
RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
|
|
|
|
LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
|
|
|
|
Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
|
|
|
|
Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (HasMULHU) {
|
|
|
|
Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
|
|
|
|
Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL);
|
|
|
|
RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
|
|
|
|
LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
|
|
|
|
Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
|
|
|
|
Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2014-07-11 06:40:18 +08:00
|
|
|
|
|
|
|
bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
EVT VT = Node->getOperand(0).getValueType();
|
|
|
|
EVT NVT = Node->getValueType(0);
|
|
|
|
SDLoc dl(SDValue(Node, 0));
|
|
|
|
|
|
|
|
// FIXME: Only f32 to i64 conversions are supported.
|
|
|
|
if (VT != MVT::f32 || NVT != MVT::i64)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Expand f32 -> i64 conversion
|
|
|
|
// This algorithm comes from compiler-rt's implementation of fixsfdi:
|
|
|
|
// https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
|
|
|
|
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
|
|
|
|
VT.getSizeInBits());
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
|
|
|
|
SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
|
|
|
|
SDValue Bias = DAG.getConstant(127, dl, IntVT);
|
|
|
|
SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl,
|
2014-07-11 06:40:18 +08:00
|
|
|
IntVT);
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT);
|
|
|
|
SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
|
2014-07-11 06:40:18 +08:00
|
|
|
|
|
|
|
SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
|
|
|
|
|
2015-07-09 10:09:20 +08:00
|
|
|
auto &DL = DAG.getDataLayout();
|
|
|
|
SDValue ExponentBits = DAG.getNode(
|
|
|
|
ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
|
|
|
|
DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL)));
|
2014-07-11 06:40:18 +08:00
|
|
|
SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
|
|
|
|
|
2015-07-09 10:09:20 +08:00
|
|
|
SDValue Sign = DAG.getNode(
|
|
|
|
ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
|
|
|
|
DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL)));
|
2014-07-11 06:40:18 +08:00
|
|
|
Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
|
|
|
|
|
|
|
|
SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
|
|
|
|
DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0x00800000, dl, IntVT));
|
2014-07-11 06:40:18 +08:00
|
|
|
|
|
|
|
R = DAG.getZExtOrTrunc(R, dl, NVT);
|
|
|
|
|
2015-07-09 10:09:20 +08:00
|
|
|
R = DAG.getSelectCC(
|
|
|
|
dl, Exponent, ExponentLoBit,
|
|
|
|
DAG.getNode(ISD::SHL, dl, NVT, R,
|
|
|
|
DAG.getZExtOrTrunc(
|
|
|
|
DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
|
|
|
|
dl, getShiftAmountTy(IntVT, DL))),
|
|
|
|
DAG.getNode(ISD::SRL, dl, NVT, R,
|
|
|
|
DAG.getZExtOrTrunc(
|
|
|
|
DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
|
|
|
|
dl, getShiftAmountTy(IntVT, DL))),
|
|
|
|
ISD::SETGT);
|
2014-07-11 06:40:18 +08:00
|
|
|
|
|
|
|
SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
|
|
|
|
DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
|
|
|
|
Sign);
|
|
|
|
|
2015-04-28 22:05:47 +08:00
|
|
|
Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
|
|
|
|
DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
|
2014-07-11 06:40:18 +08:00
|
|
|
return true;
|
|
|
|
}
|
2015-07-29 00:24:05 +08:00
|
|
|
|
2016-03-31 05:15:10 +08:00
|
|
|
SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
SDLoc SL(LD);
|
|
|
|
SDValue Chain = LD->getChain();
|
|
|
|
SDValue BasePTR = LD->getBasePtr();
|
|
|
|
EVT SrcVT = LD->getMemoryVT();
|
|
|
|
ISD::LoadExtType ExtType = LD->getExtensionType();
|
|
|
|
|
|
|
|
unsigned NumElem = SrcVT.getVectorNumElements();
|
|
|
|
|
|
|
|
EVT SrcEltVT = SrcVT.getScalarType();
|
|
|
|
EVT DstEltVT = LD->getValueType(0).getScalarType();
|
|
|
|
|
|
|
|
unsigned Stride = SrcEltVT.getSizeInBits() / 8;
|
|
|
|
assert(SrcEltVT.isByteSized());
|
|
|
|
|
|
|
|
EVT PtrVT = BasePTR.getValueType();
|
|
|
|
|
|
|
|
SmallVector<SDValue, 8> Vals;
|
|
|
|
SmallVector<SDValue, 8> LoadChains;
|
|
|
|
|
|
|
|
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
SDValue ScalarLoad =
|
|
|
|
DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
|
|
|
|
LD->getPointerInfo().getWithOffset(Idx * Stride),
|
|
|
|
SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
|
|
|
|
LD->getMemOperand()->getFlags(), LD->getAAInfo());
|
2016-03-31 05:15:10 +08:00
|
|
|
|
|
|
|
BasePTR = DAG.getNode(ISD::ADD, SL, PtrVT, BasePTR,
|
|
|
|
DAG.getConstant(Stride, SL, PtrVT));
|
|
|
|
|
|
|
|
Vals.push_back(ScalarLoad.getValue(0));
|
|
|
|
LoadChains.push_back(ScalarLoad.getValue(1));
|
|
|
|
}
|
|
|
|
|
|
|
|
SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
|
|
|
|
SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, SL, LD->getValueType(0), Vals);
|
|
|
|
|
|
|
|
return DAG.getMergeValues({ Value, NewChain }, SL);
|
|
|
|
}
|
|
|
|
|
2016-03-31 05:15:18 +08:00
|
|
|
// FIXME: This relies on each element having a byte size, otherwise the stride
|
|
|
|
// is 0 and just overwrites the same location. ExpandStore currently expects
|
|
|
|
// this broken behavior.
|
|
|
|
SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
SDLoc SL(ST);
|
|
|
|
|
|
|
|
SDValue Chain = ST->getChain();
|
|
|
|
SDValue BasePtr = ST->getBasePtr();
|
|
|
|
SDValue Value = ST->getValue();
|
|
|
|
EVT StVT = ST->getMemoryVT();
|
|
|
|
|
|
|
|
// The type of the data we want to save
|
|
|
|
EVT RegVT = Value.getValueType();
|
|
|
|
EVT RegSclVT = RegVT.getScalarType();
|
|
|
|
|
|
|
|
// The type of data as saved in memory.
|
|
|
|
EVT MemSclVT = StVT.getScalarType();
|
|
|
|
|
|
|
|
EVT PtrVT = BasePtr.getValueType();
|
|
|
|
|
|
|
|
// Store Stride in bytes
|
|
|
|
unsigned Stride = MemSclVT.getSizeInBits() / 8;
|
|
|
|
EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
|
|
|
|
unsigned NumElem = StVT.getVectorNumElements();
|
|
|
|
|
|
|
|
// Extract each of the elements from the original vector and save them into
|
|
|
|
// memory individually.
|
|
|
|
SmallVector<SDValue, 8> Stores;
|
|
|
|
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
|
|
|
|
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
|
|
|
|
DAG.getConstant(Idx, SL, IdxVT));
|
|
|
|
|
|
|
|
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
|
|
|
|
DAG.getConstant(Idx * Stride, SL, PtrVT));
|
|
|
|
|
|
|
|
// This scalar TruncStore may be illegal, but we legalize it later.
|
|
|
|
SDValue Store = DAG.getTruncStore(
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
|
|
|
|
MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
|
|
|
|
ST->getMemOperand()->getFlags(), ST->getAAInfo());
|
2016-03-31 05:15:18 +08:00
|
|
|
|
|
|
|
Stores.push_back(Store);
|
|
|
|
}
|
|
|
|
|
|
|
|
return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
|
|
|
|
}
|
|
|
|
|
2016-04-22 02:19:11 +08:00
|
|
|
std::pair<SDValue, SDValue>
|
|
|
|
TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
|
|
|
|
assert(LD->getAddressingMode() == ISD::UNINDEXED &&
|
|
|
|
"unaligned indexed loads not implemented!");
|
|
|
|
SDValue Chain = LD->getChain();
|
|
|
|
SDValue Ptr = LD->getBasePtr();
|
|
|
|
EVT VT = LD->getValueType(0);
|
|
|
|
EVT LoadedVT = LD->getMemoryVT();
|
|
|
|
SDLoc dl(LD);
|
|
|
|
if (VT.isFloatingPoint() || VT.isVector()) {
|
|
|
|
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
|
|
|
|
if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
|
|
|
|
if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) {
|
|
|
|
// Scalarize the load and let the individual components be handled.
|
|
|
|
SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
|
|
|
|
return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Expand to a (misaligned) integer load of the same size,
|
|
|
|
// then bitconvert to floating point or vector.
|
|
|
|
SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
|
|
|
|
LD->getMemOperand());
|
|
|
|
SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
|
|
|
|
if (LoadedVT != VT)
|
|
|
|
Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
|
|
|
|
ISD::ANY_EXTEND, dl, VT, Result);
|
|
|
|
|
|
|
|
return std::make_pair(Result, newLoad.getValue(1));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy the value to a (aligned) stack slot using (unaligned) integer
|
|
|
|
// loads and stores, then do a (aligned) load from the stack slot.
|
|
|
|
MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
|
|
|
|
unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
|
|
|
|
unsigned RegBytes = RegVT.getSizeInBits() / 8;
|
|
|
|
unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
|
|
|
|
|
|
|
|
// Make sure the stack slot is also aligned for the register type.
|
|
|
|
SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
|
|
|
|
|
|
|
|
SmallVector<SDValue, 8> Stores;
|
|
|
|
SDValue StackPtr = StackBase;
|
|
|
|
unsigned Offset = 0;
|
|
|
|
|
|
|
|
EVT PtrVT = Ptr.getValueType();
|
|
|
|
EVT StackPtrVT = StackPtr.getValueType();
|
|
|
|
|
|
|
|
SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
|
|
|
|
SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
|
|
|
|
|
|
|
|
// Do all but one copies using the full register width.
|
|
|
|
for (unsigned i = 1; i < NumRegs; i++) {
|
|
|
|
// Load one integer register's worth from the original location.
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
SDValue Load = DAG.getLoad(
|
|
|
|
RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
|
|
|
|
MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
|
|
|
|
LD->getAAInfo());
|
2016-04-22 02:19:11 +08:00
|
|
|
// Follow the load with a store to the stack slot. Remember the store.
|
|
|
|
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
MachinePointerInfo()));
|
2016-04-22 02:19:11 +08:00
|
|
|
// Increment the pointers.
|
|
|
|
Offset += RegBytes;
|
|
|
|
Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement);
|
|
|
|
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, StackPtr,
|
|
|
|
StackPtrIncrement);
|
|
|
|
}
|
|
|
|
|
|
|
|
// The last copy may be partial. Do an extending load.
|
|
|
|
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
|
|
|
|
8 * (LoadedBytes - Offset));
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
SDValue Load =
|
|
|
|
DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
|
|
|
|
LD->getPointerInfo().getWithOffset(Offset), MemVT,
|
|
|
|
MinAlign(LD->getAlignment(), Offset),
|
|
|
|
LD->getMemOperand()->getFlags(), LD->getAAInfo());
|
2016-04-22 02:19:11 +08:00
|
|
|
// Follow the load with a store to the stack slot. Remember the store.
|
|
|
|
// On big-endian machines this requires a truncating store to ensure
|
|
|
|
// that the bits end up in the right place.
|
|
|
|
Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
MachinePointerInfo(), MemVT));
|
2016-04-22 02:19:11 +08:00
|
|
|
|
|
|
|
// The order of the stores doesn't matter - say it with a TokenFactor.
|
|
|
|
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
|
|
|
|
|
|
|
|
// Finally, perform the original load only redirected to the stack slot.
|
|
|
|
Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
MachinePointerInfo(), LoadedVT);
|
2016-04-22 02:19:11 +08:00
|
|
|
|
|
|
|
// Callers expect a MERGE_VALUES node.
|
|
|
|
return std::make_pair(Load, TF);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
|
|
|
|
"Unaligned load of unsupported type.");
|
|
|
|
|
|
|
|
// Compute the new VT that is half the size of the old one. This is an
|
|
|
|
// integer MVT.
|
|
|
|
unsigned NumBits = LoadedVT.getSizeInBits();
|
|
|
|
EVT NewLoadedVT;
|
|
|
|
NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
|
|
|
|
NumBits >>= 1;
|
|
|
|
|
|
|
|
unsigned Alignment = LD->getAlignment();
|
|
|
|
unsigned IncrementSize = NumBits / 8;
|
|
|
|
ISD::LoadExtType HiExtType = LD->getExtensionType();
|
|
|
|
|
|
|
|
// If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
|
|
|
|
if (HiExtType == ISD::NON_EXTLOAD)
|
|
|
|
HiExtType = ISD::ZEXTLOAD;
|
|
|
|
|
|
|
|
// Load the value in two parts
|
|
|
|
SDValue Lo, Hi;
|
|
|
|
if (DAG.getDataLayout().isLittleEndian()) {
|
|
|
|
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
|
2016-04-22 02:19:11 +08:00
|
|
|
LD->getAAInfo());
|
|
|
|
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
|
|
|
|
DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
|
|
|
|
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
|
|
|
|
LD->getPointerInfo().getWithOffset(IncrementSize),
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
NewLoadedVT, MinAlign(Alignment, IncrementSize),
|
|
|
|
LD->getMemOperand()->getFlags(), LD->getAAInfo());
|
2016-04-22 02:19:11 +08:00
|
|
|
} else {
|
|
|
|
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
|
2016-04-22 02:19:11 +08:00
|
|
|
LD->getAAInfo());
|
|
|
|
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
|
|
|
|
DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
|
|
|
|
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
|
|
|
|
LD->getPointerInfo().getWithOffset(IncrementSize),
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
NewLoadedVT, MinAlign(Alignment, IncrementSize),
|
|
|
|
LD->getMemOperand()->getFlags(), LD->getAAInfo());
|
2016-04-22 02:19:11 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// aggregate the two parts
|
|
|
|
SDValue ShiftAmount =
|
|
|
|
DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
|
|
|
|
DAG.getDataLayout()));
|
|
|
|
SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
|
|
|
|
Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
|
|
|
|
|
|
|
|
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
|
|
|
|
Hi.getValue(1));
|
|
|
|
|
|
|
|
return std::make_pair(Result, TF);
|
|
|
|
}
|
|
|
|
|
|
|
|
SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
assert(ST->getAddressingMode() == ISD::UNINDEXED &&
|
|
|
|
"unaligned indexed stores not implemented!");
|
|
|
|
SDValue Chain = ST->getChain();
|
|
|
|
SDValue Ptr = ST->getBasePtr();
|
|
|
|
SDValue Val = ST->getValue();
|
|
|
|
EVT VT = Val.getValueType();
|
|
|
|
int Alignment = ST->getAlignment();
|
|
|
|
|
|
|
|
SDLoc dl(ST);
|
|
|
|
if (ST->getMemoryVT().isFloatingPoint() ||
|
|
|
|
ST->getMemoryVT().isVector()) {
|
|
|
|
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
|
|
|
|
if (isTypeLegal(intVT)) {
|
|
|
|
if (!isOperationLegalOrCustom(ISD::STORE, intVT)) {
|
|
|
|
// Scalarize the store and let the individual components be handled.
|
|
|
|
SDValue Result = scalarizeVectorStore(ST, DAG);
|
|
|
|
|
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
// Expand to a bitconvert of the value to the integer type of the
|
|
|
|
// same size, then a (misaligned) int store.
|
|
|
|
// FIXME: Does not handle truncating floating point stores!
|
|
|
|
SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
|
|
|
|
Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
Alignment, ST->getMemOperand()->getFlags());
|
2016-04-22 02:19:11 +08:00
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
// Do a (aligned) store to a stack slot, then copy from the stack slot
|
|
|
|
// to the final destination using (unaligned) integer loads and stores.
|
|
|
|
EVT StoredVT = ST->getMemoryVT();
|
|
|
|
MVT RegVT =
|
|
|
|
getRegisterType(*DAG.getContext(),
|
|
|
|
EVT::getIntegerVT(*DAG.getContext(),
|
|
|
|
StoredVT.getSizeInBits()));
|
|
|
|
EVT PtrVT = Ptr.getValueType();
|
|
|
|
unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
|
|
|
|
unsigned RegBytes = RegVT.getSizeInBits() / 8;
|
|
|
|
unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
|
|
|
|
|
|
|
|
// Make sure the stack slot is also aligned for the register type.
|
|
|
|
SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
|
|
|
|
|
|
|
|
// Perform the original store, only redirected to the stack slot.
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr,
|
|
|
|
MachinePointerInfo(), StoredVT);
|
2016-04-22 02:19:11 +08:00
|
|
|
|
|
|
|
EVT StackPtrVT = StackPtr.getValueType();
|
|
|
|
|
|
|
|
SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
|
|
|
|
SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
|
|
|
|
SmallVector<SDValue, 8> Stores;
|
|
|
|
unsigned Offset = 0;
|
|
|
|
|
|
|
|
// Do all but one copies using the full register width.
|
|
|
|
for (unsigned i = 1; i < NumRegs; i++) {
|
|
|
|
// Load one integer register's worth from the stack slot.
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
SDValue Load =
|
|
|
|
DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo());
|
2016-04-22 02:19:11 +08:00
|
|
|
// Store it to the final location. Remember the store.
|
|
|
|
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
ST->getPointerInfo().getWithOffset(Offset),
|
|
|
|
MinAlign(ST->getAlignment(), Offset),
|
|
|
|
ST->getMemOperand()->getFlags()));
|
2016-04-22 02:19:11 +08:00
|
|
|
// Increment the pointers.
|
|
|
|
Offset += RegBytes;
|
|
|
|
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT,
|
|
|
|
StackPtr, StackPtrIncrement);
|
|
|
|
Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement);
|
|
|
|
}
|
|
|
|
|
|
|
|
// The last store may be partial. Do a truncating store. On big-endian
|
|
|
|
// machines this requires an extending load from the stack slot to ensure
|
|
|
|
// that the bits are in the right place.
|
|
|
|
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
|
|
|
|
8 * (StoredBytes - Offset));
|
|
|
|
|
|
|
|
// Load from the stack slot.
|
|
|
|
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
MachinePointerInfo(), MemVT);
|
|
|
|
|
|
|
|
Stores.push_back(
|
|
|
|
DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
|
|
|
|
ST->getPointerInfo().getWithOffset(Offset), MemVT,
|
|
|
|
MinAlign(ST->getAlignment(), Offset),
|
|
|
|
ST->getMemOperand()->getFlags(), ST->getAAInfo()));
|
2016-04-22 02:19:11 +08:00
|
|
|
// The order of the stores doesn't matter - say it with a TokenFactor.
|
|
|
|
SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
|
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(ST->getMemoryVT().isInteger() &&
|
|
|
|
!ST->getMemoryVT().isVector() &&
|
|
|
|
"Unaligned store of unknown type.");
|
|
|
|
// Get the half-size VT
|
|
|
|
EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
|
|
|
|
int NumBits = NewStoredVT.getSizeInBits();
|
|
|
|
int IncrementSize = NumBits / 8;
|
|
|
|
|
|
|
|
// Divide the stored value in two parts.
|
|
|
|
SDValue ShiftAmount =
|
|
|
|
DAG.getConstant(NumBits, dl, getShiftAmountTy(Val.getValueType(),
|
|
|
|
DAG.getDataLayout()));
|
|
|
|
SDValue Lo = Val;
|
|
|
|
SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
|
|
|
|
|
|
|
|
// Store the two parts
|
|
|
|
SDValue Store1, Store2;
|
|
|
|
Store1 = DAG.getTruncStore(Chain, dl,
|
|
|
|
DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
|
|
|
|
ST->getMemOperand()->getFlags());
|
2016-04-22 02:19:11 +08:00
|
|
|
|
|
|
|
EVT PtrVT = Ptr.getValueType();
|
|
|
|
Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
|
|
|
|
DAG.getConstant(IncrementSize, dl, PtrVT));
|
|
|
|
Alignment = MinAlign(Alignment, IncrementSize);
|
|
|
|
Store2 = DAG.getTruncStore(
|
|
|
|
Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
|
|
|
|
ST->getMemOperand()->getFlags(), ST->getAAInfo());
|
2016-04-22 02:19:11 +08:00
|
|
|
|
|
|
|
SDValue Result =
|
|
|
|
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
|
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
2015-07-29 00:24:05 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Implementation of Emulated TLS Model
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
// Access to address of TLS varialbe xyz is lowered to a function call:
|
|
|
|
// __emutls_get_address( address of global variable named "__emutls_v.xyz" )
|
|
|
|
EVT PtrVT = getPointerTy(DAG.getDataLayout());
|
|
|
|
PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
|
|
|
|
SDLoc dl(GA);
|
|
|
|
|
|
|
|
ArgListTy Args;
|
|
|
|
ArgListEntry Entry;
|
|
|
|
std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
|
|
|
|
Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
|
|
|
|
StringRef EmuTlsVarName(NameString);
|
|
|
|
GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
|
2016-01-14 07:56:37 +08:00
|
|
|
assert(EmuTlsVar && "Cannot find EmuTlsVar ");
|
2015-07-29 00:24:05 +08:00
|
|
|
Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
|
|
|
|
Entry.Ty = VoidPtrType;
|
|
|
|
Args.push_back(Entry);
|
|
|
|
|
|
|
|
SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
|
|
|
|
|
|
|
|
TargetLowering::CallLoweringInfo CLI(DAG);
|
|
|
|
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
|
2016-06-22 20:54:25 +08:00
|
|
|
CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
|
2015-07-29 00:24:05 +08:00
|
|
|
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
|
|
|
|
|
|
|
|
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
|
|
|
|
// At last for X86 targets, maybe good for other targets too?
|
2016-07-29 02:40:00 +08:00
|
|
|
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
|
|
|
|
MFI.setAdjustsStack(true); // Is this only for X86 target?
|
|
|
|
MFI.setHasCalls(true);
|
2015-07-29 00:24:05 +08:00
|
|
|
|
|
|
|
assert((GA->getOffset() == 0) &&
|
|
|
|
"Emulated TLS must have zero offset in GlobalAddressSDNode");
|
|
|
|
return CallResult.first;
|
|
|
|
}
|
2016-08-16 21:53:53 +08:00
|
|
|
|
|
|
|
SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
|
|
|
|
if (!isCtlzFast())
|
|
|
|
return SDValue();
|
|
|
|
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
|
|
|
|
SDLoc dl(Op);
|
|
|
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
|
|
|
if (C->isNullValue() && CC == ISD::SETEQ) {
|
|
|
|
EVT VT = Op.getOperand(0).getValueType();
|
|
|
|
SDValue Zext = Op.getOperand(0);
|
|
|
|
if (VT.bitsLT(MVT::i32)) {
|
|
|
|
VT = MVT::i32;
|
|
|
|
Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
|
|
|
|
}
|
|
|
|
unsigned Log2b = Log2_32(VT.getSizeInBits());
|
|
|
|
SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
|
|
|
|
SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
|
|
|
|
DAG.getConstant(Log2b, dl, MVT::i32));
|
|
|
|
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return SDValue();
|
|
|
|
}
|