[X86] Remove X86 specific dag nodes for RDTSC/RDTSCP/RDPMC. NFCI

This patch removes the following dag node opcodes from namespace X86ISD:

RDTSC_DAG,
RDTSCP_DAG,
RDPMC_DAG

The logic that expands RDTSC/RDPMC/XGETBV intrinsics is basically the same. The
only differences are:

    RDTSC/RDTSCP don't implicitly read ECX.
    RDTSCP also implicitly writes ECX.

I moved the common expansion logic into a helper function with the goal to get
rid of code repetition. That helper is now used for the expansion of
RDTSC/RDTSCP/RDPMC/XGETBV intrinsics.

No functional change intended.

Differential Revision: https://reviews.llvm.org/D59547

llvm-svn: 356546
This commit is contained in:
Andrea Di Biagio 2019-03-20 11:21:15 +00:00
parent d4c80012c0
commit 624f5deff4
5 changed files with 66 additions and 144 deletions

View File

@ -22794,24 +22794,37 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
return SDValue(Res, 0);
}
/// Handles the lowering of builtin intrinsic that return the value
/// of the extended control register.
static void getExtendedControlRegister(SDNode *N, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget,
SmallVectorImpl<SDValue> &Results) {
assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue LO, HI;
/// Handles the lowering of builtin intrinsics with chain that return their
/// value into registers EDX:EAX.
/// If operand ScrReg is a valid register identifier, then operand 2 of N is
/// copied to SrcReg. The assumption is that SrcReg is an implicit input to
/// TargetOpcode.
/// Returns a Glue value which can be used to add extra copy-from-reg if the
/// expanded intrinsics implicitly defines extra registers (i.e. not just
/// EDX:EAX).
static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL,
SelectionDAG &DAG,
unsigned TargetOpcode,
unsigned SrcReg,
const X86Subtarget &Subtarget,
SmallVectorImpl<SDValue> &Results) {
SDValue Chain = N->getOperand(0);
SDValue Glue;
// The ECX register is used to select the index of the XCR register to
// return.
SDValue Chain =
DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX, N->getOperand(2));
SDNode *N1 = DAG.getMachineNode(X86::XGETBV, DL, Tys, Chain);
if (SrcReg) {
assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
Chain = DAG.getCopyToReg(Chain, DL, SrcReg, N->getOperand(2), Glue);
Glue = Chain.getValue(1);
}
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue N1Ops[] = {Chain, Glue};
SDNode *N1 = DAG.getMachineNode(
TargetOpcode, DL, Tys, ArrayRef<SDValue>(N1Ops, Glue.getNode() ? 2 : 1));
Chain = SDValue(N1, 0);
// Reads the content of XCR and returns it in registers EDX:EAX.
SDValue LO, HI;
if (Subtarget.is64Bit()) {
LO = DAG.getCopyFromReg(Chain, DL, X86::RAX, MVT::i64, SDValue(N1, 1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
@ -22822,60 +22835,15 @@ static void getExtendedControlRegister(SDNode *N, const SDLoc &DL,
LO.getValue(2));
}
Chain = HI.getValue(1);
Glue = HI.getValue(2);
if (Subtarget.is64Bit()) {
// Merge the two 32-bit values into a 64-bit one..
// Merge the two 32-bit values into a 64-bit one.
SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
DAG.getConstant(32, DL, MVT::i8));
Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
Results.push_back(Chain);
return;
}
// Use a buildpair to merge the two 32-bit values into a 64-bit one.
SDValue Ops[] = { LO, HI };
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
Results.push_back(Pair);
Results.push_back(Chain);
}
/// Handles the lowering of builtin intrinsics that read performance monitor
/// counters (x86_rdpmc).
static void getReadPerformanceCounter(SDNode *N, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget,
SmallVectorImpl<SDValue> &Results) {
assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue LO, HI;
// The ECX register is used to select the index of the performance counter
// to read.
SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX,
N->getOperand(2));
SDValue rd = DAG.getNode(X86ISD::RDPMC_DAG, DL, Tys, Chain);
// Reads the content of a 64-bit performance counter and returns it in the
// registers EDX:EAX.
if (Subtarget.is64Bit()) {
LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
LO.getValue(2));
} else {
LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
LO.getValue(2));
}
Chain = HI.getValue(1);
if (Subtarget.is64Bit()) {
// The EAX register is loaded with the low-order 32 bits. The EDX register
// is loaded with the supported high-order bits of the counter.
SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
DAG.getConstant(32, DL, MVT::i8));
Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
Results.push_back(Chain);
return;
return Glue;
}
// Use a buildpair to merge the two 32-bit values into a 64-bit one.
@ -22883,6 +22851,7 @@ static void getReadPerformanceCounter(SDNode *N, const SDLoc &DL,
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
Results.push_back(Pair);
Results.push_back(Chain);
return Glue;
}
/// Handles the lowering of builtin intrinsics that read the time stamp counter
@ -22892,59 +22861,28 @@ static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode,
SelectionDAG &DAG,
const X86Subtarget &Subtarget,
SmallVectorImpl<SDValue> &Results) {
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0));
SDValue LO, HI;
// The processor's time-stamp counter (a 64-bit MSR) is stored into the
// EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
// and the EAX register is loaded with the low-order 32 bits.
if (Subtarget.is64Bit()) {
LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
LO.getValue(2));
} else {
LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
LO.getValue(2));
}
SDValue Chain = HI.getValue(1);
SDValue TSC;
if (Subtarget.is64Bit()) {
// The EDX register is loaded with the high-order 32 bits of the MSR, and
// the EAX register is loaded with the low-order 32 bits.
TSC = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
DAG.getConstant(32, DL, MVT::i8));
TSC = DAG.getNode(ISD::OR, DL, MVT::i64, LO, TSC);
} else {
// Use a buildpair to merge the two 32-bit values into a 64-bit one.
TSC = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, { LO, HI });
}
if (Opcode == X86ISD::RDTSCP_DAG) {
assert(N->getNumOperands() == 2 && "Unexpected number of operands!");
// Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
// the ECX register. Add 'ecx' explicitly to the chain.
SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32,
HI.getValue(2));
Results.push_back(TSC);
Results.push_back(ecx);
Results.push_back(ecx.getValue(1));
SDValue Glue = expandIntrinsicWChainHelper(N, DL, DAG, Opcode,
/* NoRegister */0, Subtarget,
Results);
if (Opcode != X86::RDTSCP)
return;
}
Results.push_back(TSC);
Results.push_back(Chain);
SDValue Chain = Results[1];
// Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
// the ECX register. Add 'ecx' explicitly to the chain.
SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32, Glue);
Results[1] = ecx;
Results.push_back(ecx.getValue(1));
}
static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SmallVector<SDValue, 3> Results;
SDLoc DL(Op);
getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget,
getReadTimeStampCounter(Op.getNode(), DL, X86::RDTSC, DAG, Subtarget,
Results);
return DAG.getMergeValues(Results, DL);
}
@ -23145,15 +23083,16 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getMergeValues(Results, dl);
}
// Read Performance Monitoring Counters.
case RDPMC: {
SmallVector<SDValue, 2> Results;
getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results);
return DAG.getMergeValues(Results, dl);
}
// Get Extended Control Register.
case RDPMC:
// GetExtended Control Register.
case XGETBV: {
SmallVector<SDValue, 2> Results;
getExtendedControlRegister(Op.getNode(), dl, DAG, Subtarget, Results);
// RDPMC uses ECX to select the index of the performance counter to read.
// XGETBV uses ECX to select the index of the XCR register to return.
// The result is stored into registers EDX:EAX.
expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX,
Subtarget, Results);
return DAG.getMergeValues(Results, dl);
}
// XTEST intrinsics.
@ -27233,21 +27172,23 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
default : llvm_unreachable("Do not know how to custom type "
"legalize this intrinsic operation!");
case Intrinsic::x86_rdtsc:
return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget,
Results);
case Intrinsic::x86_rdtscp:
return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget,
return getReadTimeStampCounter(N, dl, X86::RDTSCP, DAG, Subtarget,
Results);
case Intrinsic::x86_rdpmc:
return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results);
expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPMC, X86::ECX, Subtarget,
Results);
return;
case Intrinsic::x86_xgetbv:
return getExtendedControlRegister(N, dl, DAG, Subtarget, Results);
expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget,
Results);
return;
}
}
case ISD::READCYCLECOUNTER: {
return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
Results);
return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget, Results);
}
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
EVT T = N->getValueType(0);
@ -27526,9 +27467,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FLD: return "X86ISD::FLD";
case X86ISD::FST: return "X86ISD::FST";
case X86ISD::CALL: return "X86ISD::CALL";
case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
case X86ISD::RDTSCP_DAG: return "X86ISD::RDTSCP_DAG";
case X86ISD::RDPMC_DAG: return "X86ISD::RDPMC_DAG";
case X86ISD::BT: return "X86ISD::BT";
case X86ISD::CMP: return "X86ISD::CMP";
case X86ISD::COMI: return "X86ISD::COMI";

View File

@ -77,15 +77,6 @@ namespace llvm {
/// Same as call except it adds the NoTrack prefix.
NT_CALL,
/// This operation implements the lowering for readcyclecounter.
RDTSC_DAG,
/// X86 Read Time-Stamp Counter and Processor ID.
RDTSCP_DAG,
/// X86 Read Performance Monitoring Counters.
RDPMC_DAG,
/// X86 compare and logical compare instructions.
CMP, COMI, UCOMI,

View File

@ -205,13 +205,6 @@ def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad]>;
def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void,
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void,
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;

View File

@ -14,10 +14,10 @@
let SchedRW = [WriteSystem] in {
let Defs = [RAX, RDX] in
def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", []>, TB;
let Defs = [RAX, RCX, RDX] in
def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB;
def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
// CPU flow control instructions
@ -410,7 +410,7 @@ let Defs = [EAX, EDX], Uses = [ECX] in
def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
let Defs = [RAX, RDX], Uses = [ECX] in
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)]>, TB;
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
"smsw{w}\t$dst", []>, OpSize16, TB;

View File

@ -305,16 +305,16 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, 0, 0),
X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0),
X86_INTRINSIC_DATA(rdpmc, RDPMC, X86::RDPMC, 0),
X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdrand_64, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdseed_16, RDSEED, X86ISD::RDSEED, 0),
X86_INTRINSIC_DATA(rdseed_32, RDSEED, X86ISD::RDSEED, 0),
X86_INTRINSIC_DATA(rdseed_64, RDSEED, X86ISD::RDSEED, 0),
X86_INTRINSIC_DATA(rdtsc, RDTSC, X86ISD::RDTSC_DAG, 0),
X86_INTRINSIC_DATA(rdtscp, RDTSC, X86ISD::RDTSCP_DAG, 0),
X86_INTRINSIC_DATA(xgetbv, XGETBV, X86::XGETBV, 0),
X86_INTRINSIC_DATA(rdtsc, RDTSC, X86::RDTSC, 0),
X86_INTRINSIC_DATA(rdtscp, RDTSC, X86::RDTSCP, 0),
X86_INTRINSIC_DATA(xgetbv, XGETBV, X86::XGETBV, 0),
X86_INTRINSIC_DATA(xtest, XTEST, X86ISD::XTEST, 0),
};