[SystemZ] Simplify handling of 128-bit multiply/divide instruction

Several integer multiply/divide instructions require use of a
register pair as input and output.  This patch moves setting
up the input register pair from C++ code to TableGen, simplifying
the whole process and making it more easily extensible.

No functional change.

llvm-svn: 307155
This commit is contained in:
Ulrich Weigand 2017-07-05 13:17:31 +00:00
parent e2a68e96f0
commit 43579cf4a0
7 changed files with 105 additions and 105 deletions

View File

@ -2224,15 +2224,12 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
// Lower a binary operation that produces two VT results, one in each
// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
// on the extended Op0 and (unextended) Op1. Store the even register result
// and Opcode performs the GR128 operation. Store the even register result
// in Even and the odd register result in Odd.
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
unsigned Extend, unsigned Opcode, SDValue Op0,
SDValue Op1, SDValue &Even, SDValue &Odd) {
SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
SDValue(In128, 0), Op1);
unsigned Opcode, SDValue Op0, SDValue Op1,
SDValue &Even, SDValue &Odd) {
SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
bool Is32Bit = is32Bit(VT);
Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
@ -2962,7 +2959,7 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
Op.getOperand(1), Ops[1], Ops[0]);
else {
// Do a full 128-bit multiplication based on UMUL_LOHI64:
// Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
//
// (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
//
@ -2980,10 +2977,10 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
SDValue RL = Op.getOperand(1);
SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
// UMUL_LOHI64 returns the low result in the odd register and the high
// result in the even register. SMUL_LOHI is defined to return the
// low half first, so the results are in reverse order.
lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
// SystemZISD::UMUL_LOHI returns the low result in the odd register and
// the high result in the even register. ISD::SMUL_LOHI is defined to
// return the low half first, so the results are in reverse order.
lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
LL, RL, Ops[1], Ops[0]);
SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
@ -3004,10 +3001,10 @@ SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
Op.getOperand(1), Ops[1], Ops[0]);
else
// UMUL_LOHI64 returns the low result in the odd register and the high
// result in the even register. UMUL_LOHI is defined to return the
// low half first, so the results are in reverse order.
lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
// SystemZISD::UMUL_LOHI returns the low result in the odd register and
// the high result in the even register. ISD::UMUL_LOHI is defined to
// return the low half first, so the results are in reverse order.
lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
return DAG.getMergeValues(Ops, DL);
}
@ -3018,24 +3015,19 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
SDValue Op1 = Op.getOperand(1);
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Opcode;
// We use DSGF for 32-bit division.
if (is32Bit(VT)) {
// We use DSGF for 32-bit division. This means the first operand must
// always be 64-bit, and the second operand should be 32-bit whenever
// that is possible, to improve performance.
if (is32Bit(VT))
Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
Opcode = SystemZISD::SDIVREM32;
} else if (DAG.ComputeNumSignBits(Op1) > 32) {
else if (DAG.ComputeNumSignBits(Op1) > 32)
Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
Opcode = SystemZISD::SDIVREM32;
} else
Opcode = SystemZISD::SDIVREM64;
// DSG(F) takes a 64-bit dividend, so the even register in the GR128
// input is "don't care". The instruction returns the remainder in
// the even register and the quotient in the odd register.
// DSG(F) returns the remainder in the even register and the
// quotient in the odd register.
SDValue Ops[2];
lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode,
Op0, Op1, Ops[1], Ops[0]);
lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
return DAG.getMergeValues(Ops, DL);
}
@ -3044,16 +3036,11 @@ SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
EVT VT = Op.getValueType();
SDLoc DL(Op);
// DL(G) uses a double-width dividend, so we need to clear the even
// register in the GR128 input. The instruction returns the remainder
// in the even register and the quotient in the odd register.
// DL(G) returns the remainder in the even register and the
// quotient in the odd register.
SDValue Ops[2];
if (is32Bit(VT))
lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32,
Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
else
lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64,
Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
return DAG.getMergeValues(Ops, DL);
}
@ -4669,11 +4656,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(SELECT_CCMASK);
OPCODE(ADJDYNALLOC);
OPCODE(POPCNT);
OPCODE(UMUL_LOHI64);
OPCODE(SDIVREM32);
OPCODE(SDIVREM64);
OPCODE(UDIVREM32);
OPCODE(UDIVREM64);
OPCODE(UMUL_LOHI);
OPCODE(SDIVREM);
OPCODE(UDIVREM);
OPCODE(MVC);
OPCODE(MVC_LOOP);
OPCODE(NC);
@ -5778,14 +5763,12 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
return DoneMBB;
}
// Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true
// Emit an extension from a GR64 to a GR128. ClearEven is true
// if the high register of the GR128 value must be cleared or false if
// it's "don't care". SubReg is subreg_l32 when extending a GR32
// and subreg_l64 when extending a GR64.
// it's "don't care".
MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
MachineBasicBlock *MBB,
bool ClearEven,
unsigned SubReg) const {
bool ClearEven) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
@ -5808,7 +5791,7 @@ MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
In128 = NewIn128;
}
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
.addReg(In128).addReg(Src).addImm(SubReg);
.addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
MI.eraseFromParent();
return MBB;
@ -6172,12 +6155,10 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::CondStoreF64Inv:
return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
case SystemZ::AEXT128_64:
return emitExt128(MI, MBB, false, SystemZ::subreg_l64);
case SystemZ::ZEXT128_32:
return emitExt128(MI, MBB, true, SystemZ::subreg_l32);
case SystemZ::ZEXT128_64:
return emitExt128(MI, MBB, true, SystemZ::subreg_l64);
case SystemZ::AEXT128:
return emitExt128(MI, MBB, false);
case SystemZ::ZEXT128:
return emitExt128(MI, MBB, true);
case SystemZ::ATOMIC_SWAPW:
return emitAtomicLoadBinary(MI, MBB, 0, 0);

View File

@ -86,14 +86,11 @@ enum NodeType : unsigned {
// Count number of bits set in operand 0 per byte.
POPCNT,
// Wrappers around the ISD opcodes of the same name. The output and
// first input operands are GR128s. The trailing numbers are the
// widths of the second operand in bits.
UMUL_LOHI64,
SDIVREM32,
SDIVREM64,
UDIVREM32,
UDIVREM64,
// Wrappers around the ISD opcodes of the same name. The output is GR128.
// Input operands may be GR64 or GR32, depending on the instruction.
UMUL_LOHI,
SDIVREM,
UDIVREM,
// Use a series of MVCs to copy bytes from one memory location to another.
// The operands are:
@ -562,7 +559,7 @@ private:
unsigned StoreOpcode, unsigned STOCOpcode,
bool Invert) const;
MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB,
bool ClearEven, unsigned SubReg) const;
bool ClearEven) const;
MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI,
MachineBasicBlock *BB,
unsigned BinOpcode, unsigned BitSize,

View File

@ -677,6 +677,22 @@ let Predicates = [FeatureLoadAndTrap] in {
def LLGTAT : UnaryRXY<"llgtat", 0xE39C, null_frag, GR64, 4>;
}
// Extend GR64s to GR128s.
let usesCustomInserter = 1 in
def ZEXT128 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
//===----------------------------------------------------------------------===//
// "Any" extensions
//===----------------------------------------------------------------------===//
// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
def : Pat<(i64 (anyext GR32:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
// Extend GR64s to GR128s.
let usesCustomInserter = 1 in
def AEXT128 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
//===----------------------------------------------------------------------===//
// Truncations
//===----------------------------------------------------------------------===//
@ -1216,13 +1232,17 @@ def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>;
// Multiplication of a register, producing two results.
def MR : BinaryRR <"mr", 0x1C, null_frag, GR128, GR32>;
def MLR : BinaryRRE<"mlr", 0xB996, null_frag, GR128, GR32>;
def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>;
def MLGR : BinaryRRE<"mlgr", 0xB986, null_frag, GR128, GR64>;
def : Pat<(z_umul_lohi GR64:$src1, GR64:$src2),
(MLGR (AEXT128 GR64:$src1), GR64:$src2)>;
// Multiplication of memory, producing two results.
def M : BinaryRX <"m", 0x5C, null_frag, GR128, load, 4>;
def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, load, 4>;
def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>;
def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>;
def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, load, 8>;
def : Pat<(z_umul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))),
(MLG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>;
//===----------------------------------------------------------------------===//
// Division and remainder
@ -1230,19 +1250,38 @@ def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>;
let hasSideEffects = 1 in { // Do not speculatively execute.
// Division and remainder, from registers.
def DR : BinaryRR <"dr", 0x1D, null_frag, GR128, GR32>;
def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>;
def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>;
def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>;
def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>;
def DR : BinaryRR <"dr", 0x1D, null_frag, GR128, GR32>;
def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>;
def DSGR : BinaryRRE<"dsgr", 0xB90D, null_frag, GR128, GR64>;
def DLR : BinaryRRE<"dlr", 0xB997, null_frag, GR128, GR32>;
def DLGR : BinaryRRE<"dlgr", 0xB987, null_frag, GR128, GR64>;
// Division and remainder, from memory.
def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>;
def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>;
def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>;
def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>;
def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>;
def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>;
def DSGF : BinaryRXY<"dsgf", 0xE31D, null_frag, GR128, load, 4>;
def DSG : BinaryRXY<"dsg", 0xE30D, null_frag, GR128, load, 8>;
def DL : BinaryRXY<"dl", 0xE397, null_frag, GR128, load, 4>;
def DLG : BinaryRXY<"dlg", 0xE387, null_frag, GR128, load, 8>;
}
def : Pat<(z_sdivrem GR64:$src1, GR32:$src2),
(DSGFR (AEXT128 GR64:$src1), GR32:$src2)>;
def : Pat<(z_sdivrem GR64:$src1, (i32 (load bdxaddr20only:$src2))),
(DSGF (AEXT128 GR64:$src1), bdxaddr20only:$src2)>;
def : Pat<(z_sdivrem GR64:$src1, GR64:$src2),
(DSGR (AEXT128 GR64:$src1), GR64:$src2)>;
def : Pat<(z_sdivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
(DSG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>;
def : Pat<(z_udivrem GR32:$src1, GR32:$src2),
(DLR (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1,
subreg_l32)), GR32:$src2)>;
def : Pat<(z_udivrem GR32:$src1, (i32 (load bdxaddr20only:$src2))),
(DL (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1,
subreg_l32)), bdxaddr20only:$src2)>;
def : Pat<(z_udivrem GR64:$src1, GR64:$src2),
(DLGR (ZEXT128 GR64:$src1), GR64:$src2)>;
def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
(DLG (ZEXT128 GR64:$src1), bdxaddr20only:$src2)>;
//===----------------------------------------------------------------------===//
// Shifts
@ -1894,17 +1933,6 @@ def : Pat<(ctlz GR64:$src),
let Predicates = [FeaturePopulationCount], Defs = [CC] in
def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>;
// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
def : Pat<(i64 (anyext GR32:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
// Extend GR32s and GR64s to GR128s.
let usesCustomInserter = 1 in {
def AEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
def ZEXT128_32 : Pseudo<(outs GR128:$dst), (ins GR32:$src), []>;
def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
}
// Search a block of memory for a character.
let mayLoad = 1, Defs = [CC] in
defm SRST : StringRRE<"srst", 0xB25E, z_search_string>;

View File

@ -36,14 +36,10 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2,
SDTCisSameAs<0, 2>,
SDTCisPtrTy<0>]>;
def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
def SDT_ZGR128Binary32 : SDTypeProfile<1, 2,
def SDT_ZGR128Binary : SDTypeProfile<1, 2,
[SDTCisVT<0, untyped>,
SDTCisVT<1, untyped>,
SDTCisVT<2, i32>]>;
def SDT_ZGR128Binary64 : SDTypeProfile<1, 2,
[SDTCisVT<0, untyped>,
SDTCisVT<1, untyped>,
SDTCisVT<2, i64>]>;
SDTCisInt<1>,
SDTCisInt<2>]>;
def SDT_ZAtomicLoadBinaryW : SDTypeProfile<1, 5,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>,
@ -185,11 +181,9 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
[SDNPInGlue]>;
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>;
def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>;
def z_sdivrem : SDNode<"SystemZISD::SDIVREM", SDT_ZGR128Binary>;
def z_udivrem : SDNode<"SystemZISD::UDIVREM", SDT_ZGR128Binary>;
def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;

View File

@ -694,8 +694,8 @@ def : InstRW<[FXa, Lat6, GroupAlone], (instregex "FLOGR$")>;
def : InstRW<[FXa, Lat3], (instregex "POPCNT$")>;
// Extend
def : InstRW<[FXa], (instregex "AEXT128_64$")>;
def : InstRW<[FXa], (instregex "ZEXT128_(32|64)$")>;
def : InstRW<[FXa], (instregex "AEXT128$")>;
def : InstRW<[FXa], (instregex "ZEXT128$")>;
// String instructions
def : InstRW<[FXa, LSU, Lat30], (instregex "SRST$")>;

View File

@ -627,8 +627,8 @@ def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>;
def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>;
// Extend
def : InstRW<[FXU], (instregex "AEXT128_64$")>;
def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>;
def : InstRW<[FXU], (instregex "AEXT128$")>;
def : InstRW<[FXU], (instregex "ZEXT128$")>;
// String instructions
def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>;

View File

@ -665,8 +665,8 @@ def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>;
def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>;
// Extend
def : InstRW<[FXU], (instregex "AEXT128_64$")>;
def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>;
def : InstRW<[FXU], (instregex "AEXT128$")>;
def : InstRW<[FXU], (instregex "ZEXT128$")>;
// String instructions
def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>;