Add MULX code generation support

llvm-svn: 164673
This commit is contained in:
Michael Liao 2012-09-26 08:22:37 +00:00
parent dd9602fe93
commit f9f7b5518a
4 changed files with 127 additions and 27 deletions

View File

@ -2184,13 +2184,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue N1 = Node->getOperand(1);
bool isSigned = Opcode == ISD::SMUL_LOHI;
bool hasBMI2 = Subtarget->hasBMI2();
if (!isSigned) {
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
}
} else {
switch (NVT.getSimpleVT().SimpleTy) {
@ -2202,13 +2205,31 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
}
unsigned LoReg, HiReg;
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
unsigned SrcReg, LoReg, HiReg;
switch (Opc) {
default: llvm_unreachable("Unknown MUL opcode!");
case X86::IMUL8r:
case X86::MUL8r:
SrcReg = LoReg = X86::AL; HiReg = X86::AH;
break;
case X86::IMUL16r:
case X86::MUL16r:
SrcReg = LoReg = X86::AX; HiReg = X86::DX;
break;
case X86::IMUL32r:
case X86::MUL32r:
SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
break;
case X86::IMUL64r:
case X86::MUL64r:
SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
break;
case X86::MULX32rr:
SrcReg = X86::EDX; LoReg = HiReg = 0;
break;
case X86::MULX64rr:
SrcReg = X86::RDX; LoReg = HiReg = 0;
break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
@ -2220,22 +2241,47 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
std::swap(N0, N1);
}
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
N0, SDValue()).getValue(1);
SDValue ResHi, ResLo;
if (foldedLoad) {
SDValue Chain;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
SDNode *CNode =
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
array_lengthof(Ops));
InFlag = SDValue(CNode, 1);
if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
array_lengthof(Ops));
ResHi = SDValue(CNode, 0);
ResLo = SDValue(CNode, 1);
Chain = SDValue(CNode, 2);
InFlag = SDValue(CNode, 3);
} else {
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
array_lengthof(Ops));
Chain = SDValue(CNode, 0);
InFlag = SDValue(CNode, 1);
}
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
ReplaceUses(N1.getValue(1), Chain);
} else {
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
InFlag = SDValue(CNode, 0);
SDValue Ops[] = { N1, InFlag };
if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
array_lengthof(Ops));
ResHi = SDValue(CNode, 0);
ResLo = SDValue(CNode, 1);
InFlag = SDValue(CNode, 2);
} else {
SDVTList VTs = CurDAG->getVTList(MVT::Glue);
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
array_lengthof(Ops));
InFlag = SDValue(CNode, 0);
}
}
// Prevent use of AH in a REX instruction by referencing AX instead.
@ -2260,19 +2306,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
if (ResLo.getNode() == 0) {
assert(LoReg && "Register for low half is not defined!");
ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
InFlag);
InFlag = ResLo.getValue(2);
}
ReplaceUses(SDValue(Node, 0), ResLo);
DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
HiReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
if (ResHi.getNode() == 0) {
assert(HiReg && "Register for high half is not defined!");
ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
InFlag);
InFlag = ResHi.getValue(2);
}
ReplaceUses(SDValue(Node, 1), ResHi);
DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
}
return NULL;

View File

@ -1140,6 +1140,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 },
{ X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 },
{ X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 },
// BMI/BMI2 foldable instructions
{ X86::MULX32rr, X86::MULX32rm, 0 },
{ X86::MULX64rr, X86::MULX64rm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {

View File

@ -0,0 +1,22 @@
; RUN: llc -mcpu=core-avx2 -march=x86 < %s | FileCheck %s
define i64 @f1(i32 %a, i32 %b) {
%x = zext i32 %a to i64
%y = zext i32 %b to i64
%r = mul i64 %x, %y
; CHECK: f1
; CHECK: mulxl
; CHECK: ret
ret i64 %r
}
define i64 @f2(i32 %a, i32* %p) {
%b = load i32* %p
%x = zext i32 %a to i64
%y = zext i32 %b to i64
%r = mul i64 %x, %y
; CHECK: f1
; CHECK: mulxl ({{.+}}), %{{.+}}, %{{.+}}
; CHECK: ret
ret i64 %r
}

View File

@ -0,0 +1,22 @@
; RUN: llc -mcpu=core-avx2 -march=x86-64 < %s | FileCheck %s
define i128 @f1(i64 %a, i64 %b) {
%x = zext i64 %a to i128
%y = zext i64 %b to i128
%r = mul i128 %x, %y
; CHECK: f1
; CHECK: mulxq
; CHECK: ret
ret i128 %r
}
define i128 @f2(i64 %a, i64* %p) {
%b = load i64* %p
%x = zext i64 %a to i128
%y = zext i64 %b to i128
%r = mul i128 %x, %y
; CHECK: f1
; CHECK: mulxq ({{.+}}), %{{.+}}, %{{.+}}
; CHECK: ret
ret i128 %r
}