forked from OSchip/llvm-project
it turns out that when ".with.overflow" intrinsics were added to the X86
backend that they were all implemented except umul. This one fell back to the default implementation that did a hi/lo multiply and compared the top. Fix this to check the overflow flag that the 'mul' instruction sets, so we can avoid an explicit test. Now we compile: void *func(long count) { return new int[count]; } into: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] seto %cl ## encoding: [0x0f,0x90,0xc1] testb %cl, %cl ## encoding: [0x84,0xc9] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL instead of: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] testq %rdx, %rdx ## encoding: [0x48,0x85,0xd2] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL Other than the silly seto+test, this is using the o bit directly, so it's going in the right direction. llvm-svn: 120935
This commit is contained in:
parent
183ddd8ed3
commit
364bb0a081
|
@ -1600,6 +1600,31 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
||||||
return RetVal;
|
return RetVal;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case X86ISD::UMUL: {
|
||||||
|
SDValue N0 = Node->getOperand(0);
|
||||||
|
SDValue N1 = Node->getOperand(1);
|
||||||
|
|
||||||
|
unsigned LoReg, HiReg;
|
||||||
|
switch (NVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: llvm_unreachable("Unsupported VT!");
|
||||||
|
case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; Opc = X86::MUL8r; break;
|
||||||
|
case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; Opc = X86::MUL16r; break;
|
||||||
|
case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; Opc = X86::MUL32r; break;
|
||||||
|
case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; Opc = X86::MUL64r; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
|
||||||
|
N0, SDValue()).getValue(1);
|
||||||
|
|
||||||
|
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
|
||||||
|
SDValue Ops[] = {N1, InFlag};
|
||||||
|
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
|
||||||
|
|
||||||
|
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
|
||||||
|
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
|
||||||
|
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
case ISD::SMUL_LOHI:
|
case ISD::SMUL_LOHI:
|
||||||
case ISD::UMUL_LOHI: {
|
case ISD::UMUL_LOHI: {
|
||||||
|
@ -1653,11 +1678,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
||||||
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
|
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
|
||||||
array_lengthof(Ops));
|
array_lengthof(Ops));
|
||||||
InFlag = SDValue(CNode, 1);
|
InFlag = SDValue(CNode, 1);
|
||||||
|
|
||||||
// Update the chain.
|
// Update the chain.
|
||||||
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
|
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
|
||||||
} else {
|
} else {
|
||||||
InFlag =
|
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag);
|
||||||
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
|
InFlag = SDValue(CNode, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prevent use of AH in a REX instruction by referencing AX instead.
|
// Prevent use of AH in a REX instruction by referencing AX instead.
|
||||||
|
|
|
@ -948,6 +948,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||||
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
|
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
|
||||||
setOperationAction(ISD::USUBO, MVT::i32, Custom);
|
setOperationAction(ISD::USUBO, MVT::i32, Custom);
|
||||||
setOperationAction(ISD::SMULO, MVT::i32, Custom);
|
setOperationAction(ISD::SMULO, MVT::i32, Custom);
|
||||||
|
setOperationAction(ISD::UMULO, MVT::i32, Custom);
|
||||||
|
|
||||||
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
|
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
|
||||||
// handle type legalization for these operations here.
|
// handle type legalization for these operations here.
|
||||||
|
@ -961,6 +962,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||||
setOperationAction(ISD::SSUBO, MVT::i64, Custom);
|
setOperationAction(ISD::SSUBO, MVT::i64, Custom);
|
||||||
setOperationAction(ISD::USUBO, MVT::i64, Custom);
|
setOperationAction(ISD::USUBO, MVT::i64, Custom);
|
||||||
setOperationAction(ISD::SMULO, MVT::i64, Custom);
|
setOperationAction(ISD::SMULO, MVT::i64, Custom);
|
||||||
|
setOperationAction(ISD::UMULO, MVT::i64, Custom);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Subtarget->is64Bit()) {
|
if (!Subtarget->is64Bit()) {
|
||||||
|
@ -7042,7 +7044,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||||
return NewSetCC;
|
return NewSetCC;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Look for "(setcc) == / != 1" to avoid unncessary setcc.
|
// Look for "(setcc) == / != 1" to avoid unnecessary setcc.
|
||||||
if (Op0.getOpcode() == X86ISD::SETCC &&
|
if (Op0.getOpcode() == X86ISD::SETCC &&
|
||||||
Op1.getOpcode() == ISD::Constant &&
|
Op1.getOpcode() == ISD::Constant &&
|
||||||
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
|
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
|
||||||
|
@ -8446,8 +8448,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
|
||||||
SDValue RHS = N->getOperand(1);
|
SDValue RHS = N->getOperand(1);
|
||||||
unsigned BaseOp = 0;
|
unsigned BaseOp = 0;
|
||||||
unsigned Cond = 0;
|
unsigned Cond = 0;
|
||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc DL = Op.getDebugLoc();
|
||||||
|
|
||||||
switch (Op.getOpcode()) {
|
switch (Op.getOpcode()) {
|
||||||
default: llvm_unreachable("Unknown ovf instruction!");
|
default: llvm_unreachable("Unknown ovf instruction!");
|
||||||
case ISD::SADDO:
|
case ISD::SADDO:
|
||||||
|
@ -8486,19 +8487,29 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
|
||||||
BaseOp = X86ISD::SMUL;
|
BaseOp = X86ISD::SMUL;
|
||||||
Cond = X86::COND_O;
|
Cond = X86::COND_O;
|
||||||
break;
|
break;
|
||||||
case ISD::UMULO:
|
case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
|
||||||
BaseOp = X86ISD::UMUL;
|
SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
|
||||||
Cond = X86::COND_B;
|
MVT::i32);
|
||||||
break;
|
SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
|
||||||
|
|
||||||
|
SDValue SetCC =
|
||||||
|
DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
|
||||||
|
DAG.getConstant(X86::COND_O, MVT::i32),
|
||||||
|
SDValue(Sum.getNode(), 2));
|
||||||
|
|
||||||
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
|
||||||
|
return Sum;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also sets EFLAGS.
|
// Also sets EFLAGS.
|
||||||
SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
|
SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
|
||||||
SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS);
|
SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
|
||||||
|
|
||||||
SDValue SetCC =
|
SDValue SetCC =
|
||||||
DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1),
|
DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
|
||||||
DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1));
|
DAG.getConstant(Cond, MVT::i32),
|
||||||
|
SDValue(Sum.getNode(), 1));
|
||||||
|
|
||||||
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
|
||||||
return Sum;
|
return Sum;
|
||||||
|
|
|
@ -200,10 +200,12 @@ namespace llvm {
|
||||||
PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
|
PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
|
||||||
PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
|
PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
|
||||||
|
|
||||||
// ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results.
|
// ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
|
||||||
ADD, SUB, SMUL, UMUL,
|
ADD, SUB, SMUL,
|
||||||
INC, DEC, OR, XOR, AND,
|
INC, DEC, OR, XOR, AND,
|
||||||
|
|
||||||
|
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
|
||||||
|
|
||||||
// MUL_IMM - X86 specific multiply by immediate.
|
// MUL_IMM - X86 specific multiply by immediate.
|
||||||
MUL_IMM,
|
MUL_IMM,
|
||||||
|
|
||||||
|
|
|
@ -60,11 +60,12 @@ def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
|
||||||
|
|
||||||
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
|
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
|
||||||
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
|
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
|
||||||
"mul{l}\t$src",
|
"mul{l}\t$src", // EAX,EDX = EAX*GR32
|
||||||
[]>; // EAX,EDX = EAX*GR32
|
[/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
|
||||||
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
|
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
|
||||||
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
|
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
|
||||||
"mul{q}\t$src", []>; // RAX,RDX = RAX*GR64
|
"mul{q}\t$src", // RAX,RDX = RAX*GR64
|
||||||
|
[/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
|
||||||
|
|
||||||
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
|
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
|
||||||
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
|
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
|
||||||
|
|
|
@ -35,6 +35,12 @@ def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
|
||||||
[SDTCisSameAs<0, 2>,
|
[SDTCisSameAs<0, 2>,
|
||||||
SDTCisSameAs<0, 3>,
|
SDTCisSameAs<0, 3>,
|
||||||
SDTCisInt<0>, SDTCisVT<1, i32>]>;
|
SDTCisInt<0>, SDTCisVT<1, i32>]>;
|
||||||
|
// RES1, RES2, FLAGS = op LHS, RHS
|
||||||
|
def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
|
||||||
|
[SDTCisSameAs<0, 1>,
|
||||||
|
SDTCisSameAs<0, 2>,
|
||||||
|
SDTCisSameAs<0, 3>,
|
||||||
|
SDTCisInt<0>, SDTCisVT<1, i32>]>;
|
||||||
def SDTX86BrCond : SDTypeProfile<0, 3,
|
def SDTX86BrCond : SDTypeProfile<0, 3,
|
||||||
[SDTCisVT<0, OtherVT>,
|
[SDTCisVT<0, OtherVT>,
|
||||||
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
|
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
|
||||||
|
@ -188,7 +194,7 @@ def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
|
||||||
def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
|
def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
|
||||||
def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
|
def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
|
||||||
[SDNPCommutative]>;
|
[SDNPCommutative]>;
|
||||||
def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
|
def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
|
||||||
[SDNPCommutative]>;
|
[SDNPCommutative]>;
|
||||||
|
|
||||||
def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
|
def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
|
||||||
|
|
|
@ -1,8 +1,14 @@
|
||||||
; RUN: llc < %s -march=x86 | grep "\\\\\\\<mul"
|
; RUN: llc < %s -march=x86 | FileCheck %s
|
||||||
|
|
||||||
declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
|
declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
|
||||||
define i1 @a(i32 %x) zeroext nounwind {
|
define i1 @a(i32 %x) zeroext nounwind {
|
||||||
%res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
|
%res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
|
||||||
%obil = extractvalue {i32, i1} %res, 1
|
%obil = extractvalue {i32, i1} %res, 1
|
||||||
ret i1 %obil
|
ret i1 %obil
|
||||||
|
|
||||||
|
; CHECK: a:
|
||||||
|
; CHECK: mull
|
||||||
|
; CHECK: seto %al
|
||||||
|
; CHECK: movzbl %al, %eax
|
||||||
|
; CHECK: ret
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue