forked from OSchip/llvm-project
[x86] fix FCOPYSIGN lowering to create constants instead of ConstantPool loads
This is similar to: https://reviews.llvm.org/rL279958 By not prematurely lowering to loads, we should be able to more easily eliminate the 'or' with zero instructions seen in copysign-constant-magnitude.ll. We should also be able to extend this code to handle vectors. llvm-svn: 282312
This commit is contained in:
parent
100f99a94c
commit
0b36337d61
|
@ -14633,28 +14633,22 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
|
|||
}
|
||||
|
||||
static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
LLVMContext *Context = DAG.getContext();
|
||||
SDValue Mag = Op.getOperand(0);
|
||||
SDValue Sign = Op.getOperand(1);
|
||||
SDLoc dl(Op);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT SignVT = Sign.getSimpleValueType();
|
||||
bool IsF128 = (VT == MVT::f128);
|
||||
|
||||
// If the sign operand is smaller, extend it first.
|
||||
if (SignVT.bitsLT(VT)) {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
if (Sign.getSimpleValueType().bitsLT(VT))
|
||||
Sign = DAG.getNode(ISD::FP_EXTEND, dl, VT, Sign);
|
||||
SignVT = VT;
|
||||
}
|
||||
|
||||
// And if it is bigger, shrink it first.
|
||||
if (SignVT.bitsGT(VT)) {
|
||||
if (Sign.getSimpleValueType().bitsGT(VT))
|
||||
Sign = DAG.getNode(ISD::FP_ROUND, dl, VT, Sign, DAG.getIntPtrConstant(1, dl));
|
||||
SignVT = VT;
|
||||
}
|
||||
|
||||
// At this point the operands and the result should have the same
|
||||
// type, and that won't be f80 since that is not custom lowered.
|
||||
bool IsF128 = (VT == MVT::f128);
|
||||
assert((VT == MVT::f64 || VT == MVT::f32 || IsF128) &&
|
||||
"Unexpected type in LowerFCOPYSIGN");
|
||||
|
||||
|
@ -14663,61 +14657,46 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
|
|||
(IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle);
|
||||
const unsigned SizeInBits = VT.getSizeInBits();
|
||||
|
||||
SmallVector<Constant *, 4> CV(
|
||||
VT == MVT::f64 ? 2 : (IsF128 ? 1 : 4),
|
||||
ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0))));
|
||||
// Perform all logic operations as 16-byte vectors because there are no
|
||||
// scalar FP logic instructions in SSE.
|
||||
MVT LogicVT =
|
||||
(VT == MVT::f64) ? MVT::v2f64 : (IsF128 ? MVT::f128 : MVT::v4f32);
|
||||
SDValue SignMask = DAG.getConstantFP(
|
||||
APFloat(Sem, APInt::getSignBit(SizeInBits)), dl, LogicVT);
|
||||
|
||||
// First, clear all bits but the sign bit from the second operand (sign).
|
||||
CV[0] = ConstantFP::get(*Context,
|
||||
APFloat(Sem, APInt::getSignBit(SizeInBits)));
|
||||
Constant *C = ConstantVector::get(CV);
|
||||
auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
|
||||
SDValue CPIdx = DAG.getConstantPool(C, PtrVT, 16);
|
||||
|
||||
// Perform all logic operations as 16-byte vectors because there are no
|
||||
// scalar FP logic instructions in SSE. This allows load folding of the
|
||||
// constants into the logic instructions.
|
||||
MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : (IsF128 ? MVT::f128 : MVT::v4f32);
|
||||
SDValue SignMask =
|
||||
DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
|
||||
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
|
||||
/* Alignment = */ 16);
|
||||
if (!IsF128)
|
||||
Sign = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Sign);
|
||||
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Sign, SignMask);
|
||||
|
||||
// Next, clear the sign bit from the first operand (magnitude).
|
||||
// If it's a constant, we can clear it here.
|
||||
SDValue MagMask = DAG.getConstantFP(
|
||||
APFloat(Sem, ~APInt::getSignBit(SizeInBits)), dl, LogicVT);
|
||||
|
||||
// FIXME: This check shouldn't be necessary. Logic instructions with constant
|
||||
// operands should be folded!
|
||||
SDValue MagBits;
|
||||
if (ConstantFPSDNode *Op0CN = dyn_cast<ConstantFPSDNode>(Mag)) {
|
||||
APFloat APF = Op0CN->getValueAPF();
|
||||
// If the magnitude is a positive zero, the sign bit alone is enough.
|
||||
if (APF.isPosZero())
|
||||
return IsF128 ? SignBit :
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SignVT, SignBit,
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, SignBit,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
APF.clearSign();
|
||||
CV[0] = ConstantFP::get(*Context, APF);
|
||||
MagBits = DAG.getConstantFP(APF, dl, LogicVT);
|
||||
} else {
|
||||
CV[0] = ConstantFP::get(*Context,
|
||||
APFloat(Sem, ~APInt::getSignBit(SizeInBits)));
|
||||
}
|
||||
C = ConstantVector::get(CV);
|
||||
CPIdx = DAG.getConstantPool(C, PtrVT, 16);
|
||||
SDValue MagMask =
|
||||
DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
|
||||
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
|
||||
/* Alignment = */ 16);
|
||||
// If the magnitude operand wasn't a constant, we need to AND out the sign.
|
||||
SDValue MagBits = MagMask;
|
||||
if (!isa<ConstantFPSDNode>(Mag)) {
|
||||
// If the magnitude operand wasn't a constant, we need to AND out the sign.
|
||||
if (!IsF128)
|
||||
Mag = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Mag);
|
||||
MagBits = DAG.getNode(X86ISD::FAND, dl, LogicVT, Mag, MagMask);
|
||||
}
|
||||
|
||||
// OR the magnitude value with the sign bit.
|
||||
SDValue Or = DAG.getNode(X86ISD::FOR, dl, LogicVT, MagBits, SignBit);
|
||||
return IsF128 ? Or :
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SignVT, Or,
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
|
|
|
@ -3,11 +3,11 @@
|
|||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; The assertions are *enhanced* from update_test_checks.ll to include
|
||||
; the constant load values because those are important.
|
||||
; the constant load values because those are important.
|
||||
|
||||
; CHECK: [[SIGNMASK1:L.+]]:
|
||||
; CHECK-NEXT: .quad -9223372036854775808 ## double -0
|
||||
; CHECK-NEXT: .quad 0 ## double 0
|
||||
; CHECK-NEXT: .quad -9223372036854775808 ## double -0
|
||||
|
||||
define double @mag_pos0_double(double %x) nounwind {
|
||||
; CHECK-LABEL: mag_pos0_double:
|
||||
|
@ -21,15 +21,15 @@ define double @mag_pos0_double(double %x) nounwind {
|
|||
|
||||
; CHECK: [[SIGNMASK2:L.+]]:
|
||||
; CHECK-NEXT: .quad -9223372036854775808 ## double -0
|
||||
; CHECK-NEXT: .quad 0 ## double 0
|
||||
; CHECK: [[ZERO2:L.+]]:
|
||||
; CHECK-NEXT: .space 16
|
||||
|
||||
define double @mag_neg0_double(double %x) nounwind {
|
||||
; CHECK-LABEL: mag_neg0_double:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: andps [[SIGNMASK2]](%rip), %xmm0
|
||||
; CHECK-NEXT: orps [[ZERO2]](%rip), %xmm0
|
||||
; CHECK-NEXT: movsd [[SIGNMASK2]](%rip), %xmm1
|
||||
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; CHECK-NEXT: andps %xmm0, %xmm1
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: orps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
%y = call double @copysign(double -0.0, double %x)
|
||||
|
@ -38,16 +38,17 @@ define double @mag_neg0_double(double %x) nounwind {
|
|||
|
||||
; CHECK: [[SIGNMASK3:L.+]]:
|
||||
; CHECK-NEXT: .quad -9223372036854775808 ## double -0
|
||||
; CHECK-NEXT: .quad 0 ## double 0
|
||||
; CHECK-NEXT: .quad -9223372036854775808 ## double -0
|
||||
; CHECK: [[ONE3:L.+]]:
|
||||
; CHECK-NEXT: .quad 4607182418800017408 ## double 1
|
||||
; CHECK-NEXT: .quad 0 ## double 0
|
||||
|
||||
define double @mag_pos1_double(double %x) nounwind {
|
||||
; CHECK-LABEL: mag_pos1_double:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: andps [[SIGNMASK3]](%rip), %xmm0
|
||||
; CHECK-NEXT: orps [[ONE3]](%rip), %xmm0
|
||||
; CHECK-NEXT: movsd [[ONE3]](%rip), %xmm1
|
||||
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; CHECK-NEXT: orps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
%y = call double @copysign(double 1.0, double %x)
|
||||
|
@ -56,10 +57,10 @@ define double @mag_pos1_double(double %x) nounwind {
|
|||
|
||||
; CHECK: [[SIGNMASK4:L.+]]:
|
||||
; CHECK-NEXT: .quad -9223372036854775808 ## double -0
|
||||
; CHECK-NEXT: .quad 0 ## double 0
|
||||
; CHECK-NEXT: .quad -9223372036854775808 ## double -0
|
||||
; CHECK: [[ONE4:L.+]]:
|
||||
; CHECK-NEXT: .quad 4607182418800017408 ## double 1
|
||||
; CHECK-NEXT: .quad 0 ## double 0
|
||||
; CHECK-NEXT: .quad 4607182418800017408 ## double 1
|
||||
|
||||
define double @mag_neg1_double(double %x) nounwind {
|
||||
; CHECK-LABEL: mag_neg1_double:
|
||||
|
@ -74,9 +75,7 @@ define double @mag_neg1_double(double %x) nounwind {
|
|||
|
||||
; CHECK: [[SIGNMASK5:L.+]]:
|
||||
; CHECK-NEXT: .long 2147483648 ## float -0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 2147483648 ## float -0
|
||||
|
||||
define float @mag_pos0_float(float %x) nounwind {
|
||||
; CHECK-LABEL: mag_pos0_float:
|
||||
|
@ -90,17 +89,15 @@ define float @mag_pos0_float(float %x) nounwind {
|
|||
|
||||
; CHECK: [[SIGNMASK6:L.+]]:
|
||||
; CHECK-NEXT: .long 2147483648 ## float -0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK: [[ZERO6:L.+]]:
|
||||
; CHECK-NEXT: .space 16
|
||||
|
||||
define float @mag_neg0_float(float %x) nounwind {
|
||||
; CHECK-LABEL: mag_neg0_float:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: andps [[SIGNMASK6]](%rip), %xmm0
|
||||
; CHECK-NEXT: orps [[ZERO6]](%rip), %xmm0
|
||||
; CHECK-NEXT: movss [[SIGNMASK6]](%rip), %xmm1
|
||||
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; CHECK-NEXT: andps %xmm0, %xmm1
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: orps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
%y = call float @copysignf(float -0.0, float %x)
|
||||
|
@ -109,20 +106,17 @@ define float @mag_neg0_float(float %x) nounwind {
|
|||
|
||||
; CHECK: [[SIGNMASK7:L.+]]:
|
||||
; CHECK-NEXT: .long 2147483648 ## float -0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 2147483648 ## float -0
|
||||
; CHECK: [[ONE7:L.+]]:
|
||||
; CHECK-NEXT: .long 1065353216 ## float 1
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
|
||||
define float @mag_pos1_float(float %x) nounwind {
|
||||
; CHECK-LABEL: mag_pos1_float:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: andps [[SIGNMASK7]](%rip), %xmm0
|
||||
; CHECK-NEXT: orps [[ONE7]](%rip), %xmm0
|
||||
; CHECK-NEXT: movss [[ONE7]](%rip), %xmm1
|
||||
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; CHECK-NEXT: orps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
%y = call float @copysignf(float 1.0, float %x)
|
||||
|
@ -131,14 +125,12 @@ define float @mag_pos1_float(float %x) nounwind {
|
|||
|
||||
; CHECK: [[SIGNMASK8:L.+]]:
|
||||
; CHECK-NEXT: .long 2147483648 ## float -0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 2147483648 ## float -0
|
||||
; CHECK: [[ONE8:L.+]]:
|
||||
; CHECK-NEXT: .long 1065353216 ## float 1
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 0 ## float 0
|
||||
; CHECK-NEXT: .long 1065353216 ## float 1
|
||||
; CHECK-NEXT: .long 1065353216 ## float 1
|
||||
; CHECK-NEXT: .long 1065353216 ## float 1
|
||||
|
||||
define float @mag_neg1_float(float %x) nounwind {
|
||||
; CHECK-LABEL: mag_neg1_float:
|
||||
|
|
|
@ -351,8 +351,10 @@ cleanup: ; preds = %entry, %if.then
|
|||
;
|
||||
; X64-LABEL: TestTruncCopysign:
|
||||
; X64: callq __trunctfdf2
|
||||
; X64-NEXT: movsd {{.*}}, %xmm1
|
||||
; X64-NEXT: movlhps %xmm1, %xmm1
|
||||
; X64-NEXT: andps {{.*}}, %xmm0
|
||||
; X64-NEXT: orps {{.*}}, %xmm0
|
||||
; X64-NEXT: orps %xmm1, %xmm0
|
||||
; X64-NEXT: callq __extenddftf2
|
||||
; X64: retq
|
||||
}
|
||||
|
|
|
@ -30,8 +30,10 @@ declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone
|
|||
define float @pr26070() {
|
||||
; CHECK-LABEL: pr26070:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: orps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: andps {{.*}}(%rip), %xmm1
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; CHECK-NEXT: orps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
%c = call float @copysignf(float 1.0, float undef) readnone
|
||||
|
|
Loading…
Reference in New Issue