forked from OSchip/llvm-project
X86: Constant fold converting vector setcc results to float.
Since the result of a SETCC for X86 is 0 or -1 in each lane, we can move unary operations, in this case [su]int_to_fp through the mask operation and constant fold the operation away. Generally speaking: UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> AND(VECTOR_CMP(x,y), constant2) where constant2 is UNARYOP(constant). This implements the transform where UNARYOP is [su]int_to_fp. For example, consider the simple function: define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind { %cmp = fcmp oeq <4 x float> %val, %test %ext = zext <4 x i1> %cmp to <4 x i32> %result = sitofp <4 x i32> %ext to <4 x float> ret <4 x float> %result } Before this change, the SSE code is generated as: LCPI0_0: .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .section __TEXT,__text,regular,pure_instructions .globl _foo .align 4, 0x90 _foo: ## @foo cmpeqps %xmm1, %xmm0 andps LCPI0_0(%rip), %xmm0 cvtdq2ps %xmm0, %xmm0 retq After, the code is improved to: LCPI0_0: .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .section __TEXT,__text,regular,pure_instructions .globl _foo .align 4, 0x90 _foo: ## @foo cmpeqps %xmm1, %xmm0 andps LCPI0_0(%rip), %xmm0 retq The cvtdq2ps has been constant folded away and the floating point 1.0f vector lanes are materialized directly via the ModRM operand of andps. llvm-svn: 213342
This commit is contained in:
parent
f7502c4884
commit
b6535c32f5
|
@ -21847,8 +21847,59 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
|
||||
SelectionDAG &DAG) {
|
||||
// Take advantage of vector comparisons producing 0 or -1 in each lane to
|
||||
// optimize away operation when it's from a constant.
|
||||
//
|
||||
// The general transformation is:
|
||||
// UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
|
||||
// AND(VECTOR_CMP(x,y), constant2)
|
||||
// constant2 = UNARYOP(constant)
|
||||
|
||||
// Early exit if this isn't a vector operation or if the operand of the
|
||||
// unary operation isn't a bitwise AND.
|
||||
EVT VT = N->getValueType(0);
|
||||
if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
|
||||
N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC)
|
||||
return SDValue();
|
||||
|
||||
// Now check that the other operand of the AND is a constant splat. We could
|
||||
// make the transformation for non-constant splats as well, but it's unclear
|
||||
// that would be a benefit as it would not eliminate any operations, just
|
||||
// perform one more step in scalar code before moving to the vector unit.
|
||||
if (BuildVectorSDNode *BV =
|
||||
dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
|
||||
// Bail out if the vector isn't a constant splat.
|
||||
if (!BV->getConstantSplatNode())
|
||||
return SDValue();
|
||||
|
||||
// Everything checks out. Build up the new and improved node.
|
||||
SDLoc DL(N);
|
||||
EVT IntVT = BV->getValueType(0);
|
||||
// Create a new constant of the appropriate type for the transformed
|
||||
// DAG.
|
||||
SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
|
||||
// The AND node needs bitcasts to/from an integer vector type around it.
|
||||
SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
|
||||
SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
|
||||
N->getOperand(0)->getOperand(0), MaskConst);
|
||||
SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
|
||||
return Res;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86TargetLowering *XTLI) {
|
||||
// First try to optimize away the conversion entirely when it's
|
||||
// conditionally from a constant. Vectors only.
|
||||
SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
|
||||
if (Res != SDValue())
|
||||
return Res;
|
||||
|
||||
// Now move on to more general possibilities.
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
EVT InVT = Op0->getValueType(0);
|
||||
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
|
||||
|
||||
define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
|
||||
; CHECK-LABEL: LCPI0_0
|
||||
; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
|
||||
; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
|
||||
; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
|
||||
; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: cmpeqps %xmm1, %xmm0
|
||||
; CHECK-NEXT: andps LCPI0_0(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
%cmp = fcmp oeq <4 x float> %val, %test
|
||||
%ext = zext <4 x i1> %cmp to <4 x i32>
|
||||
%result = sitofp <4 x i32> %ext to <4 x float>
|
||||
ret <4 x float> %result
|
||||
}
|
Loading…
Reference in New Issue