X86: Do not select X86 custom vector nodes if operand types don't match

X86ISD::ADDSUB, X86ISD::(F)HADD, X86ISD::(F)HSUB should not be selected if the operand types do not match the result type because vector type legalization cannot deal with this for custom nodes. Testcase X86ISD::ADDSUB is attached. I could not create a testcase for the FHADD/FHSUB cases because of: https://llvm.org/bugs/show_bug.cgi?id=23296 Differential Revision: http://reviews.llvm.org/D9120 llvm-svn: 235367
2015-04-21 01:13:41 +00:00 · 2015-04-21 01:13:41 +00:00 · b6b5aaad98
parent 396db88958
commit b6b5aaad98
2 changed files with 30 additions and 5 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -5269,11 +5269,17 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
    unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();

    if (i * 2 < NumElts) {
-      if (V0.getOpcode() == ISD::UNDEF)
+      if (V0.getOpcode() == ISD::UNDEF) {
        V0 = Op0.getOperand(0);
+        if (V0.getValueType() != VT)
+          return false;
+      }
    } else {
-      if (V1.getOpcode() == ISD::UNDEF)
+      if (V1.getOpcode() == ISD::UNDEF) {
        V1 = Op0.getOperand(0);
+        if (V1.getValueType() != VT)
+          return false;
+      }
      if (i * 2 == NumElts)
        ExpectedVExtractIdx = BaseIdx;
    }
@ -5423,10 +5429,16 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
      SubFound = true;

    // Update InVec0 and InVec1.
-    if (InVec0.getOpcode() == ISD::UNDEF)
+    if (InVec0.getOpcode() == ISD::UNDEF) {
      InVec0 = Op0.getOperand(0);
-    if (InVec1.getOpcode() == ISD::UNDEF)
+      if (InVec0.getValueType() != VT)
+        return SDValue();
+    }
+    if (InVec1.getOpcode() == ISD::UNDEF) {
      InVec1 = Op1.getOperand(0);
+      if (InVec1.getValueType() != VT)
+        return SDValue();
+    }

    // Make sure that operands in input to each add/sub node always
    // come from a same pair of vectors.
--- a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
+++ b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
@ -315,4 +315,17 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
 ; CHECK-NOT: addsubps
 ; CHECK: ret

-
+define <2 x float> @test_v2f32(<2 x float> %v0, <2 x float> %v1) {
+  %v2 = extractelement <2 x float> %v0, i32 0
+  %v3 = extractelement <2 x float> %v1, i32 0
+  %v4 = extractelement <2 x float> %v0, i32 1
+  %v5 = extractelement <2 x float> %v1, i32 1
+  %sub = fsub float %v2, %v3
+  %add = fadd float %v5, %v4
+  %res0 = insertelement <2 x float> undef, float %sub, i32 0
+  %res1 = insertelement <2 x float> %res0, float %add, i32 1
+  ret <2 x float> %res1
+}
+; CHECK-LABEL: test_v2f32
+; CHECK: addsubps %xmm1, %xmm0
+; CHECK-NEXT: retq