From 752ad8fde7160c51f21cfcf92d84c7f534259051 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 24 Sep 2016 20:24:06 +0000 Subject: [PATCH] [x86] don't try to create a vector integer inst for an SSE1 target (PR30512) This bug was introduced with: http://reviews.llvm.org/rL272511 We need to restrict the lowering to v4f32 comparisons because that's all SSE1 can handle. This should fix: https://llvm.org/bugs/show_bug.cgi?id=28044 llvm-svn: 282336 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +-- llvm/test/CodeGen/X86/sse1.ll | 63 +++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 95f71eb2f884..3b4c5eaecca1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31064,9 +31064,10 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, } } - // For an SSE1-only target, lower to X86ISD::CMPP early to avoid scalarization - // via legalization because v4i32 is not a legal type. - if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) + // For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early + // to avoid scalarization via legalization because v4i32 is not a legal type. + if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 && + LHS.getValueType() == MVT::v4f32) return LowerVSETCC(SDValue(N, 0), Subtarget, DAG); return SDValue(); diff --git a/llvm/test/CodeGen/X86/sse1.ll b/llvm/test/CodeGen/X86/sse1.ll index 3fb9cdb2d8f6..3ac6ea6e2b8c 100644 --- a/llvm/test/CodeGen/X86/sse1.ll +++ b/llvm/test/CodeGen/X86/sse1.ll @@ -148,3 +148,66 @@ define <4 x float> @PR28044(<4 x float> %a0, <4 x float> %a1) nounwind { ret <4 x float> %res } +; Don't crash trying to do the impossible: an integer vector comparison doesn't exist, so we must scalarize. +; https://llvm.org/bugs/show_bug.cgi?id=30512 + +define <4 x i32> @PR30512(<4 x i32> %x, <4 x i32> %y) nounwind { +; X32-LABEL: PR30512: +; X32: # BB#0: +; X32-NEXT: pushl %ebp +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: cmpl {{[0-9]+}}(%esp), %edx +; X32-NEXT: sete %cl +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: sete %dl +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: cmpl {{[0-9]+}}(%esp), %edi +; X32-NEXT: sete %bl +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: cmpl {{[0-9]+}}(%esp), %esi +; X32-NEXT: sete %al +; X32-NEXT: movl %eax, 12(%ebp) +; X32-NEXT: movl %ebx, 8(%ebp) +; X32-NEXT: movl %edx, 4(%ebp) +; X32-NEXT: movl %ecx, (%ebp) +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: popl %ebp +; X32-NEXT: retl $4 +; +; X64-LABEL: PR30512: +; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %r9d, %esi +; X64-NEXT: sete %al +; X64-NEXT: xorl %esi, %esi +; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %edx +; X64-NEXT: sete %sil +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %ecx +; X64-NEXT: sete %dl +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %r8d +; X64-NEXT: sete %cl +; X64-NEXT: movl %ecx, 12(%rdi) +; X64-NEXT: movl %edx, 8(%rdi) +; X64-NEXT: movl %esi, 4(%rdi) +; X64-NEXT: movl %eax, (%rdi) +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: retq + %cmp = icmp eq <4 x i32> %x, %y + %zext = zext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %zext +} +