diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7dce32766633..ff0f41eaf3c3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1445,6 +1445,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FTRUNC, VT, Legal); setOperationAction(ISD::FRINT, VT, Legal); setOperationAction(ISD::FNEARBYINT, VT, Legal); + + setOperationAction(ISD::SELECT, VT, Custom); } // Without BWI we need to use custom lowering to handle MVT::v64i8 input. @@ -1464,13 +1466,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MULHU, MVT::v16i32, Custom); setOperationAction(ISD::MULHS, MVT::v16i32, Custom); - setOperationAction(ISD::SELECT, MVT::v8f64, Custom); - setOperationAction(ISD::SELECT, MVT::v8i64, Custom); - setOperationAction(ISD::SELECT, MVT::v16i32, Custom); - setOperationAction(ISD::SELECT, MVT::v32i16, Custom); - setOperationAction(ISD::SELECT, MVT::v64i8, Custom); - setOperationAction(ISD::SELECT, MVT::v16f32, Custom); - for (auto VT : { MVT::v16i32, MVT::v8i64 }) { setOperationAction(ISD::SMAX, VT, Legal); setOperationAction(ISD::UMAX, VT, Legal); @@ -1484,6 +1479,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ROTL, VT, Custom); setOperationAction(ISD::ROTR, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); // The condition codes aren't legal in SSE/AVX and under AVX512 we use // setcc all the way to isel and prefer SETGT in some isel patterns. @@ -1704,6 +1700,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SADDSAT, VT, Legal); setOperationAction(ISD::USUBSAT, VT, Legal); setOperationAction(ISD::SSUBSAT, VT, Legal); + setOperationAction(ISD::SELECT, VT, Custom); // The condition codes aren't legal in SSE/AVX and under AVX512 we use // setcc all the way to isel and prefer SETGT in some isel patterns. diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll index ed69a48a352a..e2f8215d3567 100644 --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -433,3 +433,107 @@ define <16 x i16> @pr31515(<16 x i1> %a, <16 x i1> %b, <16 x i16> %c) nounwind { ret <16 x i16> %res } +define <32 x i16> @pr42355_v32i16(i1 %c, <32 x i16> %x, <32 x i16> %y) { +; X86-AVX512F-LABEL: pr42355_v32i16: +; X86-AVX512F: # %bb.0: +; X86-AVX512F-NEXT: pushl %ebp +; X86-AVX512F-NEXT: .cfi_def_cfa_offset 8 +; X86-AVX512F-NEXT: .cfi_offset %ebp, -8 +; X86-AVX512F-NEXT: movl %esp, %ebp +; X86-AVX512F-NEXT: .cfi_def_cfa_register %ebp +; X86-AVX512F-NEXT: andl $-32, %esp +; X86-AVX512F-NEXT: subl $32, %esp +; X86-AVX512F-NEXT: testb $1, 8(%ebp) +; X86-AVX512F-NEXT: jne .LBB14_2 +; X86-AVX512F-NEXT: # %bb.1: +; X86-AVX512F-NEXT: vmovaps 40(%ebp), %ymm1 +; X86-AVX512F-NEXT: vmovaps %ymm2, %ymm0 +; X86-AVX512F-NEXT: .LBB14_2: +; X86-AVX512F-NEXT: movl %ebp, %esp +; X86-AVX512F-NEXT: popl %ebp +; X86-AVX512F-NEXT: .cfi_def_cfa %esp, 4 +; X86-AVX512F-NEXT: retl +; +; X64-AVX512F-LABEL: pr42355_v32i16: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: testb $1, %dil +; X64-AVX512F-NEXT: jne .LBB14_2 +; X64-AVX512F-NEXT: # %bb.1: +; X64-AVX512F-NEXT: vmovaps %ymm2, %ymm0 +; X64-AVX512F-NEXT: vmovaps %ymm3, %ymm1 +; X64-AVX512F-NEXT: .LBB14_2: +; X64-AVX512F-NEXT: retq +; +; X86-AVX512BW-LABEL: pr42355_v32i16: +; X86-AVX512BW: # %bb.0: +; X86-AVX512BW-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-AVX512BW-NEXT: jne .LBB14_2 +; X86-AVX512BW-NEXT: # %bb.1: +; X86-AVX512BW-NEXT: vmovaps %zmm1, %zmm0 +; X86-AVX512BW-NEXT: .LBB14_2: +; X86-AVX512BW-NEXT: retl +; +; X64-AVX512BW-LABEL: pr42355_v32i16: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: testb $1, %dil +; X64-AVX512BW-NEXT: jne .LBB14_2 +; X64-AVX512BW-NEXT: # %bb.1: +; X64-AVX512BW-NEXT: vmovaps %zmm1, %zmm0 +; X64-AVX512BW-NEXT: .LBB14_2: +; X64-AVX512BW-NEXT: retq + %a = select i1 %c, <32 x i16> %x, <32 x i16> %y + ret <32 x i16> %a +} + +define <64 x i8> @pr42355_v64i8(i1 %c, <64 x i8> %x, <64 x i8> %y) { +; X86-AVX512F-LABEL: pr42355_v64i8: +; X86-AVX512F: # %bb.0: +; X86-AVX512F-NEXT: pushl %ebp +; X86-AVX512F-NEXT: .cfi_def_cfa_offset 8 +; X86-AVX512F-NEXT: .cfi_offset %ebp, -8 +; X86-AVX512F-NEXT: movl %esp, %ebp +; X86-AVX512F-NEXT: .cfi_def_cfa_register %ebp +; X86-AVX512F-NEXT: andl $-32, %esp +; X86-AVX512F-NEXT: subl $32, %esp +; X86-AVX512F-NEXT: testb $1, 8(%ebp) +; X86-AVX512F-NEXT: jne .LBB15_2 +; X86-AVX512F-NEXT: # %bb.1: +; X86-AVX512F-NEXT: vmovaps 40(%ebp), %ymm1 +; X86-AVX512F-NEXT: vmovaps %ymm2, %ymm0 +; X86-AVX512F-NEXT: .LBB15_2: +; X86-AVX512F-NEXT: movl %ebp, %esp +; X86-AVX512F-NEXT: popl %ebp +; X86-AVX512F-NEXT: .cfi_def_cfa %esp, 4 +; X86-AVX512F-NEXT: retl +; +; X64-AVX512F-LABEL: pr42355_v64i8: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: testb $1, %dil +; X64-AVX512F-NEXT: jne .LBB15_2 +; X64-AVX512F-NEXT: # %bb.1: +; X64-AVX512F-NEXT: vmovaps %ymm2, %ymm0 +; X64-AVX512F-NEXT: vmovaps %ymm3, %ymm1 +; X64-AVX512F-NEXT: .LBB15_2: +; X64-AVX512F-NEXT: retq +; +; X86-AVX512BW-LABEL: pr42355_v64i8: +; X86-AVX512BW: # %bb.0: +; X86-AVX512BW-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-AVX512BW-NEXT: jne .LBB15_2 +; X86-AVX512BW-NEXT: # %bb.1: +; X86-AVX512BW-NEXT: vmovaps %zmm1, %zmm0 +; X86-AVX512BW-NEXT: .LBB15_2: +; X86-AVX512BW-NEXT: retl +; +; X64-AVX512BW-LABEL: pr42355_v64i8: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: testb $1, %dil +; X64-AVX512BW-NEXT: jne .LBB15_2 +; X64-AVX512BW-NEXT: # %bb.1: +; X64-AVX512BW-NEXT: vmovaps %zmm1, %zmm0 +; X64-AVX512BW-NEXT: .LBB15_2: +; X64-AVX512BW-NEXT: retq + %a = select i1 %c, <64 x i8> %x, <64 x i8> %y + ret <64 x i8> %a +} +