diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index df5e16c6a54b..62577162f1d4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1575,38 +1575,32 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } - // This block control legalization of v32i1/v64i1 which are available with + // This block control legalization of v32i1 which is available with // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with - // useBWIRegs. + // useBWIRegs. v64i1 is also controled with useBWIRegs. if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { addRegisterClass(MVT::v32i1, &X86::VK32RegClass); - addRegisterClass(MVT::v64i1, &X86::VK64RegClass); - for (auto VT : { MVT::v32i1, MVT::v64i1 }) { - setOperationAction(ISD::ADD, VT, Custom); - setOperationAction(ISD::SUB, VT, Custom); - setOperationAction(ISD::MUL, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Expand); - setOperationAction(ISD::UADDSAT, VT, Custom); - setOperationAction(ISD::SADDSAT, VT, Custom); - setOperationAction(ISD::USUBSAT, VT, Custom); - setOperationAction(ISD::SSUBSAT, VT, Custom); + setOperationAction(ISD::ADD, MVT::v32i1, Custom); + setOperationAction(ISD::SUB, MVT::v32i1, Custom); + setOperationAction(ISD::MUL, MVT::v32i1, Custom); + setOperationAction(ISD::VSELECT, MVT::v32i1, Expand); + setOperationAction(ISD::UADDSAT, MVT::v32i1, Custom); + setOperationAction(ISD::SADDSAT, MVT::v32i1, Custom); + setOperationAction(ISD::USUBSAT, MVT::v32i1, Custom); + setOperationAction(ISD::SSUBSAT, MVT::v32i1, Custom); - setOperationAction(ISD::TRUNCATE, VT, Custom); - setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - } + setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom); + setOperationAction(ISD::SETCC, MVT::v32i1, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i1, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom); + setOperationAction(ISD::SELECT, MVT::v32i1, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v32i1, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); - for (auto VT : { MVT::v16i1, MVT::v32i1 }) - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i1, Custom); // Extends from v32i1 masks to 256-bit vectors. setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); @@ -1696,6 +1690,34 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSHL, MVT::v32i16, Custom); setOperationAction(ISD::FSHR, MVT::v32i16, Custom); } + + // Only support v64i1 if we support v64i8. Without 64i8 we won't have any + // operations that can produce these values other than concatenating + // v32i1 vectors together. And we don't have any masked operations that + // need a v64i1. By making it legal we avoid needing to lower arbitrary + // shuffles of v64i1 which need v64i8 to be legal. + addRegisterClass(MVT::v64i1, &X86::VK64RegClass); + + setOperationAction(ISD::ADD, MVT::v64i1, Custom); + setOperationAction(ISD::SUB, MVT::v64i1, Custom); + setOperationAction(ISD::MUL, MVT::v64i1, Custom); + setOperationAction(ISD::VSELECT, MVT::v64i1, Expand); + setOperationAction(ISD::UADDSAT, MVT::v64i1, Custom); + setOperationAction(ISD::SADDSAT, MVT::v64i1, Custom); + setOperationAction(ISD::USUBSAT, MVT::v64i1, Custom); + setOperationAction(ISD::SSUBSAT, MVT::v64i1, Custom); + + setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom); + setOperationAction(ISD::SETCC, MVT::v64i1, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i1, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom); + setOperationAction(ISD::SELECT, MVT::v64i1, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v64i1, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom); + + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i1, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index 8bfd448f201c..6e256c060d29 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -1116,3 +1116,38 @@ define void @trunc_packus_v16i32_v16i8_store(<16 x i32>* %p, <16 x i8>* %q) "min store <16 x i8> %f, <16 x i8>* %q ret void } + +define <64 x i1> @v64i1_argument_return(<64 x i1> %x) "min-legal-vector-width"="256" { +; CHECK-LABEL: v64i1_argument_return: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + ret <64 x i1> %x +} + +define void @v64i1_shuffle(<64 x i8>* %x, <64 x i8>* %y) "min-legal-vector-width"="256" { +; CHECK-LABEL: v64i1_shuffle: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 +; CHECK-NEXT: vmovdqa 32(%rdi), %ymm1 +; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k0 +; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1 +; CHECK-NEXT: vpmovm2b %k1, %ymm2 +; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; CHECK-NEXT: vpshufb %ymm3, %ymm2, %ymm2 +; CHECK-NEXT: vpmovb2m %ymm2, %k1 +; CHECK-NEXT: vpmovm2b %k0, %ymm2 +; CHECK-NEXT: vpshufb %ymm3, %ymm2, %ymm2 +; CHECK-NEXT: vpmovb2m %ymm2, %k2 +; CHECK-NEXT: vmovdqu8 %ymm1, 32(%rsi) {%k2} +; CHECK-NEXT: vmovdqu8 %ymm0, (%rsi) {%k1} +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %a = load <64 x i8>, <64 x i8>* %x + %b = icmp eq <64 x i8> %a, zeroinitializer + %shuf = shufflevector <64 x i1> %b, <64 x i1> undef, <64 x i32> + call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> %a, <64 x i8>* %y, i32 1, <64 x i1> %shuf) + ret void +} +declare void @llvm.masked.store.v64i8.p0v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>) +