diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d528e4921978..956f57d97f0d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37137,21 +37137,27 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, DAG.getBitcast(MVT::i16, N0.getOperand(0))); - // Combine (bitcast (vbroadcast_load)) -> (vbroadcast_load). The memory VT - // determines the number of bits loaded. + // Canonicalize (bitcast (vbroadcast_load)) so that the output of the bitcast + // and the vbroadcast_load are both integer or both fp. In some cases this + // will remove the bitcast entirely. if (N0.getOpcode() == X86ISD::VBROADCAST_LOAD && N0.hasOneUse() && - VT.getScalarSizeInBits() == SrcVT.getScalarSizeInBits()) { + VT.isFloatingPoint() != SrcVT.isFloatingPoint() && VT.isVector()) { auto *BCast = cast(N0); - assert(VT.getScalarSizeInBits() == BCast->getMemoryVT().getSizeInBits() && - "Unexpected load size!"); - SDVTList Tys = DAG.getVTList(VT, MVT::Other); + unsigned SrcVTSize = SrcVT.getScalarSizeInBits(); + unsigned MemSize = BCast->getMemoryVT().getScalarSizeInBits(); + MVT MemVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(MemSize) + : MVT::getIntegerVT(MemSize); + MVT LoadVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(SrcVTSize) + : MVT::getIntegerVT(SrcVTSize); + LoadVT = MVT::getVectorVT(LoadVT, SrcVT.getVectorNumElements()); + + SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other); SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() }; SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops, - VT.getVectorElementType(), - BCast->getMemOperand()); + MemVT, BCast->getMemOperand()); DAG.ReplaceAllUsesOfValueWith(SDValue(BCast, 1), ResNode.getValue(1)); - return ResNode; + return DAG.getBitcast(VT, ResNode); } // Since MMX types are special and don't usually play with other vector types, diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll index b645098582ff..879a1643325d 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll @@ -2697,9 +2697,9 @@ define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x ; X86: # %bb.0: ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] -; X86-NEXT: vpbroadcastq (%eax), %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x10] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa32 %ymm2, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xca] +; X86-NEXT: vbroadcasti32x2 (%eax), %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x59,0x08] +; X86-NEXT: # ymm1 {%k1} = mem[0,1,0,1,0,1,0,1] ; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01] ; X86-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01] ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]