diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 908053e1342d..c8eebc1b88c2 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8646,6 +8646,28 @@ def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), (VMOVDDUPZ128rm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; + +def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)), + (v2f64 VR128X:$src0)), + (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; +def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)), + (bitconvert (v4i32 immAllZerosV))), + (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; + +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), + (v2f64 VR128X:$src0)), + (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, + (COPY_TO_REGCLASS FR64X:$src, VR128X))>; +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), + (bitconvert (v4i32 immAllZerosV))), + (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>; + +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), + (v2f64 VR128X:$src0)), + (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), + (bitconvert (v4i32 immAllZerosV))), + (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/vector-shuffle-masked.ll b/llvm/test/CodeGen/X86/vector-shuffle-masked.ll index 04d6b3733246..d57dc7194e90 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-masked.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-masked.ll @@ -686,3 +686,33 @@ define <2 x double> @mask_cast_extract_v16f32_v2f64_1(<16 x float> %a, <2 x doub %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> %passthru ret <2 x double> %res } + +define <2 x double> @broadcast_v4f32_0101_from_v2f32_mask(double* %x, <2 x double> %passthru, i8 %mask) { +; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_mask: +; CHECK: # BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vblendmpd (%rdi){1to2}, %xmm0, %xmm0 {%k1} +; CHECK-NEXT: retq + %q = load double, double* %x, align 1 + %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 + %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 + %mask.cast = bitcast i8 %mask to <8 x i1> + %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> + %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> %passthru + ret <2 x double> %res +} + +define <2 x double> @broadcast_v4f32_0101_from_v2f32_maskz(double* %x, i8 %mask) { +; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_maskz: +; CHECK: # BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0] +; CHECK-NEXT: retq + %q = load double, double* %x, align 1 + %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 + %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 + %mask.cast = bitcast i8 %mask to <8 x i1> + %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> + %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> zeroinitializer + ret <2 x double> %res +}