[AVX-512] Add masked forms of the alternate MOVDDUP patterns.

I'm not too sure how to get isel to select even all of the unmasked forms, but at least we have a consistent set now.

llvm-svn: 291368
This commit is contained in:
Craig Topper 2017-01-07 22:20:23 +00:00
parent 66b5441c86
commit da84ff3ed4
2 changed files with 52 additions and 0 deletions

View File

@ -8646,6 +8646,28 @@ def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
(VMOVDDUPZ128rm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
(bitconvert (v4i32 immAllZerosV))),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
(COPY_TO_REGCLASS FR64X:$src, VR128X))>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
(bitconvert (v4i32 immAllZerosV))),
(VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
(bitconvert (v4i32 immAllZerosV))),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
}
//===----------------------------------------------------------------------===//

View File

@ -686,3 +686,33 @@ define <2 x double> @mask_cast_extract_v16f32_v2f64_1(<16 x float> %a, <2 x doub
%res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> %passthru
ret <2 x double> %res
}
define <2 x double> @broadcast_v4f32_0101_from_v2f32_mask(double* %x, <2 x double> %passthru, i8 %mask) {
; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_mask:
; CHECK: # BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vblendmpd (%rdi){1to2}, %xmm0, %xmm0 {%k1}
; CHECK-NEXT: retq
%q = load double, double* %x, align 1
%vecinit.i = insertelement <2 x double> undef, double %q, i32 0
%vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
%mask.cast = bitcast i8 %mask to <8 x i1>
%mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> %passthru
ret <2 x double> %res
}
define <2 x double> @broadcast_v4f32_0101_from_v2f32_maskz(double* %x, i8 %mask) {
; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_maskz:
; CHECK: # BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0]
; CHECK-NEXT: retq
%q = load double, double* %x, align 1
%vecinit.i = insertelement <2 x double> undef, double %q, i32 0
%vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
%mask.cast = bitcast i8 %mask to <8 x i1>
%mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> zeroinitializer
ret <2 x double> %res
}