forked from OSchip/llvm-project
[AVX-512] Add masked forms of the alternate MOVDDUP patterns.
I'm not too sure how to get isel to select even all of the unmasked forms, but at least we have a consistent set now. llvm-svn: 291368
This commit is contained in:
parent
66b5441c86
commit
da84ff3ed4
|
@ -8646,6 +8646,28 @@ def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
|
|||
(VMOVDDUPZ128rm addr:$src)>;
|
||||
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
|
||||
(VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
|
||||
|
||||
def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
|
||||
(v2f64 VR128X:$src0)),
|
||||
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
|
||||
def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
|
||||
(bitconvert (v4i32 immAllZerosV))),
|
||||
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
|
||||
|
||||
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
|
||||
(v2f64 VR128X:$src0)),
|
||||
(VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
|
||||
(COPY_TO_REGCLASS FR64X:$src, VR128X))>;
|
||||
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
|
||||
(bitconvert (v4i32 immAllZerosV))),
|
||||
(VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
|
||||
|
||||
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
|
||||
(v2f64 VR128X:$src0)),
|
||||
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
|
||||
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
|
||||
(bitconvert (v4i32 immAllZerosV))),
|
||||
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -686,3 +686,33 @@ define <2 x double> @mask_cast_extract_v16f32_v2f64_1(<16 x float> %a, <2 x doub
|
|||
%res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> %passthru
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @broadcast_v4f32_0101_from_v2f32_mask(double* %x, <2 x double> %passthru, i8 %mask) {
|
||||
; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_mask:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vblendmpd (%rdi){1to2}, %xmm0, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%q = load double, double* %x, align 1
|
||||
%vecinit.i = insertelement <2 x double> undef, double %q, i32 0
|
||||
%vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
|
||||
%mask.cast = bitcast i8 %mask to <8 x i1>
|
||||
%mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
||||
%res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> %passthru
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @broadcast_v4f32_0101_from_v2f32_maskz(double* %x, i8 %mask) {
|
||||
; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_maskz:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0]
|
||||
; CHECK-NEXT: retq
|
||||
%q = load double, double* %x, align 1
|
||||
%vecinit.i = insertelement <2 x double> undef, double %q, i32 0
|
||||
%vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
|
||||
%mask.cast = bitcast i8 %mask to <8 x i1>
|
||||
%mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
||||
%res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> zeroinitializer
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue