forked from OSchip/llvm-project
[X86] Update the broadcast fallback patterns to use shuffle instructions from the appropriate execution domain.
llvm-svn: 293603
This commit is contained in:
parent
88b0a47312
commit
06e038c6de
|
@ -8314,15 +8314,15 @@ let Predicates = [HasAVX, NoVLX] in {
|
|||
|
||||
let Predicates = [HasAVX1Only] in {
|
||||
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
|
||||
(VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
|
||||
(VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
|
||||
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
|
||||
(VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
|
||||
(VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
|
||||
(VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
|
||||
(VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
|
||||
(VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
|
||||
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
|
||||
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
|
||||
(VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm),
|
||||
(VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>;
|
||||
(VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_xmm),
|
||||
(VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128)), 1)>;
|
||||
|
||||
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
|
||||
(VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>;
|
||||
|
@ -8336,7 +8336,7 @@ let Predicates = [HasAVX1Only] in {
|
|||
(VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
|
||||
|
||||
def : Pat<(v2i64 (X86VBroadcast i64:$src)),
|
||||
(VMOVDDUPrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
|
||||
(VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -49,7 +49,7 @@ define <4 x i64> @A2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
|
|||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: vmovq %rax, %xmm0
|
||||
; X64-NEXT: movq %rax, (%rsi)
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -114,7 +114,7 @@ define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
|
|||
; X32-NEXT: movl (%ecx), %ecx
|
||||
; X32-NEXT: vmovd %ecx, %xmm0
|
||||
; X32-NEXT: movl %ecx, (%eax)
|
||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
|
@ -123,7 +123,7 @@ define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
|
|||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: vmovd %eax, %xmm0
|
||||
; X64-NEXT: movl %eax, (%rsi)
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -167,7 +167,7 @@ define <4 x double> @C2(double* %ptr, double* %ptr2) nounwind uwtable readnone s
|
|||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-NEXT: vmovsd %xmm0, (%eax)
|
||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
|
@ -175,7 +175,7 @@ define <4 x double> @C2(double* %ptr, double* %ptr2) nounwind uwtable readnone s
|
|||
; X64: ## BB#0: ## %entry
|
||||
; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-NEXT: vmovsd %xmm0, (%rsi)
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -237,17 +237,17 @@ define <8 x float> @D3(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp
|
|||
; X32: ## BB#0: ## %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: vmovd %xmm0, (%eax)
|
||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: vmovss %xmm0, (%eax)
|
||||
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: D3:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: vmovd %xmm0, (%rsi)
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: vmovss %xmm0, (%rsi)
|
||||
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -291,16 +291,16 @@ define <4 x float> @e2(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp
|
|||
; X32: ## BB#0: ## %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: vmovd %xmm0, (%eax)
|
||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: vmovss %xmm0, (%eax)
|
||||
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: e2:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: vmovd %xmm0, (%rsi)
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: vmovss %xmm0, (%rsi)
|
||||
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%q = load float, float* %ptr, align 4
|
||||
|
|
Loading…
Reference in New Issue