forked from OSchip/llvm-project
[X86] Add CodeGenOnly instructions for (f32 (X86selects $mask, (loadf32 addr), fp32imm0) to use masked MOVSS from memory.
Similar for f64 and having a non-zero passthru value. We were previously not trying to fold the load at all. Using a CodeGenOnly instruction allows us to use FR32X/FR64X as the register class to avoid a bunch of COPY_TO_REGCLASS. llvm-svn: 373021
This commit is contained in:
parent
f98d2c099a
commit
c898724974
|
@ -3958,6 +3958,18 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
|
|||
!strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
|
||||
"$dst {${mask}} {z}, $src}"),
|
||||
[], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
|
||||
let isCodeGenOnly = 1 in {
|
||||
def rmk_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
|
||||
!strconcat(asm, "\t{$src, $dst {${mask}}|",
|
||||
"$dst {${mask}}, $src}"),
|
||||
[], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
|
||||
def rmkz_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst),
|
||||
(ins _.KRCWM:$mask, _.ScalarMemOp:$src),
|
||||
!strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
|
||||
"$dst {${mask}} {z}, $src}"),
|
||||
[], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
|
||||
}
|
||||
}
|
||||
def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
|
||||
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
|
||||
|
@ -4222,16 +4234,26 @@ def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
|
|||
(COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
|
||||
(v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
|
||||
|
||||
def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
|
||||
(VMOVSSZrmk_alt FR32X:$src0, VK1WM:$mask, addr:$src)>;
|
||||
def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
|
||||
(VMOVSSZrmkz_alt VK1WM:$mask, addr:$src)>;
|
||||
|
||||
def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
|
||||
(COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
|
||||
(v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
|
||||
VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
|
||||
(v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
|
||||
|
||||
def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
|
||||
def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
|
||||
(COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
|
||||
(v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
|
||||
|
||||
def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
|
||||
(VMOVSDZrmk_alt FR64X:$src0, VK1WM:$mask, addr:$src)>;
|
||||
def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
|
||||
(VMOVSDZrmkz_alt VK1WM:$mask, addr:$src)>;
|
||||
|
||||
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
|
||||
def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
|
||||
(ins VR128X:$src1, VR128X:$src2),
|
||||
|
|
|
@ -70,9 +70,8 @@ define float @test5(float %p) #0 {
|
|||
; ALL-NEXT: retq
|
||||
; ALL-NEXT: LBB3_1: ## %if.end
|
||||
; ALL-NEXT: vcmpltss %xmm0, %xmm1, %k1
|
||||
; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; ALL-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
|
||||
; ALL-NEXT: vmovss {{.*}}(%rip), %xmm0 {%k1}
|
||||
; ALL-NEXT: retq
|
||||
entry:
|
||||
%cmp = fcmp oeq float %p, 0.000000e+00
|
||||
|
|
|
@ -13,8 +13,7 @@ define float @_Z3fn2v() {
|
|||
; CHECK-NEXT: callq _Z1av
|
||||
; CHECK-NEXT: # kill: def $al killed $al def $eax
|
||||
; CHECK-NEXT: kmovd %eax, %k1
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vmovss {{.*}}(%rip), %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: cmpl $0, {{.*}}(%rip)
|
||||
; CHECK-NEXT: je .LBB0_2
|
||||
; CHECK-NEXT: # %bb.1: # %if.then
|
||||
|
|
|
@ -84,10 +84,9 @@ define float @fcmp_select_fp_constants(float %x) nounwind readnone {
|
|||
;
|
||||
; X64_AVX512F-LABEL: fcmp_select_fp_constants:
|
||||
; X64_AVX512F: # %bb.0:
|
||||
; X64_AVX512F-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X64_AVX512F-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %k1
|
||||
; X64_AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64_AVX512F-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
|
||||
; X64_AVX512F-NEXT: vmovss {{.*}}(%rip), %xmm0 {%k1}
|
||||
; X64_AVX512F-NEXT: retq
|
||||
%c = fcmp une float %x, -4.0
|
||||
%r = select i1 %c, float 42.0, float 23.0
|
||||
|
|
Loading…
Reference in New Issue