forked from OSchip/llvm-project
Lift alignment restrictions for load/store folding on VINSERTF128/VEXTRACTF128. Fixes PR17268.
llvm-svn: 190916
This commit is contained in:
parent
c4d7c82c7f
commit
98064b9f4d
|
@ -7716,11 +7716,11 @@ def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
|
|||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||
|
||||
def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2),
|
||||
def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
|
||||
(iPTR imm)),
|
||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2),
|
||||
def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
|
||||
(iPTR imm)),
|
||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||
|
@ -7744,22 +7744,22 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
|
|||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||
|
||||
def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
|
||||
def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
|
||||
(iPTR imm)),
|
||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
|
||||
(bc_v4i32 (memopv2i64 addr:$src2)),
|
||||
(bc_v4i32 (loadv2i64 addr:$src2)),
|
||||
(iPTR imm)),
|
||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
|
||||
(bc_v16i8 (memopv2i64 addr:$src2)),
|
||||
(bc_v16i8 (loadv2i64 addr:$src2)),
|
||||
(iPTR imm)),
|
||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
|
||||
(bc_v8i16 (memopv2i64 addr:$src2)),
|
||||
(bc_v8i16 (loadv2i64 addr:$src2)),
|
||||
(iPTR imm)),
|
||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||
|
@ -7791,12 +7791,12 @@ def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
|
|||
(v4f64 VR256:$src1),
|
||||
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
|
||||
|
||||
def : Pat<(alignedstore (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1),
|
||||
(iPTR imm))), addr:$dst),
|
||||
def : Pat<(store (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1),
|
||||
(iPTR imm))), addr:$dst),
|
||||
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
||||
(EXTRACT_get_vextract128_imm VR128:$ext))>;
|
||||
def : Pat<(alignedstore (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1),
|
||||
(iPTR imm))), addr:$dst),
|
||||
def : Pat<(store (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1),
|
||||
(iPTR imm))), addr:$dst),
|
||||
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
||||
(EXTRACT_get_vextract128_imm VR128:$ext))>;
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ static cl::opt<int>
|
|||
"number "));
|
||||
namespace {
|
||||
|
||||
static const unsigned MinVecRegSize = 128;
|
||||
static const unsigned MinVecRegSize = 256;
|
||||
|
||||
static const unsigned RecursionMaxDepth = 12;
|
||||
|
||||
|
|
|
@ -251,8 +251,6 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
|
|||
; CHECK: swap8doubles
|
||||
; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
|
||||
; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
|
||||
; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
|
||||
; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
|
||||
; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
|
||||
; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
|
||||
; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
|
||||
|
|
|
@ -462,6 +462,7 @@ static void AddOptimizationPasses(PassManagerBase &MPM,FunctionPassManager &FPM,
|
|||
DisableLoopUnrolling : OptLevel == 0;
|
||||
|
||||
Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2;
|
||||
Builder.SLPVectorize = true;
|
||||
|
||||
Builder.populateFunctionPassManager(FPM);
|
||||
Builder.populateModulePassManager(MPM);
|
||||
|
|
Loading…
Reference in New Issue