forked from OSchip/llvm-project
AVX-512: fixed a bug in getLoadStoreRegOpcode() for AVX-512 target
llvm-svn: 191818
This commit is contained in:
parent
375d6c1ee0
commit
34586e7d41
|
@ -1353,7 +1353,7 @@ let Predicates = [HasAVX512] in {
|
|||
// 256-bit types
|
||||
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
|
||||
(v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
|
||||
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
|
||||
(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
|
||||
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
|
||||
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
|
||||
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
|
||||
|
@ -1371,7 +1371,7 @@ let Predicates = [HasAVX512] in {
|
|||
FR64X:$src)), sub_xmm)>;
|
||||
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
|
||||
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
|
||||
(SUBREG_TO_REG (i64 0), (VMOVSDZrm addr:$src), sub_xmm)>;
|
||||
(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
|
||||
|
||||
// Move low f64 and clear high bits.
|
||||
def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
|
||||
|
|
|
@ -3104,14 +3104,11 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
|
|||
if (X86::VK8RegClass.hasSubClassEq(RC) ||
|
||||
X86::VK16RegClass.hasSubClassEq(RC))
|
||||
return load ? X86::KMOVWkm : X86::KMOVWmk;
|
||||
|
||||
if (X86::FR32XRegClass.hasSubClassEq(RC))
|
||||
if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC))
|
||||
return load ? X86::VMOVSSZrm : X86::VMOVSSZmr;
|
||||
if (X86::FR64XRegClass.hasSubClassEq(RC))
|
||||
if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC))
|
||||
return load ? X86::VMOVSDZrm : X86::VMOVSDZmr;
|
||||
if (X86::VR128XRegClass.hasSubClassEq(RC) ||
|
||||
X86::VR256XRegClass.hasSubClassEq(RC) ||
|
||||
X86::VR512RegClass.hasSubClassEq(RC))
|
||||
if (X86::VR512RegClass.hasSubClassEq(RC))
|
||||
return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
|
||||
}
|
||||
|
||||
|
|
|
@ -116,3 +116,12 @@ define <4 x i32> @test14(i32 %x) {
|
|||
%res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0
|
||||
ret <4 x i32>%res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test15
|
||||
; CHECK: vmovdz (%rdi)
|
||||
; CHECK: ret
|
||||
define <4 x i32> @test15(i32* %x) {
|
||||
%y = load i32* %x, align 4
|
||||
%res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
|
||||
ret <4 x i32>%res
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue