AVX-512: fixed a bug in getLoadStoreRegOpcode() for AVX-512 target

llvm-svn: 191818
This commit is contained in:
Elena Demikhovsky 2013-10-02 12:20:42 +00:00
parent 375d6c1ee0
commit 34586e7d41
3 changed files with 14 additions and 8 deletions

View File

@ -1353,7 +1353,7 @@ let Predicates = [HasAVX512] in {
// 256-bit types
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
@ -1371,7 +1371,7 @@ let Predicates = [HasAVX512] in {
FR64X:$src)), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i64 0), (VMOVSDZrm addr:$src), sub_xmm)>;
(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
// Move low f64 and clear high bits.
def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),

View File

@ -3104,14 +3104,11 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
if (X86::VK8RegClass.hasSubClassEq(RC) ||
X86::VK16RegClass.hasSubClassEq(RC))
return load ? X86::KMOVWkm : X86::KMOVWmk;
if (X86::FR32XRegClass.hasSubClassEq(RC))
if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC))
return load ? X86::VMOVSSZrm : X86::VMOVSSZmr;
if (X86::FR64XRegClass.hasSubClassEq(RC))
if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC))
return load ? X86::VMOVSDZrm : X86::VMOVSDZmr;
if (X86::VR128XRegClass.hasSubClassEq(RC) ||
X86::VR256XRegClass.hasSubClassEq(RC) ||
X86::VR512RegClass.hasSubClassEq(RC))
if (X86::VR512RegClass.hasSubClassEq(RC))
return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
}

View File

@ -116,3 +116,12 @@ define <4 x i32> @test14(i32 %x) {
%res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0
ret <4 x i32>%res
}
; CHECK-LABEL: @test15
; CHECK: vmovdz (%rdi)
; CHECK: ret
define <4 x i32> @test15(i32* %x) {
%y = load i32* %x, align 4
%res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
ret <4 x i32>%res
}