From 34586e7d4147e0074e244e8db18f550ecc2079e3 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 2 Oct 2013 12:20:42 +0000 Subject: [PATCH] AVX-512: fixed a bug in getLoadStoreRegOpcode() for AVX-512 target llvm-svn: 191818 --- llvm/lib/Target/X86/X86InstrAVX512.td | 4 ++-- llvm/lib/Target/X86/X86InstrInfo.cpp | 9 +++------ llvm/test/CodeGen/X86/avx512-mov.ll | 9 +++++++++ 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index aae262d516de..38d728c9f0ce 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1353,7 +1353,7 @@ let Predicates = [HasAVX512] in { // 256-bit types def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; @@ -1371,7 +1371,7 @@ let Predicates = [HasAVX512] in { FR64X:$src)), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i64 0), (VMOVSDZrm addr:$src), sub_xmm)>; + (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>; // Move low f64 and clear high bits. def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 80d681a13c64..6216627a9112 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3104,14 +3104,11 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, if (X86::VK8RegClass.hasSubClassEq(RC) || X86::VK16RegClass.hasSubClassEq(RC)) return load ? X86::KMOVWkm : X86::KMOVWmk; - - if (X86::FR32XRegClass.hasSubClassEq(RC)) + if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC)) return load ? X86::VMOVSSZrm : X86::VMOVSSZmr; - if (X86::FR64XRegClass.hasSubClassEq(RC)) + if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC)) return load ? X86::VMOVSDZrm : X86::VMOVSDZmr; - if (X86::VR128XRegClass.hasSubClassEq(RC) || - X86::VR256XRegClass.hasSubClassEq(RC) || - X86::VR512RegClass.hasSubClassEq(RC)) + if (X86::VR512RegClass.hasSubClassEq(RC)) return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; } diff --git a/llvm/test/CodeGen/X86/avx512-mov.ll b/llvm/test/CodeGen/X86/avx512-mov.ll index c44107be2c9d..6c5c586afd0d 100644 --- a/llvm/test/CodeGen/X86/avx512-mov.ll +++ b/llvm/test/CodeGen/X86/avx512-mov.ll @@ -116,3 +116,12 @@ define <4 x i32> @test14(i32 %x) { %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 ret <4 x i32>%res } + +; CHECK-LABEL: @test15 +; CHECK: vmovdz (%rdi) +; CHECK: ret +define <4 x i32> @test15(i32* %x) { + %y = load i32* %x, align 4 + %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 + ret <4 x i32>%res +}