diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index b29674d992bb..2808b41671d1 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8312,6 +8312,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); if (ISD::isNON_EXTLoad(N) && VT.isVector() && TM.getSubtarget().hasAltivec() && + // P8 and later hardware should just use LOAD. + !TM.getSubtarget().hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v4f32) && LD->getAlignment() < ABIAlignment) { @@ -9204,7 +9206,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, if (VT.getSimpleVT().isVector()) { if (Subtarget.hasVSX()) { - if (VT != MVT::v2f64 && VT != MVT::v2i64) + if (VT != MVT::v2f64 && VT != MVT::v2i64 && + VT != MVT::v4f32 && VT != MVT::v4i32) return false; } else { return false; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index f30e64f95d17..4b2f5a321f80 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -63,7 +63,8 @@ let Uses = [RM] in { def LXVW4X : XX1Form<31, 780, (outs vsrc:$XT), (ins memrr:$src), - "lxvw4x $XT, $src", IIC_LdStLFD, []>; + "lxvw4x $XT, $src", IIC_LdStLFD, + [(set v4i32:$XT, (load xoaddr:$src))]>; } // Store indexed instructions @@ -80,7 +81,8 @@ let Uses = [RM] in { def STXVW4X : XX1Form<31, 908, (outs), (ins vsrc:$XT, memrr:$dst), - "stxvw4x $XT, $dst", IIC_LdStSTFD, []>; + "stxvw4x $XT, $dst", IIC_LdStSTFD, + [(store v4i32:$XT, xoaddr:$dst)]>; } // Add/Mul Instructions @@ -811,6 +813,13 @@ def : Pat<(sext_inreg v2i64:$C, v2i32), def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))), (XVCVSXWDP (XXSLDWI $C, $C, 1))>; +// Loads. +def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; + +// Stores. +def : Pat<(store v4i32:$rS, xoaddr:$dst), + (STXVW4X $rS, xoaddr:$dst)>; + } // AddedComplexity } // HasVSX diff --git a/llvm/test/CodeGen/PowerPC/unal4-std.ll b/llvm/test/CodeGen/PowerPC/unal4-std.ll index 9f29e31cb902..e91109911161 100644 --- a/llvm/test/CodeGen/PowerPC/unal4-std.ll +++ b/llvm/test/CodeGen/PowerPC/unal4-std.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mcpu=pwr7 | FileCheck %s +; RUN: llc < %s -mcpu=pwr7 -mattr=-vsx| FileCheck %s +; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -22,6 +23,9 @@ if.end210: ; preds = %entry ; a multiple of 4). ; CHECK: @copy_to_conceal ; CHECK: stdx {{[0-9]+}}, 0, + +; CHECK-VSX: @copy_to_conceal +; CHECK-VSX: stxvw4x {{[0-9]+}}, 0, } attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/PowerPC/unaligned.ll b/llvm/test/CodeGen/PowerPC/unaligned.ll index 0c59516f1186..64c03cdda35e 100644 --- a/llvm/test/CodeGen/PowerPC/unaligned.ll +++ b/llvm/test/CodeGen/PowerPC/unaligned.ll @@ -92,10 +92,14 @@ entry: ; CHECK-DAG: stdx ; CHECK: stdx +; For VSX on P7, unaligned loads and stores are preferable to aligned +; stack slots, but lvsl/vperm is better still. (On P8 lxvw4x is preferable.) +; Using unaligned stxvw4x is preferable on both machines. ; CHECK-VSX: @foo6 -; CHECK-VSX-DAG: ld -; CHECK-VSX-DAG: ld -; CHECK-VSX-DAG: stdx -; CHECK-VSX: stdx +; CHECK-VSX-DAG: lvsl +; CHECK-VSX-DAG: lvx +; CHECK-VSX-DAG: lvx +; CHECK-VSX: vperm +; CHECK-VSX: stxvw4x } diff --git a/llvm/test/CodeGen/PowerPC/vec-abi-align.ll b/llvm/test/CodeGen/PowerPC/vec-abi-align.ll index 3239cf6c06ab..5075ff2b8c07 100644 --- a/llvm/test/CodeGen/PowerPC/vec-abi-align.ll +++ b/llvm/test/CodeGen/PowerPC/vec-abi-align.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -16,6 +17,10 @@ entry: ; CHECK-LABEL: @test1 ; CHECK: stvx 2, ; CHECK: blr + +; CHECK-VSX-LABEL: @test1 +; CHECK-VSX: stxvw4x 34, +; CHECK-VSX: blr } ; Function Attrs: nounwind @@ -35,6 +40,13 @@ entry: ; CHECK: addi [[REGB:[0-9]+]], 1, 112 ; CHECK: lvx 2, [[REGB]], [[REG16]] ; CHECK: blr + +; CHECK-VSX-LABEL: @test2 +; CHECK-VSX: ld {{[0-9]+}}, 112(1) +; CHECK-VSX: li [[REG16:[0-9]+]], 16 +; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 112 +; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]] +; CHECK-VSX: blr } ; Function Attrs: nounwind @@ -54,6 +66,13 @@ entry: ; CHECK: addi [[REGB:[0-9]+]], 1, 128 ; CHECK: lvx 2, [[REGB]], [[REG16]] ; CHECK: blr + +; CHECK-VSX-LABEL: @test3 +; CHECK-VSX: ld {{[0-9]+}}, 128(1) +; CHECK-VSX: li [[REG16:[0-9]+]], 16 +; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 128 +; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]] +; CHECK-VSX: blr } attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/vec_misaligned.ll b/llvm/test/CodeGen/PowerPC/vec_misaligned.ll index 304a84d49a9d..73a4a4d395da 100644 --- a/llvm/test/CodeGen/PowerPC/vec_misaligned.ll +++ b/llvm/test/CodeGen/PowerPC/vec_misaligned.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -mattr=-power8-vector | FileCheck %s ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec | FileCheck %s -check-prefix=CHECK-LE target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128" diff --git a/llvm/test/CodeGen/PowerPC/vrspill.ll b/llvm/test/CodeGen/PowerPC/vrspill.ll index c3d1bf8f1ead..b990442aed87 100644 --- a/llvm/test/CodeGen/PowerPC/vrspill.ll +++ b/llvm/test/CodeGen/PowerPC/vrspill.ll @@ -1,5 +1,7 @@ -; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs -fast-isel=false < %s | FileCheck %s +; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -verify-machineinstrs -fast-isel=false < %s | FileCheck %s +; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -mattr=+vsx -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-VSX %s +; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -mattr=+vsx -verify-machineinstrs -fast-isel=false < %s | FileCheck -check-prefix=CHECK-VSX %s ; This verifies that we generate correct spill/reload code for vector regs. @@ -15,4 +17,9 @@ entry: ; CHECK: stvx 2, +; We would prefer to test for "stxvw4x 34," but current -O0 code +; needlessly generates "vor 3,2,2 / stxvw4x 35,0,3", so we'll settle for +; the opcode. +; CHECK-VSX: stxvw4x + declare void @foo(i32*) diff --git a/llvm/test/CodeGen/PowerPC/vsx-p8.ll b/llvm/test/CodeGen/PowerPC/vsx-p8.ll new file mode 100644 index 000000000000..81406b6f0798 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vsx-p8.ll @@ -0,0 +1,42 @@ +; RUN: llc -mcpu=pwr8 -mattr=+power8-vector < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Unaligned loads/stores on P8 and later should use VSX where possible. + +define <2 x double> @test28u(<2 x double>* %a) { + %v = load <2 x double>* %a, align 8 + ret <2 x double> %v + +; CHECK-LABEL: @test28u +; CHECK: lxvd2x 34, 0, 3 +; CHECK: blr +} + +define void @test29u(<2 x double>* %a, <2 x double> %b) { + store <2 x double> %b, <2 x double>* %a, align 8 + ret void + +; CHECK-LABEL: @test29u +; CHECK: stxvd2x 34, 0, 3 +; CHECK: blr +} + +define <4 x float> @test32u(<4 x float>* %a) { + %v = load <4 x float>* %a, align 8 + ret <4 x float> %v + +; CHECK-LABEL: @test32u +; CHECK: lxvw4x 34, 0, 3 +; CHECK: blr +} + +define void @test33u(<4 x float>* %a, <4 x float> %b) { + store <4 x float> %b, <4 x float>* %a, align 8 + ret void + +; CHECK-LABEL: @test33u +; CHECK: stxvw4x 34, 0, 3 +; CHECK: blr +} + diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll index 2f226e1f614c..65343f4a9ba6 100644 --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -356,6 +356,63 @@ define void @test31(<2 x i64>* %a, <2 x i64> %b) { ; CHECK: blr } +define <4 x float> @test32(<4 x float>* %a) { + %v = load <4 x float>* %a, align 16 + ret <4 x float> %v + +; CHECK-LABEL: @test32 +; CHECK: lxvw4x 34, 0, 3 +; CHECK: blr +} + +define void @test33(<4 x float>* %a, <4 x float> %b) { + store <4 x float> %b, <4 x float>* %a, align 16 + ret void + +; CHECK-LABEL: @test33 +; CHECK: stxvw4x 34, 0, 3 +; CHECK: blr +} + +define <4 x float> @test32u(<4 x float>* %a) { + %v = load <4 x float>* %a, align 8 + ret <4 x float> %v + +; CHECK-LABEL: @test32u +; CHECK-DAG: lvsl +; CHECK-DAG: lvx +; CHECK-DAG: lvx +; CHECK: vperm 2, +; CHECK: blr +} + +define void @test33u(<4 x float>* %a, <4 x float> %b) { + store <4 x float> %b, <4 x float>* %a, align 8 + ret void + +; CHECK-LABEL: @test33u +; CHECK: stxvw4x 34, 0, 3 +; CHECK: blr +} + +define <4 x i32> @test34(<4 x i32>* %a) { + %v = load <4 x i32>* %a, align 16 + ret <4 x i32> %v + +; CHECK-LABEL: @test34 +; CHECK: lxvw4x 34, 0, 3 +; CHECK: blr +} + +define void @test35(<4 x i32>* %a, <4 x i32> %b) { + store <4 x i32> %b, <4 x i32>* %a, align 16 + ret void + +; CHECK-LABEL: @test35 +; CHECK: stxvw4x 34, 0, 3 +; CHECK: blr +} + define <2 x double> @test40(<2 x i64> %a) { %v = uitofp <2 x i64> %a to <2 x double> ret <2 x double> %v