forked from OSchip/llvm-project
[Power9] Add exploitation of non-permuting memory ops
This patch corresponds to review: https://reviews.llvm.org/D19825 The new lxvx/stxvx instructions do not require the swaps to line the elements up correctly. In order to select them over the lxvd2x/lxvw4x instructions which require swaps, the patterns for the old instruction have a predicate that ensures they won't be selected on Power9 and newer CPUs. llvm-svn: 282143
This commit is contained in:
parent
2ce2ab3a4d
commit
6e7879c5e6
|
@ -33,6 +33,11 @@ static cl::opt<bool>
|
|||
FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
|
||||
cl::desc("Use full register names when printing assembly"));
|
||||
|
||||
// Useful for testing purposes. Prints vs{31-63} as v{0-31} respectively.
|
||||
static cl::opt<bool>
|
||||
ShowVSRNumsAsVR("ppc-vsr-nums-as-vr", cl::Hidden, cl::init(false),
|
||||
cl::desc("Prints full register names with vs{31-63} as v{0-31}"));
|
||||
|
||||
#define PRINT_ALIAS_INSTR
|
||||
#include "PPCGenAsmWriter.inc"
|
||||
|
||||
|
@ -462,6 +467,14 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
|
|||
const MCOperand &Op = MI->getOperand(OpNo);
|
||||
if (Op.isReg()) {
|
||||
const char *RegName = getRegisterName(Op.getReg());
|
||||
if (ShowVSRNumsAsVR) {
|
||||
unsigned RegNum = Op.getReg();
|
||||
if (RegNum >= PPC::VSH0 && RegNum <= PPC::VSH31)
|
||||
O << 'v' << RegNum - PPC::VSH0;
|
||||
else
|
||||
O << RegName;
|
||||
return;
|
||||
}
|
||||
// The linux and AIX assembler does not take register prefixes.
|
||||
if (!isDarwinSyntax())
|
||||
RegName = stripRegisterPrefix(RegName);
|
||||
|
|
|
@ -10734,10 +10734,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
}
|
||||
|
||||
// For little endian, VSX stores require generating xxswapd/lxvd2x.
|
||||
// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
|
||||
EVT VT = N->getOperand(1).getValueType();
|
||||
if (VT.isSimple()) {
|
||||
MVT StoreVT = VT.getSimpleVT();
|
||||
if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
|
||||
if (Subtarget.needsSwapsForVSXMemOps() &&
|
||||
(StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
|
||||
StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
|
||||
return expandVSXStoreForLE(N, DCI);
|
||||
|
@ -10749,9 +10750,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
EVT VT = LD->getValueType(0);
|
||||
|
||||
// For little endian, VSX loads require generating lxvd2x/xxswapd.
|
||||
// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
|
||||
if (VT.isSimple()) {
|
||||
MVT LoadVT = VT.getSimpleVT();
|
||||
if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
|
||||
if (Subtarget.needsSwapsForVSXMemOps() &&
|
||||
(LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
|
||||
LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
|
||||
return expandVSXLoadForLE(N, DCI);
|
||||
|
@ -11066,7 +11068,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
break;
|
||||
case ISD::INTRINSIC_W_CHAIN: {
|
||||
// For little endian, VSX loads require generating lxvd2x/xxswapd.
|
||||
if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
|
||||
// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
|
||||
if (Subtarget.needsSwapsForVSXMemOps()) {
|
||||
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
|
||||
default:
|
||||
break;
|
||||
|
@ -11079,7 +11082,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
}
|
||||
case ISD::INTRINSIC_VOID: {
|
||||
// For little endian, VSX stores require generating xxswapd/stxvd2x.
|
||||
if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
|
||||
// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
|
||||
if (Subtarget.needsSwapsForVSXMemOps()) {
|
||||
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
|
||||
default:
|
||||
break;
|
||||
|
|
|
@ -273,6 +273,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
|
|||
case PPC::RESTORE_CRBIT:
|
||||
case PPC::LVX:
|
||||
case PPC::LXVD2X:
|
||||
case PPC::LXVX:
|
||||
case PPC::QVLFDX:
|
||||
case PPC::QVLFSXs:
|
||||
case PPC::QVLFDXb:
|
||||
|
@ -302,6 +303,7 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
|
|||
case PPC::SPILL_CRBIT:
|
||||
case PPC::STVX:
|
||||
case PPC::STXVD2X:
|
||||
case PPC::STXVX:
|
||||
case PPC::QVSTFDX:
|
||||
case PPC::QVSTFSXs:
|
||||
case PPC::QVSTFDXb:
|
||||
|
@ -1008,7 +1010,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
|
|||
FrameIdx));
|
||||
NonRI = true;
|
||||
} else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
|
||||
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X))
|
||||
unsigned Op = Subtarget.hasP9Vector() ? PPC::STXVX : PPC::STXVD2X;
|
||||
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op))
|
||||
.addReg(SrcReg,
|
||||
getKillRegState(isKill)),
|
||||
FrameIdx));
|
||||
|
@ -1129,7 +1132,8 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
|
|||
FrameIdx));
|
||||
NonRI = true;
|
||||
} else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
|
||||
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg),
|
||||
unsigned Op = Subtarget.hasP9Vector() ? PPC::LXVX : PPC::LXVD2X;
|
||||
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op), DestReg),
|
||||
FrameIdx));
|
||||
NonRI = true;
|
||||
} else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
|
||||
|
|
|
@ -92,6 +92,7 @@ multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
|
|||
def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
|
||||
def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">;
|
||||
def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">;
|
||||
def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">;
|
||||
|
||||
let Predicates = [HasVSX] in {
|
||||
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
|
||||
|
@ -105,6 +106,7 @@ let Uses = [RM] in {
|
|||
"lxsdx $XT, $src", IIC_LdStLFD,
|
||||
[(set f64:$XT, (load xoaddr:$src))]>;
|
||||
|
||||
let Predicates = [HasVSX, HasOnlySwappingMemOps] in
|
||||
def LXVD2X : XX1Form<31, 844,
|
||||
(outs vsrc:$XT), (ins memrr:$src),
|
||||
"lxvd2x $XT, $src", IIC_LdStLFD,
|
||||
|
@ -114,6 +116,7 @@ let Uses = [RM] in {
|
|||
(outs vsrc:$XT), (ins memrr:$src),
|
||||
"lxvdsx $XT, $src", IIC_LdStLFD, []>;
|
||||
|
||||
let Predicates = [HasVSX, HasOnlySwappingMemOps] in
|
||||
def LXVW4X : XX1Form<31, 780,
|
||||
(outs vsrc:$XT), (ins memrr:$src),
|
||||
"lxvw4x $XT, $src", IIC_LdStLFD,
|
||||
|
@ -127,6 +130,7 @@ let Uses = [RM] in {
|
|||
"stxsdx $XT, $dst", IIC_LdStSTFD,
|
||||
[(store f64:$XT, xoaddr:$dst)]>;
|
||||
|
||||
let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
|
||||
def STXVD2X : XX1Form<31, 972,
|
||||
(outs), (ins vsrc:$XT, memrr:$dst),
|
||||
"stxvd2x $XT, $dst", IIC_LdStSTFD,
|
||||
|
@ -136,7 +140,7 @@ let Uses = [RM] in {
|
|||
(outs), (ins vsrc:$XT, memrr:$dst),
|
||||
"stxvw4x $XT, $dst", IIC_LdStSTFD,
|
||||
[(store v4i32:$XT, xoaddr:$dst)]>;
|
||||
|
||||
}
|
||||
} // mayStore
|
||||
|
||||
// Add/Mul Instructions
|
||||
|
@ -948,6 +952,7 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
|
|||
(v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
|
||||
|
||||
// Loads.
|
||||
let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
|
||||
def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
|
||||
def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
|
||||
def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
|
||||
|
@ -960,6 +965,7 @@ def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
|
|||
def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
|
||||
(STXVW4X $rS, xoaddr:$dst)>;
|
||||
def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
|
||||
}
|
||||
|
||||
// Permutes.
|
||||
def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
|
||||
|
@ -2185,7 +2191,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>;
|
||||
|
||||
// Load Vector Indexed
|
||||
def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, []>;
|
||||
def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
|
||||
[(set v2f64:$XT, (load xoaddr:$src))]>;
|
||||
|
||||
// Load Vector (Left-justified) with Length
|
||||
def LXVL : X_XT6_RA5_RB5<31, 269, "lxvl" , vsrc, []>;
|
||||
|
@ -2221,7 +2228,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>;
|
||||
|
||||
// Store Vector Indexed
|
||||
def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, []>;
|
||||
def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
|
||||
[(store v2f64:$XT, xoaddr:$dst)]>;
|
||||
|
||||
// Store Vector (Left-justified) with Length
|
||||
def STXVL : X_XS6_RA5_RB5<31, 397, "stxvl" , vsrc, []>;
|
||||
|
@ -2282,4 +2290,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
|
||||
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
|
||||
} // IsLittleEndian, HasP9Vector
|
||||
|
||||
def : Pat<(v2f64 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
|
||||
def : Pat<(v2i64 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
|
||||
def : Pat<(v4f32 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
|
||||
def : Pat<(v4i32 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
|
||||
def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
|
||||
def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
|
||||
def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
|
||||
def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
|
||||
def : Pat<(store v4f32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
|
||||
def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
|
||||
def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
|
||||
(STXVX $rS, xoaddr:$dst)>;
|
||||
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
|
||||
(STXVX $rS, xoaddr:$dst)>;
|
||||
} // end HasP9Vector, AddedComplexity
|
||||
|
|
|
@ -277,6 +277,9 @@ public:
|
|||
bool hasFloat128() const { return HasFloat128; }
|
||||
bool isISA3_0() const { return IsISA3_0; }
|
||||
bool useLongCalls() const { return UseLongCalls; }
|
||||
bool needsSwapsForVSXMemOps() const {
|
||||
return hasVSX() && isLittleEndian() && !hasP9Vector();
|
||||
}
|
||||
|
||||
POPCNTDKind hasPOPCNTD() const { return HasPOPCNTD; }
|
||||
|
||||
|
|
|
@ -1,4 +1,13 @@
|
|||
; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr8 \
|
||||
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
|
||||
|
||||
; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr9 \
|
||||
; RUN: -mtriple=powerpc64le-unknown-unknown < %s \
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK-P9 --implicit-check-not xxswapd
|
||||
|
||||
; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr9 -mattr=-power9-vector \
|
||||
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test() {
|
||||
entry:
|
||||
|
@ -17,6 +26,8 @@ entry:
|
|||
; CHECK: lwa [[REG0:[0-9]+]],
|
||||
; CHECK: lxvd2x [[REG1:[0-9]+]], {{[0-9]+}}, [[REG0]]
|
||||
; CHECK: xxswapd [[REG1]], [[REG1]]
|
||||
; CHECK-P9: lwa [[REG0:[0-9]+]],
|
||||
; CHECK-P9: lxvx [[REG1:[0-9]+]], {{[0-9]+}}, [[REG0]]
|
||||
store <4 x i32> %4, <4 x i32>* %j, align 16
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -972,10 +972,10 @@ define <4 x float> @insertVarF(<4 x float> %a, float %f, i32 %el) {
|
|||
entry:
|
||||
; CHECK-LABEL: insertVarF
|
||||
; CHECK: stxsspx 1,
|
||||
; CHECK: lxvd2x
|
||||
; CHECK: lxvx
|
||||
; CHECK-BE-LABEL: insertVarF
|
||||
; CHECK-BE: stxsspx 1,
|
||||
; CHECK-BE: lxvw4x
|
||||
; CHECK-BE: lxvx
|
||||
%vecins = insertelement <4 x float> %a, float %f, i32 %el
|
||||
ret <4 x float> %vecins
|
||||
}
|
||||
|
@ -983,10 +983,10 @@ define <4 x i32> @insertVarI(<4 x i32> %a, i32 %i, i32 %el) {
|
|||
entry:
|
||||
; CHECK-LABEL: insertVarI
|
||||
; CHECK: stwx
|
||||
; CHECK: lxvd2x
|
||||
; CHECK: lxvx
|
||||
; CHECK-BE-LABEL: insertVarI
|
||||
; CHECK-BE: stwx
|
||||
; CHECK-BE: lxvw4x
|
||||
; CHECK-BE: lxvx
|
||||
%vecins = insertelement <4 x i32> %a, i32 %i, i32 %el
|
||||
ret <4 x i32> %vecins
|
||||
}
|
||||
|
|
|
@ -1,9 +1,32 @@
|
|||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-P9 \
|
||||
; RUN: --implicit-check-not xxswapd
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr9 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \
|
||||
; RUN: --implicit-check-not xxswapd
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr9 -mattr=-power9-vector < %s | FileCheck %s \
|
||||
; RUN: -check-prefix=CHECK-LE
|
||||
|
||||
@x = common global <1 x i128> zeroinitializer, align 16
|
||||
@y = common global <1 x i128> zeroinitializer, align 16
|
||||
|
@ -31,6 +54,11 @@ define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind {
|
|||
; CHECK-LE: vadduqm 2, 2, 3
|
||||
; CHECK-LE: blr
|
||||
|
||||
; CHECK-P9-LABEL: @v1i128_increment_by_one
|
||||
; CHECK-P9: lxvx
|
||||
; CHECK-P9: vadduqm 2, 2, 3
|
||||
; CHECK-P9: blr
|
||||
|
||||
; CHECK-BE-LABEL: @v1i128_increment_by_one
|
||||
; CHECK-BE: lxvd2x 35, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-BE-NOT: xxswapd
|
||||
|
@ -171,6 +199,11 @@ define <1 x i128> @call_v1i128_increment_by_one() nounwind {
|
|||
; CHECK-LE: bl v1i128_increment_by_one
|
||||
; CHECK-LE: blr
|
||||
|
||||
; CHECK-P9-LABEL: @call_v1i128_increment_by_one
|
||||
; CHECK-P9: lxvx
|
||||
; CHECK-P9: bl v1i128_increment_by_one
|
||||
; CHECK-P9: blr
|
||||
|
||||
; CHECK-BE-LABEL: @call_v1i128_increment_by_one
|
||||
; CHECK-BE: lxvw4x 34, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
|
||||
|
@ -198,6 +231,12 @@ define <1 x i128> @call_v1i128_increment_by_val() nounwind {
|
|||
; CHECK-LE: bl v1i128_increment_by_val
|
||||
; CHECK-LE: blr
|
||||
|
||||
; CHECK-P9-LABEL: @call_v1i128_increment_by_val
|
||||
; CHECK-P9-DAG: lxvx 34
|
||||
; CHECK-P9-DAG: lxvx 35
|
||||
; CHECK-P9: bl v1i128_increment_by_val
|
||||
; CHECK-P9: blr
|
||||
|
||||
; CHECK-BE-LABEL: @call_v1i128_increment_by_val
|
||||
|
||||
|
||||
|
|
|
@ -1,5 +1,17 @@
|
|||
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 -disable-ppc-vsx-swap-removal -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck -check-prefix=NOOPTSWAP %s
|
||||
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 -disable-ppc-vsx-swap-removal \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
|
||||
; RUN: -check-prefix=NOOPTSWAP %s
|
||||
|
||||
; RUN: llc -O3 -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -verify-machineinstrs -ppc-vsr-nums-as-vr < %s | FileCheck \
|
||||
; RUN: -check-prefix=CHECK-P9 --implicit-check-not xxswapd %s
|
||||
|
||||
; RUN: llc -O3 -mcpu=pwr9 -disable-ppc-vsx-swap-removal -mattr=-power9-vector \
|
||||
; RUN: -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s \
|
||||
; RUN: | FileCheck -check-prefix=NOOPTSWAP %s
|
||||
|
||||
; This test was generated from the following source:
|
||||
;
|
||||
|
@ -98,6 +110,7 @@ for.end:
|
|||
; CHECK-LABEL: @foo
|
||||
; CHECK-NOT: xxpermdi
|
||||
; CHECK-NOT: xxswapd
|
||||
; CHECK-P9-NOT: xxpermdi
|
||||
|
||||
; CHECK: lxvd2x
|
||||
; CHECK: lxvd2x
|
||||
|
@ -127,7 +140,6 @@ for.end:
|
|||
; CHECK: vmuluwm
|
||||
; CHECK: stxvd2x
|
||||
|
||||
|
||||
; NOOPTSWAP-LABEL: @foo
|
||||
|
||||
; NOOPTSWAP: lxvd2x
|
||||
|
@ -145,3 +157,29 @@ for.end:
|
|||
; NOOPTSWAP-DAG: stxvd2x
|
||||
; NOOPTSWAP: stxvd2x
|
||||
|
||||
; CHECK-P9-LABEL: @foo
|
||||
; CHECK-P9: lxvx
|
||||
; CHECK-P9: lxvx
|
||||
; CHECK-P9: lxvx
|
||||
; CHECK-P9: lxvx
|
||||
; CHECK-P9: lxvx
|
||||
; CHECK-P9: lxvx
|
||||
; CHECK-P9: lxvx
|
||||
; CHECK-P9: lxvx
|
||||
; CHECK-P9: lxvx
|
||||
; CHECK-P9-DAG: lxvx
|
||||
; CHECK-P9-DAG: lxvx
|
||||
; CHECK-P9-DAG: lxvx
|
||||
; CHECK-P9-DAG: vadduwm
|
||||
; CHECK-P9-DAG: vadduwm
|
||||
; CHECK-P9-DAG: vadduwm
|
||||
; CHECK-P9-DAG: vadduwm
|
||||
; CHECK-P9: vmuluwm
|
||||
; CHECK-P9: vmuluwm
|
||||
; CHECK-P9: vmuluwm
|
||||
; CHECK-P9-DAG: vmuluwm
|
||||
; CHECK-P9-DAG: stxvx
|
||||
; CHECK-P9: stxvx
|
||||
; CHECK-P9: stxvx
|
||||
; CHECK-P9: stxvx
|
||||
|
||||
|
|
|
@ -1,4 +1,12 @@
|
|||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
|
||||
|
||||
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \
|
||||
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-P9 \
|
||||
; RUN: --implicit-check-not xxswapd
|
||||
|
||||
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \
|
||||
; RUN: -verify-machineinstrs -mattr=-power9-vector < %s | FileCheck %s
|
||||
|
||||
; These tests verify that VSX swap optimization works when loading a scalar
|
||||
; into a vector register.
|
||||
|
@ -24,6 +32,13 @@ entry:
|
|||
; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
|
||||
; CHECK: stxvd2x [[REG5]]
|
||||
|
||||
; CHECK-P9-LABEL: @bar0
|
||||
; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
|
||||
; CHECK-P9-DAG: lxsdx [[REG2:[0-9]+]]
|
||||
; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
|
||||
; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
|
||||
; CHECK-P9: stxvx [[REG5]]
|
||||
|
||||
define void @bar1() {
|
||||
entry:
|
||||
%0 = load <2 x double>, <2 x double>* @x, align 16
|
||||
|
@ -40,3 +55,10 @@ entry:
|
|||
; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
|
||||
; CHECK: stxvd2x [[REG5]]
|
||||
|
||||
; CHECK-P9-LABEL: @bar1
|
||||
; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
|
||||
; CHECK-P9-DAG: lxsdx [[REG2:[0-9]+]]
|
||||
; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
|
||||
; CHECK-P9: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
|
||||
; CHECK-P9: stxvx [[REG5]]
|
||||
|
||||
|
|
|
@ -1,4 +1,12 @@
|
|||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \
|
||||
; RUN: --check-prefix=CHECK-P9
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector -O2 \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
@vf = global <4 x float> <float -1.500000e+00, float 2.500000e+00, float -3.500000e+00, float 4.500000e+00>, align 16
|
||||
@vd = global <2 x double> <double 3.500000e+00, double -7.500000e+00>, align 16
|
||||
|
@ -16,59 +24,84 @@
|
|||
define void @test1() {
|
||||
entry:
|
||||
; CHECK-LABEL: test1
|
||||
; CHECK-P9-LABEL: test1
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-P9-DAG: lxvx
|
||||
%0 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* bitcast (<4 x i32>* @vsi to i8*))
|
||||
; CHECK: stxvd2x
|
||||
; CHECK-P9-DAG: stxvx
|
||||
store <4 x i32> %0, <4 x i32>* @res_vsi, align 16
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-P9-DAG: lxvx
|
||||
%1 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* bitcast (<4 x i32>* @vui to i8*))
|
||||
; CHECK: stxvd2x
|
||||
; CHECK-P9-DAG: stxvx
|
||||
store <4 x i32> %1, <4 x i32>* @res_vui, align 16
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-P9-DAG: lxvx
|
||||
%2 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* bitcast (<4 x float>* @vf to i8*))
|
||||
%3 = bitcast <4 x i32> %2 to <4 x float>
|
||||
; CHECK: stxvd2x
|
||||
; CHECK-P9-DAG: stxvx
|
||||
store <4 x float> %3, <4 x float>* @res_vf, align 16
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-P9-DAG: lxvx
|
||||
%4 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* bitcast (<2 x i64>* @vsll to i8*))
|
||||
%5 = bitcast <2 x double> %4 to <2 x i64>
|
||||
; CHECK: stxvd2x
|
||||
; CHECK-P9-DAG: stxvx
|
||||
store <2 x i64> %5, <2 x i64>* @res_vsll, align 16
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-P9-DAG: lxvx
|
||||
%6 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* bitcast (<2 x i64>* @vull to i8*))
|
||||
%7 = bitcast <2 x double> %6 to <2 x i64>
|
||||
; CHECK: stxvd2x
|
||||
; CHECK-P9-DAG: stxvx
|
||||
store <2 x i64> %7, <2 x i64>* @res_vull, align 16
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-P9-DAG: lxvx
|
||||
%8 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* bitcast (<2 x double>* @vd to i8*))
|
||||
; CHECK: stxvd2x
|
||||
; CHECK-P9-DAG: stxvx
|
||||
store <2 x double> %8, <2 x double>* @res_vd, align 16
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-P9-DAG: lxvx
|
||||
%9 = load <4 x i32>, <4 x i32>* @vsi, align 16
|
||||
; CHECK: stxvd2x
|
||||
; CHECK-P9-DAG: stxvx
|
||||
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %9, i8* bitcast (<4 x i32>* @res_vsi to i8*))
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-P9-DAG: lxvx
|
||||
%10 = load <4 x i32>, <4 x i32>* @vui, align 16
|
||||
; CHECK: stxvd2x
|
||||
; CHECK-P9-DAG: stxvx
|
||||
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %10, i8* bitcast (<4 x i32>* @res_vui to i8*))
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-P9-DAG: lxvx
|
||||
%11 = load <4 x float>, <4 x float>* @vf, align 16
|
||||
%12 = bitcast <4 x float> %11 to <4 x i32>
|
||||
; CHECK: stxvd2x
|
||||
; CHECK-P9-DAG: stxvx
|
||||
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %12, i8* bitcast (<4 x float>* @res_vf to i8*))
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-P9-DAG: lxvx
|
||||
%13 = load <2 x i64>, <2 x i64>* @vsll, align 16
|
||||
%14 = bitcast <2 x i64> %13 to <2 x double>
|
||||
; CHECK: stxvd2x
|
||||
; CHECK-P9-DAG: stxvx
|
||||
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %14, i8* bitcast (<2 x i64>* @res_vsll to i8*))
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-P9-DAG: lxvx
|
||||
%15 = load <2 x i64>, <2 x i64>* @vull, align 16
|
||||
%16 = bitcast <2 x i64> %15 to <2 x double>
|
||||
; CHECK: stxvd2x
|
||||
; CHECK-P9-DAG: stxvx
|
||||
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %16, i8* bitcast (<2 x i64>* @res_vull to i8*))
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-P9-DAG: lxvx
|
||||
%17 = load <2 x double>, <2 x double>* @vd, align 16
|
||||
; CHECK: stxvd2x
|
||||
; CHECK-P9-DAG: stxvx
|
||||
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %17, i8* bitcast (<2 x double>* @res_vd to i8*))
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -1,18 +1,28 @@
|
|||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 -mtriple=powerpc64-unknown-linux-gnu < %s > %t
|
||||
; RUN: grep lxvw4x < %t | count 3
|
||||
; RUN: grep lxvd2x < %t | count 3
|
||||
; RUN: grep stxvw4x < %t | count 3
|
||||
; RUN: grep stxvd2x < %t | count 3
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O0 -fast-isel=1 -mtriple=powerpc64-unknown-linux-gnu < %s > %t
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \
|
||||
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s > %t
|
||||
; RUN: grep lxvw4x < %t | count 3
|
||||
; RUN: grep lxvd2x < %t | count 3
|
||||
; RUN: grep stxvw4x < %t | count 3
|
||||
; RUN: grep stxvd2x < %t | count 3
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O0 -fast-isel=1 \
|
||||
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s > %t
|
||||
; RUN: grep lxvw4x < %t | count 3
|
||||
; RUN: grep lxvd2x < %t | count 3
|
||||
; RUN: grep stxvw4x < %t | count 3
|
||||
; RUN: grep stxvd2x < %t | count 3
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
|
||||
; RUN: grep lxvd2x < %t | count 6
|
||||
; RUN: grep stxvd2x < %t | count 6
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
|
||||
; RUN: grep lxvx < %t | count 6
|
||||
; RUN: grep stxvx < %t | count 6
|
||||
|
||||
|
||||
@vsi = global <4 x i32> <i32 -1, i32 2, i32 -3, i32 4>, align 16
|
||||
@vui = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
|
||||
@vf = global <4 x float> <float -1.500000e+00, float 2.500000e+00, float -3.500000e+00, float 4.500000e+00>, align 16
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
; RUN: llc -verify-machineinstrs -mcpu=pwr9 \
|
||||
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr9 \
|
||||
; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
|
||||
|
||||
@uca = global <16 x i8> zeroinitializer, align 16
|
||||
@ucb = global <16 x i8> zeroinitializer, align 16
|
||||
@sca = global <16 x i8> zeroinitializer, align 16
|
||||
@scb = global <16 x i8> zeroinitializer, align 16
|
||||
@usa = global <8 x i16> zeroinitializer, align 16
|
||||
@usb = global <8 x i16> zeroinitializer, align 16
|
||||
@ssa = global <8 x i16> zeroinitializer, align 16
|
||||
@ssb = global <8 x i16> zeroinitializer, align 16
|
||||
@uia = global <4 x i32> zeroinitializer, align 16
|
||||
@uib = global <4 x i32> zeroinitializer, align 16
|
||||
@sia = global <4 x i32> zeroinitializer, align 16
|
||||
@sib = global <4 x i32> zeroinitializer, align 16
|
||||
@ulla = global <2 x i64> zeroinitializer, align 16
|
||||
@ullb = global <2 x i64> zeroinitializer, align 16
|
||||
@slla = global <2 x i64> zeroinitializer, align 16
|
||||
@sllb = global <2 x i64> zeroinitializer, align 16
|
||||
@uxa = global <1 x i128> zeroinitializer, align 16
|
||||
@uxb = global <1 x i128> zeroinitializer, align 16
|
||||
@sxa = global <1 x i128> zeroinitializer, align 16
|
||||
@sxb = global <1 x i128> zeroinitializer, align 16
|
||||
@vfa = global <4 x float> zeroinitializer, align 16
|
||||
@vfb = global <4 x float> zeroinitializer, align 16
|
||||
@vda = global <2 x double> zeroinitializer, align 16
|
||||
@vdb = global <2 x double> zeroinitializer, align 16
|
||||
|
||||
define void @_Z4testv() {
|
||||
entry:
|
||||
; CHECK-LABEL: @_Z4testv
|
||||
%0 = load <16 x i8>, <16 x i8>* @uca, align 16
|
||||
%1 = load <16 x i8>, <16 x i8>* @ucb, align 16
|
||||
%add.i = add <16 x i8> %1, %0
|
||||
tail call void (...) @sink(<16 x i8> %add.i)
|
||||
; CHECK: lxvx 34, 0, 3
|
||||
; CHECK: lxvx 35, 0, 4
|
||||
; CHECK: vaddubm 2, 3, 2
|
||||
; CHECK: stxvx 34,
|
||||
; CHECK: bl sink
|
||||
%2 = load <16 x i8>, <16 x i8>* @sca, align 16
|
||||
%3 = load <16 x i8>, <16 x i8>* @scb, align 16
|
||||
%add.i22 = add <16 x i8> %3, %2
|
||||
tail call void (...) @sink(<16 x i8> %add.i22)
|
||||
; CHECK: lxvx 34, 0, 3
|
||||
; CHECK: lxvx 35, 0, 4
|
||||
; CHECK: vaddubm 2, 3, 2
|
||||
; CHECK: stxvx 34,
|
||||
; CHECK: bl sink
|
||||
%4 = load <8 x i16>, <8 x i16>* @usa, align 16
|
||||
%5 = load <8 x i16>, <8 x i16>* @usb, align 16
|
||||
%add.i21 = add <8 x i16> %5, %4
|
||||
tail call void (...) @sink(<8 x i16> %add.i21)
|
||||
; CHECK: lxvx 34, 0, 3
|
||||
; CHECK: lxvx 35, 0, 4
|
||||
; CHECK: vadduhm 2, 3, 2
|
||||
; CHECK: stxvx 34,
|
||||
; CHECK: bl sink
|
||||
%6 = load <8 x i16>, <8 x i16>* @ssa, align 16
|
||||
%7 = load <8 x i16>, <8 x i16>* @ssb, align 16
|
||||
%add.i20 = add <8 x i16> %7, %6
|
||||
tail call void (...) @sink(<8 x i16> %add.i20)
|
||||
; CHECK: lxvx 34, 0, 3
|
||||
; CHECK: lxvx 35, 0, 4
|
||||
; CHECK: vadduhm 2, 3, 2
|
||||
; CHECK: stxvx 34,
|
||||
; CHECK: bl sink
|
||||
%8 = load <4 x i32>, <4 x i32>* @uia, align 16
|
||||
%9 = load <4 x i32>, <4 x i32>* @uib, align 16
|
||||
%add.i19 = add <4 x i32> %9, %8
|
||||
tail call void (...) @sink(<4 x i32> %add.i19)
|
||||
; CHECK: lxvx 34, 0, 3
|
||||
; CHECK: lxvx 35, 0, 4
|
||||
; CHECK: vadduwm 2, 3, 2
|
||||
; CHECK: stxvx 34,
|
||||
; CHECK: bl sink
|
||||
%10 = load <4 x i32>, <4 x i32>* @sia, align 16
|
||||
%11 = load <4 x i32>, <4 x i32>* @sib, align 16
|
||||
%add.i18 = add <4 x i32> %11, %10
|
||||
tail call void (...) @sink(<4 x i32> %add.i18)
|
||||
; CHECK: lxvx 34, 0, 3
|
||||
; CHECK: lxvx 35, 0, 4
|
||||
; CHECK: vadduwm 2, 3, 2
|
||||
; CHECK: stxvx 34,
|
||||
; CHECK: bl sink
|
||||
%12 = load <2 x i64>, <2 x i64>* @ulla, align 16
|
||||
%13 = load <2 x i64>, <2 x i64>* @ullb, align 16
|
||||
%add.i17 = add <2 x i64> %13, %12
|
||||
tail call void (...) @sink(<2 x i64> %add.i17)
|
||||
; CHECK: lxvx 34, 0, 3
|
||||
; CHECK: lxvx 35, 0, 4
|
||||
; CHECK: vaddudm 2, 3, 2
|
||||
; CHECK: stxvx 34,
|
||||
; CHECK: bl sink
|
||||
%14 = load <2 x i64>, <2 x i64>* @slla, align 16
|
||||
%15 = load <2 x i64>, <2 x i64>* @sllb, align 16
|
||||
%add.i16 = add <2 x i64> %15, %14
|
||||
tail call void (...) @sink(<2 x i64> %add.i16)
|
||||
; CHECK: lxvx 34, 0, 3
|
||||
; CHECK: lxvx 35, 0, 4
|
||||
; CHECK: vaddudm 2, 3, 2
|
||||
; CHECK: stxvx 34,
|
||||
; CHECK: bl sink
|
||||
%16 = load <1 x i128>, <1 x i128>* @uxa, align 16
|
||||
%17 = load <1 x i128>, <1 x i128>* @uxb, align 16
|
||||
%add.i15 = add <1 x i128> %17, %16
|
||||
tail call void (...) @sink(<1 x i128> %add.i15)
|
||||
; CHECK: lxvx 34, 0, 3
|
||||
; CHECK: lxvx 35, 0, 4
|
||||
; CHECK: vadduqm 2, 3, 2
|
||||
; CHECK: stxvx 34,
|
||||
; CHECK: bl sink
|
||||
%18 = load <1 x i128>, <1 x i128>* @sxa, align 16
|
||||
%19 = load <1 x i128>, <1 x i128>* @sxb, align 16
|
||||
%add.i14 = add <1 x i128> %19, %18
|
||||
tail call void (...) @sink(<1 x i128> %add.i14)
|
||||
; CHECK: lxvx 34, 0, 3
|
||||
; CHECK: lxvx 35, 0, 4
|
||||
; CHECK: vadduqm 2, 3, 2
|
||||
; CHECK: stxvx 34,
|
||||
; CHECK: bl sink
|
||||
%20 = load <4 x float>, <4 x float>* @vfa, align 16
|
||||
%21 = load <4 x float>, <4 x float>* @vfb, align 16
|
||||
%add.i13 = fadd <4 x float> %20, %21
|
||||
tail call void (...) @sink(<4 x float> %add.i13)
|
||||
; CHECK: lxvx 0, 0, 3
|
||||
; CHECK: lxvx 1, 0, 4
|
||||
; CHECK: xvaddsp 34, 0, 1
|
||||
; CHECK: stxvx 34,
|
||||
; CHECK: bl sink
|
||||
%22 = load <2 x double>, <2 x double>* @vda, align 16
|
||||
%23 = load <2 x double>, <2 x double>* @vdb, align 16
|
||||
%add.i12 = fadd <2 x double> %22, %23
|
||||
tail call void (...) @sink(<2 x double> %add.i12)
|
||||
; CHECK: lxvx 0, 0, 3
|
||||
; CHECK: lxvx 1, 0, 4
|
||||
; CHECK: xvadddp 0, 0, 1
|
||||
; CHECK: stxvx 0,
|
||||
; CHECK: bl sink
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @sink(...)
|
|
@ -1,4 +1,12 @@
|
|||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr9 \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \
|
||||
; RUN: --check-prefix=CHECK-P9 --implicit-check-not xxswapd
|
||||
|
||||
define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
|
||||
%v = load <2 x double>, <2 x double>* %p1
|
||||
|
@ -12,6 +20,12 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
|
|||
; CHECK: xxswapd 0, 0
|
||||
; CHECK: xxspltd 1, 1, 0
|
||||
; CHECK: xxpermdi 34, 0, 1, 1
|
||||
|
||||
; CHECK-P9-LABEL: testi0
|
||||
; CHECK-P9: lxsdx 0, 0, 4
|
||||
; CHECK-P9: lxvx 1, 0, 3
|
||||
; CHECK-P9: xxspltd 0, 0, 0
|
||||
; CHECK-P9: xxpermdi 34, 1, 0, 1
|
||||
}
|
||||
|
||||
define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
|
||||
|
@ -26,6 +40,12 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
|
|||
; CHECK: xxswapd 0, 0
|
||||
; CHECK: xxspltd 1, 1, 0
|
||||
; CHECK: xxmrgld 34, 1, 0
|
||||
|
||||
; CHECK-P9-LABEL: testi1
|
||||
; CHECK-P9: lxsdx 0, 0, 4
|
||||
; CHECK-P9: lxvx 1, 0, 3
|
||||
; CHECK-P9: xxspltd 0, 0, 0
|
||||
; CHECK-P9: xxmrgld 34, 0, 1
|
||||
}
|
||||
|
||||
define double @teste0(<2 x double>* %p1) {
|
||||
|
@ -35,6 +55,9 @@ define double @teste0(<2 x double>* %p1) {
|
|||
|
||||
; CHECK-LABEL: teste0
|
||||
; CHECK: lxvd2x 1, 0, 3
|
||||
|
||||
; CHECK-P9-LABEL: teste0
|
||||
; CHECK-P9: lxsdx 1, 0, 3
|
||||
}
|
||||
|
||||
define double @teste1(<2 x double>* %p1) {
|
||||
|
@ -45,4 +68,8 @@ define double @teste1(<2 x double>* %p1) {
|
|||
; CHECK-LABEL: teste1
|
||||
; CHECK: lxvd2x 0, 0, 3
|
||||
; CHECK: xxswapd 1, 0
|
||||
|
||||
; CHECK-P9-LABEL: teste1
|
||||
; CHECK-P9: li 4, 8
|
||||
; CHECK-P9: lxsdx 1, 3, 4
|
||||
}
|
||||
|
|
|
@ -1,4 +1,12 @@
|
|||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \
|
||||
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \
|
||||
; RUN: --check-prefix=CHECK-P9 --implicit-check-not xxswapd
|
||||
|
||||
define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
%v1 = load <2 x double>, <2 x double>* %p1
|
||||
|
@ -9,6 +17,10 @@ define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK-LABEL: test00
|
||||
; CHECK: lxvd2x 0, 0, 3
|
||||
; CHECK: xxspltd 34, 0, 0
|
||||
|
||||
; CHECK-P9-LABEL: test00
|
||||
; CHECK-P9: lxvx 0, 0, 3
|
||||
; CHECK-P9: xxspltd 34, 0, 1
|
||||
}
|
||||
|
||||
define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -20,6 +32,9 @@ define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK-LABEL: test01
|
||||
; CHECK: lxvd2x 0, 0, 3
|
||||
; CHECK: xxswapd 34, 0
|
||||
|
||||
; CHECK-P9-LABEL: test01
|
||||
; CHECK-P9: lxvx 34, 0, 3
|
||||
}
|
||||
|
||||
define <2 x double> @test02(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -34,6 +49,11 @@ define <2 x double> @test02(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK: xxswapd 0, 0
|
||||
; CHECK: xxswapd 1, 1
|
||||
; CHECK: xxmrgld 34, 1, 0
|
||||
|
||||
; CHECK-P9-LABEL: @test02
|
||||
; CHECK-P9: lxvx 0, 0, 3
|
||||
; CHECK-P9: lxvx 1, 0, 4
|
||||
; CHECK-P9: xxmrgld 34, 1, 0
|
||||
}
|
||||
|
||||
define <2 x double> @test03(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -48,6 +68,11 @@ define <2 x double> @test03(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK: xxswapd 0, 0
|
||||
; CHECK: xxswapd 1, 1
|
||||
; CHECK: xxpermdi 34, 1, 0, 1
|
||||
|
||||
; CHECK-P9-LABEL: @test03
|
||||
; CHECK-P9: lxvx 0, 0, 3
|
||||
; CHECK-P9: lxvx 1, 0, 4
|
||||
; CHECK-P9: xxpermdi 34, 1, 0, 1
|
||||
}
|
||||
|
||||
define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -58,6 +83,10 @@ define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
|
||||
; CHECK-LABEL: @test10
|
||||
; CHECK: lxvd2x 34, 0, 3
|
||||
|
||||
; CHECK-P9-LABEL: @test10
|
||||
; CHECK-P9: lxvx 0, 0, 3
|
||||
; CHECK-P9: xxswapd 34, 0
|
||||
}
|
||||
|
||||
define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -69,6 +98,10 @@ define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK-LABEL: @test11
|
||||
; CHECK: lxvd2x 0, 0, 3
|
||||
; CHECK: xxspltd 34, 0, 1
|
||||
|
||||
; CHECK-P9-LABEL: @test11
|
||||
; CHECK-P9: lxvx 0, 0, 3
|
||||
; CHECK-P9: xxspltd 34, 0, 0
|
||||
}
|
||||
|
||||
define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -83,6 +116,11 @@ define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK: xxswapd 0, 0
|
||||
; CHECK: xxswapd 1, 1
|
||||
; CHECK: xxpermdi 34, 1, 0, 2
|
||||
|
||||
; CHECK-P9-LABEL: @test12
|
||||
; CHECK-P9: lxvx 0, 0, 3
|
||||
; CHECK-P9: lxvx 1, 0, 4
|
||||
; CHECK-P9: xxpermdi 34, 1, 0, 2
|
||||
}
|
||||
|
||||
define <2 x double> @test13(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -97,6 +135,11 @@ define <2 x double> @test13(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK: xxswapd 0, 0
|
||||
; CHECK: xxswapd 1, 1
|
||||
; CHECK: xxmrghd 34, 1, 0
|
||||
|
||||
; CHECK-P9-LABEL: @test13
|
||||
; CHECK-P9: lxvx 0, 0, 3
|
||||
; CHECK-P9: lxvx 1, 0, 4
|
||||
; CHECK-P9: xxmrghd 34, 1, 0
|
||||
}
|
||||
|
||||
define <2 x double> @test20(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -111,6 +154,11 @@ define <2 x double> @test20(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK: xxswapd 0, 0
|
||||
; CHECK: xxswapd 1, 1
|
||||
; CHECK: xxmrgld 34, 0, 1
|
||||
|
||||
; CHECK-P9-LABEL: @test20
|
||||
; CHECK-P9: lxvx 0, 0, 3
|
||||
; CHECK-P9: lxvx 1, 0, 4
|
||||
; CHECK-P9: xxmrgld 34, 0, 1
|
||||
}
|
||||
|
||||
define <2 x double> @test21(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -125,6 +173,11 @@ define <2 x double> @test21(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK: xxswapd 0, 0
|
||||
; CHECK: xxswapd 1, 1
|
||||
; CHECK: xxpermdi 34, 0, 1, 1
|
||||
|
||||
; CHECK-P9-LABEL: @test21
|
||||
; CHECK-P9: lxvx 0, 0, 3
|
||||
; CHECK-P9: lxvx 1, 0, 4
|
||||
; CHECK-P9: xxpermdi 34, 0, 1, 1
|
||||
}
|
||||
|
||||
define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -136,6 +189,10 @@ define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK-LABEL: @test22
|
||||
; CHECK: lxvd2x 0, 0, 4
|
||||
; CHECK: xxspltd 34, 0, 0
|
||||
|
||||
; CHECK-P9-LABEL: @test22
|
||||
; CHECK-P9: lxvx 0, 0, 4
|
||||
; CHECK-P9: xxspltd 34, 0, 1
|
||||
}
|
||||
|
||||
define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -147,6 +204,9 @@ define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK-LABEL: @test23
|
||||
; CHECK: lxvd2x 0, 0, 4
|
||||
; CHECK: xxswapd 34, 0
|
||||
|
||||
; CHECK-P9-LABEL: @test23
|
||||
; CHECK-P9: lxvx 34, 0, 4
|
||||
}
|
||||
|
||||
define <2 x double> @test30(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -161,6 +221,11 @@ define <2 x double> @test30(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK: xxswapd 0, 0
|
||||
; CHECK: xxswapd 1, 1
|
||||
; CHECK: xxpermdi 34, 0, 1, 2
|
||||
|
||||
; CHECK-P9-LABEL: @test30
|
||||
; CHECK-P9: lxvx 0, 0, 3
|
||||
; CHECK-P9: lxvx 1, 0, 4
|
||||
; CHECK-P9: xxpermdi 34, 0, 1, 2
|
||||
}
|
||||
|
||||
define <2 x double> @test31(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -175,6 +240,11 @@ define <2 x double> @test31(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK: xxswapd 0, 0
|
||||
; CHECK: xxswapd 1, 1
|
||||
; CHECK: xxmrghd 34, 0, 1
|
||||
|
||||
; CHECK-P9-LABEL: @test31
|
||||
; CHECK-P9: lxvx 0, 0, 3
|
||||
; CHECK-P9: lxvx 1, 0, 4
|
||||
; CHECK-P9: xxmrghd 34, 0, 1
|
||||
}
|
||||
|
||||
define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -185,6 +255,10 @@ define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
|
||||
; CHECK-LABEL: @test32
|
||||
; CHECK: lxvd2x 34, 0, 4
|
||||
|
||||
; CHECK-P9-LABEL: @test32
|
||||
; CHECK-P9: lxvx 0, 0, 4
|
||||
; CHECK-P9: xxswapd 34, 0
|
||||
}
|
||||
|
||||
define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
|
||||
|
@ -196,4 +270,8 @@ define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
|
|||
; CHECK-LABEL: @test33
|
||||
; CHECK: lxvd2x 0, 0, 4
|
||||
; CHECK: xxspltd 34, 0, 1
|
||||
|
||||
; CHECK-P9-LABEL: @test33
|
||||
; CHECK-P9: lxvx 0, 0, 4
|
||||
; CHECK-P9: xxspltd 34, 0, 0
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue